Applications of Naive Bayes:
1. Email Filtering: It is used to classify emails as spam and helps filter emails.2. Credit Approval: Banks and credit providers use it to identify potential clients who will not default on loans in future.
3. Medical Diagnosis: Based on the input in form of symptoms and other features it is widely used by hospitals to diagnose various medical conditions.
Download The Dataset
Download The Code FileProblem statement: identify or classify the SMS textual data into spam or ham.
Variables:
Type: 2 types, Spam or Ham.
Text: SMS data in form of text.
#Set working directory
>setwd("/Users/Planet Analytics/Documents/RDirectory")
# Read File
>Data <- read.csv("sms_spam.csv")
#Explore Variables
>names(Data)
#See the structure of the data
>str(Data)
#Check the summary
>summary(Data)
#Check the top data points
>head(Data)
# Check the number of spams and hams
>table(Data$type)
#Create a corpus
>library(tm)
>library(SnowballC)
>corpus <- VCorpus(VectorSource(Data$text))
>inspect(head(corpus))
#Check the text in corpus
>lapply(head(corpus,1), as.character)
#Clean the text
>Clean_corpus <- tm_map(corpus, content_transformer(tolower))
>Clean_corpus <- tm_map(Clean_corpus, removeNumbers)
>Clean_corpus <- tm_map(Clean_corpus, removeWords,stopwords())
>Clean_corpus <- tm_map(Clean_corpus, removePunctuation)
>Clean_corpus <- tm_map(Clean_corpus, stemDocument)
>Clean_corpus <- tm_map(Clean_corpus, stripWhitespace)
>lapply(head(Clean_corpus,2), as.character)
#Tokenization of the corpus
>corpus_dtm <- DocumentTermMatrix(Clean_corpus)
#Creating Train and test dataset
>Train <- corpus_dtm[1:4574,]
>Test <- corpus_dtm[4575:5574,]
>TrainLabels <- Data[1:4574,]$type
>TestLabels <- Data[4575:5574,]$type
#check the proportion of ham and spam in Train and Test dataset
>prop.table(table(TrainLabels))
>prop.table(table(TestLabels))
#Creating indicator features
>FreqWords <- findFreqTerms(Train, 10)
#Filter DTM to only include frequent terms
>TrainFreq <- Train[ ,FreqWords]
>TestFreq <- Test[ ,FreqWords]
#Conver counts into categorical form
>convert <- function(x){
x <- ifelse(x > 0, "Yes", "No")
}
>TrainSet <- apply(TrainFreq, MARGIN = 2, convert)
>TestSet <- apply(TestFreq, MARGIN = 2, convert)
#Train the model
>library(e1071)
>Model <- naiveBayes(TrainSet, TrainLabels)
#predicting Spam or Ham on test set
>Predictions <- predict(Model, TestSet)
#Checking accuracy
>table(Predictions, TestLabels)





















