-
Notifications
You must be signed in to change notification settings - Fork 0
/
Logistic_Regression_LEAD.Rmd
73 lines (60 loc) · 1.68 KB
/
Logistic_Regression_LEAD.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
---
title: "LEAD_Propensity_Model"
author: "Vijay S"
date: "9 August 2018"
output: html_document
---
```{r setup, include=FALSE, warning=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
setwd("E:/PGDDS/Marketing/Advanced Martketing Analysis/Case Study/Assignment_No.1")
lead = read.csv("LEAD.csv")
```
### View the structure of dataset
```{r}
str(lead)
dim(lead)
```
### Correlation analysis
```{r, warning=FALSE}
library(corrplot)
matr = cor(lead)
corrplot(matr, method = "pie", type = "upper")
```
### Convert dependent variable to categorical column
```{r}
lead$PURCHASE = as.factor(lead$PURCHASE)
```
### Check for NA values
```{r}
colSums(is.na(lead))
```
### Split into training and test dataset
```{r}
lead_df = lead
lead_train = lead_df[sample(seq(1,nrow(lead_df)),(0.6*nrow(lead_df))),]
lead_test = lead_df[sample(seq(1,nrow(lead_df)),(0.4*nrow(lead_df))),]
```
### Logistic Regression
```{r, warning=FALSE}
library(class)
lead_model = glm(PURCHASE~OWN_HOUSE+CAR_LOAN+HEART_DISEASE+DIABETES+OTHER_INSURANCE+CHILDREN, data = lead_train, family = "binomial")
summary(lead_model)
```
### Predict whether the customer will purchase or not
```{r}
lead_test$predict = predict(lead_model, lead_test, type = "response")
lead_test$predict = ifelse(lead_test$predict > 0.4, 1, 0)
lead_test$predict = as.factor(lead_test$predict)
lead_test$PURCHASE = as.factor(lead_test$PURCHASE)
```
### Check for accuracy using Confusion Matrix
```{r, warning=FALSE}
library(caret)
cm = confusionMatrix(lead_test$predict, lead_test$PURCHASE, positive = "1")
cm
```
### Accuracy of the Model
```{r}
round(cm$overall['Accuracy']*100,2)
```