-
Notifications
You must be signed in to change notification settings - Fork 0
/
LogisticRegression.py
34 lines (31 loc) · 1.4 KB
/
LogisticRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# Loistic Regression
import pandas as pd
import time
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
# Classifier Model (Logistic Regression)
from sklearn.linear_model import LogisticRegression
import joblib
start_time = time.time()
data = pd.read_csv('data.csv')
features = data.values[:, 1].astype('str')
labels = data.values[:, -1].astype('int')
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test=train_test_split(features,labels,test_size=0.25,random_state=0)
classifier_model = Pipeline([
('tfidf', TfidfVectorizer(analyzer='char')),
('logisticRegression',LogisticRegression(multi_class='multinomial', solver='sag')),
])
# Fit the Model
classifier_model.fit(X_train, y_train)
y_pred = classifier_model.predict(X_test)
from sklearn.metrics import confusion_matrix, classification_report
cm=confusion_matrix(y_test,y_pred)
print(classification_report(y_test, y_pred, digits=4))
print("Confusion Matrix: \n", cm)
accuracy = (cm[0][0]+cm[1][1]+cm[2][2])/(cm[0][0]+cm[0][1]+cm[0][2]+cm[1][0]+cm[1][1]+cm[1][2]+cm[2][0]+cm[2][1]+cm[2][2])
print('Training Accuracy: ',classifier_model.score(features, labels))
print("Testing Accuracy = ", accuracy)
print("Time Taken to train the model = %s seconds" % round((time.time() - start_time),2))
# Save model
joblib.dump(classifier_model, 'LogisticRegression_Model.joblib')