Skip to content

Commit

Permalink
mozilla#3 data-split space mapped
Browse files Browse the repository at this point in the history
  • Loading branch information
Addi-11 committed Mar 7, 2020
1 parent 652c40e commit b65565c
Show file tree
Hide file tree
Showing 5 changed files with 450 additions and 267 deletions.
9 changes: 8 additions & 1 deletion dev/Addi-11/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import plot_precision_recall_curve, confusion_matrix, plot_confusion_matrix
import matplotlib.pyplot as plt

class Classifier:

Expand Down Expand Up @@ -39,12 +41,17 @@ def Gaussian(self, X_train,y_train):
classifier.fit(X_train, y_train)
return classifier

""" This function is to evaluate classifier's performancen"""
def evaluation(self, classifier, X_test, y_test):
accuracy, precision, recall, f_score = evaluate(classifier, X_test, y_test)
accuracy, precision, recall, f_score , y_score = evaluate(classifier, X_test, y_test)
print("Accuracy : ",accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1 score : ",f_score)
print("Precision vs Recall Curve")
disp = plot_precision_recall_curve(classifier,X_test, y_test)
print("Confusion Matrix")
disp = plot_confusion_matrix(classifier,X_test, y_test)



34 changes: 34 additions & 0 deletions dev/Addi-11/data_split_examine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
""" This file compares various evaluation metrics for different data splits """

import numpy as np
from dataloader import train_test_split_data
from evaluation import evaluate
from classifiers import Classifier
import pandas as pd
from IPython.display import HTML
from pylab import *

""" for now KNeighbors will be used as it gave the highest accuracy """
model = Classifier()

test_sizes = np.arange(0.0001,1,0.05)
columns = ['Training data','Testing Data','Accuracy', 'Precision', 'Recall', 'F1_score']
df = pd.DataFrame(columns = columns)

def data_split_examine():
for index in range(len(test_sizes)):
X_train, X_test, y_train, y_test = train_test_split_data(test_sizes[index])
classifier = model.KNeighbors(X_train, y_train)
accuracy, precision, recall, f_score, y = evaluate(classifier, X_test, y_test)
train = round((1-test_sizes[index])*100)
test = round(test_sizes[index]*100)
df.loc[index+1] = [train, test, accuracy, precision, recall, f_score]

display(df)

def visualise_split():
fig,axes = plt.subplots()
axes.set_xlabel("Accuracy")
axes.set_ylabel("Test Data Size")
axes.set_title("Relation btw accuracy and test data size")
disp = axes.plot(test_sizes, df.Accuracy)
5 changes: 5 additions & 0 deletions dev/Addi-11/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns

""" read data file """
address = "../../datasets/eeg.csv"
Expand All @@ -21,6 +22,10 @@ def data_info():
display(df.info()) # info of each column of dataset
print("\nChecking for null values: \n")
print(df.isnull().sum()) # sum of all null values in a dataset for preprocessing
""" Function specific to given dataset"""
def data_visuals():
df.Class.unique()
sns.countplot(df.Class)

""" splitting the data """
def train_test_split_data(test_size):
Expand Down
Loading

0 comments on commit b65565c

Please sign in to comment.