-
Notifications
You must be signed in to change notification settings - Fork 5
/
LDAClassifier.py
49 lines (40 loc) · 1.38 KB
/
LDAClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier as RFC
import csv
import random
def main():
# prepare data
trainingSet=[]
testSet=[]
accuracy = 0.0
split = 0.25
loadDataset('Dataset/dnd/RDK.csv', split, trainingSet, testSet)
print('Train set: ' + repr(len(trainingSet)))
print('Test set: ' + repr(len(testSet)))
# generate predictions
predictions=[]
trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
columns = trainData.shape[1]
X = np.array(trainData).astype(np.float)
y = np.array(trainingSet)[:,columns].astype(np.float)
clf = RFC(n_estimators=100)
clf.fit(X, y)
testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
X_test = np.array(testData).astype(np.float)
y_test = np.array(testSet)[:,columns].astype(np.float)
accuracy = clf.score(X_test,y_test)
accuracy *= 100
print("Accuracy %:",accuracy)
def loadDataset(filename, split, trainingSet=[] , testSet=[]):
with open(filename, 'r') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)):
for y in range(np.array(dataset).shape[1]):
dataset[x][y] = float(dataset[x][y])
if random.random() < split:
testSet.append(dataset[x])
else:
trainingSet.append(dataset[x])
main()