-
Notifications
You must be signed in to change notification settings - Fork 0
/
ecoc_classifier.py
156 lines (99 loc) · 4.72 KB
/
ecoc_classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
from sklearn.utils.multiclass import unique_labels
from typing import List
import numpy as np
class ecoc_classifier(ClassifierMixin, BaseEstimator):
def __init__(self, model_constructer=None, ecoc_matrix=None, model_list=None, code_word_length=0):
'''
the whole point of the is class is to train a ecoc model using sklearn, keras, or pytorch models (depending on if you implemented a fit function on your pytorch model)
while also being a sklearn compatable estimator, meaning I can use sklearn functions like crossvaidation. (note this had not be tested with all of sklearns functions)
there are two ways to use this class,
one is when there is already a function for defining blank models, which you can then simply supply using the model consuctre paramater
there other is when you want to supply a list of blank models your self which you can do with by using the model_list paramater
make sure to supply one or the other not both.
it is also nessary to supply a ecoc matrix, but unessary to supply a code length. (sklearn complains if any class variables don't have defualt values)
'''
self.model_constructer = model_constructer
self.ecoc_matrix = ecoc_matrix
self.model_list = model_list
self.code_word_length = code_word_length
def Hdistance(self, model_output : List , code_word : List ):# determins hamming distance
'''
counts the diffreance of bits between two code words
'''
distance = 0
pos = 0
while(pos < self.code_word_length):
if( int(model_output[pos]) != code_word[pos] ):
distance += 1
pos += 1
return distance
def determinLable(self, results):
'''
when given an list of output codes from the models, this assigns a list
of code words from the ecoc matrix which are the smallest hamming distance
'''
output = np.empty( ( results.shape[0], self.code_word_length ) )
item = 0
while(item < results.shape[0]):
smallest_distance = -1
for code_word in self.ecoc_matrix :
distance = self.Hdistance(results[item], code_word)
if( distance < smallest_distance or smallest_distance == -1):
smallest_distance = distance
output_code = code_word
output[item] = np.array(output_code, copy=True)
item += 1
return output
def fit(self, X, y, **kwargs):
self.code_word_length = len(self.ecoc_matrix[0])
'''
a standerd implementation of fit used by all sklearn models
in this case it initalzes a model for each column of the ecoc matrix,
and then calls fit to train it on the bits of the column. after wards the
model is append to a list for latter use
'''
self.classes_ = unique_labels(y)
self.X_ = X
self.y_ = y
if(self.model_list == None):
self.model_list = []
if(self.model_constructer != None):
bit_pos = 0
while(bit_pos < self.code_word_length):
self.model_list.append(self.model_constructer())
bit_pos += 1
bit_pos = 0
while(bit_pos < self.code_word_length):
columnBits = y[:, bit_pos]
self.model_list[bit_pos].fit(X, columnBits , **kwargs)
bit_pos += 1
# Return the classifier
return self
def predict(self, X, y=None):
'''
a standerd implementation of the predict function used by all sklearn models.
here after checking if the data is vailid it is feed into each model of the list, and a new output code
is made from the outputs which is then check against the ecoc matrix to see which row the new code word
is closest to.
'''
# Check is fit had been called
check_is_fitted(self, ['X_', 'y_'])
print('predicting')
results = np.empty((self.code_word_length,) + (X.shape[0],) + (1,))
pos = 0;
for model in self.model_list:
results[pos] = model.predict( X )
pos += 1
results = results.reshape((self.code_word_length,X.shape[0])).T.round()
return self.determinLable(results)
def score(self, X ,y):
results = self.predict(X)
right = 0
pos = 0
for sample in results:
if (sample == y[pos]).all():
right += 1
pos += 1
return right/X.shape[0]