-
Notifications
You must be signed in to change notification settings - Fork 0
/
naive_bayes.py
130 lines (122 loc) · 7.42 KB
/
naive_bayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# The Naive Bayes algorithm is implemented in this file.
import utility
# *********************************************************************************************************************
# Positive or negative classification probabilities.
positive_probability = 0
negative_probability = 0
# *********************************************************************************************************************
# *********************************************************************************************************************
# *********************************************************************************************************************
# The Naive Bayes class will classify
# examples based on the Bayes classification algorithm.
class NaiveBayes(object):
# The constructor.
# Gets the number of features.
def __init__(self, num_of_features):
self.num_of_features = num_of_features
# Start classifying the examples.
# Params are the training set and test set.
def classify(self, train, test):
# Create the classification dictionary.
# Examples that are classified as yes and examples that are classified as no.
classification_dictionary = self.create_classification_dictionary(train)
results = []
# Predict result classification for every test example.
for example in test:
prediction = []
# Build the return example without the classification.
for index in range(self.num_of_features):
prediction.append((example[index][utility.feature_name], example[index][utility.feature_value]))
# Predict the classification and add it to the above built example.
prediction.append((example[index+1][utility.feature_name],
self.predict(example, classification_dictionary)))
# Append the prediction to the result list.
results.append(prediction)
# Return the complete results for all test examples.
return results
# Predicts an example.
# Params are the example to predict and the tags.
def predict(self, example_to_predict, classification_dictionary):
# Store corresponding values and their probabilities for final classification prediction.
positive_value_dictionary = {}
negative_value_dictionary = {}
# Iterate the features.
for index in range(self.num_of_features):
positive_counter = 0
# Get all possibilities for the feature type.
feature_options = utility.all_feature_types[example_to_predict[index][utility.feature_name]]
# Iterate all positive classifications.
for classification in classification_dictionary["yes"]:
# If the current feature value is in a positive classification
# increase the positive counter.
if example_to_predict[index][utility.feature_value] == classification[index][utility.feature_value]:
positive_counter += 1
# Get that value.
current_positive_value = example_to_predict[index][utility.feature_value]
# Add it to dictionary that will store probability of teh value being in a positive classification.
positive_value_dictionary[current_positive_value] = positive_counter / (len(classification_dictionary["yes"]
) + len(feature_options))
# Do the same as above for the negative classifications.
negative_counter = 0
# Iterate all negative classifications.
for classification in classification_dictionary["no"]:
# Increase counter if feature is in negative classification.
if example_to_predict[index][utility.feature_value] == classification[index][utility.feature_value]:
negative_counter += 1
# Add that feature with it's probability to the dictionary.
current_negative_value = example_to_predict[index][utility.feature_value]
negative_value_dictionary[current_negative_value] = negative_counter / (len(classification_dictionary["no"]
) + len(feature_options))
# Calculate the probabilities of the final classification using all gathered data.
positive_result_probability = self.classification_probability(positive_value_dictionary) * positive_probability
negative_result_probability = self.classification_probability(negative_value_dictionary) * negative_probability
# Base final result on which probability is higher.
if positive_result_probability >= negative_result_probability:
return utility.confirm_value('yes')
if positive_result_probability < negative_result_probability:
return utility.confirm_value('no')
# Calculate the probability of a classification.
def classification_probability(self, result_dictionary):
dictionary_values = result_dictionary.values()
result = 1
# Multiply all values in the dictionary.
for value in dictionary_values:
result *= value
# Return the result.
return result
# Create two lists of examples that are classified as yes and examples
# that are classified as no and then put them into a single dictionary.
def create_classification_dictionary(self, train):
global positive_probability, negative_probability
# The final dictionary.
classification_dictionary = {}
# Yes and no classification lists.
yes_classification = []
no_classification = []
# Iterate all all the example in the training set.
for example in train:
build_example = []
# Iterate the features to build the example.
for index in range(self.num_of_features):
build_example.append((example[index][utility.feature_name], example[index][utility.feature_value]))
# Check the final classification.
# If it is positive, add it to the yes classification list.
if example[index+1][utility.feature_value] == utility.confirm_value('yes'):
yes_classification.append(build_example)
# If it is negative, add it to the no classification list.
elif example[index+1][utility.feature_value] == utility.confirm_value('no'):
no_classification.append(build_example)
# Insert the yes and no classification lists to the
# result dictionary as the values of the yes and no
# keys for later use.
classification_dictionary["yes"] = yes_classification
classification_dictionary["no"] = no_classification
# Calculate the probability of the result being positive
# by dividing the length of the yes list by the length of
# final classification dictionary and then the probability
# of the result being negative by simply subtracting the
# positive probability from 1.
positive_probability = float(len(yes_classification) / (len(yes_classification) + len(no_classification)))
negative_probability = 1 - positive_probability
# Return the final classification dictionary.
return classification_dictionary