forked from insightbook/data-science-from-scratch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ch11_machine_learning.py
50 lines (38 loc) · 1.35 KB
/
ch11_machine_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from __future__ import division
from collections import Counter
import math, random
#
# data splitting
#
def split_data(data, prob):
"""split data into fractions [prob, 1 - prob]"""
results = [], []
for row in data:
results[0 if random.random() < prob else 1].append(row)
return results
def train_test_split(x, y, test_pct):
data = zip(x, y) # pair corresponding values
train, test = split_data(data, 1 - test_pct) # split the dataset of pairs
x_train, y_train = zip(*train) # magical un-zip trick
x_test, y_test = zip(*test)
return x_train, x_test, y_train, y_test
#
# correctness
#
def accuracy(tp, fp, fn, tn):
correct = tp + tn
total = tp + fp + fn + tn
return correct / total
def precision(tp, fp, fn, tn):
return tp / (tp + fp)
def recall(tp, fp, fn, tn):
return tp / (tp + fn)
def f1_score(tp, fp, fn, tn):
p = precision(tp, fp, fn, tn)
r = recall(tp, fp, fn, tn)
return 2 * p * r / (p + r)
if __name__ == "__main__":
print "accuracy(70, 4930, 13930, 981070)", accuracy(70, 4930, 13930, 981070)
print "precision(70, 4930, 13930, 981070)", precision(70, 4930, 13930, 981070)
print "recall(70, 4930, 13930, 981070)", recall(70, 4930, 13930, 981070)
print "f1_score(70, 4930, 13930, 981070)", f1_score(70, 4930, 13930, 981070)