-
Notifications
You must be signed in to change notification settings - Fork 1k
/
benchmark.py
124 lines (112 loc) · 3.59 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""This module runs a 5-Fold CV for all the algorithms (default parameters) on
the movielens datasets, and reports average RMSE, MAE, and total computation
time. It is used for making tables in the README.md file"""
# flake8: noqa
import datetime
import random
import time
import numpy as np
from surprise import (
BaselineOnly,
CoClustering,
Dataset,
KNNBaseline,
KNNBasic,
KNNWithMeans,
NMF,
NormalPredictor,
SlopeOne,
SVD,
SVDpp,
)
from surprise.model_selection import cross_validate, KFold
from tabulate import tabulate
# The algorithms to cross-validate
algos = (
SVD(random_state=0),
SVDpp(random_state=0, cache_ratings=False),
SVDpp(random_state=0, cache_ratings=True),
NMF(random_state=0),
SlopeOne(),
KNNBasic(),
KNNWithMeans(),
KNNBaseline(),
CoClustering(random_state=0),
BaselineOnly(),
NormalPredictor(),
)
# ugly dict to map algo names and datasets to their markdown links in the table
stable = "https://surprise.readthedocs.io/en/stable/"
LINK = {
"SVD": "[{}]({})".format(
"SVD",
stable
+ "matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD",
),
"SVDpp": "[{}]({})".format(
"SVD++",
stable
+ "matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVDpp",
),
"NMF": "[{}]({})".format(
"NMF",
stable
+ "matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.NMF",
),
"SlopeOne": "[{}]({})".format(
"Slope One",
stable + "slope_one.html#surprise.prediction_algorithms.slope_one.SlopeOne",
),
"KNNBasic": "[{}]({})".format(
"k-NN",
stable + "knn_inspired.html#surprise.prediction_algorithms.knns.KNNBasic",
),
"KNNWithMeans": "[{}]({})".format(
"Centered k-NN",
stable + "knn_inspired.html#surprise.prediction_algorithms.knns.KNNWithMeans",
),
"KNNBaseline": "[{}]({})".format(
"k-NN Baseline",
stable + "knn_inspired.html#surprise.prediction_algorithms.knns.KNNBaseline",
),
"CoClustering": "[{}]({})".format(
"Co-Clustering",
stable
+ "co_clustering.html#surprise.prediction_algorithms.co_clustering.CoClustering",
),
"BaselineOnly": "[{}]({})".format(
"Baseline",
stable
+ "basic_algorithms.html#surprise.prediction_algorithms.baseline_only.BaselineOnly",
),
"NormalPredictor": "[{}]({})".format(
"Random",
stable
+ "basic_algorithms.html#surprise.prediction_algorithms.random_pred.NormalPredictor",
),
"ml-100k": "[{}]({})".format(
"Movielens 100k", "https://grouplens.org/datasets/movielens/100k"
),
"ml-1m": "[{}]({})".format(
"Movielens 1M", "https://grouplens.org/datasets/movielens/1m"
),
}
# set RNG
np.random.seed(0)
random.seed(0)
dataset = "ml-100k"
data = Dataset.load_builtin(dataset)
kf = KFold(random_state=0) # folds will be the same for all algorithms.
table = []
for algo in algos:
start = time.time()
out = cross_validate(algo, data, ["rmse", "mae"], kf)
cv_time = str(datetime.timedelta(seconds=int(time.time() - start)))
link = LINK[algo.__class__.__name__]
mean_rmse = "{:.3f}".format(np.mean(out["test_rmse"]))
mean_mae = "{:.3f}".format(np.mean(out["test_mae"]))
new_line = [link, mean_rmse, mean_mae, cv_time]
print(tabulate([new_line], tablefmt="pipe")) # print current algo perf
table.append(new_line)
header = [LINK[dataset], "RMSE", "MAE", "Time"]
print(tabulate(table, header, tablefmt="pipe"))