-
Notifications
You must be signed in to change notification settings - Fork 3
/
collaborativefiltering.py
96 lines (78 loc) · 2.36 KB
/
collaborativefiltering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- coding: utf-8 -*-
'''
Collaborative filtering
Alternating least squares approach
@author [email protected]
'''
import math
import itertools
import numpy
import matplotlib
import loader
import pickle
import dummyloader
import loader
loader = loader.MovieLoader()
p = 3
lmba = 0.001
epochs = 20
print 'Loading movies...'
X = loader.loadMovies()
m = loader.getM()
n = loader.getN()
print 'Processing data...'
A = numpy.matrix(numpy.random.normal(0, 0.1, (p, m))).astype(numpy.float32)
B = numpy.matrix(numpy.random.normal(0, 0.1, (p, n))).astype(numpy.float32)
for i in range(X.shape[1]):
Bdata = X[:, i].todense()
B[0, i] = numpy.mean(Bdata[Bdata > 0])
def update_A(X, A, B):
lamI = numpy.matrix(numpy.eye(p) * lmba).astype(numpy.float32)
Anew = numpy.matrix(numpy.zeros((p, m))).astype(numpy.float32)
for i in range(m):
Xlocal = numpy.array(X[i, :].todense())[0]
items = Xlocal > 0
Bi = B[:, items]
vector = numpy.dot(Bi, numpy.matrix(Xlocal[items]).T)
matrix = numpy.dot(Bi, Bi.T) + numpy.dot(lamI, A[:, i])
solution = numpy.linalg.lstsq(matrix, vector)
Anew[:, i] = solution[0]
return Anew
def update_B(X, A, B):
lamI = numpy.matrix(numpy.eye(p) * lmba).astype(numpy.float32)
Bnew = numpy.matrix(numpy.zeros((p, n))).astype(numpy.float32)
for i in range(n):
Xlocal = numpy.array(X[:, i].todense()).T[0]
items = Xlocal > 0
Ai = A[:, items]
vector = numpy.dot(Ai, numpy.matrix(Xlocal[items]).T)
matrix = numpy.dot(Ai, Ai.T) + numpy.dot(lamI, B[:, i])
solution = numpy.linalg.lstsq(matrix, vector)
Bnew[:, i] = solution[0]
return Bnew
def RMSE(X, A, B):
C = X.tocoo()
count = 0
square_sum = 0
for i, j, v in itertools.izip(C.row, C.col, C.data):
square_sum += (v - numpy.dot(A[:, i].T, B[:, j])) ** 2
count += 1
return math.sqrt(square_sum / count)
for epoch in range(epochs):
A = update_A(X, A, B)
B = update_B(X, A, B)
E = RMSE(X, A, B)
print "Epoch %d: RMSE %.4f..." % (epoch, E)
print X.todense()
numpy.set_printoptions(precision = 2)
print numpy.dot(A.T, B)
print 'Saving to factorization.pkl...'
data = {'m': m,
'n': n,
'lmba': lmba,
'A': A,
'B': B}
output = open('factorization.pkl', 'wb')
pickle.dump(data, output)
output.close()
print 'Done!'