forked from lancedb/vectordb-recipes
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
70 lines (56 loc) · 2.01 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import lancedb
import numpy as np
import pandas as pd
import pytest
import subprocess
from main import get_recommendations, data
import main
# DOWNLOAD ======================================================
subprocess.Popen(
"curl https://files.grouplens.org/datasets/movielens/ml-latest-small.zip -o ml-latest-small.zip",
shell=True,
).wait()
subprocess.Popen("unzip ml-latest-small.zip", shell=True).wait()
# TESTING ======================================================
def test_main():
ratings = pd.read_csv(
"./ml-latest-small/ratings.csv",
header=None,
names=["user id", "movie id", "rating", "timestamp"],
)
ratings = ratings.drop(columns=["timestamp"])
ratings = ratings.drop(0)
ratings["rating"] = ratings["rating"].values.astype(np.float32)
ratings["user id"] = ratings["user id"].values.astype(np.int32)
ratings["movie id"] = ratings["movie id"].values.astype(np.int32)
reviewmatrix = ratings.pivot(
index="user id", columns="movie id", values="rating"
).fillna(0)
# SVD
matrix = reviewmatrix.values
u, s, vh = np.linalg.svd(matrix, full_matrices=False)
vectors = np.rot90(np.fliplr(vh))
print(vectors.shape)
# Metadata
movies = pd.read_csv(
"./ml-latest-small/movies.csv", header=0, names=["movie id", "title", "genres"]
)
movies = movies[movies["movie id"].isin(reviewmatrix.columns)]
for i in range(len(movies)):
data.append(
{
"id": movies.iloc[i]["movie id"],
"title": movies.iloc[i]["title"],
"vector": vectors[i],
"genre": movies.iloc[i]["genres"],
}
)
print(pd.DataFrame(data))
# Connect to LanceDB
db = lancedb.connect("./data/test-db")
try:
main.table = db.create_table("movie_set", data=data)
except:
main.table = db.open_table("movie_set")
print(get_recommendations("Moana (2016)"))
print(get_recommendations("Rogue One: A Star Wars Story (2016)"))