-
Notifications
You must be signed in to change notification settings - Fork 0
/
import_secns.m
executable file
·74 lines (53 loc) · 1.99 KB
/
import_secns.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function [uxi movies_md] = import_secns()
% Analyse the 100k movielens dataset.
%
% [UXI MOVIES_MD] = IMPORT_SECNS()
%
% UXI = USERS x ITEMS dense matrix of ratings, based on
[dat, cols, users, movies, ratings, timestamps] = load_data();
[movies_md] = load_movies_md();
% try generating the USERS x MOVIES matrix 2 different ways
% and confirm they produce the same thing
nUsers = numel(unique(users));
nMovies = numel(unique(movies));
uxi = generate_uxi(nUsers, nMovies, users, movies, ratings);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [dat, cols, users, movies, ratings, timestamps] = load_data()
% DAT = nPoints x 4
dat = load('data/ml-100k/u.data');
cols.user = 1;
cols.movie = 2;
cols.rating = 3;
cols.timestamp = 4;
users = dat(:, cols.user);
movies = dat(:, cols.movie);
ratings = dat(:, cols.rating);
timestamps = dat(:, cols.timestamp);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [uxi] = generate_uxi(nUsers, nMovies, users, movies, ratings)
nPoints = numel(ratings);
uxi = nan(nUsers, nMovies);
for r=1:nPoints
user = users(r);
movie = movies(r);
rating = ratings(r);
uxi(user, movie) = rating;
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [] = plot_uxi2(ratings)
mean_rating = mean(ratings);
fig = figure(gcf);
hold on
hist(ratings, 5);
title('Histogram of ratings');
plot(mean_rating, 1:35000, 'r-');
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [movies_md] = load_movies_md()
% metadata about each MOVIE, e.g.
% 1|Toy Story (1995)|01-Jan-1995||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0
% ->
% - movies_md.textdata(1,:) = unstructured metadata
% '1' 'Toy Story (1995)' '01-Jan-1995' '' [1x53 char]
% - movies_md.data(1,:) = genre booleans
% 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
movies_md = importdata('data/ml-100k/u.item', '|');