forked from fyang93/diffusion
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ts-classifier.py
116 lines (87 loc) · 2.89 KB
/
ts-classifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env python
"""
classifier.py
"""
SUPRESS_WARNINGS = True
if SUPRESS_WARNINGS:
import sys
def warn(*args, **kwargs): pass
import warnings
warnings.warn = warn
import os
os.environ['NUMEXPR_MAX_THREADS'] = '80'
import sys
import argparse
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from diffusion import VanillaDiffusion
from helpers import squeezed_array, permute_data, metric_fns
# --
# CLI
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--prob-name', type=str, default='Adiac')
parser.add_argument('--seed', type=int, default=123)
return parser.parse_args()
args = parse_args()
np.random.seed(args.seed)
# --
# IO
df_train = pd.read_csv(f'data/ucr/{args.prob_name}/{args.prob_name}_TRAIN.tsv', header=None, sep='\t')
X_train, y_train = df_train.values[:,1:], df_train.values[:,0]
X_train, y_train = permute_data(X_train, y_train)
df_test = pd.read_csv(f'data/ucr/{args.prob_name}/{args.prob_name}_TEST.tsv', header=None, sep='\t')
X_test, y_test = df_test.values[:,1:], df_test.values[:,0]
X_test, y_test = permute_data(X_test, y_test)
X = np.vstack([X_test, X_train])
n_test = X_test.shape[0]
metric_fn = metric_fns['f1'] if len(set(y_train)) == 2 else metric_fns['f1_macro']
# --
# Baselines
model = LinearSVC().fit(X_train, y_train)
pred = model.predict(X_test)
svc_score = metric_fn(y_test, pred)
model = KNeighborsClassifier(n_neighbors=1).fit(X_train, y_train)
pred = model.predict(X_test)
knn_score = metric_fn(y_test, pred)
# Does this matcht the Exline algorithms?
# --
n_trunc = np.inf
kd = 8
sym_fn = 'mean'
# <<
# diffusion_model = TDiffusion(features=X, kd=kd, metric='l2', sym_fn=sym_fn, alpha=0.9)
# d = diffusion_model.run(n_trunc=n_trunc, do_norm=False)
# --
diffusion_model = VanillaDiffusion(features=X, kd=kd, sym_fn=sym_fn)
d = diffusion_model.run()
# <<
scores = d[:n_test, n_test:]
nscores = normalize(scores, 'l2', axis=1)
# --
# Diffusion w/ subsets of data
# Top-1
top1_idx = squeezed_array(scores.argmax(axis=-1))
top1_score = metric_fn(y_test, y_train[top1_idx])
# Sum
labels = np.unique(y_train)
tmp = [scores[:,y_train == i].sum(axis=-1) for i in labels]
tmp = np.column_stack([squeezed_array(t) for t in tmp])
sum_score = metric_fn(y_test, labels[tmp.argmax(axis=-1)])
# Norm sum
labels = np.unique(y_train)
tmp = [nscores[:,y_train == i].sum(axis=-1) for i in labels]
tmp = np.column_stack([squeezed_array(t) for t in tmp])
nsum_score = metric_fn(y_test, labels[tmp.argmax(axis=-1)])
print({
'svc_score' : svc_score,
'knn_score' : knn_score,
'top1_score' : top1_score,
'sum_score' : sum_score,
'nsum_score' : nsum_score,
})