-
-
Notifications
You must be signed in to change notification settings - Fork 24
/
knn_example.py
80 lines (68 loc) · 2.47 KB
/
knn_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# -*- coding: utf-8 -*-
"""Example of using kNN for outlier detection
"""
# Author: Yue Zhao <[email protected]>
# License: BSD 2 clause
import os
import sys
import time
import torch
from pyod.models.knn import KNN as KNN_PyOD
from pyod.utils.data import evaluate_print
from pyod.utils.data import generate_data
# temporary solution for relative imports in case pyod is not installed
# if pyod is installed, no need to use the following line
sys.path.append(
os.path.abspath(os.path.join(os.path.dirname("__file__"), '..')))
from pytod.models.knn import KNN
from pytod.utils.utility import validate_device
contamination = 0.1 # percentage of outliers
n_train = 30000 # number of training points
n_test = 5000 # number of testing points
n_features = 20
k = 10
# Generate sample data
X_train, X_test, y_train, y_test = \
generate_data(n_train=n_train,
n_test=n_test,
n_features=n_features,
contamination=contamination,
random_state=42)
clf_name = 'KNN-PyOD'
clf = KNN_PyOD(n_neighbors=k)
start = time.time()
clf.fit(X_train)
end = time.time()
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
pyod_time = end - start
print('Execution time', end - start)
X_train, y_train, X_test, y_test = torch.from_numpy(X_train), \
torch.from_numpy(y_train), \
torch.from_numpy(X_test), \
torch.from_numpy(y_test)
print()
print()
# try to access the GPU, fall back to cpu if no gpu is available
device = validate_device(0)
device = 'cpu'
clf_name = 'KNN-PyTOD'
# clf = KNN(n_neighbors=k, batch_size=10000, device=device)
clf = KNN(n_neighbors=k, batch_size=None, device=device)
start = time.time()
clf.fit(X_train)
end = time.time()
# get the prediction labels and outlier scores of the training data
y_train_pred = clf.labels_ # binary labels (0: inliers, 1: outliers)
y_train_scores = clf.decision_scores_ # raw outlier scores
# evaluate and print the results
print("\nOn Training Data:")
evaluate_print(clf_name, y_train, y_train_scores)
tod_time = end - start
print('Execution time', end - start)
print('TOD is', round(pyod_time / tod_time, ndigits=2),
'times faster than PyOD')