-
Notifications
You must be signed in to change notification settings - Fork 0
/
inference.py
137 lines (95 loc) · 3.32 KB
/
inference.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#%%
import json
import logging
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from CustomVideoDataset import CustomVideoDataset
from video_transforms import VideoTransform
logging.basicConfig()
from torch.utils.tensorboard import SummaryWriter
#%% Load the model:
# Choose the `slow_r50` model
model = torch.hub.load('facebookresearch/pytorchvideo', 'slow_r50', pretrained=True)
#%% Get Kinetics class names from json file
json_filename = "k400/kinetics_classnames.json"
with open(json_filename, "r") as f:
kinetics_classnames = json.load(f)
# Create an id to label name mapping
kinetics_id_to_classname = {}
for k, v in kinetics_classnames.items():
kinetics_id_to_classname[v] = str(k).replace('"', "")
#%% Load data
logger = logging.getLogger()
#logger.setLevel(logging.INFO)
data_path = "k400/k400_paths.csv"
# Load video paths and labels
data = pd.read_csv(data_path, header=None, delimiter=",")
samples = data.iloc[:,0].to_list()
labels = data.iloc[:,1].to_list() # will be disregarded for SSL training
num_samples = len(data)
#%% Define input transform
RESOLUTION = 256
transform = VideoTransform()
#%% Initialize data set and data loader
NUM_FRAMES = 8
SAMPLING_RATE = 8
dataset = CustomVideoDataset(
data_path=data_path,
frames_per_clip=NUM_FRAMES, # 8
frame_step=SAMPLING_RATE, # 8
num_clips=1,
random_clip_sampling=True,
allow_clip_overlap=False,
resolution=RESOLUTION,
transform=transform
)
logger.info('CustomVideoDataset dataset created')
N_EPOCHS = 1
BATCH_SIZE = 1
DEVICE = "cuda"
PIN_MEMORY = True
NUM_WORKERS = 0
data_loader = DataLoader(
dataset,
#collate_fn=collator,
#sampler=dist_sampler,
batch_size=BATCH_SIZE,
#drop_last=drop_last,
pin_memory=PIN_MEMORY,
num_workers=NUM_WORKERS,
#persistent_workers=num_workers > 0,
shuffle=False
)
logger.info('VideoDataset unsupervised data loader created')
#%% Load and visualize 1 clip from dataset
#vid, label, clip_indices, path_video = next(iter(data_loader))
for vid, label, clip_indices, path_video in data_loader:
for idx, clip in enumerate(vid):
rows, cols = 2, 4
fig, axes = plt.subplots(rows,cols)
for i in range(rows):
for j in range(cols):
img = clip[0][i*4+j].numpy().astype(np.uint8) # 0th batch
axes[i,j].imshow((img))
fig.suptitle(f"{path_video}_clip{idx}", fontsize=12)
fig.show()
# Pass the input clip through the model
inputs = clip.squeeze()
print("Shape inputs: ", inputs.shape)
#inputs = inputs[0:8,...]
inputs = inputs.permute(3, 0, 1, 2) # C T H W
#inputs = inputs / 255.0 # .float() # / 255.0
preds = model(inputs[None ,...])
# Get the predicted classes
post_act = torch.nn.Softmax(dim=1)
preds = post_act(preds)
pred_classes = preds.topk(k=5).indices[0]
print(preds.topk(k=5))
print(f"\nLabel: {kinetics_id_to_classname[label.item()]} \t| Prediction: {kinetics_id_to_classname[pred_classes[0].item()]}")
# Map the predicted classes to the label names
pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes]
print("Top 5 predicted labels: %s" % ", ".join(pred_class_names))
#%%