Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
hehefan committed Aug 25, 2021
1 parent dc59026 commit e97bc79
Show file tree
Hide file tree
Showing 51 changed files with 5,550 additions and 1 deletion.
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
# PST-Transformer
# PST-Transformer

The code is tested with Red Hat Enterprise Linux Workstation release 7.7 (Maipo), g++ (GCC) 8.3.1, PyTorch (both v1.4.0 and v1.9.0 are supported), CUDA 10.2 and cuDNN v7.6.

Compile the CUDA layers for [PointNet++](http://arxiv.org/abs/1706.02413), which we used for furthest point sampling (FPS) and radius neighbouring search:
```
mv modules-pytorch-1.4.0/modules-pytorch-1.9.0 modules
cd modules
python setup.py install
```
79 changes: 79 additions & 0 deletions datasets/msr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import sys
import numpy as np
from torch.utils.data import Dataset

class MSRAction3D(Dataset):
def __init__(self, root, frames_per_clip=16, step_between_clips=1, num_points=2048, train=True):
super(MSRAction3D, self).__init__()

self.videos = []
self.labels = []
self.index_map = []
index = 0
for video_name in os.listdir(root):
if train and (int(video_name.split('_')[1].split('s')[1]) <= 5):
video = np.load(os.path.join(root, video_name), allow_pickle=True)['point_clouds']
self.videos.append(video)
label = int(video_name.split('_')[0][1:])-1
self.labels.append(label)

nframes = video.shape[0]
for t in range(0, nframes-step_between_clips*(frames_per_clip-1), step_between_clips):
self.index_map.append((index, t))
index += 1

if not train and (int(video_name.split('_')[1].split('s')[1]) > 5):
video = np.load(os.path.join(root, video_name), allow_pickle=True)['point_clouds']
self.videos.append(video)
label = int(video_name.split('_')[0][1:])-1
self.labels.append(label)

nframes = video.shape[0]
for t in range(0, nframes-step_between_clips*(frames_per_clip-1), step_between_clips):
self.index_map.append((index, t))
index += 1

self.frames_per_clip = frames_per_clip
self.step_between_clips = step_between_clips
self.num_points = num_points
self.train = train
self.num_classes = max(self.labels) + 1


def __len__(self):
return len(self.index_map)

def __getitem__(self, idx):
index, t = self.index_map[idx]

video = self.videos[index]
label = self.labels[index]

clip = [video[t+i*self.step_between_clips] for i in range(self.frames_per_clip)]
for i, p in enumerate(clip):
if p.shape[0] > self.num_points:
r = np.random.choice(p.shape[0], size=self.num_points, replace=False)
else:
repeat, residue = self.num_points // p.shape[0], self.num_points % p.shape[0]
r = np.random.choice(p.shape[0], size=residue, replace=False)
r = np.concatenate([np.arange(p.shape[0]) for _ in range(repeat)] + [r], axis=0)
clip[i] = p[r, :]
clip = np.array(clip)

if self.train:
# scale the points
scales = np.random.uniform(0.9, 1.1, size=3)
clip = clip * scales

clip = clip / 300

return clip.astype(np.float32), label, index

if __name__ == '__main__':
dataset = MSRAction(root='../data/msr_action', frames_per_clip=16)
clip, label, video_idx = dataset[0]
print(clip)
print(label)
print(video_idx)
print(dataset.num_classes)
193 changes: 193 additions & 0 deletions datasets/synthia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
import os
import sys
import numpy as np
from pyquaternion import Quaternion
from torch.utils.data import Dataset

index_to_label = np.array([12, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, -1, -1, 11], dtype='int32')
label_to_index = np.array([2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 0], dtype='int32')
index_to_class = ['Void', 'Sky', 'Building', 'Road', 'Sidewalk', 'Fence', 'Vegetation', 'Pole', 'Car', 'Traffic Sign', 'Pedestrian', 'Bicycle', 'Lanemarking', 'Reserved', 'Reserved', 'Traffic Light']

def index_to_label_func(x):
return index_to_label[x]
index_to_label_vec_func = np.vectorize(index_to_label_func)

class SegDataset(Dataset):
def __init__(self, root='data/pc', meta='data/train_raw.txt', labelweight = 'data/labelweights.npz', frames_per_clip=3, num_points=16384, train=True):
super(SegDataset, self).__init__()

self.num_points = num_points
self.train = train
self.root = root
self.frames_per_clip = frames_per_clip

labelweights = np.load(labelweight)['labelweights'].astype(np.float32)
if train:
labelweights = 1/np.log(1.2 + labelweights)
self.labelweights = labelweights / labelweights.min()
else:
self.labelweights = np.ones_like(labelweights)

self.meta = []
self.data = {}
#m = 0
with open(meta, 'r') as f:
for line in f:
line = line.split(' ')[0]
line = line.split('/')
sequence_name = line[0]
frame_id = int(line[-1].split('.')[0])

fn = os.path.join(root, sequence_name + '-' + str(frame_id).zfill(6) + '.npz')
data = np.load(fn)

pc = data['pc'] # (16384, 3)
rgb = data['rgb'] # (16384, 3)
semantic = data['semantic'] # (16384, )
center = data['center'] # (3, )
semantic = semantic.astype('uint8')

self.data[sequence_name + '-' + str(frame_id)] = (pc, rgb, semantic, center)
self.meta.append([sequence_name, frame_id])
#m+=1
#if m == 100:
#break
self.meta.sort()

def __len__(self):
return len(self.meta)

def read_training_data_point(self, index):
sequence_name, frame_id = self.meta[index]

pcs = []
rgbs = []
semantics = []
center_0 = None

most_recent_success = -1
for diff in range(0, self.frames_per_clip):
key = sequence_name + '-' + str(frame_id-diff)
if key in self.data:
pc, rgb, semantic, center = self.data[key]
most_recent_success = frame_id - diff
else:
pc, rgb, semantic, center = self.data[sequence_name + '-' + str(most_recent_success)]

if diff == 0:
center_0 = center

pcs.append(pc)
rgbs.append(rgb)
semantics.append(semantic)

pc = np.stack(pcs, axis=0)
rgb = np.stack(rgbs, axis=0)
semantic = np.stack(semantics, axis=0)

return pc, rgb, semantic, center_0


def half_crop_w_context(self, half, context, pc, rgb, semantic, center):
frames_per_clip = pc.shape[0]
all_idx = np.arange(pc.shape[1])
sample_indicies_half_w_context = []
if half == 0:
for f in range(frames_per_clip):
sample_idx_half_w_context = all_idx[pc[f, :, 2] > (center[2] - context)]
sample_indicies_half_w_context.append(sample_idx_half_w_context)
else:
for f in range(frames_per_clip):
sample_idx_half_w_context = all_idx[pc[f, :, 2] < (center[2] + context)]
sample_indicies_half_w_context.append(sample_idx_half_w_context)

pc_half_w_context = [pc[f, s] for f, s in enumerate(sample_indicies_half_w_context)]
rgb_half_w_context = [rgb[f, s] for f, s in enumerate(sample_indicies_half_w_context)]
semantic_half_w_context = [semantic[f, s] for f, s in enumerate(sample_indicies_half_w_context)]
if half == 0:
loss_masks = [p[:, 2] > center[2] for p in pc_half_w_context]
else:
loss_masks = [p[:, 2] < center[2] for p in pc_half_w_context]
valid_pred_idx_in_full = sample_indicies_half_w_context

return pc_half_w_context, rgb_half_w_context, semantic_half_w_context, loss_masks, valid_pred_idx_in_full

def augment(self, pc, center):
flip = np.random.uniform(0, 1) > 0.5
if flip:
pc = (pc - center)
pc[:, 0] *= -1
pc += center

scale = np.random.uniform(0.8, 1.2)
pc = (pc - center) * scale + center

rot_axis = np.array([0, 1, 0])
rot_angle = np.random.uniform(np.pi * 2)
q = Quaternion(axis=rot_axis, angle=rot_angle)
R = q.rotation_matrix

pc = np.dot(pc - center, R) + center
return pc

def mask_and_label_conversion(self, semantic, loss_mask):
labels = []
loss_masks = []
for i, s in enumerate(semantic):
sem = s.astype('int32')
label = index_to_label_vec_func(sem)
loss_mask_ = (label != 12) * loss_mask[i]
label[label == 12] = 0

labels.append(label)
loss_masks.append(loss_mask_)
return labels, loss_masks

def choice_to_num_points(self, pc, rgb, label, loss_mask, valid_pred_idx_in_full):

# shuffle idx to change point order (change FPS behavior)
for f in range(self.frames_per_clip):
idx = np.arange(pc[f].shape[0])
choice_num = self.num_points
if pc[f].shape[0] > choice_num:
shuffle_idx = np.random.choice(idx, choice_num, replace=False)
else:
shuffle_idx = np.concatenate([np.random.choice(idx, choice_num - idx.shape[0]), np.arange(idx.shape[0])])
pc[f] = pc[f][shuffle_idx]
rgb[f] = rgb[f][shuffle_idx]
label[f] = label[f][shuffle_idx]
loss_mask[f] = loss_mask[f][shuffle_idx]
valid_pred_idx_in_full[f] = valid_pred_idx_in_full[f][shuffle_idx]

pc = np.stack(pc, axis=0)
rgb = np.stack(rgb, axis=0)
label = np.stack(label, axis=0)
loss_mask = np.stack(loss_mask, axis=0)
valid_pred_idx_in_full = np.stack(valid_pred_idx_in_full, axis=0)

return pc, rgb, label, loss_mask, valid_pred_idx_in_full

def __getitem__(self, index):
context = 1.

pc, rgb, semantic, center = self.read_training_data_point(index)

half = 0
pc1, rgb1, semantic1, mask1, valid_pred_idx_in_full1 = self.half_crop_w_context(half, context, pc, rgb, semantic, center)
label1, mask1 = self.mask_and_label_conversion(semantic1, mask1)
pc1, rgb1, label1, mask1, valid_pred_idx_in_full1 = self.choice_to_num_points(pc1, rgb1, label1, mask1, valid_pred_idx_in_full1)

half = 1
pc2, rgb2, semantic2, mask2, valid_pred_idx_in_full2 = self.half_crop_w_context(half, context, pc, rgb, semantic, center)
label2, mask2 = self.mask_and_label_conversion(semantic2, mask2)
pc2, rgb2, label2, mask2, valid_pred_idx_in_full2 = self.choice_to_num_points(pc2, rgb2, label2, mask2, valid_pred_idx_in_full2)

if self.train:
pc1 = self.augment(pc1, center)
pc2 = self.augment(pc2, center)

rgb1 = np.swapaxes(rgb1, 1, 2)
rgb2 = np.swapaxes(rgb2, 1, 2)

return pc1.astype(np.float32), rgb1.astype(np.float32), label1.astype(np.int64), mask1.astype(np.float32), pc2.astype(np.float32), rgb2.astype(np.float32), label2.astype(np.int64), mask2.astype(np.float32)

Binary file added models/.synthia2.py.swp
Binary file not shown.
Binary file added models/.video.py.swp
Binary file not shown.
Loading

0 comments on commit e97bc79

Please sign in to comment.