-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathdataset_folder.py
100 lines (82 loc) · 3.73 KB
/
dataset_folder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# --------------------------------------------------------
# Copyright (c) 2022 Microsoft
# Licensed under The MIT License
# Written by Guoqiang Wei
# --------------------------------------------------------
from torchvision.datasets.vision import VisionDataset
from torchvision.datasets.folder import IMG_EXTENSIONS, make_dataset, default_loader
import os
import os.path
from typing import Any, Callable, Dict, List, Optional, Tuple
def make_dataset_with_ann(ann_file, img_prefix, extensions):
images = []
with open(ann_file, "r") as f:
contents = f.readlines()
for line_str in contents:
path_contents = [c for c in line_str.split('\t')]
im_file_name = path_contents[0]
class_index = int(path_contents[1])
assert str.lower(os.path.splitext(im_file_name)[-1]) in extensions
item = (os.path.join(img_prefix, im_file_name), class_index)
images.append(item)
return images
class CustomDatasetFolder(VisionDataset):
def __init__(
self,
root: str,
loader: Callable[[str], Any],
extensions: Optional[Tuple[str, ...]] = None,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
is_valid_file: Optional[Callable[[str], bool]] = None,
map_txt: str = None,
) -> None:
super(CustomDatasetFolder, self).__init__(root, transform=transform, target_transform=target_transform)
if os.path.isfile(map_txt):
samples = make_dataset_with_ann(map_txt, root, extensions=extensions)
else:
classes, class_to_idx = self._find_classes(self.root)
samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file)
if len(samples) == 0:
msg = "Found 0 files in subfolders of: {}\n".format(self.root)
if extensions is not None:
msg += "Supported extensions are: {}".format(",".join(extensions))
raise RuntimeError(msg)
self.loader = loader
self.extensions = extensions
self.labels = [y_1k for _, y_1k in samples]
self.classes = list(set(self.labels))
# self.class_to_idx = class_to_idx
self.samples = samples
self.targets = [s[1] for s in samples]
def _find_classes(self, dir: str) -> Tuple[List[str], Dict[str, int]]:
classes = [d.name for d in os.scandir(dir) if d.is_dir()]
classes.sort()
class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
return classes, class_to_idx
def __getitem__(self, index: int) -> Tuple[Any, Any]:
path, target = self.samples[index]
sample = self.loader(path)
if self.transform is not None:
sample = self.transform(sample)
if self.target_transform is not None:
target = self.target_transform(target)
return sample, target
def __len__(self) -> int:
return len(self.samples)
class ImageFolder(CustomDatasetFolder):
def __init__(
self,
root: str,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
loader: Callable[[str], Any] = default_loader,
is_valid_file: Optional[Callable[[str], bool]] = None,
map_txt: str = None,
):
super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None,
transform=transform,
target_transform=target_transform,
is_valid_file=is_valid_file,
map_txt=map_txt)
self.imgs = self.samples