Skip to content

Commit

Permalink
Using Path and glob instead of walk_files (pytorch#1069)
Browse files Browse the repository at this point in the history
- yesno
- librispeech
- libritts
- speechcommands

Co-authored-by: krishnakalyan3 <[email protected]>
Co-authored-by: Vincent Quenneville-Belair <[email protected]>
  • Loading branch information
3 people authored Dec 15, 2020
1 parent 79c97fb commit d25a4dd
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 18 deletions.
6 changes: 1 addition & 5 deletions torchaudio/datasets/librispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from torchaudio.datasets.utils import (
download_url,
extract_archive,
walk_files,
)

URL = "train-clean-100"
Expand Down Expand Up @@ -125,10 +124,7 @@ def __init__(self,
download_url(url, root, hash_value=checksum)
extract_archive(archive)

walker = walk_files(
self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True
)
self._walker = list(walker)
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio))

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
"""Load the n-th sample from the dataset.
Expand Down
6 changes: 1 addition & 5 deletions torchaudio/datasets/libritts.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from torchaudio.datasets.utils import (
download_url,
extract_archive,
walk_files,
)

URL = "train-clean-100"
Expand Down Expand Up @@ -126,10 +125,7 @@ def __init__(
download_url(url, root, hash_value=checksum)
extract_archive(archive)

walker = walk_files(
self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True
)
self._walker = list(walker)
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio))

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
"""Load the n-th sample from the dataset.
Expand Down
5 changes: 2 additions & 3 deletions torchaudio/datasets/speechcommands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from torchaudio.datasets.utils import (
download_url,
extract_archive,
walk_files
)

FOLDER_IN_ARCHIVE = "SpeechCommands"
Expand Down Expand Up @@ -110,15 +109,15 @@ def __init__(self,
self._walker = _load_list(self._path, "testing_list.txt")
elif subset == "training":
excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt"))
walker = walk_files(self._path, suffix=".wav", prefix=True)
walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav'))
self._walker = [
w for w in walker
if HASH_DIVIDER in w
and EXCEPT_FOLDER not in w
and os.path.normpath(w) not in excludes
]
else:
walker = walk_files(self._path, suffix=".wav", prefix=True)
walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav'))
self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w]

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]:
Expand Down
6 changes: 1 addition & 5 deletions torchaudio/datasets/yesno.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from torchaudio.datasets.utils import (
download_url,
extract_archive,
walk_files
)

URL = "http://www.openslr.org/resources/1/waves_yesno.tar.gz"
Expand Down Expand Up @@ -85,10 +84,7 @@ def __init__(self,
"Dataset not found. Please use `download=True` to download it."
)

walker = walk_files(
self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True
)
self._walker = list(walker)
self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio))

def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]:
"""Load the n-th sample from the dataset.
Expand Down

0 comments on commit d25a4dd

Please sign in to comment.