From 33911065dd8986893d901d1777b7974dcfa07e05 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 5 Dec 2020 18:24:32 +0100 Subject: [PATCH 01/17] initial changes for yesno dataset --- torchaudio/datasets/yesno.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 182d224ba4..18758cbae1 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -8,8 +8,7 @@ from torch.utils.data import Dataset from torchaudio.datasets.utils import ( download_url, - extract_archive, - walk_files + extract_archive ) URL = "http://www.openslr.org/resources/1/waves_yesno.tar.gz" @@ -85,9 +84,7 @@ def __init__(self, "Dataset not found. Please use `download=True` to download it." ) - walker = walk_files( - self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True - ) + walker = sorted([str(p.stem) for p in Path(self._path).glob('*.wav')]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: From 79968d6fd0f5a3145198fea66f5fe93ebf6cf415 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 5 Dec 2020 20:50:56 +0100 Subject: [PATCH 02/17] librispeech to glob + path --- torchaudio/datasets/librispeech.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index c13fb312b8..8f12ae0812 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -125,9 +125,7 @@ def __init__(self, download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = walk_files( - self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True - ) + walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*'+self._ext_audio)]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: From 0ba43ac8180d48dc576e85dfceaf5b8719bfb2d9 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 5 Dec 2020 20:54:05 +0100 Subject: [PATCH 03/17] minor changes --- torchaudio/datasets/yesno.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 18758cbae1..7b99d883a9 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -8,7 +8,7 @@ from torch.utils.data import Dataset from torchaudio.datasets.utils import ( download_url, - extract_archive + extract_archive, ) URL = "http://www.openslr.org/resources/1/waves_yesno.tar.gz" From 4bc29cbce3e53758173b341b77984942223a2519 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 12 Dec 2020 09:50:25 +0100 Subject: [PATCH 04/17] minor comment --- torchaudio/datasets/librispeech.py | 2 +- torchaudio/datasets/yesno.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 8f12ae0812..12f1870c2a 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -125,7 +125,7 @@ def __init__(self, download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*'+self._ext_audio)]) + walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 7b99d883a9..bfad3e97ca 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -84,7 +84,7 @@ def __init__(self, "Dataset not found. Please use `download=True` to download it." ) - walker = sorted([str(p.stem) for p in Path(self._path).glob('*.wav')]) + walker = sorted([str(p.stem) for p in Path(self._path).glob('*.' + self._ext_audio)]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: From c97918e970e20e8008ce0c3d6255305ef63a5d51 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 12 Dec 2020 10:21:58 +0100 Subject: [PATCH 05/17] sc update --- torchaudio/datasets/speechcommands.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/torchaudio/datasets/speechcommands.py b/torchaudio/datasets/speechcommands.py index 7d774da943..b98089f8ba 100644 --- a/torchaudio/datasets/speechcommands.py +++ b/torchaudio/datasets/speechcommands.py @@ -7,8 +7,7 @@ from torch import Tensor from torchaudio.datasets.utils import ( download_url, - extract_archive, - walk_files + extract_archive ) FOLDER_IN_ARCHIVE = "SpeechCommands" @@ -110,7 +109,7 @@ def __init__(self, self._walker = _load_list(self._path, "testing_list.txt") elif subset == "training": excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt")) - walker = walk_files(self._path, suffix=".wav", prefix=True) + walker = sorted([str(p) for p in Path(self._path).glob('*/*.wav')]) self._walker = [ w for w in walker if HASH_DIVIDER in w @@ -118,7 +117,7 @@ def __init__(self, and os.path.normpath(w) not in excludes ] else: - walker = walk_files(self._path, suffix=".wav", prefix=True) + walker = sorted([str(p) for p in Path(self._path).glob('*/*.wav')]) self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w] def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]: From f44d9854bf3bcbdc347863d63d8c6b8462d4f9c1 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 12 Dec 2020 22:10:53 +0100 Subject: [PATCH 06/17] change to Path libritts --- torchaudio/datasets/libritts.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index 8ed57d9b52..ba7243222d 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -8,7 +8,6 @@ from torchaudio.datasets.utils import ( download_url, extract_archive, - walk_files, ) URL = "train-clean-100" @@ -126,9 +125,7 @@ def __init__( download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = walk_files( - self._path, suffix=self._ext_audio, prefix=False, remove_suffix=True - ) + walker = sorted([str(p.stem) for p in Path(lt_data._path).glob('*/*/*' + self._ext_audio)]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: From 9081235c5d674731468d767a5c629fd49f4f2a68 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 12 Dec 2020 22:11:09 +0100 Subject: [PATCH 07/17] change to Path libritts --- torchaudio/datasets/libritts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index ba7243222d..57d85f99ae 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -125,7 +125,7 @@ def __init__( download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = sorted([str(p.stem) for p in Path(lt_data._path).glob('*/*/*' + self._ext_audio)]) + walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: From 5aab320569a1a626782749122889dc06a2deea6c Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Sat, 12 Dec 2020 22:13:06 +0100 Subject: [PATCH 08/17] yes no update --- torchaudio/datasets/yesno.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index bfad3e97ca..4fbb240653 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -84,7 +84,7 @@ def __init__(self, "Dataset not found. Please use `download=True` to download it." ) - walker = sorted([str(p.stem) for p in Path(self._path).glob('*.' + self._ext_audio)]) + walker = sorted([str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)]) self._walker = list(walker) def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: From 41d742f11813ffca64ac77fac8f11f40588fb4a0 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 12:34:33 +0100 Subject: [PATCH 09/17] remove redundant list --- torchaudio/datasets/librispeech.py | 2 +- torchaudio/datasets/yesno.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 12f1870c2a..93655fa98c 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -126,7 +126,7 @@ def __init__(self, extract_archive(archive) walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) - self._walker = list(walker) + self._walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 4fbb240653..1915f52880 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -85,7 +85,7 @@ def __init__(self, ) walker = sorted([str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)]) - self._walker = list(walker) + self._walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: """Load the n-th sample from the dataset. From f8fbbcb9383b66259d4c66a82e6b80a7c4ae543b Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 12:35:28 +0100 Subject: [PATCH 10/17] remove redundant list --- torchaudio/datasets/libritts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index 57d85f99ae..8b4d94f3ae 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -126,7 +126,7 @@ def __init__( extract_archive(archive) walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) - self._walker = list(walker) + self._walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: """Load the n-th sample from the dataset. From bbed79c4e845349a3115c934d8722953ba78575d Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 14:37:14 +0100 Subject: [PATCH 11/17] add missing comma --- torchaudio/datasets/speechcommands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchaudio/datasets/speechcommands.py b/torchaudio/datasets/speechcommands.py index b98089f8ba..eec31e7ac0 100644 --- a/torchaudio/datasets/speechcommands.py +++ b/torchaudio/datasets/speechcommands.py @@ -7,7 +7,7 @@ from torch import Tensor from torchaudio.datasets.utils import ( download_url, - extract_archive + extract_archive, ) FOLDER_IN_ARCHIVE = "SpeechCommands" From f70914bb123323b7f4ded82258402769d53b2beb Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 14:39:20 +0100 Subject: [PATCH 12/17] remove walk_files --- torchaudio/datasets/librispeech.py | 1 - 1 file changed, 1 deletion(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 93655fa98c..9690b4f1c7 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -8,7 +8,6 @@ from torchaudio.datasets.utils import ( download_url, extract_archive, - walk_files, ) URL = "train-clean-100" From f162d64c1a407c08603644ab1e5918523893feb1 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 15:12:28 +0100 Subject: [PATCH 13/17] walker sort --- torchaudio/datasets/librispeech.py | 2 +- torchaudio/datasets/libritts.py | 2 +- torchaudio/datasets/yesno.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 9690b4f1c7..9d7cf5cd78 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -125,7 +125,7 @@ def __init__(self, extract_archive(archive) walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) - self._walker.sort() + self._walker = walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index 8b4d94f3ae..c07b70f7fc 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -126,7 +126,7 @@ def __init__( extract_archive(archive) walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) - self._walker.sort() + self._walker = walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 1915f52880..5adbf65ca3 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -85,7 +85,7 @@ def __init__(self, ) walker = sorted([str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)]) - self._walker.sort() + self._walker = walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: """Load the n-th sample from the dataset. From a6632a553c38cc3c71a97713cf83a4c814809507 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 15:15:48 +0100 Subject: [PATCH 14/17] fix sort --- torchaudio/datasets/librispeech.py | 2 +- torchaudio/datasets/libritts.py | 2 +- torchaudio/datasets/yesno.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 9d7cf5cd78..218c2aaa19 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -124,7 +124,7 @@ def __init__(self, download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) + walker = [str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)] self._walker = walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index c07b70f7fc..4b2aa7cb42 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -125,7 +125,7 @@ def __init__( download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = sorted([str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)]) + walker = [str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)] self._walker = walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 5adbf65ca3..8a967c20a0 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -84,7 +84,7 @@ def __init__(self, "Dataset not found. Please use `download=True` to download it." ) - walker = sorted([str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)]) + walker = [str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)] self._walker = walker.sort() def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: From 95251717f62a9c65261546b03c55c55e66e88d21 Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 15:39:26 +0100 Subject: [PATCH 15/17] inplace sorting --- torchaudio/datasets/librispeech.py | 3 ++- torchaudio/datasets/libritts.py | 3 ++- torchaudio/datasets/yesno.py | 3 ++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 218c2aaa19..5b3458a004 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -125,7 +125,8 @@ def __init__(self, extract_archive(archive) walker = [str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)] - self._walker = walker.sort() + walker.sort() + self._walker = walker def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index 4b2aa7cb42..b09190afe6 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -126,7 +126,8 @@ def __init__( extract_archive(archive) walker = [str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)] - self._walker = walker.sort() + walker.sort() + self._walker = walker def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index 8a967c20a0..fa841a533b 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -85,7 +85,8 @@ def __init__(self, ) walker = [str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)] - self._walker = walker.sort() + walker.sort() + self._walker = walker def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: """Load the n-th sample from the dataset. From bf3b26783d6b58e73ef0c9068ac20df2fb8f8add Mon Sep 17 00:00:00 2001 From: krishnakalyan3 Date: Tue, 15 Dec 2020 15:41:18 +0100 Subject: [PATCH 16/17] inplace sort --- torchaudio/datasets/speechcommands.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torchaudio/datasets/speechcommands.py b/torchaudio/datasets/speechcommands.py index eec31e7ac0..929f972084 100644 --- a/torchaudio/datasets/speechcommands.py +++ b/torchaudio/datasets/speechcommands.py @@ -109,7 +109,8 @@ def __init__(self, self._walker = _load_list(self._path, "testing_list.txt") elif subset == "training": excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt")) - walker = sorted([str(p) for p in Path(self._path).glob('*/*.wav')]) + walker = [str(p) for p in Path(self._path).glob('*/*.wav')] + walker.sort() self._walker = [ w for w in walker if HASH_DIVIDER in w @@ -117,7 +118,8 @@ def __init__(self, and os.path.normpath(w) not in excludes ] else: - walker = sorted([str(p) for p in Path(self._path).glob('*/*.wav')]) + walker = [str(p) for p in Path(self._path).glob('*/*.wav')] + walker.sort() self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w] def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]: From ce060262d3704372c97b56a2e9f5602205814369 Mon Sep 17 00:00:00 2001 From: Vincent Quenneville-Belair Date: Tue, 15 Dec 2020 10:57:38 -0500 Subject: [PATCH 17/17] use sorted(generator) --- torchaudio/datasets/librispeech.py | 4 +--- torchaudio/datasets/libritts.py | 4 +--- torchaudio/datasets/speechcommands.py | 6 ++---- torchaudio/datasets/yesno.py | 4 +--- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py index 5b3458a004..8447c4c8a2 100644 --- a/torchaudio/datasets/librispeech.py +++ b/torchaudio/datasets/librispeech.py @@ -124,9 +124,7 @@ def __init__(self, download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = [str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)] - walker.sort() - self._walker = walker + self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py index b09190afe6..9f0c38a751 100644 --- a/torchaudio/datasets/libritts.py +++ b/torchaudio/datasets/libritts.py @@ -125,9 +125,7 @@ def __init__( download_url(url, root, hash_value=checksum) extract_archive(archive) - walker = [str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)] - walker.sort() - self._walker = walker + self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*/*/*' + self._ext_audio)) def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]: """Load the n-th sample from the dataset. diff --git a/torchaudio/datasets/speechcommands.py b/torchaudio/datasets/speechcommands.py index 929f972084..5264ea24de 100644 --- a/torchaudio/datasets/speechcommands.py +++ b/torchaudio/datasets/speechcommands.py @@ -109,8 +109,7 @@ def __init__(self, self._walker = _load_list(self._path, "testing_list.txt") elif subset == "training": excludes = set(_load_list(self._path, "validation_list.txt", "testing_list.txt")) - walker = [str(p) for p in Path(self._path).glob('*/*.wav')] - walker.sort() + walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav')) self._walker = [ w for w in walker if HASH_DIVIDER in w @@ -118,8 +117,7 @@ def __init__(self, and os.path.normpath(w) not in excludes ] else: - walker = [str(p) for p in Path(self._path).glob('*/*.wav')] - walker.sort() + walker = sorted(str(p) for p in Path(self._path).glob('*/*.wav')) self._walker = [w for w in walker if HASH_DIVIDER in w and EXCEPT_FOLDER not in w] def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int]: diff --git a/torchaudio/datasets/yesno.py b/torchaudio/datasets/yesno.py index fa841a533b..21d67f8ecc 100644 --- a/torchaudio/datasets/yesno.py +++ b/torchaudio/datasets/yesno.py @@ -84,9 +84,7 @@ def __init__(self, "Dataset not found. Please use `download=True` to download it." ) - walker = [str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)] - walker.sort() - self._walker = walker + self._walker = sorted(str(p.stem) for p in Path(self._path).glob('*' + self._ext_audio)) def __getitem__(self, n: int) -> Tuple[Tensor, int, List[int]]: """Load the n-th sample from the dataset.