Skip to content

Commit

Permalink
Make walk_files traverse in alphabetically breadth-first order.
Browse files Browse the repository at this point in the history
  • Loading branch information
mthrok committed Jul 23, 2020
1 parent 3cdcd7b commit 90ccc57
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 8 deletions.
5 changes: 1 addition & 4 deletions test/datasets/libritts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,13 @@ def setUpClass(cls):

def test_libritts(self):
dataset = LIBRITTS(self.root_dir)
samples = list(dataset)
samples.sort(key=lambda s: s[4])

for i, (waveform,
sample_rate,
original_text,
normalized_text,
speaker_id,
chapter_id,
utterance_id) in enumerate(samples):
utterance_id) in enumerate(dataset):

expected_ids = self.utterance_ids[i]
expected_data = self.data[i]
Expand Down
44 changes: 44 additions & 0 deletions test/datasets/utils_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import os
from pathlib import Path

from torchaudio.datasets import utils as dataset_utils

from ..common_utils import (
TempDirMixin,
TorchaudioTestCase,
)


class TestWalkFiles(TempDirMixin, TorchaudioTestCase):
root = None
expected = None

def _add_file(self, *parts):
path = self.get_temp_path(*parts)
self.expected.append(path)
Path(path).touch()

def setUp(self):
self.root = self.get_temp_path()
self.expected = []

# level 1
for filename in ['a.txt', 'b.txt', 'c.txt']:
self._add_file(filename)

# level 2
for dir1 in ['d1', 'd2', 'd3']:
for filename in ['d.txt', 'e.txt', 'f.txt']:
self._add_file(dir1, filename)
# level 3
for dir2 in ['d1', 'd2', 'd3']:
for filename in ['g.txt', 'h.txt', 'i.txt']:
self._add_file(dir1, dir2, filename)

print('\n'.join(self.expected))

def test_walk_files(self):
"""walk_files should traverse files in alphabetical order"""
for i, path in enumerate(dataset_utils.walk_files(self.root, '.txt', prefix=True)):
found = os.path.join(self.root, path)
assert found == self.expected[i]
4 changes: 1 addition & 3 deletions test/datasets/yesno_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,7 @@ def setUpClass(cls):

def test_yesno(self):
dataset = yesno.YESNO(self.root_dir)
samples = list(dataset)
samples.sort(key=lambda s: s[2])
for i, (waveform, sample_rate, label) in enumerate(samples):
for i, (waveform, sample_rate, label) in enumerate(dataset):
expected_label = self.labels[i]
expected_data = self.data[i]
self.assertEqual(expected_data, waveform, atol=5e-5, rtol=1e-8)
Expand Down
8 changes: 7 additions & 1 deletion torchaudio/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,13 @@ def walk_files(root: str,

root = os.path.expanduser(root)

for dirpath, _, files in os.walk(root):
for dirpath, dirs, files in os.walk(root):
dirs.sort()
# `dirs` is the list used in os.walk function and by sorting it in-place here, we change the
# behavior of os.walk to traverse sub directory alphabetically
# see also
# https://stackoverflow.com/questions/6670029/can-i-force-python3s-os-walk-to-visit-directories-in-alphabetical-order-how#comment71993866_6670926
files.sort()
for f in files:
if f.endswith(suffix):

Expand Down

0 comments on commit 90ccc57

Please sign in to comment.