Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support file-like object in apply_effects_file #1166

Merged
merged 3 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion test/torchaudio_unittest/sox_effect/smoke_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_apply_effects_tensor(self, args):
load_params("sox_effect_test_args.json"),
name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
)
def test_apply_effects(self, args):
def test_apply_effects_file(self, args):
"""`apply_effects_file` should return identical data as sox command"""
dtype = 'int32'
channels_first = True
Expand All @@ -57,3 +57,23 @@ def test_apply_effects(self, args):

_found, _sr = sox_effects.apply_effects_file(
input_path, effects, normalize=False, channels_first=channels_first)

@parameterized.expand(
load_params("sox_effect_test_args.json"),
name_func=lambda f, i, p: f'{f.__name__}_{i}_{p.args[0]["effects"][0][0]}',
)
def test_apply_effects_fileobj(self, args):
"""`apply_effects_file` should return identical data as sox command"""
dtype = 'int32'
channels_first = True
effects = args['effects']
num_channels = args.get("num_channels", 2)
input_sr = args.get("input_sample_rate", 8000)

input_path = self.get_temp_path('input.wav')
data = get_wav_data(dtype, num_channels, channels_first=channels_first)
save_wav(input_path, data, input_sr, channels_first=channels_first)

with open(input_path, 'rb') as fileobj:
_found, _sr = sox_effects.apply_effects_file(
fileobj, effects, normalize=False, channels_first=channels_first)
161 changes: 160 additions & 1 deletion test/torchaudio_unittest/sox_effect/sox_effect_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import io
import itertools
from pathlib import Path
import tarfile

from torchaudio import sox_effects
from parameterized import parameterized
from torchaudio import sox_effects
from torchaudio._internal import module_utils as _mod_utils

from torchaudio_unittest.common_utils import (
TempDirMixin,
HttpServerMixin,
PytorchTestCase,
skipIfNoExtension,
skipIfNoModule,
skipIfNoExec,
get_asset_path,
get_sinusoid,
get_wav_data,
Expand All @@ -21,6 +27,10 @@
)


if _mod_utils.is_module_available("requests"):
import requests


@skipIfNoExtension
class TestSoxEffects(PytorchTestCase):
def test_init(self):
Expand Down Expand Up @@ -262,3 +272,152 @@ def test_mp3(self):
path = get_asset_path("mp3_without_ext")
_, sr = sox_effects.apply_effects_file(path, effects, format="mp3")
assert sr == 16000


@skipIfNoExec('sox')
@skipIfNoExtension
class TestFileObject(TempDirMixin, PytorchTestCase):
@parameterized.expand([
('wav', None),
('mp3', 128),
('mp3', 320),
('flac', 0),
('flac', 5),
('flac', 8),
('vorbis', -1),
('vorbis', 10),
('amb', None),
])
def test_fileobj(self, ext, compression):
"""Applying effects via file object works"""
sample_rate = 16000
channels_first = True
effects = [['band', '300', '10']]
format_ = ext if ext in ['mp3'] else None
input_path = self.get_temp_path(f'input.{ext}')
reference_path = self.get_temp_path('reference.wav')

sox_utils.gen_audio_file(
input_path, sample_rate, num_channels=2, compression=compression)
sox_utils.run_sox_effect(
input_path, reference_path, effects, output_bitdepth=32)
expected, expected_sr = load_wav(reference_path)

with open(input_path, 'rb') as fileobj:
found, sr = sox_effects.apply_effects_file(
fileobj, effects, channels_first=channels_first, format=format_)
save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first)
assert sr == expected_sr
self.assertEqual(found, expected)

@parameterized.expand([
('wav', None),
('mp3', 128),
('mp3', 320),
('flac', 0),
('flac', 5),
('flac', 8),
('vorbis', -1),
('vorbis', 10),
('amb', None),
])
def test_bytesio(self, ext, compression):
"""Applying effects via BytesIO object works"""
sample_rate = 16000
channels_first = True
effects = [['band', '300', '10']]
format_ = ext if ext in ['mp3'] else None
input_path = self.get_temp_path(f'input.{ext}')
reference_path = self.get_temp_path('reference.wav')

sox_utils.gen_audio_file(
input_path, sample_rate, num_channels=2, compression=compression)
sox_utils.run_sox_effect(
input_path, reference_path, effects, output_bitdepth=32)
expected, expected_sr = load_wav(reference_path)

with open(input_path, 'rb') as file_:
fileobj = io.BytesIO(file_.read())
found, sr = sox_effects.apply_effects_file(
fileobj, effects, channels_first=channels_first, format=format_)
save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first)
assert sr == expected_sr
self.assertEqual(found, expected)

@parameterized.expand([
('wav', None),
('mp3', 128),
('mp3', 320),
('flac', 0),
('flac', 5),
('flac', 8),
('vorbis', -1),
('vorbis', 10),
('amb', None),
])
def test_tarfile(self, ext, compression):
"""Applying effects to compressed audio via file-like file works"""
sample_rate = 16000
channels_first = True
effects = [['band', '300', '10']]
format_ = ext if ext in ['mp3'] else None
audio_file = f'input.{ext}'

input_path = self.get_temp_path(audio_file)
reference_path = self.get_temp_path('reference.wav')
archive_path = self.get_temp_path('archive.tar.gz')

sox_utils.gen_audio_file(
input_path, sample_rate, num_channels=2, compression=compression)
sox_utils.run_sox_effect(
input_path, reference_path, effects, output_bitdepth=32)
expected, expected_sr = load_wav(reference_path)

with tarfile.TarFile(archive_path, 'w') as tarobj:
tarobj.add(input_path, arcname=audio_file)
with tarfile.TarFile(archive_path, 'r') as tarobj:
fileobj = tarobj.extractfile(audio_file)
found, sr = sox_effects.apply_effects_file(
fileobj, effects, channels_first=channels_first, format=format_)
save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first)
assert sr == expected_sr
self.assertEqual(found, expected)


@skipIfNoExtension
@skipIfNoExec('sox')
@skipIfNoModule("requests")
class TestFileObjectHttp(HttpServerMixin, PytorchTestCase):
@parameterized.expand([
('wav', None),
('mp3', 128),
('mp3', 320),
('flac', 0),
('flac', 5),
('flac', 8),
('vorbis', -1),
('vorbis', 10),
('amb', None),
])
def test_requests(self, ext, compression):
sample_rate = 16000
channels_first = True
effects = [['band', '300', '10']]
format_ = ext if ext in ['mp3'] else None
audio_file = f'input.{ext}'
input_path = self.get_temp_path(audio_file)
reference_path = self.get_temp_path('reference.wav')

sox_utils.gen_audio_file(
input_path, sample_rate, num_channels=2, compression=compression)
sox_utils.run_sox_effect(
input_path, reference_path, effects, output_bitdepth=32)
expected, expected_sr = load_wav(reference_path)

url = self.get_url(audio_file)
with requests.get(url, stream=True) as resp:
found, sr = sox_effects.apply_effects_file(
resp.raw, effects, channels_first=channels_first, format=format_)
save_wav(self.get_temp_path('result.wav'), found, sr, channels_first=channels_first)
assert sr == expected_sr
self.assertEqual(found, expected)
5 changes: 5 additions & 0 deletions torchaudio/csrc/pybind.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <torch/extension.h>
#include <torchaudio/csrc/sox/effects.h>
#include <torchaudio/csrc/sox/io.h>
#include <torchaudio/csrc/sox/legacy.h>

Expand Down Expand Up @@ -112,4 +113,8 @@ PYBIND11_MODULE(_torchaudio, m) {
"save_audio_fileobj",
&torchaudio::sox_io::save_audio_fileobj,
"Save audio to file obj.");
m.def(
"apply_effects_fileobj",
&torchaudio::sox_effects::apply_effects_fileobj,
"Decode audio data from file-like obj and apply effects.");
}
22 changes: 18 additions & 4 deletions torchaudio/sox_effects/sox_effects.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import os
from pathlib import Path
from typing import List, Tuple, Optional

import torch

import torchaudio
from torchaudio._internal import module_utils as _mod_utils
from torchaudio.utils.sox_utils import list_effects

Expand Down Expand Up @@ -170,8 +173,16 @@ def apply_effects_file(
rate and leave samples untouched.

Args:
path (str or pathlib.Path): Path to the audio file. This function also handles ``pathlib.Path`` objects, but is
annotated as ``str`` for TorchScript compiler compatibility.
path (path-like object or file-like object):
Source of audio data. When the function is not compiled by TorchScript,
(e.g. ``torch.jit.script``), the following types are accepted;
* ``path-like``: file path
* ``file-like``: Object with ``read(size: int) -> bytes`` method,
which returns byte string of at most ``size`` length.
When the function is compiled by TorchScript, only ``str`` type is allowed.
Note:
* This argument is intentionally annotated as ``str`` only for
TorchScript compiler compatibility.
effects (List[List[str]]): List of effects.
normalize (bool):
When ``True``, this function always return ``float32``, and sample values are
Expand Down Expand Up @@ -252,8 +263,11 @@ def apply_effects_file(
>>> for batch in loader:
>>> pass
"""
# Get string representation of 'path' in case Path object is passed
path = str(path)
if not torch.jit.is_scripting():
if hasattr(path, 'read'):
return torchaudio._torchaudio.apply_effects_fileobj(
path, effects, normalize, channels_first, format)
path = os.fspath(path)
signal = torch.ops.torchaudio.sox_effects_apply_effects_file(
path, effects, normalize, channels_first, format)
return signal.get_tensor(), signal.get_sample_rate()