Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add recipe for the Santa Barbara Corpus of Spoken American English (SBCSAE) #1395

Merged
merged 14 commits into from
Oct 4, 2024
Merged
2 changes: 2 additions & 0 deletions docs/corpus.rst
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ a CLI tool that create the manifests given a corpus directory.
- :func:`lhotse.recipes.prepare_reazonspeech`
* - RIRs and Noises Corpus (OpenSLR 28)
- :func:`lhotse.recipes.prepare_rir_noise`
* - SBCSAE
- :func:`lhotse.recipes.prepare_sbcsae`
* - Spatial-LibriSpeech
- :func:`lhotse.recipes.prepare_spatial_librispeech`
* - Speech Commands
Expand Down
1 change: 1 addition & 0 deletions lhotse/bin/modes/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
from .primewords import *
from .reazonspeech import *
from .rir_noise import *
from .sbcsae import *
from .slu import *
from .spatial_librispeech import *
from .speechcommands import *
Expand Down
58 changes: 58 additions & 0 deletions lhotse/bin/modes/recipes/sbcsae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from typing import Optional, Sequence

import click

from lhotse.bin.modes import download, prepare
from lhotse.recipes.sbcsae import download_sbcsae, prepare_sbcsae
from lhotse.utils import Pathlike

__all__ = ["sbcsae"]


@prepare.command(context_settings=dict(show_default=True))
@click.argument("corpus_dir", type=click.Path(exists=True, dir_okay=True))
@click.argument("output_dir", type=click.Path())
@click.option(
"--geolocation",
type=bool,
is_flag=True,
default=False,
help="Include geographic coordinates of speakers' hometowns in the manifests.",
)
@click.option(
"--omit-realignments",
type=bool,
is_flag=True,
default=False,
help="Only output the original corpus segmentation without boundary improvements.",
)
def sbcsae(
corpus_dir: Pathlike,
output_dir: Pathlike,
geolocation: bool,
omit_realignments: bool,
):
"""SBCSAE data preparation."""
prepare_sbcsae(
corpus_dir,
output_dir=output_dir,
geolocation=geolocation,
omit_realignments=omit_realignments,
)


@download.command(context_settings=dict(show_default=True))
@click.argument("target_dir", type=click.Path())
@click.option(
"--force-download",
type=bool,
is_flag=True,
default=False,
help="Force download.",
)
def sbcsae(
target_dir: Pathlike,
force_download: bool,
):
"""SBCSAE download."""
download_sbcsae(target_dir, force_download=force_download)
1 change: 1 addition & 0 deletions lhotse/recipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
from .peoples_speech import prepare_peoples_speech
from .reazonspeech import download_reazonspeech, prepare_reazonspeech
from .rir_noise import download_rir_noise, prepare_rir_noise
from .sbcsae import download_sbcsae, prepare_sbcsae
from .slu import prepare_slu
from .spatial_librispeech import (
download_spatial_librispeech,
Expand Down
Loading
Loading