-
Notifications
You must be signed in to change notification settings - Fork 297
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Get timestamps during decoding (#598)
* print out timestamps during decoding * add word-level alignments * support to compute mean symbol delay with word-level alignments * print variance of symbol delay * update doc * support to compute delay for pruned_transducer_stateless4 * fix bug * add doc
- Loading branch information
1 parent
ff3f026
commit 0366877
Showing
8 changed files
with
1,094 additions
and
150 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -eou pipefail | ||
|
||
alignments_dir=data/alignment | ||
cuts_in_dir=data/fbank | ||
cuts_out_dir=data/fbank_ali | ||
|
||
python3 ./local/add_alignment_librispeech.py \ | ||
--alignments-dir $alignments_dir \ | ||
--cuts-in-dir $cuts_in_dir \ | ||
--cuts-out-dir $cuts_out_dir |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,196 @@ | ||
#!/usr/bin/env python3 | ||
# Copyright 2022 Xiaomi Corp. (authors: Zengwei Yao) | ||
# | ||
# See ../../../../LICENSE for clarification regarding multiple authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
|
||
""" | ||
This file adds alignments from https://github.com/CorentinJ/librispeech-alignments # noqa | ||
to the existing fbank features dir (e.g., data/fbank) | ||
and save cuts to a new dir (e.g., data/fbank_ali). | ||
""" | ||
|
||
import argparse | ||
import logging | ||
import zipfile | ||
from pathlib import Path | ||
from typing import List | ||
|
||
from lhotse import CutSet, load_manifest_lazy | ||
from lhotse.recipes.librispeech import parse_alignments | ||
from lhotse.utils import is_module_available | ||
|
||
LIBRISPEECH_ALIGNMENTS_URL = ( | ||
"https://drive.google.com/uc?id=1WYfgr31T-PPwMcxuAq09XZfHQO5Mw8fE" | ||
) | ||
|
||
DATASET_PARTS = [ | ||
"dev-clean", | ||
"dev-other", | ||
"test-clean", | ||
"test-other", | ||
"train-clean-100", | ||
"train-clean-360", | ||
"train-other-500", | ||
] | ||
|
||
|
||
def get_parser(): | ||
parser = argparse.ArgumentParser( | ||
formatter_class=argparse.ArgumentDefaultsHelpFormatter | ||
) | ||
|
||
parser.add_argument( | ||
"--alignments-dir", | ||
type=str, | ||
default="data/alignment", | ||
help="The dir to save alignments.", | ||
) | ||
|
||
parser.add_argument( | ||
"--cuts-in-dir", | ||
type=str, | ||
default="data/fbank", | ||
help="The dir of the existing cuts without alignments.", | ||
) | ||
|
||
parser.add_argument( | ||
"--cuts-out-dir", | ||
type=str, | ||
default="data/fbank_ali", | ||
help="The dir to save the new cuts with alignments", | ||
) | ||
|
||
return parser | ||
|
||
|
||
def download_alignments( | ||
target_dir: str, alignments_url: str = LIBRISPEECH_ALIGNMENTS_URL | ||
): | ||
""" | ||
Download and extract the alignments. | ||
Note: If you can not access drive.google.com, you could download the file | ||
`LibriSpeech-Alignments.zip` from huggingface: | ||
https://huggingface.co/Zengwei/librispeech-alignments | ||
and extract the zip file manually. | ||
Args: | ||
target_dir: | ||
The dir to save alignments. | ||
alignments_url: | ||
The URL of alignments. | ||
""" | ||
"""Modified from https://github.com/lhotse-speech/lhotse/blob/master/lhotse/recipes/librispeech.py""" # noqa | ||
target_dir = Path(target_dir) | ||
target_dir.mkdir(parents=True, exist_ok=True) | ||
completed_detector = target_dir / ".ali_completed" | ||
if completed_detector.is_file(): | ||
logging.info("The alignment files already exist.") | ||
return | ||
|
||
ali_zip_path = target_dir / "LibriSpeech-Alignments.zip" | ||
if not ali_zip_path.is_file(): | ||
assert is_module_available( | ||
"gdown" | ||
), 'To download LibriSpeech alignments, please install "pip install gdown"' # noqa | ||
import gdown | ||
|
||
gdown.download(alignments_url, output=str(ali_zip_path)) | ||
|
||
with zipfile.ZipFile(str(ali_zip_path)) as f: | ||
f.extractall(path=target_dir) | ||
completed_detector.touch() | ||
|
||
|
||
def add_alignment( | ||
alignments_dir: str, | ||
cuts_in_dir: str = "data/fbank", | ||
cuts_out_dir: str = "data/fbank_ali", | ||
dataset_parts: List[str] = DATASET_PARTS, | ||
): | ||
""" | ||
Add alignment info to existing cuts. | ||
Args: | ||
alignments_dir: | ||
The dir of the alignments. | ||
cuts_in_dir: | ||
The dir of the existing cuts. | ||
cuts_out_dir: | ||
The dir to save the new cuts with alignments. | ||
dataset_parts: | ||
Librispeech parts to add alignments. | ||
""" | ||
alignments_dir = Path(alignments_dir) | ||
cuts_in_dir = Path(cuts_in_dir) | ||
cuts_out_dir = Path(cuts_out_dir) | ||
cuts_out_dir.mkdir(parents=True, exist_ok=True) | ||
|
||
for part in dataset_parts: | ||
logging.info(f"Processing {part}") | ||
|
||
cuts_in_path = cuts_in_dir / f"librispeech_cuts_{part}.jsonl.gz" | ||
if not cuts_in_path.is_file(): | ||
logging.info(f"{cuts_in_path} does not exist - skipping.") | ||
continue | ||
cuts_out_path = cuts_out_dir / f"librispeech_cuts_{part}.jsonl.gz" | ||
if cuts_out_path.is_file(): | ||
logging.info(f"{part} already exists - skipping.") | ||
continue | ||
|
||
# parse alignments | ||
alignments = {} | ||
part_ali_dir = alignments_dir / "LibriSpeech" / part | ||
for ali_path in part_ali_dir.rglob("*.alignment.txt"): | ||
ali = parse_alignments(ali_path) | ||
alignments.update(ali) | ||
logging.info( | ||
f"{part} has {len(alignments.keys())} cuts with alignments." | ||
) | ||
|
||
# add alignment attribute and write out | ||
cuts_in = load_manifest_lazy(cuts_in_path) | ||
with CutSet.open_writer(cuts_out_path) as writer: | ||
for cut in cuts_in: | ||
for idx, subcut in enumerate(cut.supervisions): | ||
origin_id = subcut.id.split("_")[0] | ||
if origin_id in alignments: | ||
ali = alignments[origin_id] | ||
else: | ||
logging.info( | ||
f"Warning: {origin_id} does not has alignment." | ||
) | ||
ali = [] | ||
subcut.alignment = {"word": ali} | ||
writer.write(cut, flush=True) | ||
|
||
|
||
def main(): | ||
formatter = ( | ||
"%(asctime)s %(levelname)s [%(filename)s:%(lineno)d] %(message)s" | ||
) | ||
logging.basicConfig(format=formatter, level=logging.INFO) | ||
|
||
parser = get_parser() | ||
args = parser.parse_args() | ||
logging.info(vars(args)) | ||
|
||
download_alignments(args.alignments_dir) | ||
add_alignment(args.alignments_dir, args.cuts_in_dir, args.cuts_out_dir) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Oops, something went wrong.