Skip to content

Commit

Permalink
Do some changes for aishell/ASR/transducer stateless/export.py (faceb…
Browse files Browse the repository at this point in the history
…ookresearch#347)

* do some changes for aishell/ASR/transducer_stateless/export.py
  • Loading branch information
luomingshuang authored May 7, 2022
1 parent c059ef3 commit f783e10
Showing 1 changed file with 11 additions and 12 deletions.
23 changes: 11 additions & 12 deletions egs/aishell/ASR/transducer_stateless/export.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
#
# Copyright 2021 Xiaomi Corporation (Author: Fangjun Kuang)
# 2022 Xiaomi Corporation (Author: Mingshuang Luo)
#
# See ../../../../LICENSE for clarification regarding multiple authors
#
Expand All @@ -22,7 +23,7 @@
Usage:
./transducer_stateless/export.py \
--exp-dir ./transducer_stateless/exp \
--bpe-model data/lang_bpe_500/bpe.model \
--lang-dir data/lang_char \
--epoch 20 \
--avg 10
Expand All @@ -33,20 +34,19 @@
cd /path/to/exp_dir
ln -s pretrained.pt epoch-9999.pt
cd /path/to/egs/librispeech/ASR
cd /path/to/egs/aishell/ASR
./transducer_stateless/decode.py \
--exp-dir ./transducer_stateless/exp \
--epoch 9999 \
--avg 1 \
--max-duration 1 \
--bpe-model data/lang_bpe_500/bpe.model
--lang-dir data/lang_char
"""

import argparse
import logging
from pathlib import Path

import sentencepiece as spm
import torch
import torch.nn as nn
from conformer import Conformer
Expand All @@ -56,6 +56,7 @@

from icefall.checkpoint import average_checkpoints, load_checkpoint
from icefall.env import get_env_info
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, str2bool


Expand Down Expand Up @@ -91,10 +92,10 @@ def get_parser():
)

parser.add_argument(
"--bpe-model",
"--lang-dir",
type=str,
default="data/lang_bpe_500/bpe.model",
help="Path to the BPE model",
default="data/lang_char",
help="The lang dir",
)

parser.add_argument(
Expand Down Expand Up @@ -194,12 +195,10 @@ def main():

logging.info(f"device: {device}")

sp = spm.SentencePieceProcessor()
sp.load(params.bpe_model)
lexicon = Lexicon(params.lang_dir)

# <blk> is defined in local/train_bpe_model.py
params.blank_id = sp.piece_to_id("<blk>")
params.vocab_size = sp.get_piece_size()
params.blank_id = 0
params.vocab_size = max(lexicon.tokens) + 1

logging.info(params)

Expand Down

0 comments on commit f783e10

Please sign in to comment.