-
Notifications
You must be signed in to change notification settings - Fork 109
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add doc for Russian onnx ASR models (#664)
- Loading branch information
1 parent
b95d1e2
commit 731a391
Showing
27 changed files
with
616 additions
and
159 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
...ls/offline-ctc/nemo/code-russian/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.int8.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:375 ./build/bin/sherpa-onnx-offline --nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx --tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt ./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav | ||
|
||
OfflineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="", decoder_filename="", joiner_filename=""), paraformer=OfflineParaformerModelConfig(model=""), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model="./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx"), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), sense_voice=OfflineSenseVoiceModelConfig(model="", language="auto", use_itn=False), telespeech_ctc="", tokens="./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type="", modeling_unit="cjkchar", bpe_vocab=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0, rule_fsts="", rule_fars="") | ||
Creating recognizer ... | ||
Started | ||
Done! | ||
|
||
./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav | ||
{"lang": "", "emotion": "", "event": "", "text": "ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуп зеленый", "timestamps": [0.04, 0.12, 0.20, 0.24, 0.32, 0.40, 0.44, 0.56, 0.60, 0.64, 0.72, 0.76, 0.80, 0.84, 0.88, 1.00, 1.04, 1.16, 1.20, 1.28, 1.36, 1.40, 1.48, 1.64, 1.76, 1.84, 1.88, 1.92, 2.00, 2.04, 2.08, 2.16, 2.20, 2.28, 2.36, 2.40, 2.52, 2.56, 2.68, 2.72, 2.80, 2.84, 2.92, 3.00, 3.04, 3.08, 3.12, 3.20, 3.28, 3.32, 3.36, 3.44, 3.48, 3.56, 3.60, 3.68, 3.72, 3.76, 3.80, 3.88, 3.96, 4.00, 4.04, 4.12, 4.20, 4.24, 4.32, 4.36, 4.40, 4.48, 4.52, 4.56, 4.64, 4.68, 4.76, 4.88, 4.92, 4.96, 5.04, 5.08, 5.20, 5.40, 5.44, 5.56, 5.64, 5.68, 5.72, 5.80, 5.84, 5.92, 5.96, 6.08, 6.12, 6.16, 6.20, 6.24, 6.28, 6.36, 6.40, 6.48, 6.52, 6.56, 6.64, 6.72, 6.76, 6.80, 6.84, 6.96, 7.00, 7.04, 7.08, 7.20, 7.24, 7.28, 7.36, 7.40, 7.44, 7.52, 7.56, 7.64, 7.72, 7.80, 7.84, 7.92, 8.04, 8.08, 8.16, 8.20, 8.32, 8.36, 8.44, 9.12, 9.28, 9.32, 9.44, 9.48, 9.56, 9.60, 9.72, 9.76, 9.88, 9.92, 10.04, 10.08, 10.20, 10.24, 10.36, 10.40, 10.52, 10.56, 10.64, 10.68, 10.80, 10.84, 10.92], "tokens":["н", "и", "ч", "ь", "и", "х", " ", "н", "е", " ", "т", "р", "е", "б", "у", "я", " ", "п", "о", "х", "в", "а", "л", " ", "с", "ч", "а", "с", "т", "л", "и", "в", " ", "у", "ж", " ", "я", " ", "н", "а", "д", "е", "ж", "д", "о", "й", " ", "с", "л", "а", "д", "к", "о", "й", " ", "ч", "т", "о", " ", "д", "е", "в", "а", " ", "с", " ", "т", "р", "е", "п", "е", "т", "о", "м", " ", "л", "ю", "б", "в", "и", " ", "п", "о", "с", "м", "о", "т", "р", "и", "т", " ", "м", "о", "ж", "е", "т", " ", "б", "ы", "т", "ь", " ", "у", "к", "р", "а", "д", "к", "о", "й", " ", "н", "а", " ", "п", "е", "с", "н", "и", " ", "г", "р", "е", "ш", "н", "ы", "е", " ", "м", "о", "и", " ", "у", " ", "л", "у", "к", "о", "м", "о", "р", "ь", "я", " ", "д", "у", "п", " ", "з", "е", "л", "е", "н", "ы", "й"], "words": []} | ||
---- | ||
num threads: 2 | ||
decoding method: greedy_search | ||
Elapsed seconds: 1.868 s | ||
Real time factor (RTF): 1.868 / 11.290 = 0.165 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ This page lists all offline CTC models from `NeMo`_. | |
|
||
how-to-export | ||
english | ||
russian |
112 changes: 112 additions & 0 deletions
112
docs/source/onnx/pretrained_models/offline-ctc/nemo/russian.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
Russian | ||
======= | ||
|
||
.. hint:: | ||
|
||
Please refer to :ref:`install_sherpa_onnx` to install `sherpa-onnx`_ | ||
before you read this section. | ||
|
||
This page lists offline CTC models from `NeMo`_ for English. | ||
|
||
sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24 | ||
----------------------------------------------- | ||
|
||
This model is converted from | ||
|
||
`<https://github.com/salute-developers/GigaAM>`_ | ||
|
||
You can find the conversion script at | ||
|
||
`<https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/nemo/GigaAM/run-ctc.sh>` | ||
|
||
.. warning:: | ||
|
||
The license of the model can be found at `<https://github.com/salute-developers/GigaAM/blob/main/GigaAM%20License_NC.pdf>`_. | ||
|
||
It is for non-commercial use only. | ||
|
||
In the following, we describe how to download it and use it with `sherpa-onnx`_. | ||
|
||
Download the model | ||
~~~~~~~~~~~~~~~~~~ | ||
|
||
Please use the following commands to download it. | ||
|
||
.. code-block:: bash | ||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 | ||
tar xvf sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 | ||
rm sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.tar.bz2 | ||
You should see something like below after downloading:: | ||
|
||
ls -lh sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/ | ||
total 558904 | ||
-rw-r--r-- 1 fangjun staff 89K Oct 24 21:20 GigaAM%20License_NC.pdf | ||
-rw-r--r-- 1 fangjun staff 318B Oct 24 21:20 README.md | ||
-rwxr-xr-x 1 fangjun staff 3.5K Oct 24 21:20 export-onnx-ctc.py | ||
-rw-r--r-- 1 fangjun staff 262M Oct 24 21:24 model.int8.onnx | ||
-rwxr-xr-x 1 fangjun staff 1.2K Oct 24 21:20 run-ctc.sh | ||
-rwxr-xr-x 1 fangjun staff 4.1K Oct 24 21:20 test-onnx-ctc.py | ||
drwxr-xr-x 4 fangjun staff 128B Oct 24 21:24 test_wavs | ||
-rw-r--r--@ 1 fangjun staff 196B Oct 24 21:31 tokens.txt | ||
|
||
Decode wave files | ||
~~~~~~~~~~~~~~~~~ | ||
|
||
.. hint:: | ||
|
||
It supports decoding only wave files of a single channel with 16-bit | ||
encoded samples, while the sampling rate does not need to be 16 kHz. | ||
|
||
.. code-block:: bash | ||
cd /path/to/sherpa-onnx | ||
./build/bin/sherpa-onnx-offline \ | ||
--nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \ | ||
--tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt \ | ||
./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/test_wavs/example.wav | ||
.. note:: | ||
|
||
Please use ``./build/bin/Release/sherpa-onnx-offline.exe`` for Windows. | ||
|
||
.. caution:: | ||
|
||
If you use Windows and get encoding issues, please run: | ||
|
||
.. code-block:: bash | ||
CHCP 65001 | ||
in your commandline. | ||
|
||
You should see the following output: | ||
|
||
.. literalinclude:: ./code-russian/sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24.int8.txt | ||
|
||
Speech recognition from a microphone | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
.. code-block:: bash | ||
cd /path/to/sherpa-onnx | ||
./build/bin/sherpa-onnx-microphone-offline \ | ||
--nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \ | ||
--tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt | ||
Speech recognition from a microphone with VAD | ||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
|
||
.. code-block:: bash | ||
cd /path/to/sherpa-onnx | ||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx | ||
./build/bin/sherpa-onnx-vad-microphone-offline-asr \ | ||
--silero-vad-model=./silero_vad.onnx \ | ||
--nemo-ctc-model=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/model.int8.onnx \ | ||
--tokens=./sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24/tokens.txt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
...line-transducer/code-nemo/sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24.int8.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
/Users/fangjun/open-source/sherpa-onnx/sherpa-onnx/csrc/parse-options.cc:Read:375 ./build/bin/sherpa-onnx-offline --encoder=./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/encoder.int8.onnx --decoder=./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/decoder.onnx --joiner=./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/joiner.onnx --tokens=./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/tokens.txt --model-type=nemo_transducer ./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/test_wavs/example.wav | ||
|
||
OfflineRecognizerConfig(feat_config=FeatureExtractorConfig(sampling_rate=16000, feature_dim=80, low_freq=20, high_freq=-400, dither=0), model_config=OfflineModelConfig(transducer=OfflineTransducerModelConfig(encoder_filename="./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/encoder.int8.onnx", decoder_filename="./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/decoder.onnx", joiner_filename="./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/joiner.onnx"), paraformer=OfflineParaformerModelConfig(model=""), nemo_ctc=OfflineNemoEncDecCtcModelConfig(model=""), whisper=OfflineWhisperModelConfig(encoder="", decoder="", language="", task="transcribe", tail_paddings=-1), tdnn=OfflineTdnnModelConfig(model=""), zipformer_ctc=OfflineZipformerCtcModelConfig(model=""), wenet_ctc=OfflineWenetCtcModelConfig(model=""), sense_voice=OfflineSenseVoiceModelConfig(model="", language="auto", use_itn=False), telespeech_ctc="", tokens="./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/tokens.txt", num_threads=2, debug=False, provider="cpu", model_type="nemo_transducer", modeling_unit="cjkchar", bpe_vocab=""), lm_config=OfflineLMConfig(model="", scale=0.5), ctc_fst_decoder_config=OfflineCtcFstDecoderConfig(graph="", max_active=3000), decoding_method="greedy_search", max_active_paths=4, hotwords_file="", hotwords_score=1.5, blank_penalty=0, rule_fsts="", rule_fars="") | ||
Creating recognizer ... | ||
Started | ||
Done! | ||
|
||
./sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24/test_wavs/example.wav | ||
{"lang": "", "emotion": "", "event": "", "text": " ничьих не требуя похвал счастлив уж я надеждой сладкой что дева с трепетом любви посмотрит может быть украдкой на песни грешные мои у лукоморья дуб зеленый", "timestamps": [0.04, 0.16, 0.24, 0.28, 0.40, 0.48, 0.60, 0.68, 0.80, 0.92, 1.04, 1.20, 1.28, 1.44, 1.76, 1.88, 2.00, 2.08, 2.16, 2.28, 2.36, 2.44, 2.64, 2.76, 2.92, 3.00, 3.04, 3.16, 3.24, 3.36, 3.48, 3.56, 3.68, 3.88, 4.04, 4.16, 4.24, 4.32, 4.40, 4.56, 4.76, 4.88, 4.92, 5.36, 5.64, 5.84, 5.92, 6.04, 6.32, 6.52, 6.60, 6.72, 6.84, 6.92, 7.04, 7.16, 7.28, 7.36, 7.44, 7.56, 7.68, 7.72, 7.88, 8.00, 8.20, 8.36, 9.28, 9.40, 9.44, 9.52, 9.68, 9.84, 9.88, 9.92, 10.12, 10.32, 10.40, 10.52, 10.56, 10.76, 10.84], "tokens":[" ни", "ч", "ь", "и", "х", " не", " т", "ре", "бу", "я", " по", "х", "ва", "л", " с", "ча", "ст", "ли", "в", " у", "ж", " я", " на", "де", "ж", "до", "й", " с", "ла", "д", "ко", "й", " что", " де", "ва", " с", " т", "ре", "пе", "том", " лю", "б", "ви", " пос", "мот", "ри", "т", " может", " быть", " у", "к", "ра", "д", "ко", "й", " на", " п", "е", "с", "ни", " г", "ре", "ш", "ные", " мо", "и", " у", " ", "лу", "ко", "мо", "р", "ь", "я", " ду", "б", " з", "е", "лен", "ы", "й"], "words": []} | ||
---- | ||
num threads: 2 | ||
decoding method: greedy_search | ||
Elapsed seconds: 1.775 s | ||
Real time factor (RTF): 1.775 / 11.290 = 0.157 |
Oops, something went wrong.