From d890689def447c24c2d3b31e254dcafb8d97b41e Mon Sep 17 00:00:00 2001 From: sarane Date: Mon, 24 Jun 2024 20:38:09 +0530 Subject: [PATCH 1/7] Exposing the 'stop_historu_eou_th' parameter --- .gitmodules | 4 ++-- riva/client/argparse_utils.py | 6 ++++++ riva/client/asr.py | 3 +++ scripts/asr/riva_streaming_asr_client.py | 13 +++++++------ scripts/asr/transcribe_file.py | 13 +++++++------ scripts/asr/transcribe_file_offline.py | 13 +++++++------ scripts/asr/transcribe_mic.py | 13 +++++++------ 7 files changed, 39 insertions(+), 26 deletions(-) diff --git a/.gitmodules b/.gitmodules index bfe31f8..f366420 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "common"] path = common - url = https://github.com/nvidia-riva/common.git - branch = main + url = https://github.com/sarane22/common.git + branch = endpointing_stop_eou_threshold_param diff --git a/riva/client/argparse_utils.py b/riva/client/argparse_utils.py index 3d2b835..40e79b2 100644 --- a/riva/client/argparse_utils.py +++ b/riva/client/argparse_utils.py @@ -78,6 +78,12 @@ def add_asr_config_argparse_parameters( type=float, help="Threshold value for detecting the end of speech utterance", ) + parser.add_argument( + "--stop-eou-threshold", + default=-1.0, + type=float, + help="Threshold value for likelihood of blanks before detecting end of utterance", + ) return parser diff --git a/riva/client/asr.py b/riva/client/asr.py index 12e5293..ddd2bd5 100644 --- a/riva/client/asr.py +++ b/riva/client/asr.py @@ -130,6 +130,7 @@ def add_endpoint_parameters_to_config( stop_history: int, stop_history_eou: int, stop_threshold: float, + stop_eou_threshold: float, ) -> None: if not (start_history > 0 or start_threshold > 0 or stop_history > 0 or stop_history_eou > 0 or stop_threshold > 0): return @@ -146,6 +147,8 @@ def add_endpoint_parameters_to_config( endpointing_config.stop_history_eou = stop_history_eou if stop_threshold > 0: endpointing_config.stop_threshold = stop_threshold + if stop_eou_threshold > 0: + endpointing_config.stop_eou_threshold = stop_eou_threshold inner_config.endpointing_config.CopyFrom(endpointing_config) diff --git a/scripts/asr/riva_streaming_asr_client.py b/scripts/asr/riva_streaming_asr_client.py index 8f47e6c..4711c5c 100644 --- a/scripts/asr/riva_streaming_asr_client.py +++ b/scripts/asr/riva_streaming_asr_client.py @@ -64,12 +64,13 @@ def streaming_transcription_worker( interim_results=True, ) riva.client.add_endpoint_parameters_to_config( - config, - args.start_history, - args.start_threshold, - args.stop_history, - args.stop_history_eou, - args.stop_threshold + config, + args.start_history, + args.start_threshold, + args.stop_history, + args.stop_history_eou, + args.stop_threshold, + args.stop_eou_threshold ) riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) for _ in range(args.num_iterations): diff --git a/scripts/asr/transcribe_file.py b/scripts/asr/transcribe_file.py index c6550af..b09b043 100644 --- a/scripts/asr/transcribe_file.py +++ b/scripts/asr/transcribe_file.py @@ -80,12 +80,13 @@ def main() -> None: ) riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) riva.client.add_endpoint_parameters_to_config( - config, - args.start_history, - args.start_threshold, - args.stop_history, - args.stop_history_eou, - args.stop_threshold + config, + args.start_history, + args.start_threshold, + args.stop_history, + args.stop_history_eou, + args.stop_threshold, + args.stop_eou_threshold ) sound_callback = None try: diff --git a/scripts/asr/transcribe_file_offline.py b/scripts/asr/transcribe_file_offline.py index 203a622..05ae64a 100644 --- a/scripts/asr/transcribe_file_offline.py +++ b/scripts/asr/transcribe_file_offline.py @@ -39,12 +39,13 @@ def main() -> None: riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) riva.client.add_speaker_diarization_to_config(config, args.speaker_diarization) riva.client.add_endpoint_parameters_to_config( - config, - args.start_history, - args.start_threshold, - args.stop_history, - args.stop_history_eou, - args.stop_threshold + config, + args.start_history, + args.start_threshold, + args.stop_history, + args.stop_history_eou, + args.stop_threshold, + args.stop_eou_threshold ) with args.input_file.open('rb') as fh: data = fh.read() diff --git a/scripts/asr/transcribe_mic.py b/scripts/asr/transcribe_mic.py index 2aa9ca4..ab3f345 100644 --- a/scripts/asr/transcribe_mic.py +++ b/scripts/asr/transcribe_mic.py @@ -58,12 +58,13 @@ def main() -> None: ) riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) riva.client.add_endpoint_parameters_to_config( - config, - args.start_history, - args.start_threshold, - args.stop_history, - args.stop_history_eou, - args.stop_threshold + config, + args.start_history, + args.start_threshold, + args.stop_history, + args.stop_history_eou, + args.stop_threshold, + args.stop_eou_threshold ) with riva.client.audio_io.MicrophoneStream( args.sample_rate_hz, From 61a0996d2a8891e5fc247fbedd36ea8e574d2265 Mon Sep 17 00:00:00 2001 From: sarane Date: Tue, 25 Jun 2024 09:48:34 +0530 Subject: [PATCH 2/7] updating submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index 9dfc052..31db08d 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 9dfc052bba9a0cc2cc4b4f156e0ca8a273e9444e +Subproject commit 31db08d824d0f4fec0b244235c46e861c16fe5bb From e756ccb7565179a8eb8023b3104de19a89f4e065 Mon Sep 17 00:00:00 2001 From: sarane Date: Wed, 26 Jun 2024 12:48:30 +0530 Subject: [PATCH 3/7] Updating param name --- common | 2 +- riva/client/argparse_utils.py | 2 +- riva/client/asr.py | 6 +++--- scripts/asr/riva_streaming_asr_client.py | 2 +- scripts/asr/transcribe_file.py | 2 +- scripts/asr/transcribe_file_offline.py | 2 +- scripts/asr/transcribe_mic.py | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/common b/common index 31db08d..cc571a0 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit 31db08d824d0f4fec0b244235c46e861c16fe5bb +Subproject commit cc571a0219e9e3936e854bd7254774baf3b9ba08 diff --git a/riva/client/argparse_utils.py b/riva/client/argparse_utils.py index 40e79b2..e79e4df 100644 --- a/riva/client/argparse_utils.py +++ b/riva/client/argparse_utils.py @@ -79,7 +79,7 @@ def add_asr_config_argparse_parameters( help="Threshold value for detecting the end of speech utterance", ) parser.add_argument( - "--stop-eou-threshold", + "--stop-threshold-eou", default=-1.0, type=float, help="Threshold value for likelihood of blanks before detecting end of utterance", diff --git a/riva/client/asr.py b/riva/client/asr.py index ddd2bd5..7dafbbb 100644 --- a/riva/client/asr.py +++ b/riva/client/asr.py @@ -130,7 +130,7 @@ def add_endpoint_parameters_to_config( stop_history: int, stop_history_eou: int, stop_threshold: float, - stop_eou_threshold: float, + stop_threshold_eou: float, ) -> None: if not (start_history > 0 or start_threshold > 0 or stop_history > 0 or stop_history_eou > 0 or stop_threshold > 0): return @@ -147,8 +147,8 @@ def add_endpoint_parameters_to_config( endpointing_config.stop_history_eou = stop_history_eou if stop_threshold > 0: endpointing_config.stop_threshold = stop_threshold - if stop_eou_threshold > 0: - endpointing_config.stop_eou_threshold = stop_eou_threshold + if stop_threshold_eou > 0: + endpointing_config.stop_threshold_eou = stop_threshold_eou inner_config.endpointing_config.CopyFrom(endpointing_config) diff --git a/scripts/asr/riva_streaming_asr_client.py b/scripts/asr/riva_streaming_asr_client.py index 4711c5c..8d4fd9b 100644 --- a/scripts/asr/riva_streaming_asr_client.py +++ b/scripts/asr/riva_streaming_asr_client.py @@ -70,7 +70,7 @@ def streaming_transcription_worker( args.stop_history, args.stop_history_eou, args.stop_threshold, - args.stop_eou_threshold + args.stop_threshold_eou ) riva.client.add_word_boosting_to_config(config, args.boosted_lm_words, args.boosted_lm_score) for _ in range(args.num_iterations): diff --git a/scripts/asr/transcribe_file.py b/scripts/asr/transcribe_file.py index b09b043..976cdb9 100644 --- a/scripts/asr/transcribe_file.py +++ b/scripts/asr/transcribe_file.py @@ -86,7 +86,7 @@ def main() -> None: args.stop_history, args.stop_history_eou, args.stop_threshold, - args.stop_eou_threshold + args.stop_threshold_eou ) sound_callback = None try: diff --git a/scripts/asr/transcribe_file_offline.py b/scripts/asr/transcribe_file_offline.py index 05ae64a..5dcda00 100644 --- a/scripts/asr/transcribe_file_offline.py +++ b/scripts/asr/transcribe_file_offline.py @@ -45,7 +45,7 @@ def main() -> None: args.stop_history, args.stop_history_eou, args.stop_threshold, - args.stop_eou_threshold + args.stop_threshold_eou ) with args.input_file.open('rb') as fh: data = fh.read() diff --git a/scripts/asr/transcribe_mic.py b/scripts/asr/transcribe_mic.py index ab3f345..595563b 100644 --- a/scripts/asr/transcribe_mic.py +++ b/scripts/asr/transcribe_mic.py @@ -64,7 +64,7 @@ def main() -> None: args.stop_history, args.stop_history_eou, args.stop_threshold, - args.stop_eou_threshold + args.stop_threshold_eou ) with riva.client.audio_io.MicrophoneStream( args.sample_rate_hz, From ffb670a07d84a2c224dea85e3f5887485e7a3cf8 Mon Sep 17 00:00:00 2001 From: sarane Date: Thu, 27 Jun 2024 12:28:06 +0530 Subject: [PATCH 4/7] Updating help for VAD param --- riva/client/argparse_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/riva/client/argparse_utils.py b/riva/client/argparse_utils.py index e79e4df..b92d657 100644 --- a/riva/client/argparse_utils.py +++ b/riva/client/argparse_utils.py @@ -52,7 +52,7 @@ def add_asr_config_argparse_parameters( "--start-history", default=-1, type=int, - help="Value to detect and initiate start of speech utterance", + help="Value (in milliseconds) to detect and initiate start of speech utterance", ) parser.add_argument( "--start-threshold", @@ -64,13 +64,7 @@ def add_asr_config_argparse_parameters( "--stop-history", default=-1, type=int, - help="Value to reset the endpoint detection history", - ) - parser.add_argument( - "--stop-history-eou", - default=-1, - type=int, - help="Value to determine the response history for endpoint detection", + help="Value (in milliseconds) to detect end of utterance and reset decoder", ) parser.add_argument( "--stop-threshold", @@ -78,6 +72,12 @@ def add_asr_config_argparse_parameters( type=float, help="Threshold value for detecting the end of speech utterance", ) + parser.add_argument( + "--stop-history-eou", + default=-1, + type=int, + help="Value (in milliseconds) to detect end of utterance for the 1st pass and generate an intermediate final transcript", + ) parser.add_argument( "--stop-threshold-eou", default=-1.0, From 6548dfe1c5afaa9632a1488bd182138c00644822 Mon Sep 17 00:00:00 2001 From: sarane Date: Thu, 27 Jun 2024 13:31:04 +0530 Subject: [PATCH 5/7] Adding check for stop_threshold_eou --- riva/client/asr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/riva/client/asr.py b/riva/client/asr.py index 7dafbbb..5095fd6 100644 --- a/riva/client/asr.py +++ b/riva/client/asr.py @@ -132,7 +132,7 @@ def add_endpoint_parameters_to_config( stop_threshold: float, stop_threshold_eou: float, ) -> None: - if not (start_history > 0 or start_threshold > 0 or stop_history > 0 or stop_history_eou > 0 or stop_threshold > 0): + if not (start_history > 0 or start_threshold > 0 or stop_history > 0 or stop_history_eou > 0 or stop_threshold > 0 or stop_threshold_eou > 0): return inner_config: rasr.RecognitionConfig = config if isinstance(config, rasr.RecognitionConfig) else config.config From 84ae9b822be07e08490dbe7b34dd4b69def836fa Mon Sep 17 00:00:00 2001 From: sarane Date: Fri, 28 Jun 2024 00:54:58 +0530 Subject: [PATCH 6/7] Updating proto branch --- .gitmodules | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index f366420..5d0c8fe 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "common"] path = common - url = https://github.com/sarane22/common.git - branch = endpointing_stop_eou_threshold_param + url = https://github.com/nvidia-riva/common.git + branch = release/2.16.0 From 20959d675a135f84679487f62b8c221e3e11a185 Mon Sep 17 00:00:00 2001 From: sarane Date: Fri, 28 Jun 2024 01:14:25 +0530 Subject: [PATCH 7/7] updating the submodule --- common | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common b/common index cc571a0..a5707ad 160000 --- a/common +++ b/common @@ -1 +1 @@ -Subproject commit cc571a0219e9e3936e854bd7254774baf3b9ba08 +Subproject commit a5707ad2c4e3bf904905a9c5165fdecf9fab133b