Skip to content

Commit

Permalink
feat(stt): add speechBeginEvent param to recognize func
Browse files Browse the repository at this point in the history
  • Loading branch information
apaparazzi0329 committed May 20, 2024
1 parent 45ec51d commit 5cb5238
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 112 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2016, 2023.
* (C) Copyright IBM Corp. 2024.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -73,8 +73,9 @@ public Long getTotalWords() {
/**
* Gets the outOfVocabularyWords.
*
* <p>_For custom models that are based on previous-generation models_, the number of OOV words
* extracted from the corpus. The value is `0` while the corpus is being processed.
* <p>_For custom models that are based on large speech models and previous-generation models_,
* the number of OOV words extracted from the corpus. The value is `0` while the corpus is being
* processed.
*
* <p>_For custom models that are based on next-generation models_, no OOV words are extracted
* from corpora, so the value is always `0`.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2018, 2024.
* (C) Copyright IBM Corp. 2024.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -51,6 +51,8 @@ public interface Model {
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
/** de-DE_Telephony. */
String DE_DE_TELEPHONY = "de-DE_Telephony";
/** en-AU. */
String EN_AU = "en-AU";
/** en-AU_BroadbandModel. */
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
/** en-AU_Multimedia. */
Expand All @@ -59,8 +61,12 @@ public interface Model {
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
/** en-AU_Telephony. */
String EN_AU_TELEPHONY = "en-AU_Telephony";
/** en-IN. */
String EN_IN = "en-IN";
/** en-IN_Telephony. */
String EN_IN_TELEPHONY = "en-IN_Telephony";
/** en-GB. */
String EN_GB = "en-GB";
/** en-GB_BroadbandModel. */
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
/** en-GB_Multimedia. */
Expand All @@ -69,6 +75,8 @@ public interface Model {
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
/** en-GB_Telephony. */
String EN_GB_TELEPHONY = "en-GB_Telephony";
/** en-US. */
String EN_US = "en-US";
/** en-US_BroadbandModel. */
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
/** en-US_Multimedia. */
Expand Down Expand Up @@ -111,6 +119,8 @@ public interface Model {
String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
/** es-PE_NarrowbandModel. */
String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
/** fr-CA. */
String FR_CA = "fr-CA";
/** fr-CA_BroadbandModel. */
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
/** fr-CA_Multimedia. */
Expand All @@ -119,6 +129,8 @@ public interface Model {
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
/** fr-CA_Telephony. */
String FR_CA_TELEPHONY = "fr-CA_Telephony";
/** fr-FR. */
String FR_FR = "fr-FR";
/** fr-FR_BroadbandModel. */
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
/** fr-FR_Multimedia. */
Expand All @@ -137,6 +149,8 @@ public interface Model {
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
/** it-IT_Telephony. */
String IT_IT_TELEPHONY = "it-IT_Telephony";
/** ja-JP. */
String JA_JP = "ja-JP";
/** ja-JP_BroadbandModel. */
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
/** ja-JP_Multimedia. */
Expand Down Expand Up @@ -952,9 +966,9 @@ public String baseModelVersion() {
* custom language model compared to those from the base model for the current request.
*
* <p>Specify a value between 0.0 and 1.0. Unless a different customization weight was specified
* for the custom model when the model was trained, the default value is: * 0.3 for
* previous-generation models * 0.2 for most next-generation models * 0.1 for next-generation
* English and Japanese models
* for the custom model when the model was trained, the default value is: * 0.5 for large speech
* models * 0.3 for previous-generation models * 0.2 for most next-generation models * 0.1 for
* next-generation English and Japanese models
*
* <p>A customization weight that you specify overrides a weight that was specified when the
* custom model was trained. The default value yields the best performance in general. Assign a
Expand Down Expand Up @@ -1117,8 +1131,8 @@ public Boolean smartFormatting() {
/**
* Gets the smartFormattingVersion.
*
* <p>Smart formatting version is for next-generation models and that is supported in US English,
* Brazilian Portuguese, French and German languages.
* <p>Smart formatting version for large speech models and next-generation models is supported in
* US English, Brazilian Portuguese, French, German, Spanish and French Canadian languages.
*
* @return the smartFormattingVersion
*/
Expand All @@ -1135,8 +1149,8 @@ public Long smartFormattingVersion() {
* of whether you specify `false` for the parameter. * _For previous-generation models,_ the
* parameter can be used with Australian English, US English, German, Japanese, Korean, and
* Spanish (both broadband and narrowband models) and UK English (narrowband model) transcription
* only. * _For next-generation models,_ the parameter can be used with Czech, English
* (Australian, Indian, UK, and US), German, Japanese, Korean, and Spanish transcription only.
* only. * _For large speech models and next-generation models,_ the parameter can be used with
* all available languages.
*
* <p>See [Speaker
* labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
Expand Down Expand Up @@ -1310,8 +1324,8 @@ public Boolean splitTranscriptAtPhraseEnd() {
* <p>The values increase on a monotonic curve. Specifying one or two decimal places of precision
* (for example, `0.55`) is typically more than sufficient.
*
* <p>The parameter is supported with all next-generation models and with most previous-generation
* models. See [Speech detector
* <p>The parameter is supported with all large speech models, next-generation models and with
* most previous-generation models. See [Speech detector
* sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
* and [Language model
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
Expand All @@ -1336,8 +1350,8 @@ public Float speechDetectorSensitivity() {
* <p>The values increase on a monotonic curve. Specifying one or two decimal places of precision
* (for example, `0.55`) is typically more than sufficient.
*
* <p>The parameter is supported with all next-generation models and with most previous-generation
* models. See [Background audio
* <p>The parameter is supported with all large speech models, next-generation models and with
* most previous-generation models. See [Background audio
* suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression)
* and [Language model
* support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
Expand All @@ -1357,9 +1371,9 @@ public Float backgroundAudioSuppression() {
* parameter causes the models to produce results even more quickly, though the results might be
* less accurate when the parameter is used.
*
* <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
* It is available for most next-generation models. * For a list of next-generation models that
* support low latency, see [Supported next-generation language
* <p>The parameter is not available for large speech models and previous-generation `Broadband`
* and `Narrowband` models. It is available for most next-generation models. * For a list of
* next-generation models that support low latency, see [Supported next-generation language
* models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported).
* * For more information about the `low_latency` parameter, see [Low
* latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
Expand All @@ -1373,9 +1387,10 @@ public Boolean lowLatency() {
/**
* Gets the characterInsertionBias.
*
* <p>For next-generation models, an indication of whether the service is biased to recognize
* shorter or longer strings of characters when developing transcription hypotheses. By default,
* the service is optimized to produce the best balance of strings of different lengths.
* <p>For large speech models and next-generation models, an indication of whether the service is
* biased to recognize shorter or longer strings of characters when developing transcription
* hypotheses. By default, the service is optimized to produce the best balance of strings of
* different lengths.
*
* <p>The default bias is 0.0. The allowable range of values is -1.0 to 1.0. * Negative values
* bias the service to favor hypotheses with shorter strings of characters. * Positive values bias
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2018, 2023.
* (C) Copyright IBM Corp. 2024.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -39,6 +39,8 @@ public interface BaseModelName {
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
/** de-DE_Telephony. */
String DE_DE_TELEPHONY = "de-DE_Telephony";
/** en-AU. */
String EN_AU = "en-AU";
/** en-AU_BroadbandModel. */
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
/** en-AU_Multimedia. */
Expand All @@ -47,6 +49,8 @@ public interface BaseModelName {
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
/** en-AU_Telephony. */
String EN_AU_TELEPHONY = "en-AU_Telephony";
/** en-GB. */
String EN_GB = "en-GB";
/** en-GB_BroadbandModel. */
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
/** en-GB_Multimedia. */
Expand All @@ -55,8 +59,12 @@ public interface BaseModelName {
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
/** en-GB_Telephony. */
String EN_GB_TELEPHONY = "en-GB_Telephony";
/** en-IN. */
String EN_IN = "en-IN";
/** en-IN_Telephony. */
String EN_IN_TELEPHONY = "en-IN_Telephony";
/** en-US. */
String EN_US = "en-US";
/** en-US_BroadbandModel. */
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
/** en-US_Multimedia. */
Expand Down Expand Up @@ -99,6 +107,8 @@ public interface BaseModelName {
String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
/** es-PE_NarrowbandModel. */
String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
/** fr-CA. */
String FR_CA = "fr-CA";
/** fr-CA_BroadbandModel. */
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
/** fr-CA_Multimedia. */
Expand All @@ -107,6 +117,8 @@ public interface BaseModelName {
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
/** fr-CA_Telephony. */
String FR_CA_TELEPHONY = "fr-CA_Telephony";
/** fr-FR. */
String FR_FR = "fr-FR";
/** fr-FR_BroadbandModel. */
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
/** fr-FR_Multimedia. */
Expand All @@ -125,6 +137,8 @@ public interface BaseModelName {
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
/** it-IT_Telephony. */
String IT_IT_TELEPHONY = "it-IT_Telephony";
/** ja-JP. */
String JA_JP = "ja-JP";
/** ja-JP_BroadbandModel. */
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
/** ja-JP_Multimedia. */
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2018, 2023.
* (C) Copyright IBM Corp. 2024.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -36,6 +36,8 @@ public interface ModelId {
String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
/** de-DE_Telephony. */
String DE_DE_TELEPHONY = "de-DE_Telephony";
/** en-AU. */
String EN_AU = "en-AU";
/** en-AU_BroadbandModel. */
String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
/** en-AU_Multimedia. */
Expand All @@ -44,6 +46,8 @@ public interface ModelId {
String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
/** en-AU_Telephony. */
String EN_AU_TELEPHONY = "en-AU_Telephony";
/** en-GB. */
String EN_GB = "en-GB";
/** en-GB_BroadbandModel. */
String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
/** en-GB_Multimedia. */
Expand All @@ -52,8 +56,12 @@ public interface ModelId {
String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
/** en-GB_Telephony. */
String EN_GB_TELEPHONY = "en-GB_Telephony";
/** en-IN. */
String EN_IN = "en-IN";
/** en-IN_Telephony. */
String EN_IN_TELEPHONY = "en-IN_Telephony";
/** en-US. */
String EN_US = "en-US";
/** en-US_BroadbandModel. */
String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
/** en-US_Multimedia. */
Expand Down Expand Up @@ -96,6 +104,8 @@ public interface ModelId {
String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
/** es-PE_NarrowbandModel. */
String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
/** fr-CA. */
String FR_CA = "fr-CA";
/** fr-CA_BroadbandModel. */
String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
/** fr-CA_Multimedia. */
Expand All @@ -104,6 +114,8 @@ public interface ModelId {
String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
/** fr-CA_Telephony. */
String FR_CA_TELEPHONY = "fr-CA_Telephony";
/** fr-FR. */
String FR_FR = "fr-FR";
/** fr-FR_BroadbandModel. */
String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
/** fr-FR_Multimedia. */
Expand All @@ -122,6 +134,8 @@ public interface ModelId {
String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
/** it-IT_Telephony. */
String IT_IT_TELEPHONY = "it-IT_Telephony";
/** ja-JP. */
String JA_JP = "ja-JP";
/** ja-JP_BroadbandModel. */
String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
/** ja-JP_Multimedia. */
Expand Down
Loading

0 comments on commit 5cb5238

Please sign in to comment.