feat(stt): add speechBeginEvent param to recognize func

watson-developer-cloud · May 20, 2024 · 5cb5238 · 5cb5238
1 parent 45ec51d
commit 5cb5238
Show file tree

Hide file tree

Showing 9 changed files with 260 additions and 112 deletions.
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/SpeechToText.java
diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/Corpus.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/Corpus.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2016, 2023.
+ * (C) Copyright IBM Corp. 2024.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -73,8 +73,9 @@ public Long getTotalWords() {
   /**
    * Gets the outOfVocabularyWords.
    *
-   * <p>_For custom models that are based on previous-generation models_, the number of OOV words
-   * extracted from the corpus. The value is `0` while the corpus is being processed.
+   * <p>_For custom models that are based on large speech models and previous-generation models_,
+   * the number of OOV words extracted from the corpus. The value is `0` while the corpus is being
+   * processed.
    *
    * <p>_For custom models that are based on next-generation models_, no OOV words are extracted
    * from corpora, so the value is always `0`.

diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateJobOptions.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2018, 2024.
+ * (C) Copyright IBM Corp. 2024.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -51,6 +51,8 @@ public interface Model {
     String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
     /** de-DE_Telephony. */
     String DE_DE_TELEPHONY = "de-DE_Telephony";
+    /** en-AU. */
+    String EN_AU = "en-AU";
     /** en-AU_BroadbandModel. */
     String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
     /** en-AU_Multimedia. */
@@ -59,8 +61,12 @@ public interface Model {
     String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
     /** en-AU_Telephony. */
     String EN_AU_TELEPHONY = "en-AU_Telephony";
+    /** en-IN. */
+    String EN_IN = "en-IN";
     /** en-IN_Telephony. */
     String EN_IN_TELEPHONY = "en-IN_Telephony";
+    /** en-GB. */
+    String EN_GB = "en-GB";
     /** en-GB_BroadbandModel. */
     String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
     /** en-GB_Multimedia. */
@@ -69,6 +75,8 @@ public interface Model {
     String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
     /** en-GB_Telephony. */
     String EN_GB_TELEPHONY = "en-GB_Telephony";
+    /** en-US. */
+    String EN_US = "en-US";
     /** en-US_BroadbandModel. */
     String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
     /** en-US_Multimedia. */
@@ -111,6 +119,8 @@ public interface Model {
     String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
     /** es-PE_NarrowbandModel. */
     String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
+    /** fr-CA. */
+    String FR_CA = "fr-CA";
     /** fr-CA_BroadbandModel. */
     String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
     /** fr-CA_Multimedia. */
@@ -119,6 +129,8 @@ public interface Model {
     String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
     /** fr-CA_Telephony. */
     String FR_CA_TELEPHONY = "fr-CA_Telephony";
+    /** fr-FR. */
+    String FR_FR = "fr-FR";
     /** fr-FR_BroadbandModel. */
     String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
     /** fr-FR_Multimedia. */
@@ -137,6 +149,8 @@ public interface Model {
     String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
     /** it-IT_Telephony. */
     String IT_IT_TELEPHONY = "it-IT_Telephony";
+    /** ja-JP. */
+    String JA_JP = "ja-JP";
     /** ja-JP_BroadbandModel. */
     String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
     /** ja-JP_Multimedia. */
@@ -952,9 +966,9 @@ public String baseModelVersion() {
    * custom language model compared to those from the base model for the current request.
    *
    * <p>Specify a value between 0.0 and 1.0. Unless a different customization weight was specified
-   * for the custom model when the model was trained, the default value is: * 0.3 for
-   * previous-generation models * 0.2 for most next-generation models * 0.1 for next-generation
-   * English and Japanese models
+   * for the custom model when the model was trained, the default value is: * 0.5 for large speech
+   * models * 0.3 for previous-generation models * 0.2 for most next-generation models * 0.1 for
+   * next-generation English and Japanese models
    *
    * <p>A customization weight that you specify overrides a weight that was specified when the
    * custom model was trained. The default value yields the best performance in general. Assign a
@@ -1117,8 +1131,8 @@ public Boolean smartFormatting() {
   /**
    * Gets the smartFormattingVersion.
    *
-   * <p>Smart formatting version is for next-generation models and that is supported in US English,
-   * Brazilian Portuguese, French and German languages.
+   * <p>Smart formatting version for large speech models and next-generation models is supported in
+   * US English, Brazilian Portuguese, French, German, Spanish and French Canadian languages.
    *
    * @return the smartFormattingVersion
    */
@@ -1135,8 +1149,8 @@ public Long smartFormattingVersion() {
    * of whether you specify `false` for the parameter. * _For previous-generation models,_ the
    * parameter can be used with Australian English, US English, German, Japanese, Korean, and
    * Spanish (both broadband and narrowband models) and UK English (narrowband model) transcription
-   * only. * _For next-generation models,_ the parameter can be used with Czech, English
-   * (Australian, Indian, UK, and US), German, Japanese, Korean, and Spanish transcription only.
+   * only. * _For large speech models and next-generation models,_ the parameter can be used with
+   * all available languages.
    *
    * <p>See [Speaker
    * labels](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-speaker-labels).
@@ -1310,8 +1324,8 @@ public Boolean splitTranscriptAtPhraseEnd() {
    * <p>The values increase on a monotonic curve. Specifying one or two decimal places of precision
    * (for example, `0.55`) is typically more than sufficient.
    *
-   * <p>The parameter is supported with all next-generation models and with most previous-generation
-   * models. See [Speech detector
+   * <p>The parameter is supported with all large speech models, next-generation models and with
+   * most previous-generation models. See [Speech detector
    * sensitivity](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-sensitivity)
    * and [Language model
    * support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
@@ -1336,8 +1350,8 @@ public Float speechDetectorSensitivity() {
    * <p>The values increase on a monotonic curve. Specifying one or two decimal places of precision
    * (for example, `0.55`) is typically more than sufficient.
    *
-   * <p>The parameter is supported with all next-generation models and with most previous-generation
-   * models. See [Background audio
+   * <p>The parameter is supported with all large speech models, next-generation models and with
+   * most previous-generation models. See [Background audio
    * suppression](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-parameters-suppression)
    * and [Language model
    * support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support).
@@ -1357,9 +1371,9 @@ public Float backgroundAudioSuppression() {
    * parameter causes the models to produce results even more quickly, though the results might be
    * less accurate when the parameter is used.
    *
-   * <p>The parameter is not available for previous-generation `Broadband` and `Narrowband` models.
-   * It is available for most next-generation models. * For a list of next-generation models that
-   * support low latency, see [Supported next-generation language
+   * <p>The parameter is not available for large speech models and previous-generation `Broadband`
+   * and `Narrowband` models. It is available for most next-generation models. * For a list of
+   * next-generation models that support low latency, see [Supported next-generation language
    * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported).
    * * For more information about the `low_latency` parameter, see [Low
    * latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency).
@@ -1373,9 +1387,10 @@ public Boolean lowLatency() {
   /**
    * Gets the characterInsertionBias.
    *
-   * <p>For next-generation models, an indication of whether the service is biased to recognize
-   * shorter or longer strings of characters when developing transcription hypotheses. By default,
-   * the service is optimized to produce the best balance of strings of different lengths.
+   * <p>For large speech models and next-generation models, an indication of whether the service is
+   * biased to recognize shorter or longer strings of characters when developing transcription
+   * hypotheses. By default, the service is optimized to produce the best balance of strings of
+   * different lengths.
    *
    * <p>The default bias is 0.0. The allowable range of values is -1.0 to 1.0. * Negative values
    * bias the service to favor hypotheses with shorter strings of characters. * Positive values bias

diff --git a/...text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateLanguageModelOptions.java b/...text/src/main/java/com/ibm/watson/speech_to_text/v1/model/CreateLanguageModelOptions.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2018, 2023.
+ * (C) Copyright IBM Corp. 2024.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -39,6 +39,8 @@ public interface BaseModelName {
     String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
     /** de-DE_Telephony. */
     String DE_DE_TELEPHONY = "de-DE_Telephony";
+    /** en-AU. */
+    String EN_AU = "en-AU";
     /** en-AU_BroadbandModel. */
     String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
     /** en-AU_Multimedia. */
@@ -47,6 +49,8 @@ public interface BaseModelName {
     String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
     /** en-AU_Telephony. */
     String EN_AU_TELEPHONY = "en-AU_Telephony";
+    /** en-GB. */
+    String EN_GB = "en-GB";
     /** en-GB_BroadbandModel. */
     String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
     /** en-GB_Multimedia. */
@@ -55,8 +59,12 @@ public interface BaseModelName {
     String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
     /** en-GB_Telephony. */
     String EN_GB_TELEPHONY = "en-GB_Telephony";
+    /** en-IN. */
+    String EN_IN = "en-IN";
     /** en-IN_Telephony. */
     String EN_IN_TELEPHONY = "en-IN_Telephony";
+    /** en-US. */
+    String EN_US = "en-US";
     /** en-US_BroadbandModel. */
     String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
     /** en-US_Multimedia. */
@@ -99,6 +107,8 @@ public interface BaseModelName {
     String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
     /** es-PE_NarrowbandModel. */
     String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
+    /** fr-CA. */
+    String FR_CA = "fr-CA";
     /** fr-CA_BroadbandModel. */
     String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
     /** fr-CA_Multimedia. */
@@ -107,6 +117,8 @@ public interface BaseModelName {
     String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
     /** fr-CA_Telephony. */
     String FR_CA_TELEPHONY = "fr-CA_Telephony";
+    /** fr-FR. */
+    String FR_FR = "fr-FR";
     /** fr-FR_BroadbandModel. */
     String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
     /** fr-FR_Multimedia. */
@@ -125,6 +137,8 @@ public interface BaseModelName {
     String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
     /** it-IT_Telephony. */
     String IT_IT_TELEPHONY = "it-IT_Telephony";
+    /** ja-JP. */
+    String JA_JP = "ja-JP";
     /** ja-JP_BroadbandModel. */
     String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
     /** ja-JP_Multimedia. */

diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/GetModelOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/GetModelOptions.java
@@ -1,5 +1,5 @@
 /*
- * (C) Copyright IBM Corp. 2018, 2023.
+ * (C) Copyright IBM Corp. 2024.
  *
  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  * the License. You may obtain a copy of the License at
@@ -36,6 +36,8 @@ public interface ModelId {
     String DE_DE_NARROWBANDMODEL = "de-DE_NarrowbandModel";
     /** de-DE_Telephony. */
     String DE_DE_TELEPHONY = "de-DE_Telephony";
+    /** en-AU. */
+    String EN_AU = "en-AU";
     /** en-AU_BroadbandModel. */
     String EN_AU_BROADBANDMODEL = "en-AU_BroadbandModel";
     /** en-AU_Multimedia. */
@@ -44,6 +46,8 @@ public interface ModelId {
     String EN_AU_NARROWBANDMODEL = "en-AU_NarrowbandModel";
     /** en-AU_Telephony. */
     String EN_AU_TELEPHONY = "en-AU_Telephony";
+    /** en-GB. */
+    String EN_GB = "en-GB";
     /** en-GB_BroadbandModel. */
     String EN_GB_BROADBANDMODEL = "en-GB_BroadbandModel";
     /** en-GB_Multimedia. */
@@ -52,8 +56,12 @@ public interface ModelId {
     String EN_GB_NARROWBANDMODEL = "en-GB_NarrowbandModel";
     /** en-GB_Telephony. */
     String EN_GB_TELEPHONY = "en-GB_Telephony";
+    /** en-IN. */
+    String EN_IN = "en-IN";
     /** en-IN_Telephony. */
     String EN_IN_TELEPHONY = "en-IN_Telephony";
+    /** en-US. */
+    String EN_US = "en-US";
     /** en-US_BroadbandModel. */
     String EN_US_BROADBANDMODEL = "en-US_BroadbandModel";
     /** en-US_Multimedia. */
@@ -96,6 +104,8 @@ public interface ModelId {
     String ES_PE_BROADBANDMODEL = "es-PE_BroadbandModel";
     /** es-PE_NarrowbandModel. */
     String ES_PE_NARROWBANDMODEL = "es-PE_NarrowbandModel";
+    /** fr-CA. */
+    String FR_CA = "fr-CA";
     /** fr-CA_BroadbandModel. */
     String FR_CA_BROADBANDMODEL = "fr-CA_BroadbandModel";
     /** fr-CA_Multimedia. */
@@ -104,6 +114,8 @@ public interface ModelId {
     String FR_CA_NARROWBANDMODEL = "fr-CA_NarrowbandModel";
     /** fr-CA_Telephony. */
     String FR_CA_TELEPHONY = "fr-CA_Telephony";
+    /** fr-FR. */
+    String FR_FR = "fr-FR";
     /** fr-FR_BroadbandModel. */
     String FR_FR_BROADBANDMODEL = "fr-FR_BroadbandModel";
     /** fr-FR_Multimedia. */
@@ -122,6 +134,8 @@ public interface ModelId {
     String IT_IT_MULTIMEDIA = "it-IT_Multimedia";
     /** it-IT_Telephony. */
     String IT_IT_TELEPHONY = "it-IT_Telephony";
+    /** ja-JP. */
+    String JA_JP = "ja-JP";
     /** ja-JP_BroadbandModel. */
     String JA_JP_BROADBANDMODEL = "ja-JP_BroadbandModel";
     /** ja-JP_Multimedia. */