From 669635661a4eb81847e448d40bb8181509beada1 Mon Sep 17 00:00:00 2001 From: Angelo Paparazzi Date: Thu, 17 Oct 2024 10:55:36 -0500 Subject: [PATCH] feat(stt): readd interimResults and lowLatency wss params --- .../model/RecognizeWithWebsocketsOptions.java | 69 +++++++++++++++++++ .../speech_to_text/v1/SpeechToTextIT.java | 1 + 2 files changed, 70 insertions(+) diff --git a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java index bcc2129eae..6d6f85a517 100644 --- a/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java +++ b/speech-to-text/src/main/java/com/ibm/watson/speech_to_text/v1/model/RecognizeWithWebsocketsOptions.java @@ -201,7 +201,9 @@ public interface Model { protected Boolean splitTranscriptAtPhraseEnd; protected Float speechDetectorSensitivity; protected Float backgroundAudioSuppression; + protected Boolean lowLatency; protected Float characterInsertionBias; + private Boolean interimResults; private Boolean processingMetrics; private Float processingMetricsInterval; @@ -232,7 +234,9 @@ public static class Builder { private Boolean splitTranscriptAtPhraseEnd; private Float speechDetectorSensitivity; private Float backgroundAudioSuppression; + private Boolean lowLatency; private Float characterInsertionBias; + private Boolean interimResults; private Boolean processingMetrics; private Float processingMetricsInterval; @@ -262,7 +266,9 @@ private Builder(RecognizeWithWebsocketsOptions recognizeWithWebsocketsOptions) { this.splitTranscriptAtPhraseEnd = recognizeWithWebsocketsOptions.splitTranscriptAtPhraseEnd; this.speechDetectorSensitivity = recognizeWithWebsocketsOptions.speechDetectorSensitivity; this.backgroundAudioSuppression = recognizeWithWebsocketsOptions.backgroundAudioSuppression; + this.lowLatency = recognizeWithWebsocketsOptions.lowLatency; this.characterInsertionBias = recognizeWithWebsocketsOptions.characterInsertionBias; + this.interimResults = recognizeWithWebsocketsOptions.interimResults; this.processingMetrics = recognizeWithWebsocketsOptions.processingMetrics; this.processingMetricsInterval = recognizeWithWebsocketsOptions.processingMetricsInterval; } @@ -578,6 +584,17 @@ public Builder backgroundAudioSuppression(Float backgroundAudioSuppression) { return this; } + /** + * Set the lowLatency. + * + * @param lowLatency the lowLatency + * @return the RecognizeOptions builder + */ + public Builder lowLatency(Boolean lowLatency) { + this.lowLatency = lowLatency; + return this; + } + /** * Set the characterInsertionBias. * @@ -589,6 +606,19 @@ public Builder characterInsertionBias(Float characterInsertionBias) { return this; } + /** + * Set the interimResults. + * + *

NOTE: This parameter only works for the `recognizeUsingWebSocket` method. + * + * @param interimResults the interimResults + * @return the interimResults + */ + public Builder interimResults(Boolean interimResults) { + this.interimResults = interimResults; + return this; + } + /** * Set the audio. * @@ -655,7 +685,9 @@ protected RecognizeWithWebsocketsOptions(Builder builder) { splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd; speechDetectorSensitivity = builder.speechDetectorSensitivity; backgroundAudioSuppression = builder.backgroundAudioSuppression; + lowLatency = builder.lowLatency; characterInsertionBias = builder.characterInsertionBias; + interimResults = builder.interimResults; processingMetrics = builder.processingMetrics; processingMetricsInterval = builder.processingMetricsInterval; } @@ -1091,6 +1123,28 @@ public Float backgroundAudioSuppression() { return backgroundAudioSuppression; } + /** + * Gets the lowLatency. + * + *

If `true` for next-generation `Multimedia` and `Telephony` models that support low latency, + * directs the service to produce results even more quickly than it usually does. Next-generation + * models produce transcription results faster than previous-generation models. The `low_latency` + * parameter causes the models to produce results even more quickly, though the results might be + * less accurate when the parameter is used. + * + *

The parameter is not available for previous-generation `Broadband` and `Narrowband` models. + * It is available for most next-generation models. * For a list of next-generation models that + * support low latency, see [Supported next-generation language + * models](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-models-ng#models-ng-supported). + * * For more information about the `low_latency` parameter, see [Low + * latency](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-interim#low-latency). + * + * @return the lowLatency + */ + public Boolean lowLatency() { + return lowLatency; + } + /** * Gets the characterInsertionBias. * @@ -1122,6 +1176,21 @@ public Float characterInsertionBias() { return characterInsertionBias; } + /** + * Gets the interimResults. + * + *

If `true`, the service returns interim results as a stream of `SpeechRecognitionResults` + * objects. By default, the service returns a single `SpeechRecognitionResults` object with final + * results only. + * + *

NOTE: This parameter only works for the `recognizeUsingWebSocket` method. + * + * @return the interimResults + */ + public Boolean interimResults() { + return interimResults; + } + /** * Gets the processingMetrics. * diff --git a/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java b/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java index 076c7c42a7..56068190a6 100755 --- a/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java +++ b/speech-to-text/src/test/java/com/ibm/watson/speech_to_text/v1/SpeechToTextIT.java @@ -347,6 +347,7 @@ public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedEx .wordAlternativesThreshold(0.5f) .model(EN_BROADBAND16K) .contentType(HttpMediaType.AUDIO_WAV) + .interimResults(true) .processingMetrics(true) .processingMetricsInterval(0.2f) .audioMetrics(true)