From 6c704ee7d9202734d16f30df1e318e1d8fba489c Mon Sep 17 00:00:00 2001 From: Nirupa Anantha Kumar Date: Mon, 24 Sep 2018 12:31:42 -0700 Subject: [PATCH] samples: Speech GA - library update (#1212) * Speech GA - library update speech_transcribe_auto_punctuation speech_transcribe_enhanced_model * Speech GA - library update adding missed resources * Vision GA - library update removing duplicates * Vision GA - library update removing duplicates * fixes after review * Additional samples in GA - model-selection * Additional samples in GA - model-selection * Additional samples in GA - model-selection --- .../java/com/example/speech/Recognize.java | 550 ++---------------- .../java/com/example/speech/RecognizeIT.java | 87 +-- 2 files changed, 66 insertions(+), 571 deletions(-) diff --git a/speech/snippets/src/main/java/com/example/speech/Recognize.java b/speech/snippets/src/main/java/com/example/speech/Recognize.java index 3d6bfaecf29..de05ad84185 100644 --- a/speech/snippets/src/main/java/com/example/speech/Recognize.java +++ b/speech/snippets/src/main/java/com/example/speech/Recognize.java @@ -22,24 +22,20 @@ import com.google.api.gax.rpc.ClientStream; import com.google.api.gax.rpc.ResponseObserver; import com.google.api.gax.rpc.StreamController; -import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata; -import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse; -import com.google.cloud.speech.v1p1beta1.RecognitionAudio; -import com.google.cloud.speech.v1p1beta1.RecognitionConfig; -import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; -import com.google.cloud.speech.v1p1beta1.RecognitionMetadata; -import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.InteractionType; -import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.MicrophoneDistance; -import com.google.cloud.speech.v1p1beta1.RecognitionMetadata.RecordingDeviceType; -import com.google.cloud.speech.v1p1beta1.RecognizeResponse; -import com.google.cloud.speech.v1p1beta1.SpeechClient; -import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; -import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; -import com.google.cloud.speech.v1p1beta1.StreamingRecognitionConfig; -import com.google.cloud.speech.v1p1beta1.StreamingRecognitionResult; -import com.google.cloud.speech.v1p1beta1.StreamingRecognizeRequest; -import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse; -import com.google.cloud.speech.v1p1beta1.WordInfo; +import com.google.cloud.speech.v1.LongRunningRecognizeMetadata; +import com.google.cloud.speech.v1.LongRunningRecognizeResponse; +import com.google.cloud.speech.v1.RecognitionAudio; +import com.google.cloud.speech.v1.RecognitionConfig; +import com.google.cloud.speech.v1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1.RecognizeResponse; +import com.google.cloud.speech.v1.SpeechClient; +import com.google.cloud.speech.v1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1.SpeechRecognitionResult; +import com.google.cloud.speech.v1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1.StreamingRecognitionResult; +import com.google.cloud.speech.v1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1.StreamingRecognizeResponse; +import com.google.cloud.speech.v1.WordInfo; import com.google.common.util.concurrent.SettableFuture; import com.google.protobuf.ByteString; @@ -67,9 +63,8 @@ public static void main(String... args) throws Exception { "\tjava %s \"\" \"\"\n" + "Commands:\n" + "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n" - + "\t| wordoffsets | model-selection | auto-punctuation | stream-punctuation \n" - + "\t| enhanced-model| metadata | diarization | multi-channel | multi-language \n" - + "\t | word-level-conf" + + "\t| wordoffsets | auto-punctuation | stream-punctuation \n" + + "\t| enhanced-model | model-selection\n" + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + "for a Cloud Storage resource (gs://...)\n", Recognize.class.getCanonicalName()); @@ -101,12 +96,6 @@ public static void main(String... args) throws Exception { streamingRecognizeFile(path); } else if (command.equals("micstreamrecognize")) { streamingMicRecognize(); - } else if (command.equals("model-selection")) { - if (path.startsWith("gs://")) { - transcribeModelSelectionGcs(path); - } else { - transcribeModelSelection(path); - } } else if (command.equals("auto-punctuation")) { if (path.startsWith("gs://")) { transcribeGcsWithAutomaticPunctuation(path); @@ -117,31 +106,11 @@ public static void main(String... args) throws Exception { streamingTranscribeWithAutomaticPunctuation(path); } else if (command.equals("enhanced-model")) { transcribeFileWithEnhancedModel(path); - } else if (command.equals("metadata")) { - transcribeFileWithMetadata(path); - } else if (command.equals("diarization")) { - if (path.startsWith("gs://")) { - transcribeDiarizationGcs(path); - } else { - transcribeDiarization(path); - } - } else if (command.equals("multi-channel")) { - if (path.startsWith("gs://")) { - transcribeMultiChannelGcs(path); - } else { - transcribeMultiChannel(path); - } - } else if (command.equals("multi-language")) { - if (path.startsWith("gs://")) { - transcribeMultiLanguageGcs(path); - } else { - transcribeMultiLanguage(path); - } - } else if (command.equals("word-level-conf")) { + } else if (command.equals("model-selection")) { if (path.startsWith("gs://")) { - transcribeWordLevelConfidenceGcs(path); + transcribeModelSelectionGcs(path); } else { - transcribeWordLevelConfidence(path); + transcribeModelSelection(path); } } } @@ -477,87 +446,6 @@ public SettableFuture> future() { } // [END speech_transcribe_streaming] - // [START speech_transcribe_model_selection_beta] - /** - * Performs transcription of the given audio file synchronously with the selected model. - * - * @param fileName the path to a audio file to transcribe - */ - public static void transcribeModelSelection(String fileName) throws Exception { - Path path = Paths.get(fileName); - byte[] content = Files.readAllBytes(path); - - try (SpeechClient speech = SpeechClient.create()) { - // Configure request with video media type - RecognitionConfig recConfig = - RecognitionConfig.newBuilder() - // encoding may either be omitted or must match the value in the file header - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - // sample rate hertz may be either be omitted or must match the value in the file - // header - .setSampleRateHertz(16000) - .setModel("video") - .build(); - - RecognitionAudio recognitionAudio = - RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); - - RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio); - // Just print the first result here. - SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0); - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcript : %s\n", alternative.getTranscript()); - } - // [END speech_transcribe_model_selection_beta] - } - - // [START speech_transcribe_model_selection_gcs_beta] - /** - * Performs transcription of the remote audio file asynchronously with the selected model. - * - * @param gcsUri the path to the remote audio file to transcribe. - */ - public static void transcribeModelSelectionGcs(String gcsUri) throws Exception { - try (SpeechClient speech = SpeechClient.create()) { - - // Configure request with video media type - RecognitionConfig config = - RecognitionConfig.newBuilder() - // encoding may either be omitted or must match the value in the file header - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - // sample rate hertz may be either be omitted or must match the value in the file - // header - .setSampleRateHertz(16000) - .setModel("video") - .build(); - - RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speech.longRunningRecognizeAsync(config, audio); - - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } - - List results = response.get().getResultsList(); - - // Just print the first result here. - SpeechRecognitionResult result = results.get(0); - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - System.out.printf("Transcript : %s\n", alternative.getTranscript()); - } - // [END speech_transcribe_model_selection_gcs_beta] - } - // [START speech_sync_recognize_punctuation] /** * Performs transcription with automatic punctuation on raw PCM audio data. @@ -598,7 +486,7 @@ public static void transcribeFileWithAutomaticPunctuation(String fileName) throw } // [END speech_sync_recognize_punctuation] - // [START speech_transcribe_auto_punctuation_beta] + // [START speech_transcribe_auto_punctuation] /** * Performs transcription on remote FLAC file and prints the transcription. * @@ -638,7 +526,7 @@ public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws E System.out.printf("Transcript : %s\n", alternative.getTranscript()); } } - // [END speech_transcribe_auto_punctuation_beta] + // [END speech_transcribe_auto_punctuation] // [START speech_stream_recognize_punctuation] /** @@ -820,7 +708,7 @@ public void onError(Throwable t) { } // [END speech_transcribe_streaming_mic] - // [START speech_transcribe_enhanced_model_beta] + // [START speech_transcribe_enhanced_model] /** * Transcribe the given audio file using an enhanced model. * @@ -860,410 +748,86 @@ public static void transcribeFileWithEnhancedModel(String fileName) throws Excep } } } - // [END speech_transcribe_enhanced_model_beta] + // [END speech_transcribe_enhanced_model] - // [START speech_transcribe_recognition_metadata_beta] + // [START speech_transcribe_model_selection] /** - * Transcribe the given audio file and include recognition metadata in the request. - * - * @param fileName the path to an audio file. - */ - public static void transcribeFileWithMetadata(String fileName) throws Exception { - Path path = Paths.get(fileName); - byte[] content = Files.readAllBytes(path); - - try (SpeechClient speechClient = SpeechClient.create()) { - // Get the contents of the local audio file - RecognitionAudio recognitionAudio = - RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); - - // Construct a recognition metadata object. - // Most metadata fields are specified as enums that can be found - // in speech.enums.RecognitionMetadata - RecognitionMetadata metadata = - RecognitionMetadata.newBuilder() - .setInteractionType(InteractionType.DISCUSSION) - .setMicrophoneDistance(MicrophoneDistance.NEARFIELD) - .setRecordingDeviceType(RecordingDeviceType.SMARTPHONE) - .setRecordingDeviceName("Pixel 2 XL") // Some metadata fields are free form strings - // And some are integers, for instance the 6 digit NAICS code - // https://www.naics.com/search/ - .setIndustryNaicsCodeOfAudio(519190) - .build(); - - // Configure request to enable enhanced models - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(8000) - .setMetadata(metadata) // Add the metadata to the config - .build(); - - // Perform the transcription request - RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); - - // Print out the results - for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternatives(0); - System.out.format("Transcript: %s\n\n", alternative.getTranscript()); - } - } - } - // [END speech_transcribe_recognition_metadata_beta] - - // [START speech_transcribe_diarization_beta] - /** - * Transcribe the given audio file using speaker diarization. + * Performs transcription of the given audio file synchronously with the selected model. * - * @param fileName the path to an audio file. + * @param fileName the path to a audio file to transcribe */ - public static void transcribeDiarization(String fileName) throws Exception { + public static void transcribeModelSelection(String fileName) throws Exception { Path path = Paths.get(fileName); byte[] content = Files.readAllBytes(path); - try (SpeechClient speechClient = SpeechClient.create()) { - // Get the contents of the local audio file - RecognitionAudio recognitionAudio = - RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); - - // Configure request to enable Speaker diarization - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(8000) - .setEnableSpeakerDiarization(true) - .setDiarizationSpeakerCount(2) - .build(); - - // Perform the transcription request - RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); - - // Print out the results - for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { - // There can be several alternative transcripts for a given chunk of speech. Just - // use the first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternatives(0); - System.out.format("Transcript : %s\n", alternative.getTranscript()); - // The words array contains the entire transcript up until that point. - // Referencing the last spoken word to get the associated Speaker tag - System.out.format( - "Speaker Tag %s: %s\n", - alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(), - alternative.getTranscript()); - } - } - } - // [END speech_transcribe_diarization_beta] - - // [START speech_transcribe_diarization_gcs_beta] - /** - * Transcribe a remote audio file using speaker diarization. - * - * @param gcsUri the path to an audio file. - */ - public static void transcribeDiarizationGcs(String gcsUri) throws Exception { - try (SpeechClient speechClient = SpeechClient.create()) { - // Configure request to enable Speaker diarization - RecognitionConfig config = + try (SpeechClient speech = SpeechClient.create()) { + // Configure request with video media type + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + // encoding may either be omitted or must match the value in the file header .setEncoding(AudioEncoding.LINEAR16) .setLanguageCode("en-US") - .setSampleRateHertz(8000) - .setEnableSpeakerDiarization(true) - .setDiarizationSpeakerCount(2) + // sample rate hertz may be either be omitted or must match the value in the file + // header + .setSampleRateHertz(16000) + .setModel("video") .build(); - // Set the remote path for the audio file - RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speechClient.longRunningRecognizeAsync(config, audio); - - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } - - for (SpeechRecognitionResult result : response.get().getResultsList()) { - // There can be several alternative transcripts for a given chunk of speech. Just - // use the first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternatives(0); - // The words array contains the entire transcript up until that point. - // Referencing the last spoken word to get the associated Speaker tag - System.out.format( - "Speaker Tag %s:%s\n", - alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(), - alternative.getTranscript()); - } - } - } - - // [END speech_transcribe_diarization_gcs_beta] - - // [START speech_transcribe_multichannel_beta] - - /** - * Transcribe a local audio file with multi-channel recognition - * - * @param fileName the path to local audio file - */ - public static void transcribeMultiChannel(String fileName) throws Exception { - Path path = Paths.get(fileName); - byte[] content = Files.readAllBytes(path); - - try (SpeechClient speechClient = SpeechClient.create()) { - // Get the contents of the local audio file RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); - // Configure request to enable multiple channels - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(44100) - .setAudioChannelCount(2) - .setEnableSeparateRecognitionPerChannel(true) - .build(); - - // Perform the transcription request - RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); - - // Print out the results - for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternatives(0); - System.out.format("Transcript : %s\n", alternative.getTranscript()); - System.out.printf("Channel Tag : %s\n\n", result.getChannelTag()); - } - } - } - // [END speech_transcribe_multichannel_beta] - - // [START speech_transcribe_multichannel_gcs_beta] - - /** - * Transcribe a remote audio file with multi-channel recognition - * - * @param gcsUri the path to the audio file - */ - public static void transcribeMultiChannelGcs(String gcsUri) throws Exception { - - try (SpeechClient speechClient = SpeechClient.create()) { - - // Configure request to enable multiple channels - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setLanguageCode("en-US") - .setSampleRateHertz(44100) - .setAudioChannelCount(2) - .setEnableSeparateRecognitionPerChannel(true) - .build(); - - // Set the remote path for the audio file - RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speechClient.longRunningRecognizeAsync(config, audio); - - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } + RecognizeResponse recognizeResponse = speech.recognize(recConfig, recognitionAudio); // Just print the first result here. - for (SpeechRecognitionResult result : response.get().getResultsList()) { - - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - - // Print out the result - System.out.printf("Transcript : %s\n", alternative.getTranscript()); - System.out.printf("Channel Tag : %s\n\n", result.getChannelTag()); - } - } - } - // [END speech_transcribe_multichannel_gcs_beta] - - // [START speech_transcribe_multilanguage_beta] - - /** - * Transcribe a local audio file with multi-language recognition - * - * @param fileName the path to the audio file - */ - public static void transcribeMultiLanguage(String fileName) throws Exception { - Path path = Paths.get(fileName); - // Get the contents of the local audio file - byte[] content = Files.readAllBytes(path); - - try (SpeechClient speechClient = SpeechClient.create()) { - - RecognitionAudio recognitionAudio = - RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); - ArrayList languageList = new ArrayList<>(); - languageList.add("es-ES"); - languageList.add("en-US"); - - // Configure request to enable multiple languages - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setSampleRateHertz(16000) - .setLanguageCode("ja-JP") - .addAllAlternativeLanguageCodes(languageList) - .build(); - // Perform the transcription request - RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); - - // Print out the results - for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternatives(0); - System.out.format("Transcript : %s\n\n", alternative.getTranscript()); - } + SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); } } - // [END speech_transcribe_multilanguage_beta] - - // [START speech_transcribe_multilanguage_gcs_beta] + // [END speech_transcribe_model_selection] + // [START speech_transcribe_model_selection_gcs] /** - * Transcribe a remote audio file with multi-language recognition + * Performs transcription of the remote audio file asynchronously with the selected model. * - * @param gcsUri the path to the remote audio file + * @param gcsUri the path to the remote audio file to transcribe. */ - public static void transcribeMultiLanguageGcs(String gcsUri) throws Exception { - try (SpeechClient speechClient = SpeechClient.create()) { - - ArrayList languageList = new ArrayList<>(); - languageList.add("es-ES"); - languageList.add("en-US"); + public static void transcribeModelSelectionGcs(String gcsUri) throws Exception { + try (SpeechClient speech = SpeechClient.create()) { - // Configure request to enable multiple languages + // Configure request with video media type RecognitionConfig config = RecognitionConfig.newBuilder() + // encoding may either be omitted or must match the value in the file header .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + // sample rate hertz may be either be omitted or must match the value in the file + // header .setSampleRateHertz(16000) - .setLanguageCode("ja-JP") - .addAllAlternativeLanguageCodes(languageList) + .setModel("video") .build(); - // Set the remote path for the audio file RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); // Use non-blocking call for getting file transcription OperationFuture response = - speechClient.longRunningRecognizeAsync(config, audio); + speech.longRunningRecognizeAsync(config, audio); while (!response.isDone()) { System.out.println("Waiting for response..."); Thread.sleep(10000); } - for (SpeechRecognitionResult result : response.get().getResultsList()) { - - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - - // Print out the result - System.out.printf("Transcript : %s\n\n", alternative.getTranscript()); - } - } - } - // [END speech_transcribe_multilanguage_gcs_beta] - - // [START speech_transcribe_word_level_confidence_beta] - - /** - * Transcribe a local audio file with word level confidence - * - * @param fileName the path to the local audio file - */ - public static void transcribeWordLevelConfidence(String fileName) throws Exception { - Path path = Paths.get(fileName); - byte[] content = Files.readAllBytes(path); - - try (SpeechClient speechClient = SpeechClient.create()) { - RecognitionAudio recognitionAudio = - RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build(); - // Configure request to enable word level confidence - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setSampleRateHertz(16000) - .setLanguageCode("en-US") - .setEnableWordConfidence(true) - .build(); - // Perform the transcription request - RecognizeResponse recognizeResponse = speechClient.recognize(config, recognitionAudio); - - // Print out the results - for (SpeechRecognitionResult result : recognizeResponse.getResultsList()) { - // There can be several alternative transcripts for a given chunk of speech. Just use the - // first (most likely) one here. - SpeechRecognitionAlternative alternative = result.getAlternatives(0); - System.out.format("Transcript : %s\n", alternative.getTranscript()); - System.out.format( - "First Word and Confidence : %s %s \n", - alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence()); - } - } - } - // [END speech_transcribe_word_level_confidence_beta] - - // [START speech_transcribe_word_level_confidence_gcs_beta] - - /** - * Transcribe a remote audio file with word level confidence - * - * @param gcsUri path to the remote audio file - */ - public static void transcribeWordLevelConfidenceGcs(String gcsUri) throws Exception { - try (SpeechClient speechClient = SpeechClient.create()) { - - // Configure request to enable word level confidence - RecognitionConfig config = - RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.FLAC) - .setSampleRateHertz(16000) - .setLanguageCode("en-US") - .setEnableWordConfidence(true) - .build(); - - // Set the remote path for the audio file - RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); - - // Use non-blocking call for getting file transcription - OperationFuture response = - speechClient.longRunningRecognizeAsync(config, audio); + List results = response.get().getResultsList(); - while (!response.isDone()) { - System.out.println("Waiting for response..."); - Thread.sleep(10000); - } // Just print the first result here. - SpeechRecognitionResult result = response.get().getResultsList().get(0); - + SpeechRecognitionResult result = results.get(0); // There can be several alternative transcripts for a given chunk of speech. Just use the // first (most likely) one here. SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); - // Print out the result System.out.printf("Transcript : %s\n", alternative.getTranscript()); - System.out.format( - "First Word and Confidence : %s %s \n", - alternative.getWords(0).getWord(), alternative.getWords(0).getConfidence()); } } - // [END speech_transcribe_word_level_confidence_gcs_beta] + // [END speech_transcribe_model_selection_gcs] } diff --git a/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java b/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java index 2a36ac3922a..3bc3d5f1611 100644 --- a/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java +++ b/speech/snippets/src/test/java/com/example/speech/RecognizeIT.java @@ -37,17 +37,13 @@ public class RecognizeIT { // The path to the audio file to transcribe private String audioFileName = "./resources/audio.raw"; - private String multiChannelAudioFileName = "./resources/commercial_stereo.wav"; - private String gcsMultiChannelAudioPath = "gs://" + BUCKET + "/speech/commercial_stereo.wav"; private String gcsAudioPath = "gs://" + BUCKET + "/speech/brooklyn.flac"; - private String gcsDiarizationAudioPath = "gs://" + BUCKET + "/speech/commercial_mono.wav"; + private String recognitionAudioFile = "./resources/commercial_mono.wav"; // The path to the video file to transcribe private String videoFileName = "./resources/Google_Gnome.wav"; private String gcsVideoPath = "gs://" + BUCKET + "/speech/Google_Gnome.wav"; - private String recognitionAudioFile = "./resources/commercial_mono.wav"; - @Before public void setUp() { bout = new ByteArrayOutputStream(); @@ -111,22 +107,6 @@ public void testStreamRecognize() throws Exception { assertThat(got).contains("how old is the Brooklyn Bridge"); } - @Test - public void testModelSelection() throws Exception { - Recognize.transcribeModelSelection(videoFileName); - String got = bout.toString(); - assertThat(got).contains("OK Google"); - assertThat(got).contains("the weather outside is sunny"); - } - - @Test - public void testGcsModelSelection() throws Exception { - Recognize.transcribeModelSelectionGcs(gcsVideoPath); - String got = bout.toString(); - assertThat(got).contains("OK Google"); - assertThat(got).contains("the weather outside is sunny"); - } - @Test public void testAutoPunctuation() throws Exception { Recognize.transcribeFileWithAutomaticPunctuation(audioFileName); @@ -156,67 +136,18 @@ public void testEnhancedModel() throws Exception { } @Test - public void testMetadata() throws Exception { - Recognize.transcribeFileWithMetadata(recognitionAudioFile); - String got = bout.toString(); - assertThat(got).contains("Chrome"); - } - - @Test - public void testTranscribeDiarization() throws Exception { - Recognize.transcribeDiarization(recognitionAudioFile); - String got = bout.toString(); - assertThat(got).contains("Speaker Tag 2:"); - } - - @Test - public void testTranscribeDiarizationGcs() throws Exception { - Recognize.transcribeDiarizationGcs(gcsDiarizationAudioPath); - String got = bout.toString(); - assertThat(got).contains("Speaker Tag 2:"); - } - - @Test - public void testTranscribeMultiChannel() throws Exception { - Recognize.transcribeMultiChannel(multiChannelAudioFileName); - String got = bout.toString(); - assertThat(got).contains("Channel Tag : 1"); - } - - @Test - public void testTranscribeMultiChannelGcs() throws Exception { - Recognize.transcribeMultiChannelGcs(gcsMultiChannelAudioPath); - String got = bout.toString(); - assertThat(got).contains("Channel Tag : 1"); - } - - @Test - public void testTranscribeMultiLanguage() throws Exception { - Recognize.transcribeMultiLanguage(videoFileName); - String got = bout.toString(); - assertThat(got).contains("Transcript : OK Google"); - } - - @Test - public void testTranscribeMultiLanguageGcs() throws Exception { - Recognize.transcribeMultiLanguageGcs(gcsVideoPath); - String got = bout.toString(); - assertThat(got).contains("Transcript : OK Google"); - } - - @Test - public void testTranscribeWordLevelConfidence() throws Exception { - Recognize.transcribeWordLevelConfidence(audioFileName); + public void testModelSelection() throws Exception { + Recognize.transcribeModelSelection(videoFileName); String got = bout.toString(); - assertThat(got).contains("Transcript : how old is the Brooklyn Bridge"); - assertThat(got).contains("First Word and Confidence : how"); + assertThat(got).contains("OK Google"); + assertThat(got).contains("the weather outside is sunny"); } @Test - public void testTranscribeWordLevelConfidenceGcs() throws Exception { - Recognize.transcribeWordLevelConfidenceGcs(gcsAudioPath); + public void testGcsModelSelection() throws Exception { + Recognize.transcribeModelSelectionGcs(gcsVideoPath); String got = bout.toString(); - assertThat(got).contains("Transcript : how old is the Brooklyn Bridge"); - assertThat(got).contains("First Word and Confidence : how"); + assertThat(got).contains("OK Google"); + assertThat(got).contains("the weather outside is sunny"); } }