From 52c515a88a53dd32840b0eb986c2d810c852d2c3 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Mon, 2 Apr 2018 08:47:29 -0700 Subject: [PATCH 1/3] Add Auto-Punctuation samples to speech --- speech/cloud-client/README.md | 16 ++ speech/cloud-client/pom.xml | 2 +- .../java/com/example/speech/Recognize.java | 179 ++++++++++++++++++ .../java/com/example/speech/RecognizeIT.java | 21 ++ 4 files changed, 217 insertions(+), 1 deletion(-) diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index 42ff0729978..da2d54e08d0 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -70,3 +70,19 @@ Asynchronously transcribe a audio file hosted on GCS ``` mvn exec:java -DRecognize -Dexec.args="model-selection gs://cloud-samples-tests/speech/Google_Gnome.wav" ``` + +## Auto Punctuation +Synchronously transcribe and punctuate an audio file +``` +mvn exec:java -DRecognize -Dexec.args="auto-punctuation ./resources/audio.raw" +``` + +Asynchronously transcribe and punctuate an audio file hosted on GCS +``` +mvn exec:java -DRecognize -Dexec.args="auto-punctuation gs://cloud-samples-tests/speech/brooklyn.flac" +``` + +Performing streaming speech transcription and punctuation on an audio file +``` +mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw" +``` diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml index a93abe2f05c..182e95e5aba 100644 --- a/speech/cloud-client/pom.xml +++ b/speech/cloud-client/pom.xml @@ -40,7 +40,7 @@ com.google.cloud google-cloud-speech - 0.40.0-alpha + 0.42.0-alpha diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java index f1fb3273ccd..e56b06a1e1a 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -53,6 +53,7 @@ public static void main(String... args) throws Exception { "\tjava %s \"\" \"\"\n" + "Commands:\n" + "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n" + + "\t| auto-punctuation | stream-punctuation\n" + "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI " + "for a Cloud Storage resource (gs://...)\n", Recognize.class.getCanonicalName()); @@ -88,6 +89,14 @@ public static void main(String... args) throws Exception { } else { transcribeModelSelection(path); } + } else if (command.equals("auto-punctuation")) { + if (path.startsWith("gs://")) { + transcribeGcsWithAutomaticPunctuation(path); + } else { + transcribeFileWithAutomaticPunctuation(path); + } + } else if (command.equals("stream-punctuation")) { + streamingTranscribeWithAutomaticPunctuation(path); } } @@ -497,4 +506,174 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception { } // [END speech_transcribe_model_selection_gcs] } + + // [START speech_sync_recognize_punctuation] + /** + * Performs transcription with automatic punctuation on raw PCM audio data. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] content = Files.readAllBytes(path); + + try (SpeechClient speechClient = SpeechClient.create()) { + // Configure request with local raw PCM audio + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableAutomaticPunctuation(true) + .build(); + + // Get the contents of the local audio file + RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder() + .setContent(ByteString.copyFrom(content)) + .build(); + + // Perform the transcription request + RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio); + + // Just print the first result here. + SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0); + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + // [END speech_sync_recognize_punctuation] + + // [START speech_async_recognize_gcs_punctuation] + /** + * Performs transcription on remote FLAC file and prints the transcription. + * + * @param gcsUri the path to the remote FLAC audio file to transcribe. + */ + public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception { + try (SpeechClient speechClient = SpeechClient.create()) { + // Configure request with raw PCM audio + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableAutomaticPunctuation(true) + .build(); + + // Set the remote path for the audio file + RecognitionAudio audio = RecognitionAudio.newBuilder() + .setUri(gcsUri) + .build(); + + // Use non-blocking call for getting file transcription + OperationFuture response = + speechClient.longRunningRecognizeAsync(config, audio); + + while (!response.isDone()) { + System.out.println("Waiting for response..."); + Thread.sleep(10000); + } + + // Just print the first result here. + SpeechRecognitionResult result = response.get().getResultsList().get(0); + + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + + // Print out the result + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + // [END speech_async_recognize_gcs_punctuation] + + /** + * Performs streaming speech recognition on raw PCM audio data. + * + * @param fileName the path to a PCM audio file to transcribe. + */ + public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception { + Path path = Paths.get(fileName); + byte[] data = Files.readAllBytes(path); + + // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS + try (SpeechClient speech = SpeechClient.create()) { + + // Configure request with local raw PCM audio + RecognitionConfig recConfig = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setLanguageCode("en-US") + .setSampleRateHertz(16000) + .setEnableAutomaticPunctuation(true) + .build(); + + // Build the streaming config with the audio config + StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder() + .setConfig(recConfig) + .build(); + + class ResponseApiStreamingObserver implements ApiStreamObserver { + private final SettableFuture> future = SettableFuture.create(); + private final List messages = new java.util.ArrayList(); + + @Override + public void onNext(T message) { + messages.add(message); + } + + @Override + public void onError(Throwable t) { + future.setException(t); + } + + @Override + public void onCompleted() { + future.set(messages); + } + + // Returns the SettableFuture object to get received messages / exceptions. + public SettableFuture> future() { + return future; + } + } + + ResponseApiStreamingObserver responseObserver = + new ResponseApiStreamingObserver<>(); + + BidiStreamingCallable callable = + speech.streamingRecognizeCallable(); + + ApiStreamObserver requestObserver = + callable.bidiStreamingCall(responseObserver); + + // The first request must **only** contain the audio configuration: + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(config) + .build()); + + // Subsequent requests must **only** contain the audio data. + requestObserver.onNext(StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(data)) + .build()); + + // Mark transmission as completed after sending the data. + requestObserver.onCompleted(); + + List responses = responseObserver.future().get(); + + for (StreamingRecognizeResponse response : responses) { + // For streaming recognize, the results list has one is_final result (if available) followed + // by a number of in-progress results (if iterim_results is true) for subsequent utterances. + // Just print the first result here. + StreamingRecognitionResult result = response.getResultsList().get(0); + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Transcript : %s\n", alternative.getTranscript()); + } + } + } } diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java index f51a4c9b9a7..022e24f7d1a 100644 --- a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java +++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java @@ -124,4 +124,25 @@ public void testGcsModelSelection() throws Exception { assertThat(got).contains("OK Google"); assertThat(got).contains("the weather outside is sunny"); } + + @Test + public void testAutoPunctuation() throws Exception { + Recognize.transcribeFileWithAutomaticPunctuation(audioFileName); + String got = bout.toString(); + assertThat(got).contains("How old is the Brooklyn Bridge?"); + } + + @Test + public void testGcsAutoPunctuation() throws Exception { + Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath); + String got = bout.toString(); + assertThat(got).contains("How old is the Brooklyn Bridge?"); + } + + @Test + public void testStreamAutoPunctuation() throws Exception { + Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName); + String got = bout.toString(); + assertThat(got).contains("How old is the Brooklyn Bridge?"); + } } From e7f1eb89e9d2db865fe36f91401248fff1321205 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Tue, 3 Apr 2018 09:43:29 -0700 Subject: [PATCH 2/3] Add region tags for streaming sample --- .../src/main/java/com/example/speech/Recognize.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java index e56b06a1e1a..dab73a3aa29 100644 --- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java +++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java @@ -590,6 +590,7 @@ public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws E } // [END speech_async_recognize_gcs_punctuation] + // [START speech_stream_recognize_punctuation] /** * Performs streaming speech recognition on raw PCM audio data. * @@ -676,4 +677,5 @@ public SettableFuture> future() { } } } + // [END speech_stream_recognize_punctuation] } From 00d441664c46a113fe1deefe8082e35789573730 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Thu, 5 Apr 2018 16:29:23 -0700 Subject: [PATCH 3/3] Update README --- speech/cloud-client/README.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md index da2d54e08d0..3f6cbc4a42e 100644 --- a/speech/cloud-client/README.md +++ b/speech/cloud-client/README.md @@ -61,16 +61,21 @@ mvn exec:java -DRecognize -Dexec.args="wordoffsets gs://cloud-samples-tests/spee ``` ## Model Selection -Synchronously transcribe a audio file +Synchronously transcribe an audio file ``` mvn exec:java -DRecognize -Dexec.args="model-selection ./resources/Google_Gnome.wav" ``` -Asynchronously transcribe a audio file hosted on GCS +Asynchronously transcribe an audio file hosted on GCS ``` mvn exec:java -DRecognize -Dexec.args="model-selection gs://cloud-samples-tests/speech/Google_Gnome.wav" ``` +Perform streaming speech transcription on an audio file +``` +mvn exec:java -DRecognize -Dexec.args="streamrecognize ./resources/Google_Gnome.wav" +``` + ## Auto Punctuation Synchronously transcribe and punctuate an audio file ```