diff --git a/speech/cloud-client/README.md b/speech/cloud-client/README.md
index 42ff0729978..3f6cbc4a42e 100644
--- a/speech/cloud-client/README.md
+++ b/speech/cloud-client/README.md
@@ -61,12 +61,33 @@ mvn exec:java -DRecognize -Dexec.args="wordoffsets gs://cloud-samples-tests/spee
```
## Model Selection
-Synchronously transcribe a audio file
+Synchronously transcribe an audio file
```
mvn exec:java -DRecognize -Dexec.args="model-selection ./resources/Google_Gnome.wav"
```
-Asynchronously transcribe a audio file hosted on GCS
+Asynchronously transcribe an audio file hosted on GCS
```
mvn exec:java -DRecognize -Dexec.args="model-selection gs://cloud-samples-tests/speech/Google_Gnome.wav"
```
+
+Perform streaming speech transcription on an audio file
+```
+mvn exec:java -DRecognize -Dexec.args="streamrecognize ./resources/Google_Gnome.wav"
+```
+
+## Auto Punctuation
+Synchronously transcribe and punctuate an audio file
+```
+mvn exec:java -DRecognize -Dexec.args="auto-punctuation ./resources/audio.raw"
+```
+
+Asynchronously transcribe and punctuate an audio file hosted on GCS
+```
+mvn exec:java -DRecognize -Dexec.args="auto-punctuation gs://cloud-samples-tests/speech/brooklyn.flac"
+```
+
+Performing streaming speech transcription and punctuation on an audio file
+```
+mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw"
+```
diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml
index a93abe2f05c..182e95e5aba 100644
--- a/speech/cloud-client/pom.xml
+++ b/speech/cloud-client/pom.xml
@@ -40,7 +40,7 @@
com.google.cloud
google-cloud-speech
- 0.40.0-alpha
+ 0.42.0-alpha
diff --git a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
index f1fb3273ccd..dab73a3aa29 100644
--- a/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
+++ b/speech/cloud-client/src/main/java/com/example/speech/Recognize.java
@@ -53,6 +53,7 @@ public static void main(String... args) throws Exception {
"\tjava %s \"\" \"\"\n"
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets | model-selection\n"
+ + "\t| auto-punctuation | stream-punctuation\n"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
@@ -88,6 +89,14 @@ public static void main(String... args) throws Exception {
} else {
transcribeModelSelection(path);
}
+ } else if (command.equals("auto-punctuation")) {
+ if (path.startsWith("gs://")) {
+ transcribeGcsWithAutomaticPunctuation(path);
+ } else {
+ transcribeFileWithAutomaticPunctuation(path);
+ }
+ } else if (command.equals("stream-punctuation")) {
+ streamingTranscribeWithAutomaticPunctuation(path);
}
}
@@ -497,4 +506,176 @@ public static void transcribeModelSelectionGcs(String gcsUri) throws Exception {
}
// [END speech_transcribe_model_selection_gcs]
}
+
+ // [START speech_sync_recognize_punctuation]
+ /**
+ * Performs transcription with automatic punctuation on raw PCM audio data.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void transcribeFileWithAutomaticPunctuation(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] content = Files.readAllBytes(path);
+
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Configure request with local raw PCM audio
+ RecognitionConfig recConfig = RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableAutomaticPunctuation(true)
+ .build();
+
+ // Get the contents of the local audio file
+ RecognitionAudio recognitionAudio = RecognitionAudio.newBuilder()
+ .setContent(ByteString.copyFrom(content))
+ .build();
+
+ // Perform the transcription request
+ RecognizeResponse recognizeResponse = speechClient.recognize(recConfig, recognitionAudio);
+
+ // Just print the first result here.
+ SpeechRecognitionResult result = recognizeResponse.getResultsList().get(0);
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ // [END speech_sync_recognize_punctuation]
+
+ // [START speech_async_recognize_gcs_punctuation]
+ /**
+ * Performs transcription on remote FLAC file and prints the transcription.
+ *
+ * @param gcsUri the path to the remote FLAC audio file to transcribe.
+ */
+ public static void transcribeGcsWithAutomaticPunctuation(String gcsUri) throws Exception {
+ try (SpeechClient speechClient = SpeechClient.create()) {
+ // Configure request with raw PCM audio
+ RecognitionConfig config = RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.FLAC)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableAutomaticPunctuation(true)
+ .build();
+
+ // Set the remote path for the audio file
+ RecognitionAudio audio = RecognitionAudio.newBuilder()
+ .setUri(gcsUri)
+ .build();
+
+ // Use non-blocking call for getting file transcription
+ OperationFuture response =
+ speechClient.longRunningRecognizeAsync(config, audio);
+
+ while (!response.isDone()) {
+ System.out.println("Waiting for response...");
+ Thread.sleep(10000);
+ }
+
+ // Just print the first result here.
+ SpeechRecognitionResult result = response.get().getResultsList().get(0);
+
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+
+ // Print out the result
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ // [END speech_async_recognize_gcs_punctuation]
+
+ // [START speech_stream_recognize_punctuation]
+ /**
+ * Performs streaming speech recognition on raw PCM audio data.
+ *
+ * @param fileName the path to a PCM audio file to transcribe.
+ */
+ public static void streamingTranscribeWithAutomaticPunctuation(String fileName) throws Exception {
+ Path path = Paths.get(fileName);
+ byte[] data = Files.readAllBytes(path);
+
+ // Instantiates a client with GOOGLE_APPLICATION_CREDENTIALS
+ try (SpeechClient speech = SpeechClient.create()) {
+
+ // Configure request with local raw PCM audio
+ RecognitionConfig recConfig = RecognitionConfig.newBuilder()
+ .setEncoding(AudioEncoding.LINEAR16)
+ .setLanguageCode("en-US")
+ .setSampleRateHertz(16000)
+ .setEnableAutomaticPunctuation(true)
+ .build();
+
+ // Build the streaming config with the audio config
+ StreamingRecognitionConfig config = StreamingRecognitionConfig.newBuilder()
+ .setConfig(recConfig)
+ .build();
+
+ class ResponseApiStreamingObserver implements ApiStreamObserver {
+ private final SettableFuture> future = SettableFuture.create();
+ private final List messages = new java.util.ArrayList();
+
+ @Override
+ public void onNext(T message) {
+ messages.add(message);
+ }
+
+ @Override
+ public void onError(Throwable t) {
+ future.setException(t);
+ }
+
+ @Override
+ public void onCompleted() {
+ future.set(messages);
+ }
+
+ // Returns the SettableFuture object to get received messages / exceptions.
+ public SettableFuture> future() {
+ return future;
+ }
+ }
+
+ ResponseApiStreamingObserver responseObserver =
+ new ResponseApiStreamingObserver<>();
+
+ BidiStreamingCallable callable =
+ speech.streamingRecognizeCallable();
+
+ ApiStreamObserver requestObserver =
+ callable.bidiStreamingCall(responseObserver);
+
+ // The first request must **only** contain the audio configuration:
+ requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
+ .setStreamingConfig(config)
+ .build());
+
+ // Subsequent requests must **only** contain the audio data.
+ requestObserver.onNext(StreamingRecognizeRequest.newBuilder()
+ .setAudioContent(ByteString.copyFrom(data))
+ .build());
+
+ // Mark transmission as completed after sending the data.
+ requestObserver.onCompleted();
+
+ List responses = responseObserver.future().get();
+
+ for (StreamingRecognizeResponse response : responses) {
+ // For streaming recognize, the results list has one is_final result (if available) followed
+ // by a number of in-progress results (if iterim_results is true) for subsequent utterances.
+ // Just print the first result here.
+ StreamingRecognitionResult result = response.getResultsList().get(0);
+ // There can be several alternative transcripts for a given chunk of speech. Just use the
+ // first (most likely) one here.
+ SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
+ System.out.printf("Transcript : %s\n", alternative.getTranscript());
+ }
+ }
+ }
+ // [END speech_stream_recognize_punctuation]
}
diff --git a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
index f51a4c9b9a7..022e24f7d1a 100644
--- a/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
+++ b/speech/cloud-client/src/test/java/com/example/speech/RecognizeIT.java
@@ -124,4 +124,25 @@ public void testGcsModelSelection() throws Exception {
assertThat(got).contains("OK Google");
assertThat(got).contains("the weather outside is sunny");
}
+
+ @Test
+ public void testAutoPunctuation() throws Exception {
+ Recognize.transcribeFileWithAutomaticPunctuation(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("How old is the Brooklyn Bridge?");
+ }
+
+ @Test
+ public void testGcsAutoPunctuation() throws Exception {
+ Recognize.transcribeGcsWithAutomaticPunctuation(gcsAudioPath);
+ String got = bout.toString();
+ assertThat(got).contains("How old is the Brooklyn Bridge?");
+ }
+
+ @Test
+ public void testStreamAutoPunctuation() throws Exception {
+ Recognize.streamingTranscribeWithAutomaticPunctuation(audioFileName);
+ String got = bout.toString();
+ assertThat(got).contains("How old is the Brooklyn Bridge?");
+ }
}