Skip to content

Commit

Permalink
[DO_NOT_MERGE] Microphone streaming with a 1 minute duration. (#1185)
Browse files Browse the repository at this point in the history
* Microphone streaming with a 1 minute duration.

* Fixed audit issues.

* Fixing issues after review.

* Fixing review issues.
  • Loading branch information
nirupa-kumar authored and nnegrey committed Aug 17, 2018
1 parent a73dd58 commit a7690e7
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 7 deletions.
5 changes: 5 additions & 0 deletions speech/cloud-client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ Performing streaming speech transcription and punctuation on an audio file
mvn exec:java -DRecognize -Dexec.args="stream-punctuation ./resources/audio.raw"
```

Perform microphone streaming speech recognition
```
mvn exec:java -DRecognize -Dexec.args="micstreamrecognize"
```

## Enhanced Model
Transcribe an audio file using an enhanced model
```
Expand Down
120 changes: 113 additions & 7 deletions speech/cloud-client/src/main/java/com/example/speech/Recognize.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.rpc.ApiStreamObserver;
import com.google.api.gax.rpc.BidiStreamingCallable;
import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeMetadata;
import com.google.cloud.speech.v1p1beta1.LongRunningRecognizeResponse;
import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
Expand Down Expand Up @@ -47,6 +50,13 @@
import java.util.ArrayList;
import java.util.List;

import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
import javax.sound.sampled.DataLine.Info;
import javax.sound.sampled.TargetDataLine;

public class Recognize {

/** Run speech recognition tasks. */
Expand All @@ -56,9 +66,10 @@ public static void main(String... args) throws Exception {
System.out.printf(
"\tjava %s \"<command>\" \"<path-to-image>\"\n"
+ "Commands:\n"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | wordoffsets\n"
+ "\t| model-selection | auto-punctuation | stream-punctuation | enhanced-model\n"
+ "\t| metadata | diarization | multi-channel | multi-language | word-level-conf"
+ "\tsyncrecognize | asyncrecognize | streamrecognize | micstreamrecognize \n"
+ "\t| wordoffsets | model-selection | auto-punctuation | stream-punctuation \n"
+ "\t| enhanced-model| metadata | diarization | multi-channel | multi-language \n"
+ "\t | word-level-conf"
+ "Path:\n\tA file path (ex: ./resources/audio.raw) or a URI "
+ "for a Cloud Storage resource (gs://...)\n",
Recognize.class.getCanonicalName());
Expand Down Expand Up @@ -88,6 +99,8 @@ public static void main(String... args) throws Exception {
}
} else if (command.equals("streamrecognize")) {
streamingRecognizeFile(path);
} else if (command.equals("micstreamrecognize")) {
streamingMicRecognize();
} else if (command.equals("model-selection")) {
if (path.startsWith("gs://")) {
transcribeModelSelectionGcs(path);
Expand Down Expand Up @@ -704,6 +717,97 @@ public SettableFuture<List<T>> future() {
}
// [END speech_stream_recognize_punctuation]

// [START speech_streaming_mic_recognize]
/** Performs microphone streaming speech recognition with a duration of 1 minute. */
public static void streamingMicRecognize() throws Exception {

ResponseObserver<StreamingRecognizeResponse> responseObserver = null;
try (SpeechClient client = SpeechClient.create()) {

responseObserver =
new ResponseObserver<StreamingRecognizeResponse>() {
ArrayList<StreamingRecognizeResponse> responses = new ArrayList<>();

public void onStart(StreamController controller) {}

public void onResponse(StreamingRecognizeResponse response) {
responses.add(response);
}

public void onComplete() {
for (StreamingRecognizeResponse response : responses) {
StreamingRecognitionResult result = response.getResultsList().get(0);
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Transcript : %s\n", alternative.getTranscript());
}
}

public void onError(Throwable t) {
System.out.println(t);
}
};

ClientStream<StreamingRecognizeRequest> clientStream =
client.streamingRecognizeCallable().splitCall(responseObserver);

RecognitionConfig recognitionConfig =
RecognitionConfig.newBuilder()
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
.setLanguageCode("en-US")
.setSampleRateHertz(16000)
.build();
StreamingRecognitionConfig streamingRecognitionConfig =
StreamingRecognitionConfig.newBuilder().setConfig(recognitionConfig).build();

StreamingRecognizeRequest request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build(); // The first request in a streaming call has to be a config

clientStream.send(request);
// SampleRate:16000Hz, SampleSizeInBits: 16, Number of channels: 1, Signed: true,
// bigEndian: false
AudioFormat audioFormat = new AudioFormat(16000, 16, 1, true, false);
DataLine.Info targetInfo =
new Info(
TargetDataLine.class,
audioFormat); // Set the system information to read from the microphone audio stream

if (!AudioSystem.isLineSupported(targetInfo)) {
System.out.println("Microphone not supported");
System.exit(0);
}
// Target data line captures the audio stream the microphone produces.
TargetDataLine targetDataLine = (TargetDataLine) AudioSystem.getLine(targetInfo);
targetDataLine.open(audioFormat);
targetDataLine.start();
System.out.println("Start speaking");
long startTime = System.currentTimeMillis();
// Audio Input Stream
AudioInputStream audio = new AudioInputStream(targetDataLine);
while (true) {
long estimatedTime = System.currentTimeMillis() - startTime;
byte[] data = new byte[6400];
audio.read(data);
if (estimatedTime > 60000) { // 60 seconds
System.out.println("Stop speaking.");
targetDataLine.stop();
targetDataLine.close();
break;
}
request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(ByteString.copyFrom(data))
.build();
clientStream.send(request);
}
} catch (Exception e) {
System.out.println(e);
}
responseObserver.onComplete();
}
// [END speech_streaming_mic_recognize]

// [START speech_transcribe_file_with_enhanced_model]
/**
* Transcribe the given audio file using an enhanced model.
Expand Down Expand Up @@ -833,8 +937,9 @@ public static void transcribeDiarization(String fileName) throws Exception {
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
System.out.format("Transcript : %s\n", alternative.getTranscript());
// The words array contains the entire transcript up until that point.
//Referencing the last spoken word to get the associated Speaker tag
System.out.format("Speaker Tag %s: %s\n",
// Referencing the last spoken word to get the associated Speaker tag
System.out.format(
"Speaker Tag %s: %s\n",
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
alternative.getTranscript());
}
Expand Down Expand Up @@ -877,8 +982,9 @@ public static void transcribeDiarizationGcs(String gcsUri) throws Exception {
// use the first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternatives(0);
// The words array contains the entire transcript up until that point.
//Referencing the last spoken word to get the associated Speaker tag
System.out.format("Speaker Tag %s:%s\n",
// Referencing the last spoken word to get the associated Speaker tag
System.out.format(
"Speaker Tag %s:%s\n",
alternative.getWords((alternative.getWordsCount() - 1)).getSpeakerTag(),
alternative.getTranscript());
}
Expand Down

0 comments on commit a7690e7

Please sign in to comment.