Skip to content

Commit

Permalink
samples: Added command line option class + option to pass different l…
Browse files Browse the repository at this point in the history
…ang code as argument (#1504)

* added different language option for other than english

* changed it to english

* timestamp change

* changed millisecond format to min:sec

* Revert "timestamp change"

This reverts commit df21c30.

* removed indent issues

* made requested changes
  • Loading branch information
munkhuushmgl authored and chingor13 committed Aug 15, 2020
1 parent d250d85 commit c3e846c
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package com.example.speech;

// [START speech_transcribe_infinite_streaming]

import com.google.api.gax.rpc.ClientStream;
import com.google.api.gax.rpc.ResponseObserver;
import com.google.api.gax.rpc.StreamController;
Expand All @@ -29,11 +30,13 @@
import com.google.cloud.speech.v1p1beta1.StreamingRecognizeResponse;
import com.google.protobuf.ByteString;
import com.google.protobuf.Duration;

import java.lang.Math;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.DataLine;
Expand All @@ -54,7 +57,7 @@ public class InfiniteStreamRecognize {
private static int BYTES_PER_BUFFER = 6400; // buffer size in bytes

private static int restartCounter = 0;
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> audioInput = new ArrayList<ByteString>();
private static ArrayList<ByteString> lastAudioInput = new ArrayList<ByteString>();
private static int resultEndTimeInMS = 0;
private static int isFinalEndTime = 0;
Expand All @@ -66,15 +69,33 @@ public class InfiniteStreamRecognize {
private static ByteString tempByteString;

public static void main(String... args) {
InfiniteStreamRecognizeOptions options = InfiniteStreamRecognizeOptions.fromFlags(args);
if (options == null) {
// Could not parse.
System.out.println("Failed to parse options.");
System.exit(1);
}

try {
infiniteStreamingRecognize();
infiniteStreamingRecognize(options.langCode);
} catch (Exception e) {
System.out.println("Exception caught: " + e);
}
}

public static String convertMillisToDate(double milliSeconds) {
long millis = (long) milliSeconds;
DecimalFormat format = new DecimalFormat();
format.setMinimumIntegerDigits(2);
return String.format("%s:%s /",
format.format(TimeUnit.MILLISECONDS.toMinutes(millis)),
format.format(TimeUnit.MILLISECONDS.toSeconds(millis)
- TimeUnit.MINUTES.toSeconds(TimeUnit.MILLISECONDS.toMinutes(millis)))
);
}

/** Performs infinite streaming speech recognition */
public static void infiniteStreamingRecognize() throws Exception {
public static void infiniteStreamingRecognize(String languageCode) throws Exception {

// Microphone Input buffering
class MicBuffer implements Runnable {
Expand Down Expand Up @@ -115,45 +136,41 @@ public void onStart(StreamController controller) {
}

public void onResponse(StreamingRecognizeResponse response) {

responses.add(response);

StreamingRecognitionResult result = response.getResultsList().get(0);

Duration resultEndTime = result.getResultEndTime();

resultEndTimeInMS = (int) ((resultEndTime.getSeconds() * 1000)
+ (resultEndTime.getNanos() / 1000000));

+ (resultEndTime.getNanos() / 1000000));
double correctedTime = resultEndTimeInMS - bridgingOffset
+ (STREAMING_LIMIT * restartCounter);
DecimalFormat format = new DecimalFormat("0.#");
+ (STREAMING_LIMIT * restartCounter);

SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
if (result.getIsFinal()) {
System.out.print(GREEN);
System.out.print("\033[2K\r");
System.out.printf("%s: %s\n", format.format(correctedTime),
alternative.getTranscript());

System.out.printf("%s: %s [confidence: %.2f]\n",
convertMillisToDate(correctedTime),
alternative.getTranscript(),
alternative.getConfidence()
);
isFinalEndTime = resultEndTimeInMS;
lastTranscriptWasFinal = true;
} else {
System.out.print(RED);
System.out.print("\033[2K\r");
System.out.printf("%s: %s", format.format(correctedTime),
alternative.getTranscript());

System.out.printf("%s: %s", convertMillisToDate(correctedTime),
alternative.getTranscript()
);
lastTranscriptWasFinal = false;
}
}

public void onComplete() {}

public void onError(Throwable t) {}
public void onComplete() {
}

public void onError(Throwable t) {
}
};

clientStream = client.streamingRecognizeCallable().splitCall(responseObserver);

RecognitionConfig recognitionConfig =
Expand Down Expand Up @@ -227,8 +244,8 @@ public void onError(Throwable t) {}

request =
StreamingRecognizeRequest.newBuilder()
.setStreamingConfig(streamingRecognitionConfig)
.build();
.setStreamingConfig(streamingRecognitionConfig)
.build();

System.out.println(YELLOW);
System.out.printf("%d: RESTARTING REQUEST\n", restartCounter * STREAMING_LIMIT);
Expand All @@ -253,13 +270,12 @@ public void onError(Throwable t) {}
bridgingOffset = finalRequestEndTime;
}
int chunksFromMS = (int) Math.floor((finalRequestEndTime
- bridgingOffset) / chunkTime);
- bridgingOffset) / chunkTime);
// chunks from MS is number of chunks to resend
bridgingOffset = (int) Math.floor((lastAudioInput.size()
- chunksFromMS) * chunkTime);
- chunksFromMS) * chunkTime);
// set bridging offset for next request
for (int i = chunksFromMS; i < lastAudioInput.size(); i++) {

request =
StreamingRecognizeRequest.newBuilder()
.setAudioContent(lastAudioInput.get(i))
Expand Down Expand Up @@ -288,5 +304,6 @@ public void onError(Throwable t) {}
}
}
}

}
// [END speech_transcribe_infinite_streaming]
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Copyright 2019 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.speech;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

public class InfiniteStreamRecognizeOptions {
String langCode = "en-US"; //by default english US

/** Construct an InfiniteStreamRecognizeOptions class from command line flags. */
public static InfiniteStreamRecognizeOptions fromFlags(String[] args) {
Options options = new Options();
options.addOption(
Option.builder()
.type(String.class)
.longOpt("lang_code")
.hasArg()
.desc("Language code")
.build());

CommandLineParser parser = new DefaultParser();
CommandLine commandLine;
try {
commandLine = parser.parse(options, args);
InfiniteStreamRecognizeOptions res = new InfiniteStreamRecognizeOptions();

if (commandLine.hasOption("lang_code")) {
res.langCode = commandLine.getOptionValue("lang_code");
}
return res;
} catch (ParseException e) {
System.err.println(e.getMessage());
return null;
}
}

}

0 comments on commit c3e846c

Please sign in to comment.