Fix the sleep logic for streaming as per sample rate (#279)

* sleep 100ms instead of sleeping as function of rate * added sleep as inverse of sampling rate * added LINEAR16 bytes per sample constant * added few variables * added truth and log4j * added 32khz audio * added Streaming Test * added unit tests for 16 and 32 khz audio * checkstyle fixes * renamed to KHz * added comment
GoogleCloudPlatform · Aug 4, 2016 · 16407a9 · 16407a9
1 parent ebdad96
commit 16407a9
Show file tree

Hide file tree

Showing 4 changed files with 121 additions and 8 deletions.
diff --git a/speech/grpc/pom.xml b/speech/grpc/pom.xml
@@ -152,6 +152,17 @@ limitations under the License.
       <version>1.1.33.Fork14</version>
       <classifier>${tcnative.classifier}</classifier>
     </dependency>
+    <dependency>
+      <groupId>com.google.truth</groupId>
+      <artifactId>truth</artifactId>
+      <version>0.28</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>1.2.17</version>
+    </dependency>
   </dependencies>
   <!-- // [END dependency] -->
 

diff --git a/speech/grpc/resources/audio32KHz.raw b/speech/grpc/resources/audio32KHz.raw
diff --git a/speech/grpc/src/main/java/com/examples/cloud/speech/StreamingRecognizeClient.java b/speech/grpc/src/main/java/com/examples/cloud/speech/StreamingRecognizeClient.java
@@ -16,6 +16,8 @@
 
 package com.examples.cloud.speech;
 
+import static org.apache.log4j.ConsoleAppender.SYSTEM_OUT;
+
 import com.google.cloud.speech.v1beta1.RecognitionConfig;
 import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding;
 import com.google.cloud.speech.v1beta1.SpeechGrpc;
@@ -26,7 +28,6 @@
 import com.google.protobuf.TextFormat;
 
 import io.grpc.ManagedChannel;
-import io.grpc.Status;
 import io.grpc.stub.StreamObserver;
 
 import org.apache.commons.cli.CommandLine;
@@ -35,6 +36,10 @@
 import org.apache.commons.cli.OptionBuilder;
 import org.apache.commons.cli.Options;
 import org.apache.commons.cli.ParseException;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.SimpleLayout;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -43,8 +48,7 @@
 import java.util.List;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.TimeUnit;
-import java.util.logging.Level;
-import java.util.logging.Logger;
+
 
 /**
  * Client that sends streaming audio to Speech.Recognize and returns streaming transcript.
@@ -60,6 +64,9 @@ public class StreamingRecognizeClient {
 
   private final SpeechGrpc.SpeechStub speechClient;
 
+  private static final int BYTES_PER_BUFFER = 3200; //buffer size in bytes
+  private static final int BYTES_PER_SAMPLE = 2; //bytes per sample for LINEAR16
+
   private static final List<String> OAUTH2_SCOPES =
       Arrays.asList("https://www.googleapis.com/auth/cloud-platform");
 
@@ -73,6 +80,13 @@ public StreamingRecognizeClient(ManagedChannel channel, String file, int samplin
     this.channel = channel;
 
     speechClient = SpeechGrpc.newStub(channel);
+
+    //Send log4j logs to Console
+    //If you are going to run this on GCE, you might wish to integrate with gcloud-java logging.
+    //See https://github.com/GoogleCloudPlatform/gcloud-java/blob/master/README.md#stackdriver-logging-alpha
+
+    ConsoleAppender appender = new ConsoleAppender(new SimpleLayout(), SYSTEM_OUT);
+    logger.addAppender(appender);
   }
 
   public void shutdown() throws InterruptedException {
@@ -91,8 +105,7 @@ public void onNext(StreamingRecognizeResponse response) {
 
           @Override
           public void onError(Throwable error) {
-            Status status = Status.fromThrowable(error);
-            logger.log(Level.WARNING, "recognize failed: {0}", status);
+            logger.log(Level.WARN, "recognize failed: {0}", error);
             finishLatch.countDown();
           }
 
@@ -127,9 +140,12 @@ public void onCompleted() {
       // Open audio file. Read and send sequential buffers of audio as additional RecognizeRequests.
       FileInputStream in = new FileInputStream(new File(file));
       // For LINEAR16 at 16000 Hz sample rate, 3200 bytes corresponds to 100 milliseconds of audio.
-      byte[] buffer = new byte[3200];
+      byte[] buffer = new byte[BYTES_PER_BUFFER];
       int bytesRead;
       int totalBytes = 0;
+      int samplesPerBuffer = BYTES_PER_BUFFER / BYTES_PER_SAMPLE;
+      int samplesPerMillis = samplingRate / 1000;
+
       while ((bytesRead = in.read(buffer)) != -1) {
         totalBytes += bytesRead;
         StreamingRecognizeRequest request =
@@ -138,8 +154,7 @@ public void onCompleted() {
                 .build();
         requestObserver.onNext(request);
         // To simulate real-time audio, sleep after sending each audio buffer.
-        // For 16000 Hz sample rate, sleep 100 milliseconds.
-        Thread.sleep(samplingRate / 160);
+        Thread.sleep(samplesPerBuffer / samplesPerMillis);
       }
       logger.info("Sent " + totalBytes + " bytes from audio file: " + file);
     } catch (RuntimeException e) {

diff --git a/speech/grpc/src/test/java/com/examples/cloud/speech/StreamingRecognizeClientTest.java b/speech/grpc/src/test/java/com/examples/cloud/speech/StreamingRecognizeClientTest.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2016 Google Inc. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.examples.cloud.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import io.grpc.ManagedChannel;
+import org.apache.log4j.Logger;
+import org.apache.log4j.SimpleLayout;
+import org.apache.log4j.WriterAppender;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+
+/**
+ * Unit tests for {@link StreamingRecognizeClient }.
+ */
+@RunWith(JUnit4.class)
+public class StreamingRecognizeClientTest {
+  private Writer writer;
+  private WriterAppender appender;
+
+  @Before
+  public void setUp() {
+    writer = new StringWriter();
+    appender = new WriterAppender(new SimpleLayout(), writer);
+    Logger.getRootLogger().addAppender(appender);
+  }
+
+  @After
+  public void tearDown() {
+    Logger.getRootLogger().removeAppender(appender);
+  }
+
+  @Test
+  public void test16KHzAudio() throws InterruptedException, IOException {
+    URI uri = new File("resources/audio.raw").toURI();
+    Path path = Paths.get(uri);
+
+    String host = "speech.googleapis.com";
+    int port = 443;
+    ManagedChannel channel = AsyncRecognizeClient.createChannel(host, port);
+    StreamingRecognizeClient client = new StreamingRecognizeClient(channel, path.toString(), 16000);
+
+    client.recognize();
+    assertThat(writer.toString()).contains("transcript: \"how old is the Brooklyn Bridge\"");
+  }
+
+  @Test
+  public void test32KHzAudio() throws InterruptedException, IOException {
+    URI uri = new File("resources/audio32KHz.raw").toURI();
+    Path path = Paths.get(uri);
+
+    String host = "speech.googleapis.com";
+    int port = 443;
+    ManagedChannel channel = AsyncRecognizeClient.createChannel(host, port);
+    StreamingRecognizeClient client = new StreamingRecognizeClient(channel, path.toString(), 32000);
+
+    client.recognize();
+    assertThat(writer.toString()).contains("transcript: \"how old is the Brooklyn Bridge\"");
+  }
+}