From e1e5ac73d39b689ea0f022685bcfd03196087435 Mon Sep 17 00:00:00 2001 From: puneith Date: Sun, 3 Jul 2016 03:22:40 -0700 Subject: [PATCH 01/14] renamed from recognize to streaming recognize --- .../demos/{RecognizeClient.java => StreamingRecognizeClient.java} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/{RecognizeClient.java => StreamingRecognizeClient.java} (100%) diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognizeClient.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/StreamingRecognizeClient.java similarity index 100% rename from speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognizeClient.java rename to speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/StreamingRecognizeClient.java From d7367c881b773cb34526ee411e8582cd0b14744e Mon Sep 17 00:00:00 2001 From: puneith Date: Sun, 3 Jul 2016 03:24:33 -0700 Subject: [PATCH 02/14] added v1beta1 proto --- .../google/speech/v1beta1/cloud_speech.proto | 321 ++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100644 speech/grpc/src/main/proto/google/speech/v1beta1/cloud_speech.proto diff --git a/speech/grpc/src/main/proto/google/speech/v1beta1/cloud_speech.proto b/speech/grpc/src/main/proto/google/speech/v1beta1/cloud_speech.proto new file mode 100644 index 00000000000..48e069eafdc --- /dev/null +++ b/speech/grpc/src/main/proto/google/speech/v1beta1/cloud_speech.proto @@ -0,0 +1,321 @@ +syntax = "proto3"; + +package google.cloud.speech.v1beta1; + +option java_multiple_files = true; +option java_outer_classname = "SpeechProto"; +option java_package = "com.google.cloud.speech.v1beta1"; + +import "google/api/annotations.proto"; +import "google/longrunning/operations.proto"; +import "google/rpc/status.proto"; + + +// Service that implements Google Cloud Speech API. +service Speech { + // Perform synchronous speech-recognition: receive results after all audio + // has been sent and processed. + rpc SyncRecognize(SyncRecognizeRequest) returns (SyncRecognizeResponse) { + option (google.api.http) = + { post: "/v1beta1/speech:syncrecognize" body: "*" }; + } + + // Perform asynchronous speech-recognition: receive results via the + // google.longrunning.Operations interface. `Operation.response` returns + // `AsyncRecognizeResponse`. + rpc AsyncRecognize(AsyncRecognizeRequest) + returns (google.longrunning.Operation) { + option (google.api.http) = + { post: "/v1beta1/speech:asyncrecognize" body: "*" }; + } + + // Perform bidirectional streaming speech-recognition: receive results while + // sending audio. This method is only available via the gRPC API (not REST). + rpc StreamingRecognize(stream StreamingRecognizeRequest) + returns (stream StreamingRecognizeResponse); +} + +// `SyncRecognizeRequest` is the top-level message sent by the client for +// the `SyncRecognize` method. +message SyncRecognizeRequest { + // [Required] The `config` message provides information to the recognizer + // that specifies how to process the request. + RecognitionConfig config = 1; + + // [Required] The audio data to be recognized. + RecognitionAudio audio = 2; +} + +// `AsyncRecognizeRequest` is the top-level message sent by the client for +// the `AsyncRecognize` method. +message AsyncRecognizeRequest { + // [Required] The `config` message provides information to the recognizer + // that specifies how to process the request. + RecognitionConfig config = 1; + + // [Required] The audio data to be recognized. + RecognitionAudio audio = 2; +} + +// `StreamingRecognizeRequest` is the top-level message sent by the client for +// the `StreamingRecognize`. Multiple `StreamingRecognizeRequest` messages are +// sent. The first message must contain a `streaming_config` message and must +// not contain `audio` data. All subsequent messages must contain `audio` data +// and must not contain a `streaming_config` message. +message StreamingRecognizeRequest { + oneof streaming_request { + // The `streaming_config` message provides information to the recognizer + // that specifies how to process the request. + // + // The first `StreamingRecognizeRequest` message must contain a + // `streaming_config` message. + StreamingRecognitionConfig streaming_config = 1; + + // The audio data to be recognized. Sequential chunks of audio data are sent + // in sequential `StreamingRecognizeRequest` messages. The first + // `StreamingRecognizeRequest` message must not contain `audio_content` data + // and all subsequent `StreamingRecognizeRequest` messages must contain + // `audio_content` data. The audio bytes must be encoded as specified in + // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + // pure binary representation (not base64). + bytes audio_content = 2 [ctype = CORD]; + } +} + +// The `StreamingRecognitionConfig` message provides information to the +// recognizer that specifies how to process the request. +message StreamingRecognitionConfig { + // [Required] The `config` message provides information to the recognizer + // that specifies how to process the request. + RecognitionConfig config = 1; + + // [Optional] If `false` or omitted, the recognizer will perform continuous + // recognition (continuing to process audio even if the user pauses speaking) + // until the client closes the output stream (gRPC API) or when the maximum + // time limit has been reached. Multiple `SpeechRecognitionResult`s with the + // `is_final` flag set to `true` may be returned. + // + // If `true`, the recognizer will detect a single spoken utterance. When it + // detects that the user has paused or stopped speaking, it will return an + // `END_OF_UTTERANCE` event and cease recognition. It will return no more than + // one `SpeechRecognitionResult` with the `is_final` flag set to `true`. + bool single_utterance = 2; + + // [Optional] If `true`, interim results (tentative hypotheses) may be + // returned as they become available (these interim results are indicated with + // the `is_final=false` flag). + // If `false` or omitted, only `is_final=true` result(s) are returned. + bool interim_results = 3; +} + +// The `RecognitionConfig` message provides information to the recognizer +// that specifies how to process the request. +message RecognitionConfig { + // Audio encoding of the data sent in the audio message. All encodings support + // only 1 channel (mono) audio. Only `FLAC` includes a header that describes + // the bytes of audio that follow the header. The other encodings are raw + // audio bytes with no header. + // + // For best results, the audio source should be captured and transmitted using + // a lossless encoding (`FLAC` or `LINEAR16`). Recognition accuracy may be + // reduced if lossy codecs (such as AMR, AMR_WB and MULAW) are used to capture + // or transmit the audio, particularly if background noise is present. + enum AudioEncoding { + // Not specified. Will return result [google.rpc.Code.INVALID_ARGUMENT][]. + ENCODING_UNSPECIFIED = 0; + + // Uncompressed 16-bit signed little-endian samples. + // This is the only encoding that may be used by `AsyncRecognize`. + LINEAR16 = 1; + + // This is the recommended encoding for `SyncRecognize` and + // `StreamingRecognize` because it uses lossless compression; therefore + // recognition accuracy is not compromised by a lossy codec. + // + // The stream FLAC (Free Lossless Audio Codec) encoding is specified at: + // http://flac.sourceforge.net/documentation.html. + // Only 16-bit samples are supported. + // Not all fields in STREAMINFO are supported. + FLAC = 2; + + // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/mu-law. + MULAW = 3; + + // Adaptive Multi-Rate Narrowband codec. `sample_rate` must be 8000 Hz. + AMR = 4; + + // Adaptive Multi-Rate Wideband codec. `sample_rate` must be 16000 Hz. + AMR_WB = 5; + } + + // [Required] Encoding of audio data sent in all `RecognitionAudio` messages. + AudioEncoding encoding = 1; + + // [Required] Sample rate in Hertz of the audio data sent in all + // `RecognitionAudio` messages. Valid values are: 8000-48000. + // 16000 is optimal. For best results, set the sampling rate of the audio + // source to 16000 Hz. If that's not possible, use the native sample rate of + // the audio source (instead of re-sampling). + int32 sample_rate = 2; + + // [Optional] The language of the supplied audio as a BCP-47 language tag. + // Example: "en-GB" https://www.rfc-editor.org/rfc/bcp/bcp47.txt + // If omitted, defaults to "en-US". See + // [Language Support](/speech/docs/best-practices#language_support) for + // a list of the currently supported language codes. + string language_code = 3; + + // [Optional] Maximum number of recognition hypotheses to be returned. + // Specifically, the maximum number of `SpeechRecognitionAlternative` messages + // within each `SpeechRecognitionResult`. + // The server may return fewer than `max_alternatives`. + // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of + // `1`. If omitted, defaults to `1`. + int32 max_alternatives = 4; + + // [Optional] If set to `true`, the server will attempt to filter out + // profanities, replacing all but the initial character in each filtered word + // with asterisks, e.g. "f***". If set to `false` or omitted, profanities + // won't be filtered out. + bool profanity_filter = 5; + + // [Optional] A means to provide context to assist the speech recognition. + SpeechContext speech_context = 6; +} + +// Provides "hints" to the speech recognizer to favor specific words and phrases +// in the results. +message SpeechContext { + // [Optional] A list of up to 50 phrases of up to 100 characters each to + // provide words and phrases "hints" to the speech recognition so that it is + // more likely to recognize them. + repeated string phrases = 1; +} + +// Contains audio data in the encoding specified in the `RecognitionConfig`. +// Either `content` or `uri` must be supplied. Supplying both or neither +// returns [google.rpc.Code.INVALID_ARGUMENT][]. +message RecognitionAudio { + oneof audio_source { + // The audio data bytes encoded as specified in + // `RecognitionConfig`. Note: as with all bytes fields, protobuffers use a + // pure binary representation, whereas JSON representations use base64. + bytes content = 1 [ctype = CORD]; + + // URI that points to a file that contains audio data bytes as specified in + // `RecognitionConfig`. Currently, only Google Cloud Storage URIs are + // supported, which must be specified in the following format: + // `gs://bucket_name/object_name` (other URI formats return + // [google.rpc.Code.INVALID_ARGUMENT][]). For more information, see + // [Request URIs](/storage/docs/reference-uris). + string uri = 2; + } +} + +// `SyncRecognizeResponse` is the only message returned to the client by +// `SyncRecognize`. It contains the result as zero or more +// sequential `RecognizeResponse` messages. +message SyncRecognizeResponse { + // [Output-only] Sequential list of transcription results corresponding to + // sequential portions of audio. + repeated SpeechRecognitionResult results = 2; +} + +// `AsyncRecognizeResponse` is the only message returned to the client by +// `AsyncRecognize`. It contains the result as zero or more +// sequential `RecognizeResponse` messages. +message AsyncRecognizeResponse { + // [Output-only] Sequential list of transcription results corresponding to + // sequential portions of audio. + repeated SpeechRecognitionResult results = 2; +} + +// `StreamingRecognizeResponse` is the only message returned to the client by +// `StreamingRecognize`. It contains the result as zero or more +// sequential `RecognizeResponse` messages. +message StreamingRecognizeResponse { + // Indicates the type of endpointer event. + enum EndpointerType { + // No endpointer event specified. + ENDPOINTER_EVENT_UNSPECIFIED = 0; + + // Speech has been detected in the audio stream. + START_OF_SPEECH = 1; + + // Speech has ceased to be detected in the audio stream. + END_OF_SPEECH = 2; + + // The end of the audio stream has been reached. and it is being processed. + END_OF_AUDIO = 3; + + // This event is only sent when `single_utterance` is `true`. It indicates + // that the server has detected the end of the user's speech utterance and + // expects no additional speech. Therefore, the server will not process + // additional audio. The client should stop sending additional audio data. + END_OF_UTTERANCE = 4; + } + + // [Output-only] If set, returns a [google.rpc.Status][] message that + // specifies the error for the operation. + google.rpc.Status error = 1; + + // [Output-only] This repeated list contains zero or more results that + // correspond to consecutive portions of the audio currently being processed. + // It contains zero or one `is_final=true` result (the newly settled portion), + // followed by zero or more `is_final=false` results. + repeated StreamingRecognitionResult results = 2; + + // [Output-only] Indicates the lowest index in the `results` array that has + // changed. The repeated `SpeechRecognitionResult` results overwrite past + // results at this index and higher. + int32 result_index = 3; + + // [Output-only] Indicates the type of endpointer event. + EndpointerType endpointer_type = 4; +} + +// A speech recognition result corresponding to a portion of the audio that is +// currently being processed. +// TODO(gshires): add a comment describing the various repeated interim and +// alternative results fields. +message StreamingRecognitionResult { + // [Output-only] May contain one or more recognition hypotheses (up to the + // maximum specified in `max_alternatives`). + repeated SpeechRecognitionAlternative alternatives = 1; + + // [Output-only] If `false`, this `SpeechRecognitionResult` represents an + // interim result that may change. If `true`, this is the final time the + // speech service will return this particular `SpeechRecognitionResult`, + // the recognizer will not return any further hypotheses for this portion of + // the transcript and corresponding audio. + bool is_final = 2; + + // [Output-only] An estimate of the probability that the recognizer will not + // change its guess about this interim result. Values range from 0.0 + // (completely unstable) to 1.0 (completely stable). Note that this is not the + // same as `confidence`, which estimates the probability that a recognition + // result is correct. + // This field is only provided for interim results (`is_final=false`). + // The default of 0.0 is a sentinel value indicating stability was not set. + float stability = 3; +} + +// A speech recognition result corresponding to a portion of the audio. +message SpeechRecognitionResult { + // [Output-only] May contain one or more recognition hypotheses (up to the + // maximum specified in `max_alternatives`). + repeated SpeechRecognitionAlternative alternatives = 1; +} + +// Alternative hypotheses (a.k.a. n-best list). +message SpeechRecognitionAlternative { + // [Output-only] Transcript text representing the words that the user spoke. + string transcript = 1; + + // [Output-only] The confidence estimate between 0.0 and 1.0. A higher number + // means the system is more confident that the recognition is correct. + // This field is typically provided only for the top hypothesis, and only for + // `is_final=true` results. + // The default of 0.0 is a sentinel value indicating confidence was not set. + float confidence = 2; +} From 981260c6f2f77f24687b506c89fc689c33e1456d Mon Sep 17 00:00:00 2001 From: puneith Date: Sun, 3 Jul 2016 22:17:55 -0700 Subject: [PATCH 03/14] added long running operation --- .../grpc/demos/StreamingRecognizeClient.java | 54 ++++--- .../proto/google/longrunning/operations.proto | 144 ++++++++++++++++++ 2 files changed, 174 insertions(+), 24 deletions(-) create mode 100644 speech/grpc/src/main/proto/google/longrunning/operations.proto diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/StreamingRecognizeClient.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/StreamingRecognizeClient.java index 16ccc7caa65..9805e99a2f0 100644 --- a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/StreamingRecognizeClient.java +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/StreamingRecognizeClient.java @@ -26,12 +26,12 @@ package com.google.cloud.speech.grpc.demos; import com.google.auth.oauth2.GoogleCredentials; -import com.google.cloud.speech.v1.AudioRequest; -import com.google.cloud.speech.v1.InitialRecognizeRequest; -import com.google.cloud.speech.v1.InitialRecognizeRequest.AudioEncoding; -import com.google.cloud.speech.v1.RecognizeRequest; -import com.google.cloud.speech.v1.RecognizeResponse; -import com.google.cloud.speech.v1.SpeechGrpc; +import com.google.cloud.speech.v1beta1.RecognitionConfig; +import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1beta1.SpeechGrpc; +import com.google.cloud.speech.v1beta1.StreamingRecognitionConfig; +import com.google.cloud.speech.v1beta1.StreamingRecognizeRequest; +import com.google.cloud.speech.v1beta1.StreamingRecognizeResponse; import com.google.protobuf.ByteString; import com.google.protobuf.TextFormat; @@ -63,7 +63,7 @@ /** * Client that sends streaming audio to Speech.Recognize and returns streaming transcript. */ -public class RecognizeClient { +public class StreamingRecognizeClient { private final String host; private final int port; @@ -71,7 +71,7 @@ public class RecognizeClient { private final int samplingRate; private static final Logger logger = - Logger.getLogger(RecognizeClient.class.getName()); + Logger.getLogger(StreamingRecognizeClient.class.getName()); private final ManagedChannel channel; @@ -83,7 +83,8 @@ public class RecognizeClient { /** * Construct client connecting to Cloud Speech server at {@code host:port}. */ - public RecognizeClient(String host, int port, String file, int samplingRate) throws IOException { + public StreamingRecognizeClient(String host, int port, String file, int samplingRate) + throws IOException { this.host = host; this.port = port; this.file = file; @@ -106,9 +107,10 @@ public void shutdown() throws InterruptedException { /** Send streaming recognize requests to server. */ public void recognize() throws InterruptedException, IOException { final CountDownLatch finishLatch = new CountDownLatch(1); - StreamObserver responseObserver = new StreamObserver() { + StreamObserver responseObserver = new + StreamObserver() { @Override - public void onNext(RecognizeResponse response) { + public void onNext(StreamingRecognizeResponse response) { logger.info("Received response: " + TextFormat.printToString(response)); } @@ -126,18 +128,25 @@ public void onCompleted() { } }; - StreamObserver requestObserver = stub.recognize(responseObserver); + StreamObserver + requestObserver = stub.streamingRecognize(responseObserver); try { - // Build and send a RecognizeRequest containing the parameters for processing the audio. - InitialRecognizeRequest initial = InitialRecognizeRequest.newBuilder() + // Build and send a StreamingRecognizeRequest containing the parameters for + // processing the audio. + RecognitionConfig config = RecognitionConfig.newBuilder() .setEncoding(AudioEncoding.LINEAR16) .setSampleRate(samplingRate) + .build(); + StreamingRecognitionConfig streamingConfig = StreamingRecognitionConfig.newBuilder() + .setConfig(config) .setInterimResults(true) + .setSingleUtterance(true) .build(); - RecognizeRequest firstRequest = RecognizeRequest.newBuilder() - .setInitialRequest(initial) + + StreamingRecognizeRequest initial = StreamingRecognizeRequest.newBuilder() + .setStreamingConfig(streamingConfig) .build(); - requestObserver.onNext(firstRequest); + requestObserver.onNext(initial); // Open audio file. Read and send sequential buffers of audio as additional RecognizeRequests. FileInputStream in = new FileInputStream(new File(file)); @@ -147,11 +156,8 @@ public void onCompleted() { int totalBytes = 0; while ((bytesRead = in.read(buffer)) != -1) { totalBytes += bytesRead; - AudioRequest audio = AudioRequest.newBuilder() - .setContent(ByteString.copyFrom(buffer, 0, bytesRead)) - .build(); - RecognizeRequest request = RecognizeRequest.newBuilder() - .setAudioRequest(audio) + StreamingRecognizeRequest request = StreamingRecognizeRequest.newBuilder() + .setAudioContent(ByteString.copyFrom(buffer, 0, bytesRead)) .build(); requestObserver.onNext(request); // To simulate real-time audio, sleep after sending each audio buffer. @@ -236,8 +242,8 @@ public static void main(String[] args) throws Exception { System.exit(1); } - RecognizeClient client = - new RecognizeClient(host, port, audioFile, sampling); + StreamingRecognizeClient client = + new StreamingRecognizeClient(host, port, audioFile, sampling); try { client.recognize(); } finally { diff --git a/speech/grpc/src/main/proto/google/longrunning/operations.proto b/speech/grpc/src/main/proto/google/longrunning/operations.proto new file mode 100644 index 00000000000..a358d0a3878 --- /dev/null +++ b/speech/grpc/src/main/proto/google/longrunning/operations.proto @@ -0,0 +1,144 @@ +// Copyright (c) 2015, Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.longrunning; + +import "google/api/annotations.proto"; +import "google/protobuf/any.proto"; +import "google/protobuf/empty.proto"; +import "google/rpc/status.proto"; + +option java_multiple_files = true; +option java_outer_classname = "OperationsProto"; +option java_package = "com.google.longrunning"; + + +// Manages long-running operations with an API service. +// +// When an API method normally takes long time to complete, it can be designed +// to return [Operation][google.longrunning.Operation] to the client, and the client can use this +// interface to receive the real response asynchronously by polling the +// operation resource, or using `google.watcher.v1.Watcher` interface to watch +// the response, or pass the operation resource to another API (such as Google +// Cloud Pub/Sub API) to receive the response. Any API service that returns +// long-running operations should implement the `Operations` interface so +// developers can have a consistent client experience. +service Operations { + // Gets the latest state of a long-running operation. Clients may use this + // method to poll the operation result at intervals as recommended by the API + // service. + rpc GetOperation(GetOperationRequest) returns (Operation) { + option (google.api.http) = { get: "/v1/{name=operations/**}" }; + } + + // Lists operations that match the specified filter in the request. If the + // server doesn't support this method, it returns + // `google.rpc.Code.UNIMPLEMENTED`. + rpc ListOperations(ListOperationsRequest) returns (ListOperationsResponse) { + option (google.api.http) = { get: "/v1/{name=operations}" }; + } + + // Starts asynchronous cancellation on a long-running operation. The server + // makes a best effort to cancel the operation, but success is not + // guaranteed. If the server doesn't support this method, it returns + // `google.rpc.Code.UNIMPLEMENTED`. Clients may use + // [Operations.GetOperation] or other methods to check whether the + // cancellation succeeded or the operation completed despite cancellation. + rpc CancelOperation(CancelOperationRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { post: "/v1/{name=operations/**}:cancel" body: "*" }; + } + + // Deletes a long-running operation. It indicates the client is no longer + // interested in the operation result. It does not cancel the operation. + rpc DeleteOperation(DeleteOperationRequest) returns (google.protobuf.Empty) { + option (google.api.http) = { delete: "/v1/{name=operations/**}" }; + } +} + +// This resource represents a long-running operation that is the result of a +// network API call. +message Operation { + // The name of the operation resource, which is only unique within the same + // service that originally returns it. + string name = 1; + + // Some service-specific metadata associated with the operation. It typically + // contains progress information and common metadata such as create time. + // Some services may not provide such metadata. Any method that returns a + // long-running operation should document the metadata type, if any. + google.protobuf.Any metadata = 2; + + // If the value is false, it means the operation is still in progress. + // If true, the operation is completed and the `result` is available. + bool done = 3; + + oneof result { + // The error result of the operation in case of failure. + google.rpc.Status error = 4; + + // The normal response of the operation in case of success. If the original + // method returns no data on success, such as `Delete`, the response will be + // `google.protobuf.Empty`. If the original method is standard + // `Get`/`Create`/`Update`, the response should be the resource. For other + // methods, the response should have the type `XxxResponse`, where `Xxx` + // is the original method name. For example, if the original method name + // is `TakeSnapshot()`, the inferred response type will be + // `TakeSnapshotResponse`. + google.protobuf.Any response = 5; + } +} + +// The request message for [Operations.GetOperation][google.longrunning.Operations.GetOperation]. +message GetOperationRequest { + // The name of the operation resource. + string name = 1; +} + +// The request message for [Operations.ListOperations][google.longrunning.Operations.ListOperations]. +message ListOperationsRequest { + // The name of the operation collection. + string name = 4; + + // The standard List filter. + string filter = 1; + + // The standard List page size. + int32 page_size = 2; + + // The standard List page token. + string page_token = 3; +} + +// The response message for [Operations.ListOperations][google.longrunning.Operations.ListOperations]. +message ListOperationsResponse { + // A list of operations that match the specified filter in the request. + repeated Operation operations = 1; + + // The standard List next-page token. + string next_page_token = 2; +} + +// The request message for [Operations.CancelOperation][google.longrunning.Operations.CancelOperation]. +message CancelOperationRequest { + // The name of the operation resource to be cancelled. + string name = 1; +} + +// The request message for [Operations.DeleteOperation][google.longrunning.Operations.DeleteOperation]. +message DeleteOperationRequest { + // The name of the operation resource to be deleted. + string name = 1; +} From 9ca1c42605edbcb8a5ff8631570533ffd8d6b425 Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 02:26:21 -0700 Subject: [PATCH 04/14] made changes to the script --- speech/grpc/bin/speech-sample-streaming.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/speech/grpc/bin/speech-sample-streaming.sh b/speech/grpc/bin/speech-sample-streaming.sh index 351c17e6cd5..b536273b5fe 100755 --- a/speech/grpc/bin/speech-sample-streaming.sh +++ b/speech/grpc/bin/speech-sample-streaming.sh @@ -15,4 +15,4 @@ SRC_DIR=$(cd "$(dirname "$0")/.."; pwd) java -cp ${SRC_DIR}/target/grpc-sample-1.0-jar-with-dependencies.jar \ - com.google.cloud.speech.grpc.demos.RecognizeClient "$@" + com.google.cloud.speech.grpc.demos.StreamingRecognizeClient "$@" From 739d6cae038e0d06692824b656c331845517c1ea Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 02:31:10 -0700 Subject: [PATCH 05/14] renamed to sync recognize client --- ...{NonStreamingRecognizeClient.java => SyncRecognizeClient.java} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/{NonStreamingRecognizeClient.java => SyncRecognizeClient.java} (100%) diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/NonStreamingRecognizeClient.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java similarity index 100% rename from speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/NonStreamingRecognizeClient.java rename to speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java From 8157b253523cc7c28328d2c19da50cff6179f5ed Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 02:57:50 -0700 Subject: [PATCH 06/14] added sync changes and test --- ...tory.java => RecognitionAudioFactory.java} | 20 +++---- .../grpc/demos/SyncRecognizeClient.java | 53 ++++++++++--------- ...oryTest.java => RecognitionAudioTest.java} | 10 ++-- 3 files changed, 42 insertions(+), 41 deletions(-) rename speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/{AudioRequestFactory.java => RecognitionAudioFactory.java} (72%) rename speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/{AudioRequestFactoryTest.java => RecognitionAudioTest.java} (83%) diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java similarity index 72% rename from speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java rename to speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java index fce35b3099c..3917441ff84 100644 --- a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/AudioRequestFactory.java +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java @@ -17,7 +17,7 @@ package com.google.cloud.speech.grpc.demos; -import com.google.cloud.speech.v1.AudioRequest; +import com.google.cloud.speech.v1beta1.RecognitionAudio; import com.google.protobuf.ByteString; import java.io.IOException; @@ -27,10 +27,10 @@ import java.nio.file.Paths; /* - * AudioRequestFactory takes a URI as an input and creates an AudioRequest. The URI can point to a + * RecognitionAudioFactory takes a URI as an input and creates a RecognitionAudio. The URI can point to a * local file or a file on Google Cloud Storage. */ -public class AudioRequestFactory { +public class RecognitionAudioFactory { private static final String FILE_SCHEME = "file"; private static final String GS_SCHEME = "gs"; @@ -39,27 +39,27 @@ public class AudioRequestFactory { * Takes an input URI of form $scheme:// and converts to audio request. * * @param uri input uri - * @return AudioRequest audio request + * @return RecognitionAudio recognition audio */ - public static AudioRequest createRequest(URI uri) + public static RecognitionAudio createRecognitionAudio(URI uri) throws IOException { if (uri.getScheme() == null || uri.getScheme().equals(FILE_SCHEME)) { Path path = Paths.get(uri); return audioFromBytes(Files.readAllBytes(path)); } else if (uri.getScheme().equals(GS_SCHEME)) { - return AudioRequest.newBuilder().setUri(uri.toString()).build(); + return RecognitionAudio.newBuilder().setUri(uri.toString()).build(); } throw new RuntimeException("scheme not supported " + uri.getScheme()); } /** - * Convert bytes to AudioRequest. + * Convert bytes to RecognitionAudio. * * @param bytes input bytes - * @return AudioRequest audio request + * @return RecognitionAudio recognition audio */ - private static AudioRequest audioFromBytes(byte[] bytes) { - return AudioRequest.newBuilder() + private static RecognitionAudio audioFromBytes(byte[] bytes) { + return RecognitionAudio.newBuilder() .setContent(ByteString.copyFrom(bytes)) .build(); } diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java index 3952c7f0e5a..83b0ef3b186 100644 --- a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java @@ -14,7 +14,7 @@ * limitations under the License. */ -// Client that sends audio to Speech.NonStreamingRecognize via gRPC and returns transcription. +// Client that sends audio to Speech.SyncRecognize via gRPC and returns transcription. // // Uses a service account for OAuth2 authentication, which you may obtain at // https://console.developers.google.com @@ -26,12 +26,12 @@ package com.google.cloud.speech.grpc.demos; import com.google.auth.oauth2.GoogleCredentials; -import com.google.cloud.speech.v1.AudioRequest; -import com.google.cloud.speech.v1.InitialRecognizeRequest; -import com.google.cloud.speech.v1.InitialRecognizeRequest.AudioEncoding; -import com.google.cloud.speech.v1.NonStreamingRecognizeResponse; -import com.google.cloud.speech.v1.RecognizeRequest; -import com.google.cloud.speech.v1.SpeechGrpc; +import com.google.cloud.speech.v1beta1.RecognitionAudio; +import com.google.cloud.speech.v1beta1.RecognitionConfig; +import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1beta1.SyncRecognizeRequest; +import com.google.cloud.speech.v1beta1.SyncRecognizeResponse; +import com.google.cloud.speech.v1beta1.SpeechGrpc; import com.google.protobuf.TextFormat; import io.grpc.ManagedChannel; @@ -57,12 +57,12 @@ import java.util.logging.Logger; /** - * Client that sends audio to Speech.NonStreamingRecognize and returns transcript. + * Client that sends audio to Speech.SyncRecognize and returns transcript. */ -public class NonStreamingRecognizeClient { +public class SyncRecognizeClient { private static final Logger logger = - Logger.getLogger(NonStreamingRecognizeClient.class.getName()); + Logger.getLogger(SyncRecognizeClient.class.getName()); private static final List OAUTH2_SCOPES = Arrays.asList("https://www.googleapis.com/auth/cloud-platform"); @@ -78,7 +78,7 @@ public class NonStreamingRecognizeClient { /** * Construct client connecting to Cloud Speech server at {@code host:port}. */ - public NonStreamingRecognizeClient(String host, int port, URI input, int samplingRate) + public SyncRecognizeClient(String host, int port, URI input, int samplingRate) throws IOException { this.host = host; this.port = port; @@ -95,8 +95,8 @@ public NonStreamingRecognizeClient(String host, int port, URI input, int samplin logger.info("Created blockingStub for " + host + ":" + port); } - private AudioRequest createAudioRequest() throws IOException { - return AudioRequestFactory.createRequest(this.input); + private RecognitionAudio createRecognitionAudio() throws IOException { + return RecognitionAudioFactory.createRecognitionAudio(this.input); } public void shutdown() throws InterruptedException { @@ -105,25 +105,26 @@ public void shutdown() throws InterruptedException { /** Send a non-streaming-recognize request to server. */ public void recognize() { - AudioRequest audio; + RecognitionAudio audio; try { - audio = createAudioRequest(); + audio = createRecognitionAudio(); } catch (IOException e) { logger.log(Level.WARNING, "Failed to read audio uri input: " + input); return; } logger.info("Sending " + audio.getContent().size() + " bytes from audio uri input: " + input); - InitialRecognizeRequest initial = InitialRecognizeRequest.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setSampleRate(samplingRate) + RecognitionConfig config = RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRate(samplingRate) + .build(); + SyncRecognizeRequest request = SyncRecognizeRequest.newBuilder() + .setConfig(config) + .setAudio(audio) .build(); - RecognizeRequest request = RecognizeRequest.newBuilder() - .setInitialRequest(initial) - .setAudioRequest(audio) - .build(); - NonStreamingRecognizeResponse response; + + SyncRecognizeResponse response; try { - response = blockingStub.nonStreamingRecognize(request); + response = blockingStub.syncRecognize(request); } catch (StatusRuntimeException e) { logger.log(Level.WARNING, "RPC failed: {0}", e.getStatus()); return; @@ -196,8 +197,8 @@ public static void main(String[] args) throws Exception { System.exit(1); } - NonStreamingRecognizeClient client = - new NonStreamingRecognizeClient(host, port, URI.create(audioFile), sampling); + SyncRecognizeClient client = + new SyncRecognizeClient(host, port, URI.create(audioFile), sampling); try { client.recognize(); } finally { diff --git a/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/AudioRequestFactoryTest.java b/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioTest.java similarity index 83% rename from speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/AudioRequestFactoryTest.java rename to speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioTest.java index 8e5017d53f0..928482d105e 100644 --- a/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/AudioRequestFactoryTest.java +++ b/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioTest.java @@ -18,7 +18,7 @@ import static org.junit.Assert.assertEquals; -import com.google.cloud.speech.v1.AudioRequest; +import com.google.cloud.speech.v1beta1.RecognitionAudio; import org.junit.Test; import org.junit.runner.RunWith; @@ -29,15 +29,15 @@ import java.net.URI; /** - * Unit tests for {@link AudioRequestFactory}. + * Unit tests for {@link RecognitionAudioFactory}. */ @RunWith(JUnit4.class) -public class AudioRequestFactoryTest { +public class RecognitionAudioFactoryTest { @Test public void verifyBytesInSizeFromLocalFile() throws IOException { URI uri = new File("resources/audio.raw").toURI(); - AudioRequest audio = AudioRequestFactory.createRequest(uri); + RecognitionAudio audio = RecognitionAudioFactory.createRecognitionAudio(uri); int numBytes = audio.getContent().toByteArray().length; @@ -50,7 +50,7 @@ public void verifyBytesInSizeFromGoogleStorageFile() throws IOException { String audioUri = "gs://cloud-samples-tests/speech/audio.raw"; URI uri = URI.create(audioUri); - AudioRequest audio = AudioRequestFactory.createRequest(uri); + RecognitionAudio audio = RecognitionAudioFactory.createRecognitionAudio(uri); int numBytes = audio.getContent().toByteArray().length; From abc7a68846f54b1614590e375ef8531e5a0deb60 Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 02:58:57 -0700 Subject: [PATCH 07/14] added sync changes and test --- ...RecognitionAudioTest.java => RecognitionAudioFactoryTest.java} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/{RecognitionAudioTest.java => RecognitionAudioFactoryTest.java} (100%) diff --git a/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioTest.java b/speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactoryTest.java similarity index 100% rename from speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioTest.java rename to speech/grpc/src/test/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactoryTest.java From 38ddceb4623c0ef5e4a0a0163d4a1c9a8a2671a2 Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 03:07:29 -0700 Subject: [PATCH 08/14] made changes to script and tests --- speech/grpc/bin/speech-sample-sync.sh | 18 ++++++++++++++++++ .../grpc/demos/RecognitionAudioFactory.java | 4 ++-- .../speech/grpc/demos/SyncRecognizeClient.java | 8 ++++---- 3 files changed, 24 insertions(+), 6 deletions(-) create mode 100755 speech/grpc/bin/speech-sample-sync.sh diff --git a/speech/grpc/bin/speech-sample-sync.sh b/speech/grpc/bin/speech-sample-sync.sh new file mode 100755 index 00000000000..37cb46d7c36 --- /dev/null +++ b/speech/grpc/bin/speech-sample-sync.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright 2016 Google Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SRC_DIR=$(cd "$(dirname "$0")/.."; pwd) +java -cp ${SRC_DIR}/target/grpc-sample-1.0-jar-with-dependencies.jar \ + com.google.cloud.speech.grpc.demos.SyncRecognizeClient "$@" diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java index 3917441ff84..7023c887d1f 100644 --- a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/RecognitionAudioFactory.java @@ -27,8 +27,8 @@ import java.nio.file.Paths; /* - * RecognitionAudioFactory takes a URI as an input and creates a RecognitionAudio. The URI can point to a - * local file or a file on Google Cloud Storage. + * RecognitionAudioFactory takes a URI as an input and creates a RecognitionAudio. + * The URI can point to a local file or a file on Google Cloud Storage. */ public class RecognitionAudioFactory { diff --git a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java index 83b0ef3b186..a20a19afa35 100644 --- a/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java +++ b/speech/grpc/src/main/java/com/google/cloud/speech/grpc/demos/SyncRecognizeClient.java @@ -29,9 +29,9 @@ import com.google.cloud.speech.v1beta1.RecognitionAudio; import com.google.cloud.speech.v1beta1.RecognitionConfig; import com.google.cloud.speech.v1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1beta1.SpeechGrpc; import com.google.cloud.speech.v1beta1.SyncRecognizeRequest; import com.google.cloud.speech.v1beta1.SyncRecognizeResponse; -import com.google.cloud.speech.v1beta1.SpeechGrpc; import com.google.protobuf.TextFormat; import io.grpc.ManagedChannel; @@ -114,9 +114,9 @@ public void recognize() { } logger.info("Sending " + audio.getContent().size() + " bytes from audio uri input: " + input); RecognitionConfig config = RecognitionConfig.newBuilder() - .setEncoding(AudioEncoding.LINEAR16) - .setSampleRate(samplingRate) - .build(); + .setEncoding(AudioEncoding.LINEAR16) + .setSampleRate(samplingRate) + .build(); SyncRecognizeRequest request = SyncRecognizeRequest.newBuilder() .setConfig(config) .setAudio(audio) From 9a88c61ea82e7c933e2bfae476f28b7057cbf4f4 Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 03:07:47 -0700 Subject: [PATCH 09/14] deleted --- speech/grpc/bin/speech-sample-nonstreaming.sh | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100755 speech/grpc/bin/speech-sample-nonstreaming.sh diff --git a/speech/grpc/bin/speech-sample-nonstreaming.sh b/speech/grpc/bin/speech-sample-nonstreaming.sh deleted file mode 100755 index 8ee85a008f5..00000000000 --- a/speech/grpc/bin/speech-sample-nonstreaming.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -SRC_DIR=$(cd "$(dirname "$0")/.."; pwd) -java -cp ${SRC_DIR}/target/grpc-sample-1.0-jar-with-dependencies.jar \ - com.google.cloud.speech.grpc.demos.NonStreamingRecognizeClient "$@" From a7da37b409e011e4acc73d9dd02f6e06f8d0241a Mon Sep 17 00:00:00 2001 From: puneith Date: Mon, 4 Jul 2016 03:21:15 -0700 Subject: [PATCH 10/14] made sync work --- speech/grpc/README.md | 8 ++++---- .../cloud/speech/grpc/demos/RecognitionAudioFactory.java | 7 ++++++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/speech/grpc/README.md b/speech/grpc/README.md index 68c66975fa4..18351fc3b7d 100644 --- a/speech/grpc/README.md +++ b/speech/grpc/README.md @@ -78,15 +78,15 @@ or [homebrew](http://brew.sh/)) to convert audio files to raw format. You can run the batch client like this: ```sh -$ bin/speech-sample-nonstreaming.sh --host=speech.googleapis.com --port=443 \ ---file=