From 0873c66a1aafee343fa102045773e8ba650bbc5f Mon Sep 17 00:00:00 2001 From: Franklin Nunez <69214580+b-loved-dreamer@users.noreply.github.com> Date: Thu, 2 Dec 2021 11:31:54 -0800 Subject: [PATCH] docs(samples): refactors the export-to-gcs sample (#737) * docs(samples): refactors the export-to-gcs sample * docs(samples): refactors the export-to-gcs sample * docs(samples): fixed lint * docs(samples): updated test file * docs(samples): udded imports * docs(samples): udded dependency to global pom * docs(samples): udded dependency to global pom * docs(samples): udded dependency to global pom * docs(samples): udated pom * docs(samples): fixed null error * docs(samples): rafactored test --- speech/.bash_profile | 0 speech/snippets/pom.xml | 5 ++ .../example/speech/ExportToStorageBeta.java | 55 +++++++++++++++++-- .../speech/ExportToStorageBetaTest.java | 25 +++++++-- 4 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 speech/.bash_profile diff --git a/speech/.bash_profile b/speech/.bash_profile new file mode 100644 index 00000000000..e69de29bb2d diff --git a/speech/snippets/pom.xml b/speech/snippets/pom.xml index eb16c760eae..8af552dfdf8 100644 --- a/speech/snippets/pom.xml +++ b/speech/snippets/pom.xml @@ -38,6 +38,11 @@ + + org.json + json + 20210307 + com.google.cloud google-cloud-speech diff --git a/speech/snippets/src/main/java/com/example/speech/ExportToStorageBeta.java b/speech/snippets/src/main/java/com/example/speech/ExportToStorageBeta.java index 70e1c07af7c..6f10a24a5d4 100644 --- a/speech/snippets/src/main/java/com/example/speech/ExportToStorageBeta.java +++ b/speech/snippets/src/main/java/com/example/speech/ExportToStorageBeta.java @@ -27,19 +27,34 @@ import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; import com.google.cloud.speech.v1p1beta1.SpeechClient; import com.google.cloud.speech.v1p1beta1.TranscriptOutputConfig; +import com.google.cloud.storage.Blob; +import com.google.cloud.storage.BlobId; +import com.google.cloud.storage.Storage; +import com.google.cloud.storage.StorageOptions; +import com.google.protobuf.util.JsonFormat; import java.io.IOException; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; +import org.json.JSONObject; public class ExportToStorageBeta { public static void main(String[] args) throws Exception { String inputUri = "gs://YOUR_BUCKET_ID/path/to/your/audio_file.wav"; String outputStorageUri = "gs://YOUR_BUCKET_ID/output_dir_prefix/"; + String objectName = "YOUR_OBJECT_NAME"; + String bucketName = "YOUR_BUCKET_ID"; String encoding = "LINEAR16"; // encoding of the audio int sampleRateHertz = 8000; String languageCode = "en-US"; // language code BCP-47_LANGUAGE_CODE_OF_AUDIO - exportToStorage(inputUri, outputStorageUri, encoding, sampleRateHertz, languageCode); + exportToStorage( + inputUri, + outputStorageUri, + encoding, + sampleRateHertz, + languageCode, + bucketName, + objectName); } // Exports the recognized output to specified GCS destination. @@ -48,7 +63,9 @@ public static void exportToStorage( String outputStorageUri, String encoding, int sampleRateHertz, - String languageCode) + String languageCode, + String bucketName, + String objectName) throws IOException, ExecutionException, InterruptedException { // Initialize client that will be used to send requests. This client only needs to be created // once, and can be reused for multiple requests. After completing all of your requests, call @@ -58,6 +75,9 @@ public static void exportToStorage( AudioEncoding audioEncoding = AudioEncoding.valueOf(encoding); + // Instantiates a client + Storage storage = StorageOptions.getDefaultInstance().getService(); + // Pass in the URI of the Cloud Storage bucket to hold the transcription TranscriptOutputConfig outputConfig = TranscriptOutputConfig.newBuilder().setGcsUri(outputStorageUri).build(); @@ -80,12 +100,39 @@ public static void exportToStorage( speechClient.longRunningRecognizeAsync(request); System.out.println("Waiting for operation to complete..."); - LongRunningRecognizeResponse response = future.get(); + future.get(); + + // Get blob given bucket and object name + Blob blob = storage.get(BlobId.of(bucketName, objectName)); + + // Extract byte contents from blob + byte[] bytes = blob.getContent(); + + // Get decoded representation + String decoded = new String(bytes, "UTF-8"); + + // Create json object + JSONObject jsonObject = new JSONObject(decoded); + + // Get json string + String json = jsonObject.toString(); + + // Specefy the proto type message + LongRunningRecognizeResponse.Builder builder = LongRunningRecognizeResponse.newBuilder(); + + // Construct a parser + JsonFormat.Parser parser = JsonFormat.parser().ignoringUnknownFields(); + + // Parses from JSON into a protobuf message. + parser.merge(json, builder); + + // Get the converted values + LongRunningRecognizeResponse storageResponse = builder.build(); System.out.println("Results saved to specified output Cloud Storage bucket."); String output = - response.getResultsList().stream() + storageResponse.getResultsList().stream() .map(result -> String.valueOf(result.getAlternatives(0).getTranscript())) .collect(Collectors.joining("\n")); System.out.printf("Transcription: %s", output); diff --git a/speech/snippets/src/test/java/com/example/speech/ExportToStorageBetaTest.java b/speech/snippets/src/test/java/com/example/speech/ExportToStorageBetaTest.java index 64d2f5bc505..20a7f8cfc76 100644 --- a/speech/snippets/src/test/java/com/example/speech/ExportToStorageBetaTest.java +++ b/speech/snippets/src/test/java/com/example/speech/ExportToStorageBetaTest.java @@ -20,6 +20,7 @@ import com.google.api.gax.paging.Page; import com.google.cloud.storage.Blob; +import com.google.cloud.storage.BucketInfo; import com.google.cloud.storage.Storage; import com.google.cloud.storage.StorageOptions; import java.io.ByteArrayOutputStream; @@ -34,11 +35,15 @@ public class ExportToStorageBetaTest { private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); private static final String AUDIO_STORAGE_URI = "gs://cloud-samples-data/speech/commercial_mono.wav"; - private static final String PREFIX = "EXPORT_TEST_OUTPUTS"; + private static final String BUCKET_PREFIX = "EXPORT_TRANSCRIPT_OUTPUT"; + private static final String UNIQUE_ID = + UUID.randomUUID().toString().replace("-", "").substring(0, 8); + private static String BUCKET_NAME = String.format("speech-%s", UNIQUE_ID); private static final String OUTPUT_STORAGE_URI = - String.format("gs://%s/%s/%s/", PROJECT_ID, PREFIX, UUID.randomUUID()); + String.format("gs://%s/%s/", BUCKET_NAME, BUCKET_PREFIX); private static final String ENCODING = "LINEAR16"; private static final String LANGUAGE_CODE = "en-US"; + private static Storage storage = StorageOptions.getDefaultInstance().getService(); private static final int SAMPLE_RATE_HERTZ = 8000; @@ -46,13 +51,16 @@ public class ExportToStorageBetaTest { private PrintStream originalPrintStream; private PrintStream out; + private static void createBucket() { + storage.create(BucketInfo.of(BUCKET_NAME)); + } + private static void cleanUpBucket() { - Storage storage = StorageOptions.getDefaultInstance().getService(); Page blobs = storage.list( PROJECT_ID, Storage.BlobListOption.currentDirectory(), - Storage.BlobListOption.prefix(PREFIX)); + Storage.BlobListOption.prefix(BUCKET_PREFIX)); deleteDirectory(storage, blobs); } @@ -73,6 +81,7 @@ private static void deleteDirectory(Storage storage, Page blobs) { @Before public void setUp() { + createBucket(); bout = new ByteArrayOutputStream(); out = new PrintStream(bout); originalPrintStream = System.out; @@ -89,7 +98,13 @@ public void tearDown() { @Test public void testExportToStorageBeta() throws Exception { ExportToStorageBeta.exportToStorage( - AUDIO_STORAGE_URI, OUTPUT_STORAGE_URI, ENCODING, SAMPLE_RATE_HERTZ, LANGUAGE_CODE); + AUDIO_STORAGE_URI, + OUTPUT_STORAGE_URI, + ENCODING, + SAMPLE_RATE_HERTZ, + LANGUAGE_CODE, + BUCKET_NAME, + BUCKET_PREFIX); String got = bout.toString(); assertThat(got).contains("Transcription:"); }