From f9ef990b2fa31b2f798456bbc488c91637b4ff09 Mon Sep 17 00:00:00 2001 From: Franklin Nunez <69214580+b-loved-dreamer@users.noreply.github.com> Date: Thu, 18 Mar 2021 10:14:12 -0700 Subject: [PATCH] chore(samples): adds model adaptation sample (#468) --- .../speech/SpeechModelAdaptationBeta.java | 148 ++++++++++++++++++ .../speech/SpeechModelAdaptationBetaTest.java | 79 ++++++++++ 2 files changed, 227 insertions(+) create mode 100644 speech/snippets/src/main/java/com/example/speech/SpeechModelAdaptationBeta.java create mode 100644 speech/snippets/src/test/java/com/example/speech/SpeechModelAdaptationBetaTest.java diff --git a/speech/snippets/src/main/java/com/example/speech/SpeechModelAdaptationBeta.java b/speech/snippets/src/main/java/com/example/speech/SpeechModelAdaptationBeta.java new file mode 100644 index 00000000000..718f1796142 --- /dev/null +++ b/speech/snippets/src/main/java/com/example/speech/SpeechModelAdaptationBeta.java @@ -0,0 +1,148 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +// [START speech_transcribe_with_model_adaptation_gcs_beta] +import com.google.api.gax.rpc.ApiException; +import com.google.cloud.speech.v1p1beta1.AdaptationClient; +import com.google.cloud.speech.v1p1beta1.CreateCustomClassRequest; +import com.google.cloud.speech.v1p1beta1.CreatePhraseSetRequest; +import com.google.cloud.speech.v1p1beta1.CustomClass; +import com.google.cloud.speech.v1p1beta1.CustomClass.ClassItem; +import com.google.cloud.speech.v1p1beta1.LocationName; +import com.google.cloud.speech.v1p1beta1.PhraseSet; +import com.google.cloud.speech.v1p1beta1.PhraseSet.Phrase; +import com.google.cloud.speech.v1p1beta1.RecognitionAudio; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig; +import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding; +import com.google.cloud.speech.v1p1beta1.RecognizeResponse; +import com.google.cloud.speech.v1p1beta1.SpeechAdaptation; +import com.google.cloud.speech.v1p1beta1.SpeechClient; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative; +import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult; +import java.util.List; + +public class SpeechModelAdaptationBeta { + + public static void main(String[] args) throws Exception { + String uriPath = "gs://cloud-samples-tests/speech/brooklyn.flac"; + String projectId = "YOUR_PROJECT_ID"; + // Region e.g. us-west1 + String location = "LOCATION_REGION"; + // This value should be 4-63 characters, and valid characters are /[a-z][0-9]-/. + String customClassId = "YOUR-CUSTOM-CLASS-ID"; + // This value should be 4-63 characters, and valid characters are /[a-z][0-9]-/. + String phraseSetId = "YOUR-PHRASE-SET-ID"; + transcribeWithModelAdaptation(projectId, location, uriPath, customClassId, phraseSetId); + } + + /** + * Transcribe with model adaptation + * + * @param projectId your project id + * @param location the region + * @param gcsUri the path to the audio file + */ + public static void transcribeWithModelAdaptation( + String projectId, String location, String gcsUri, String customClassId, String phraseSetId) + throws Exception { + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. After completing all of your requests, call + // the "close" method on the client to safely clean up any remaining background resources. + try (AdaptationClient adaptationClient = AdaptationClient.create()) { + + // Create `PhraseSet` and `CustomClasses` to create custom lists of similar + // items that are likely to occur in your input data. + + // The parent resource where the custom class and phrase set will be created. + LocationName parent = LocationName.of(projectId, location); + + // Create the custom class + CreateCustomClassRequest classRequest = + CreateCustomClassRequest.newBuilder() + .setParent(parent.toString()) + .setCustomClassId(customClassId) + .setCustomClass( + CustomClass.newBuilder() + .addItems(ClassItem.newBuilder().setValue("sushido")) + .addItems(ClassItem.newBuilder().setValue("altura")) + .addItems(ClassItem.newBuilder().setValue("taneda")) + .build()) + .build(); + CustomClass classResponse = adaptationClient.createCustomClass(classRequest); + + // Create the phrase set + CreatePhraseSetRequest phraseRequest = + CreatePhraseSetRequest.newBuilder() + .setParent(parent.toString()) + .setPhraseSetId(phraseSetId) + .setPhraseSet( + PhraseSet.newBuilder() + .setBoost(10) + .addPhrases( + Phrase.newBuilder() + .setValue( + String.format("Visit restaurants like %s%n", customClassId))) + .build()) + .build(); + PhraseSet phraseResponse = adaptationClient.createPhraseSet(phraseRequest); + + // Next section shows how to use the newly created custom class and phrase set + // to send a transcription request with speech adaptation + + // Speech adaptation configuration + SpeechAdaptation speechAdaptation = + SpeechAdaptation.newBuilder() + .addCustomClasses(classResponse) + .addPhraseSets(phraseResponse) + .build(); + + // Initialize client that will be used to send requests. This client only needs to be created + // once, and can be reused for multiple requests. After completing all of your requests, call + // the "close" method on the client to safely clean up any remaining background resources. + try (SpeechClient speechClient = SpeechClient.create()) { + + // The path to the audio file to transcribe + // gcsUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE] + + // Builds the sync recognize request + RecognitionConfig config = + RecognitionConfig.newBuilder() + .setEncoding(AudioEncoding.FLAC) + .setSampleRateHertz(16000) + .setLanguageCode("en-US") + .setAdaptation(speechAdaptation) // Set the adaptation object + .build(); + RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build(); + + // Performs speech recognition on the audio file. + RecognizeResponse response = speechClient.recognize(config, audio); + List results = response.getResultsList(); + + for (SpeechRecognitionResult result : results) { + // There can be several alternative transcripts for a given chunk of speech. Just use the + // first (most likely) one here. + SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0); + System.out.printf("Adapted Transcription: %s%n", alternative.getTranscript()); + } + } + } catch (ApiException e) { + System.out.println("Client Interaction Error: \n" + e.toString()); + } + } +} +// [END speech_transcribe_with_model_adaptation_gcs_beta] diff --git a/speech/snippets/src/test/java/com/example/speech/SpeechModelAdaptationBetaTest.java b/speech/snippets/src/test/java/com/example/speech/SpeechModelAdaptationBetaTest.java new file mode 100644 index 00000000000..782e2879631 --- /dev/null +++ b/speech/snippets/src/test/java/com/example/speech/SpeechModelAdaptationBetaTest.java @@ -0,0 +1,79 @@ +/* + * Copyright 2021 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.example.speech; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.cloud.speech.v1p1beta1.AdaptationClient; +import com.google.cloud.speech.v1p1beta1.CustomClassName; +import com.google.cloud.speech.v1p1beta1.DeleteCustomClassRequest; +import com.google.cloud.speech.v1p1beta1.DeletePhraseSetRequest; +import com.google.cloud.speech.v1p1beta1.PhraseSetName; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; +import java.util.UUID; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class SpeechModelAdaptationBetaTest { + private static String UNIQUE_ID = UUID.randomUUID().toString(); + private static final String AUDIO_FILE = "gs://cloud-samples-tests/speech/brooklyn.flac"; + private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT"); + private static final String LOCATION = "us-west1"; + private static final String CUSTOM_CLASS_ID = "customClassId" + UNIQUE_ID; + private static final String PHRASE_SET_ID = "phraseSetId" + UNIQUE_ID; + private ByteArrayOutputStream bout; + private PrintStream stdout; + private PrintStream out; + + @Before + public void setUp() { + bout = new ByteArrayOutputStream(); + out = new PrintStream(bout); + stdout = System.out; + System.setOut(out); + } + + @After + public void tearDown() throws Exception { + System.setOut(stdout); + try (AdaptationClient adaptationClient = AdaptationClient.create()) { + // clean up resources + DeleteCustomClassRequest customClassDeleteRequest = + DeleteCustomClassRequest.newBuilder() + .setName(CustomClassName.of(PROJECT_ID, LOCATION, CUSTOM_CLASS_ID).toString()) + .build(); + adaptationClient.deleteCustomClass(customClassDeleteRequest); + + // clean up resources + DeletePhraseSetRequest phraseSetDeleteRequest = + DeletePhraseSetRequest.newBuilder() + .setName(PhraseSetName.of(PROJECT_ID, LOCATION, PHRASE_SET_ID).toString()) + .build(); + adaptationClient.deletePhraseSet(phraseSetDeleteRequest); + } + } + + @Test + public void testSpeechModelAdaptationBeta() throws Exception { + SpeechModelAdaptationBeta.transcribeWithModelAdaptation( + PROJECT_ID, LOCATION, AUDIO_FILE, CUSTOM_CLASS_ID, PHRASE_SET_ID); + String got = bout.toString(); + assertThat(got).contains("Adapted Transcription:"); + } +}