Skip to content

Commit

Permalink
chore(samples): adds model adaptation sample (#468)
Browse files Browse the repository at this point in the history
  • Loading branch information
b-loved-dreamer authored Mar 18, 2021
1 parent e5fb373 commit f9ef990
Show file tree
Hide file tree
Showing 2 changed files with 227 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.speech;

// [START speech_transcribe_with_model_adaptation_gcs_beta]
import com.google.api.gax.rpc.ApiException;
import com.google.cloud.speech.v1p1beta1.AdaptationClient;
import com.google.cloud.speech.v1p1beta1.CreateCustomClassRequest;
import com.google.cloud.speech.v1p1beta1.CreatePhraseSetRequest;
import com.google.cloud.speech.v1p1beta1.CustomClass;
import com.google.cloud.speech.v1p1beta1.CustomClass.ClassItem;
import com.google.cloud.speech.v1p1beta1.LocationName;
import com.google.cloud.speech.v1p1beta1.PhraseSet;
import com.google.cloud.speech.v1p1beta1.PhraseSet.Phrase;
import com.google.cloud.speech.v1p1beta1.RecognitionAudio;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig;
import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;
import com.google.cloud.speech.v1p1beta1.RecognizeResponse;
import com.google.cloud.speech.v1p1beta1.SpeechAdaptation;
import com.google.cloud.speech.v1p1beta1.SpeechClient;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionAlternative;
import com.google.cloud.speech.v1p1beta1.SpeechRecognitionResult;
import java.util.List;

public class SpeechModelAdaptationBeta {

public static void main(String[] args) throws Exception {
String uriPath = "gs://cloud-samples-tests/speech/brooklyn.flac";
String projectId = "YOUR_PROJECT_ID";
// Region e.g. us-west1
String location = "LOCATION_REGION";
// This value should be 4-63 characters, and valid characters are /[a-z][0-9]-/.
String customClassId = "YOUR-CUSTOM-CLASS-ID";
// This value should be 4-63 characters, and valid characters are /[a-z][0-9]-/.
String phraseSetId = "YOUR-PHRASE-SET-ID";
transcribeWithModelAdaptation(projectId, location, uriPath, customClassId, phraseSetId);
}

/**
* Transcribe with model adaptation
*
* @param projectId your project id
* @param location the region
* @param gcsUri the path to the audio file
*/
public static void transcribeWithModelAdaptation(
String projectId, String location, String gcsUri, String customClassId, String phraseSetId)
throws Exception {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (AdaptationClient adaptationClient = AdaptationClient.create()) {

// Create `PhraseSet` and `CustomClasses` to create custom lists of similar
// items that are likely to occur in your input data.

// The parent resource where the custom class and phrase set will be created.
LocationName parent = LocationName.of(projectId, location);

// Create the custom class
CreateCustomClassRequest classRequest =
CreateCustomClassRequest.newBuilder()
.setParent(parent.toString())
.setCustomClassId(customClassId)
.setCustomClass(
CustomClass.newBuilder()
.addItems(ClassItem.newBuilder().setValue("sushido"))
.addItems(ClassItem.newBuilder().setValue("altura"))
.addItems(ClassItem.newBuilder().setValue("taneda"))
.build())
.build();
CustomClass classResponse = adaptationClient.createCustomClass(classRequest);

// Create the phrase set
CreatePhraseSetRequest phraseRequest =
CreatePhraseSetRequest.newBuilder()
.setParent(parent.toString())
.setPhraseSetId(phraseSetId)
.setPhraseSet(
PhraseSet.newBuilder()
.setBoost(10)
.addPhrases(
Phrase.newBuilder()
.setValue(
String.format("Visit restaurants like %s%n", customClassId)))
.build())
.build();
PhraseSet phraseResponse = adaptationClient.createPhraseSet(phraseRequest);

// Next section shows how to use the newly created custom class and phrase set
// to send a transcription request with speech adaptation

// Speech adaptation configuration
SpeechAdaptation speechAdaptation =
SpeechAdaptation.newBuilder()
.addCustomClasses(classResponse)
.addPhraseSets(phraseResponse)
.build();

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (SpeechClient speechClient = SpeechClient.create()) {

// The path to the audio file to transcribe
// gcsUri URI for audio file in Cloud Storage, e.g. gs://[BUCKET]/[FILE]

// Builds the sync recognize request
RecognitionConfig config =
RecognitionConfig.newBuilder()
.setEncoding(AudioEncoding.FLAC)
.setSampleRateHertz(16000)
.setLanguageCode("en-US")
.setAdaptation(speechAdaptation) // Set the adaptation object
.build();
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();

// Performs speech recognition on the audio file.
RecognizeResponse response = speechClient.recognize(config, audio);
List<SpeechRecognitionResult> results = response.getResultsList();

for (SpeechRecognitionResult result : results) {
// There can be several alternative transcripts for a given chunk of speech. Just use the
// first (most likely) one here.
SpeechRecognitionAlternative alternative = result.getAlternativesList().get(0);
System.out.printf("Adapted Transcription: %s%n", alternative.getTranscript());
}
}
} catch (ApiException e) {
System.out.println("Client Interaction Error: \n" + e.toString());
}
}
}
// [END speech_transcribe_with_model_adaptation_gcs_beta]
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.speech;

import static com.google.common.truth.Truth.assertThat;

import com.google.cloud.speech.v1p1beta1.AdaptationClient;
import com.google.cloud.speech.v1p1beta1.CustomClassName;
import com.google.cloud.speech.v1p1beta1.DeleteCustomClassRequest;
import com.google.cloud.speech.v1p1beta1.DeletePhraseSetRequest;
import com.google.cloud.speech.v1p1beta1.PhraseSetName;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.UUID;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class SpeechModelAdaptationBetaTest {
private static String UNIQUE_ID = UUID.randomUUID().toString();
private static final String AUDIO_FILE = "gs://cloud-samples-tests/speech/brooklyn.flac";
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
private static final String LOCATION = "us-west1";
private static final String CUSTOM_CLASS_ID = "customClassId" + UNIQUE_ID;
private static final String PHRASE_SET_ID = "phraseSetId" + UNIQUE_ID;
private ByteArrayOutputStream bout;
private PrintStream stdout;
private PrintStream out;

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
stdout = System.out;
System.setOut(out);
}

@After
public void tearDown() throws Exception {
System.setOut(stdout);
try (AdaptationClient adaptationClient = AdaptationClient.create()) {
// clean up resources
DeleteCustomClassRequest customClassDeleteRequest =
DeleteCustomClassRequest.newBuilder()
.setName(CustomClassName.of(PROJECT_ID, LOCATION, CUSTOM_CLASS_ID).toString())
.build();
adaptationClient.deleteCustomClass(customClassDeleteRequest);

// clean up resources
DeletePhraseSetRequest phraseSetDeleteRequest =
DeletePhraseSetRequest.newBuilder()
.setName(PhraseSetName.of(PROJECT_ID, LOCATION, PHRASE_SET_ID).toString())
.build();
adaptationClient.deletePhraseSet(phraseSetDeleteRequest);
}
}

@Test
public void testSpeechModelAdaptationBeta() throws Exception {
SpeechModelAdaptationBeta.transcribeWithModelAdaptation(
PROJECT_ID, LOCATION, AUDIO_FILE, CUSTOM_CLASS_ID, PHRASE_SET_ID);
String got = bout.toString();
assertThat(got).contains("Adapted Transcription:");
}
}

0 comments on commit f9ef990

Please sign in to comment.