Skip to content

Commit

Permalink
samples: translate: add translate text with glossary samples (#1940)
Browse files Browse the repository at this point in the history
* translate: add translate text with glossary samples

* add some clarifying comments

 explain blocks of the code
  • Loading branch information
nnegrey authored and Shabirmean committed Nov 18, 2022
1 parent b9b7dfc commit 4233374
Show file tree
Hide file tree
Showing 4 changed files with 447 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.translate;

// [START translate_v3_batch_translate_text_with_glossary]
import com.google.api.gax.longrunning.OperationFuture;
import com.google.cloud.translate.v3.BatchTranslateMetadata;
import com.google.cloud.translate.v3.BatchTranslateResponse;
import com.google.cloud.translate.v3.BatchTranslateTextRequest;
import com.google.cloud.translate.v3.GcsDestination;
import com.google.cloud.translate.v3.GcsSource;
import com.google.cloud.translate.v3.GlossaryName;
import com.google.cloud.translate.v3.InputConfig;
import com.google.cloud.translate.v3.LocationName;
import com.google.cloud.translate.v3.OutputConfig;
import com.google.cloud.translate.v3.TranslateTextGlossaryConfig;
import com.google.cloud.translate.v3.TranslationServiceClient;

import java.io.IOException;
import java.util.concurrent.ExecutionException;

public class BatchTranslateTextWithGlossary {

public static void batchTranslateTextWithGlossary()
throws InterruptedException, ExecutionException, IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "YOUR-PROJECT-ID";
// Supported Languages: https://cloud.google.com/translate/docs/languages
String sourceLanguage = "your-source-language";
String targetLanguage = "your-target-language";
String inputUri = "gs://your-gcs-bucket/path/to/input/file.txt";
String outputUri = "gs://your-gcs-bucket/path/to/results/";
String glossaryId = "your-glossary-display-name";
batchTranslateTextWithGlossary(
projectId, sourceLanguage, targetLanguage, inputUri, outputUri, glossaryId);
}

// Batch Translate Text with a Glossary.
public static void batchTranslateTextWithGlossary(
String projectId,
String sourceLanguage,
String targetLanguage,
String inputUri,
String outputUri,
String glossaryId)
throws IOException, ExecutionException, InterruptedException {

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (TranslationServiceClient client = TranslationServiceClient.create()) {
// Supported Locations: `global`, [glossary location], or [model location]
// Glossaries must be hosted in `us-central1`
// Custom Models must use the same location as your model. (us-central1)
String location = "us-central1";
LocationName parent = LocationName.of(projectId, location);

// Configure the source of the file from a GCS bucket
GcsSource gcsSource = GcsSource.newBuilder().setInputUri(inputUri).build();
// Supported Mime Types: https://cloud.google.com/translate/docs/supported-formats
InputConfig inputConfig =
InputConfig.newBuilder().setGcsSource(gcsSource).setMimeType("text/plain").build();

// Configure where to store the output in a GCS bucket
GcsDestination gcsDestination =
GcsDestination.newBuilder().setOutputUriPrefix(outputUri).build();
OutputConfig outputConfig =
OutputConfig.newBuilder().setGcsDestination(gcsDestination).build();

// Configure the glossary used in the request
GlossaryName glossaryName = GlossaryName.of(projectId, location, glossaryId);
TranslateTextGlossaryConfig glossaryConfig =
TranslateTextGlossaryConfig.newBuilder().setGlossary(glossaryName.toString()).build();

// Build the request that will be sent to the API
BatchTranslateTextRequest request =
BatchTranslateTextRequest.newBuilder()
.setParent(parent.toString())
.setSourceLanguageCode(sourceLanguage)
.addTargetLanguageCodes(targetLanguage)
.addInputConfigs(inputConfig)
.setOutputConfig(outputConfig)
.putGlossaries(targetLanguage, glossaryConfig)
.build();

// Start an asynchronous request
OperationFuture<BatchTranslateResponse, BatchTranslateMetadata> future =
client.batchTranslateTextAsync(request);

System.out.println("Waiting for operation to complete...");
BatchTranslateResponse response = future.get();
// Display the translation for each input text provided
System.out.printf("Total Characters: %s\n", response.getTotalCharacters());
System.out.printf("Translated Characters: %s\n", response.getTranslatedCharacters());
}
}
}
// [END translate_v3_batch_translate_text_with_glossary]
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.translate;

// [START translate_v3_translate_text_with_glossary]
import com.google.cloud.translate.v3.GlossaryName;
import com.google.cloud.translate.v3.LocationName;
import com.google.cloud.translate.v3.TranslateTextGlossaryConfig;
import com.google.cloud.translate.v3.TranslateTextRequest;
import com.google.cloud.translate.v3.TranslateTextResponse;
import com.google.cloud.translate.v3.Translation;
import com.google.cloud.translate.v3.TranslationServiceClient;

import java.io.IOException;

public class TranslateTextWithGlossary {

public static void translateTextWithGlossary() throws IOException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "YOUR-PROJECT-ID";
// Supported Languages: https://cloud.google.com/translate/docs/languages
String sourceLanguage = "your-source-language";
String targetLanguage = "your-target-language";
String text = "your-text";
String glossaryId = "your-glossary-display-name";
translateTextWithGlossary(projectId, sourceLanguage, targetLanguage, text, glossaryId);
}

// Translates a given text using a glossary.
public static void translateTextWithGlossary(
String projectId,
String sourceLanguage,
String targetLanguage,
String text,
String glossaryId)
throws IOException {

// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try (TranslationServiceClient client = TranslationServiceClient.create()) {
// Supported Locations: `global`, [glossary location], or [model location]
// Glossaries must be hosted in `us-central1`
// Custom Models must use the same location as your model. (us-central1)
String location = "us-central1";
LocationName parent = LocationName.of(projectId, location);

GlossaryName glossaryName = GlossaryName.of(projectId, location, glossaryId);
TranslateTextGlossaryConfig glossaryConfig =
TranslateTextGlossaryConfig.newBuilder().setGlossary(glossaryName.toString()).build();

// Supported Mime Types: https://cloud.google.com/translate/docs/supported-formats
TranslateTextRequest request =
TranslateTextRequest.newBuilder()
.setParent(parent.toString())
.setMimeType("text/plain")
.setSourceLanguageCode(sourceLanguage)
.setTargetLanguageCode(targetLanguage)
.addContents(text)
.setGlossaryConfig(glossaryConfig)
.build();

TranslateTextResponse response = client.translateText(request);

// Display the translation for each input text provided
for (Translation translation : response.getGlossaryTranslationsList()) {
System.out.printf("Translated text: %s\n", translation.getTranslatedText());
}
}
}
}
// [END translate_v3_translate_text_with_glossary]
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.translate;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.paging.Page;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import com.google.cloud.translate.v3.CreateGlossaryMetadata;
import com.google.cloud.translate.v3.CreateGlossaryRequest;
import com.google.cloud.translate.v3.DeleteGlossaryMetadata;
import com.google.cloud.translate.v3.DeleteGlossaryRequest;
import com.google.cloud.translate.v3.DeleteGlossaryResponse;
import com.google.cloud.translate.v3.GcsSource;
import com.google.cloud.translate.v3.Glossary;
import com.google.cloud.translate.v3.GlossaryInputConfig;
import com.google.cloud.translate.v3.GlossaryName;
import com.google.cloud.translate.v3.LocationName;
import com.google.cloud.translate.v3.TranslationServiceClient;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutionException;

import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

/** Tests for Batch Translate Text With Glossary and Model sample. */
@RunWith(JUnit4.class)
@SuppressWarnings("checkstyle:abbreviationaswordinname")
public class BatchTranslateTextWithGlossaryTests {
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
private static final String INPUT_URI =
"gs://cloud-samples-data/translation/text_with_glossary.txt";
private static final String GLOSSARY_INPUT_URI =
"gs://cloud-samples-data/translation/glossary_ja.csv";
private static final String GLOSSARY_ID =
String.format("test_%s", UUID.randomUUID().toString().replace("-", "_").substring(0, 26));

private ByteArrayOutputStream bout;
private PrintStream out;

private static final void cleanUpBucket() {
Storage storage = StorageOptions.getDefaultInstance().getService();
Page<Blob> blobs =
storage.list(
PROJECT_ID,
Storage.BlobListOption.currentDirectory(),
Storage.BlobListOption.prefix("BATCH_TRANSLATION_OUTPUT/"));

deleteDirectory(storage, blobs);
}

private static void deleteDirectory(Storage storage, Page<Blob> blobs) {
for (Blob blob : blobs.iterateAll()) {
System.out.println(blob.getBlobId());
if (!blob.delete()) {
Page<Blob> subBlobs =
storage.list(
PROJECT_ID,
Storage.BlobListOption.currentDirectory(),
Storage.BlobListOption.prefix(blob.getName()));

deleteDirectory(storage, subBlobs);
}
}
}

private static void requireEnvVar(String varName) {
assertNotNull(
"Environment variable '%s' is required to perform these tests.".format(varName),
System.getenv(varName));
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS");
requireEnvVar("GOOGLE_CLOUD_PROJECT");
}

@Before
public void setUp() throws InterruptedException, ExecutionException, IOException {
// Create a glossary that can be used in the test
PrintStream temp = new PrintStream(new ByteArrayOutputStream());
System.setOut(temp);
List<String> languageCodes = new ArrayList<>();
languageCodes.add("en");
languageCodes.add("ja");
CreateGlossary.createGlossary(PROJECT_ID, GLOSSARY_ID, languageCodes, GLOSSARY_INPUT_URI);

bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
System.setOut(out);
}

@After
public void tearDown() throws InterruptedException, ExecutionException, IOException {
// Clean up
cleanUpBucket();
// Delete the created glossary
DeleteGlossary.deleteGlossary(PROJECT_ID, GLOSSARY_ID);
System.setOut(null);
}

@Test
public void testBatchTranslateTextWithGlossary()
throws InterruptedException, ExecutionException, IOException {
BatchTranslateTextWithGlossary.batchTranslateTextWithGlossary(
PROJECT_ID,
"en",
"ja",
INPUT_URI,
"gs://" + PROJECT_ID + "/BATCH_TRANSLATION_OUTPUT/",
GLOSSARY_ID);
String got = bout.toString();
assertThat(got).contains("Total Characters: 9");
}
}
Loading

0 comments on commit 4233374

Please sign in to comment.