From 81d550f4d0972b25dab816ff66122fe82fe1d2eb Mon Sep 17 00:00:00 2001 From: Gus Class Date: Mon, 28 Nov 2016 11:21:56 -0800 Subject: [PATCH 1/6] Updates to new client library and fixes broken link in README --- language/analysis/README.md | 2 +- language/analysis/pom.xml | 11 +- .../cloud/language/samples/Analyze.java | 118 ++++++------------ .../cloud/language/samples/AnalyzeIT.java | 8 +- .../cloud/language/samples/AnalyzeTest.java | 27 ++-- 5 files changed, 67 insertions(+), 99 deletions(-) diff --git a/language/analysis/README.md b/language/analysis/README.md index cc641081102..b120baf6081 100644 --- a/language/analysis/README.md +++ b/language/analysis/README.md @@ -3,7 +3,7 @@ This sample demonstrates the use of the [Google Cloud Natural Language API][NL-Docs] for entity recognition. -[NL-Docs]: https://cloud.google.com/language/docs/ +[NL-Docs]: https://cloud.google.com/natural-language/docs/ ## Java Version diff --git a/language/analysis/pom.xml b/language/analysis/pom.xml index b378ae0cc38..314192392d4 100644 --- a/language/analysis/pom.xml +++ b/language/analysis/pom.xml @@ -23,14 +23,9 @@ limitations under the License. - com.google.apis - google-api-services-language - v1-rev1-1.22.0 - - - com.google.api-client - google-api-client - 1.22.0 + com.google.cloud + google-cloud-language + 0.7.0 com.google.guava diff --git a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java index fd84f68a387..2d2a80c9467 100644 --- a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java +++ b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java @@ -16,28 +16,21 @@ package com.google.cloud.language.samples; -import com.google.api.client.googleapis.auth.oauth2.GoogleCredential; -import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport; -import com.google.api.client.http.HttpRequest; -import com.google.api.client.http.HttpRequestInitializer; -import com.google.api.client.json.JsonFactory; -import com.google.api.client.json.jackson2.JacksonFactory; -import com.google.api.services.language.v1.CloudNaturalLanguage; -import com.google.api.services.language.v1.CloudNaturalLanguageScopes; -import com.google.api.services.language.v1.model.AnalyzeEntitiesRequest; -import com.google.api.services.language.v1.model.AnalyzeEntitiesResponse; -import com.google.api.services.language.v1.model.AnalyzeSentimentRequest; -import com.google.api.services.language.v1.model.AnalyzeSentimentResponse; -import com.google.api.services.language.v1.model.AnalyzeSyntaxRequest; -import com.google.api.services.language.v1.model.AnalyzeSyntaxResponse; -import com.google.api.services.language.v1.model.AnnotateTextRequest; -import com.google.api.services.language.v1.model.AnnotateTextResponse; -import com.google.api.services.language.v1.model.Document; -import com.google.api.services.language.v1.model.Entity; -import com.google.api.services.language.v1.model.EntityMention; -import com.google.api.services.language.v1.model.Features; -import com.google.api.services.language.v1.model.Sentiment; -import com.google.api.services.language.v1.model.Token; +import com.google.cloud.language.spi.v1.LanguageServiceClient; + +import com.google.cloud.language.v1.AnalyzeEntitiesRequest; +import com.google.cloud.language.v1.AnalyzeEntitiesResponse; +import com.google.cloud.language.v1.AnalyzeSentimentResponse; +import com.google.cloud.language.v1.AnalyzeSyntaxRequest; +import com.google.cloud.language.v1.AnalyzeSyntaxResponse; +import com.google.cloud.language.v1.Document; +import com.google.cloud.language.v1.Document.Type; +import com.google.cloud.language.v1.EncodingType; +import com.google.cloud.language.v1.Entity; +import com.google.cloud.language.v1.EntityMention; +import com.google.cloud.language.v1.Sentiment; +import com.google.cloud.language.v1.Token; +import com.google.protobuf.Descriptors; import java.io.IOException; import java.io.PrintStream; @@ -49,16 +42,7 @@ * A sample application that uses the Natural Language API to perform * entity, sentiment and syntax analysis. */ -@SuppressWarnings("serial") public class Analyze { - /** - * Be sure to specify the name of your application. If the application name is {@code null} or - * blank, the application will log a warning. Suggested format is "MyCompany-ProductName/1.0". - */ - private static final String APPLICATION_NAME = "Google-LanguagAPISample/1.0"; - - private static final int MAX_RESULTS = 4; - /** * Detects entities,sentiment and syntax in a document using the Natural Language API. */ @@ -73,7 +57,7 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept String command = args[0]; String text = args[1]; - Analyze app = new Analyze(getLanguageService()); + Analyze app = new Analyze(createLanguageService()); if (command.equals("entities")) { printEntities(System.out, app.analyzeEntities(text)); @@ -97,15 +81,17 @@ public static void printEntities(PrintStream out, List entities) { out.printf("%s\n", entity.getName()); out.printf("\tSalience: %.3f\n", entity.getSalience()); out.printf("\tType: %s\n", entity.getType()); - if (entity.getMetadata() != null) { - for (Map.Entry metadata : entity.getMetadata().entrySet()) { + if (entity.getMetadataMap() != null) { + for (Map.Entry metadata : entity.getMetadataMap().entrySet()) { out.printf("\tMetadata: %s = %s\n", metadata.getKey(), metadata.getValue()); } } - if (entity.getMentions() != null) { - for (EntityMention mention : entity.getMentions()) { - for (Map.Entry mentionSetMember : mention.entrySet()) { - out.printf("\tMention: %s = %s\n", mentionSetMember.getKey(), mentionSetMember.getValue()); + if (entity.getMentionsList() != null) { + for (EntityMention mention : entity.getMentionsList()) { + for (Map.Entry mentionSetMember : + mention.getAllFields().entrySet()) { + out.printf("\tMention: %s = %s\n", mentionSetMember.getKey(), + mentionSetMember.getValue()); } } } @@ -157,29 +143,16 @@ public static void printSyntax(PrintStream out, List tokens) { /** * Connects to the Natural Language API using Application Default Credentials. */ - public static CloudNaturalLanguage getLanguageService() - throws IOException, GeneralSecurityException { - GoogleCredential credential = - GoogleCredential.getApplicationDefault().createScoped(CloudNaturalLanguageScopes.all()); - JsonFactory jsonFactory = JacksonFactory.getDefaultInstance(); - return new CloudNaturalLanguage.Builder( - GoogleNetHttpTransport.newTrustedTransport(), - jsonFactory, new HttpRequestInitializer() { - @Override - public void initialize(HttpRequest request) throws IOException { - credential.initialize(request); - } - }) - .setApplicationName(APPLICATION_NAME) - .build(); + public static LanguageServiceClient createLanguageService() throws IOException{ + return LanguageServiceClient.create(); } - private final CloudNaturalLanguage languageApi; + private final LanguageServiceClient languageApi; /** * Constructs a {@link Analyze} which connects to the Cloud Natural Language API. */ - public Analyze(CloudNaturalLanguage languageApi) { + public Analyze(LanguageServiceClient languageApi) { this.languageApi = languageApi; } @@ -188,27 +161,19 @@ public Analyze(CloudNaturalLanguage languageApi) { */ public List analyzeEntities(String text) throws IOException { AnalyzeEntitiesRequest request = - new AnalyzeEntitiesRequest() - .setDocument(new Document().setContent(text).setType("PLAIN_TEXT")) - .setEncodingType("UTF16"); - CloudNaturalLanguage.Documents.AnalyzeEntities analyze = - languageApi.documents().analyzeEntities(request); - - AnalyzeEntitiesResponse response = analyze.execute(); - return response.getEntities(); + AnalyzeEntitiesRequest.newBuilder() + .setDocument(Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT)) + .setEncodingType(EncodingType.UTF16).build(); + AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); + return response.getEntitiesList(); } /** * Gets {@link Sentiment} from the string {@code text}. */ public Sentiment analyzeSentiment(String text) throws IOException { - AnalyzeSentimentRequest request = - new AnalyzeSentimentRequest() - .setDocument(new Document().setContent(text).setType("PLAIN_TEXT")); - CloudNaturalLanguage.Documents.AnalyzeSentiment analyze = - languageApi.documents().analyzeSentiment(request); - - AnalyzeSentimentResponse response = analyze.execute(); + AnalyzeSentimentResponse response = languageApi.analyzeSentiment( + Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build()); return response.getDocumentSentiment(); } @@ -216,13 +181,10 @@ public Sentiment analyzeSentiment(String text) throws IOException { * Gets {@link Token}s from the string {@code text}. */ public List analyzeSyntax(String text) throws IOException { - AnalyzeSyntaxRequest request = - new AnalyzeSyntaxRequest() - .setDocument(new Document().setContent(text).setType("PLAIN_TEXT")) - .setEncodingType("UTF16"); - CloudNaturalLanguage.Documents.AnalyzeSyntax analyze = - languageApi.documents().analyzeSyntax(request); - AnalyzeSyntaxResponse response = analyze.execute(); - return response.getTokens(); + AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() + .setDocument(Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build()) + .setEncodingType(EncodingType.UTF16).build(); + AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); + return response.getTokensList(); } } diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index 4284dea0f58..5c37997bbed 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -18,9 +18,9 @@ import static com.google.common.truth.Truth.assertThat; -import com.google.api.services.language.v1.model.Entity; -import com.google.api.services.language.v1.model.Sentiment; -import com.google.api.services.language.v1.model.Token; +import com.google.cloud.language.v1.Entity; +import com.google.cloud.language.v1.Sentiment; +import com.google.cloud.language.v1.Token; import org.junit.Before; import org.junit.Test; @@ -40,7 +40,7 @@ public class AnalyzeIT { private Analyze analyzeApp; @Before public void setup() throws Exception { - analyzeApp = new Analyze(Analyze.getLanguageService()); + analyzeApp = new Analyze(Analyze.createLanguageService()); } @Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception { diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java index 1e52ae0639f..7c1b659c595 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java @@ -18,7 +18,9 @@ import static com.google.common.truth.Truth.assertThat; -import com.google.api.services.language.v1.model.Entity; +import com.google.cloud.language.v1.Entity; +import com.google.cloud.language.v1.Entity.Builder; +import com.google.cloud.language.v1.Entity.Type; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -69,13 +71,22 @@ public class AnalyzeTest { PrintStream out = new PrintStream(bout); ImmutableList entities = ImmutableList.of( - new Entity().setName("Larry Page").setSalience(0.426f).setType("PERSON").setMetadata( - ImmutableMap.builder() - .put("knowledge_graph_mid", "/m/0gjpq") - .put("wikipedia_url", "http://en.wikipedia.org/wiki/index.html?curid=60903") - .build()), - new Entity().setName("search engine").setSalience(0.188f).setType("CONSUMER_GOOD"), - new Entity().setName("something")); + Entity.newBuilder().setName("Larry Page") + .setSalience(0.426f) + .setType(Type.PERSON) + .putAllMetadata( + ImmutableMap.builder() + .put("knowledge_graph_mid", "/m/0gjpq") + .put("wikipedia_url", + "http://en.wikipedia.org/wiki/index.html?curid=60903") + .build()) + .build(), + Entity.newBuilder() + .setName("search engine") + .setSalience(0.188f) + .setType(Type.CONSUMER_GOOD) + .build(), + Entity.newBuilder().setName("something").build()); // Act Analyze.printEntities(out, entities); From 4baae603a70f1463bf5f2480a57bf05045b1cc1b Mon Sep 17 00:00:00 2001 From: Gus Class Date: Mon, 28 Nov 2016 12:17:30 -0800 Subject: [PATCH 2/6] Fixes error on test --- .../test/java/com/google/cloud/language/samples/AnalyzeIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index 5c37997bbed..d27eed67241 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -19,6 +19,7 @@ import static com.google.common.truth.Truth.assertThat; import com.google.cloud.language.v1.Entity; +import com.google.cloud.language.v1.PartOfSpeech.Tag; import com.google.cloud.language.v1.Sentiment; import com.google.cloud.language.v1.Token; @@ -85,7 +86,7 @@ public class AnalyzeIT { analyzeApp.analyzeSyntax( "President Obama was elected for the second term"); - List got = token.stream().map(e -> e.getPartOfSpeech().getTag()) + List got = token.stream().map(e -> e.getPartOfSpeech().getTag()) .collect(Collectors.toList()); // Assert From a5f7fc051d74bccae5333ab111099197d67256e2 Mon Sep 17 00:00:00 2001 From: Gus Class Date: Mon, 28 Nov 2016 12:33:29 -0800 Subject: [PATCH 3/6] Fixes type in test assertion --- .../java/com/google/cloud/language/samples/AnalyzeIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index d27eed67241..62f33e0e7b9 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -90,7 +90,7 @@ public class AnalyzeIT { .collect(Collectors.toList()); // Assert - assertThat(got).containsExactly("NOUN", "NOUN", "VERB", - "VERB", "ADP", "DET", "ADJ", "NOUN").inOrder(); + assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB, + Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder(); } } From bd07872c59cbb6edd3072a341cca15f6cdf0b25c Mon Sep 17 00:00:00 2001 From: Gus Class Date: Wed, 30 Nov 2016 10:00:17 -0800 Subject: [PATCH 4/6] Removes extraneous createService method, reduces line length in integration tests, and adds verbose comments on mock entities. --- .../cloud/language/samples/Analyze.java | 28 +++++++++---------- .../cloud/language/samples/AnalyzeIT.java | 3 +- .../cloud/language/samples/AnalyzeTest.java | 6 ++-- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java index 2d2a80c9467..be89dfd6895 100644 --- a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java +++ b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java @@ -57,7 +57,7 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept String command = args[0]; String text = args[1]; - Analyze app = new Analyze(createLanguageService()); + Analyze app = new Analyze(LanguageServiceClient.create()); if (command.equals("entities")) { printEntities(System.out, app.analyzeEntities(text)); @@ -140,12 +140,6 @@ public static void printSyntax(PrintStream out, List tokens) { } } - /** - * Connects to the Natural Language API using Application Default Credentials. - */ - public static LanguageServiceClient createLanguageService() throws IOException{ - return LanguageServiceClient.create(); - } private final LanguageServiceClient languageApi; @@ -160,10 +154,11 @@ public Analyze(LanguageServiceClient languageApi) { * Gets {@link Entity}s from the string {@code text}. */ public List analyzeEntities(String text) throws IOException { - AnalyzeEntitiesRequest request = - AnalyzeEntitiesRequest.newBuilder() - .setDocument(Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT)) - .setEncodingType(EncodingType.UTF16).build(); + Document doc = Document.newBuilder() + .setContent(text).setType(Type.PLAIN_TEXT).build(); + AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() + .setDocument(doc) + .setEncodingType(EncodingType.UTF16).build(); AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); return response.getEntitiesList(); } @@ -172,8 +167,9 @@ public List analyzeEntities(String text) throws IOException { * Gets {@link Sentiment} from the string {@code text}. */ public Sentiment analyzeSentiment(String text) throws IOException { - AnalyzeSentimentResponse response = languageApi.analyzeSentiment( - Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build()); + Document doc = Document.newBuilder() + .setContent(text).setType(Type.PLAIN_TEXT).build(); + AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); return response.getDocumentSentiment(); } @@ -181,9 +177,11 @@ public Sentiment analyzeSentiment(String text) throws IOException { * Gets {@link Token}s from the string {@code text}. */ public List analyzeSyntax(String text) throws IOException { + Document doc = Document.newBuilder() + .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() - .setDocument(Document.newBuilder().setContent(text).setType(Type.PLAIN_TEXT).build()) - .setEncodingType(EncodingType.UTF16).build(); + .setDocument(doc) + .setEncodingType(EncodingType.UTF16).build(); AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); return response.getTokensList(); } diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index 62f33e0e7b9..aa1a40fddc6 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -18,6 +18,7 @@ import static com.google.common.truth.Truth.assertThat; +import com.google.cloud.language.spi.v1.LanguageServiceClient; import com.google.cloud.language.v1.Entity; import com.google.cloud.language.v1.PartOfSpeech.Tag; import com.google.cloud.language.v1.Sentiment; @@ -41,7 +42,7 @@ public class AnalyzeIT { private Analyze analyzeApp; @Before public void setup() throws Exception { - analyzeApp = new Analyze(Analyze.createLanguageService()); + analyzeApp = new Analyze(LanguageServiceClient.create()); } @Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception { diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java index 7c1b659c595..a1a13c1c032 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeTest.java @@ -69,6 +69,8 @@ public class AnalyzeTest { // Arrange ByteArrayOutputStream bout = new ByteArrayOutputStream(); PrintStream out = new PrintStream(bout); + + // Mock natural-language entities based on actual data. ImmutableList entities = ImmutableList.of( Entity.newBuilder().setName("Larry Page") @@ -88,10 +90,10 @@ public class AnalyzeTest { .build(), Entity.newBuilder().setName("something").build()); - // Act + // Act on sample code with mock data. Analyze.printEntities(out, entities); - // Assert + // Assert output from sample matches expected output. String got = bout.toString(); assertThat(got).contains("Found 3 entities."); assertThat(got).contains("Larry Page"); From c701f29f23bb8430e0b0023b1c9e82bf08075d71 Mon Sep 17 00:00:00 2001 From: Gus Class Date: Fri, 2 Dec 2016 13:45:55 -0800 Subject: [PATCH 5/6] Adds GCS filepath snippets and tests --- language/analysis/README.md | 14 ++-- .../cloud/language/samples/Analyze.java | 60 +++++++++++++--- .../cloud/language/samples/AnalyzeIT.java | 71 +++++++++++++++++-- 3 files changed, 125 insertions(+), 20 deletions(-) diff --git a/language/analysis/README.md b/language/analysis/README.md index b120baf6081..084b3caaf9d 100644 --- a/language/analysis/README.md +++ b/language/analysis/README.md @@ -30,12 +30,13 @@ mvn clean compile assembly:single ``` We can then run the assembled JAR file with the `java` command. The variable $COMMAND takes -three values `entities`, `sentiment` or `syntax`. +six values `entities-text`, `entities-file`, `sentiment-text`, `sentiment-file`, +`syntax-text`, or `syntax-file`. ``` MAIN_CLASS=com.google.cloud.language.samples.Analyze JAR_FILE=target/language-entities-1.0-jar-with-dependencies.jar -java -cp $JAR_FILE $MAIN_CLASS +java -cp $JAR_FILE $MAIN_CLASS ``` Example usage: @@ -46,8 +47,11 @@ QUOTE="Larry Page, Google's co-founder, once described the 'perfect search back exactly what you want.' Since he spoke those words Google has grown to offer products beyond search, but the spirit of what he said remains." -java -cp $JAR_FILE $MAIN_CLASS entities "$QUOTE" -java -cp $JAR_FILE $MAIN_CLASS sentiment "$QUOTE" -java -cp $JAR_FILE $MAIN_CLASS syntax "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS entities-text "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS entities-file "gs://bucket/file.txt" +java -cp $JAR_FILE $MAIN_CLASS sentiment-text "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS sentiment-file "gs://bucket/file.txt" +java -cp $JAR_FILE $MAIN_CLASS syntax-text "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS syntax-file "gs://bucket/file.txt" ``` diff --git a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java index be89dfd6895..4a59dfbe870 100644 --- a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java +++ b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java @@ -59,12 +59,18 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept Analyze app = new Analyze(LanguageServiceClient.create()); - if (command.equals("entities")) { - printEntities(System.out, app.analyzeEntities(text)); - } else if (command.equals("sentiment")) { - printSentiment(System.out, app.analyzeSentiment(text)); - } else if (command.equals("syntax")) { - printSyntax(System.out, app.analyzeSyntax(text)); + if (command.equals("entities-text")) { + printEntities(System.out, app.analyzeEntitiesText(text)); + } else if (command.equals("entities-file")) { + printEntities(System.out, app.analyzeEntitiesFile(text)); + } else if (command.equals("sentiment-text")) { + printSentiment(System.out, app.analyzeSentimentText(text)); + } else if (command.equals("sentiment-file")) { + printSentiment(System.out, app.analyzeSentimentFile(text)); + } else if (command.equals("syntax-text")) { + printSyntax(System.out, app.analyzeSyntaxText(text)); + } else if (command.equals("syntax-file")) { + printSyntax(System.out, app.analyzeSyntaxFile(text)); } } @@ -153,7 +159,7 @@ public Analyze(LanguageServiceClient languageApi) { /** * Gets {@link Entity}s from the string {@code text}. */ - public List analyzeEntities(String text) throws IOException { + public List analyzeEntitiesText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() @@ -162,21 +168,44 @@ public List analyzeEntities(String text) throws IOException { AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); return response.getEntitiesList(); } + + /** + * Gets {@link Entity}s from the string representing the GCS {@code path}. + */ + public List analyzeEntitiesFile(String path) throws IOException { + Document doc = Document.newBuilder() + .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); + AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder() + .setDocument(doc) + .setEncodingType(EncodingType.UTF16).build(); + AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request); + return response.getEntitiesList(); + } /** * Gets {@link Sentiment} from the string {@code text}. */ - public Sentiment analyzeSentiment(String text) throws IOException { + public Sentiment analyzeSentimentText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); return response.getDocumentSentiment(); } + /** + * Gets {@link Sentiment} from the string representing the GCS {@code path}. + */ + public Sentiment analyzeSentimentFile(String path) throws IOException { + Document doc = Document.newBuilder() + .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); + AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc); + return response.getDocumentSentiment(); + } + /** * Gets {@link Token}s from the string {@code text}. */ - public List analyzeSyntax(String text) throws IOException { + public List analyzeSyntaxText(String text) throws IOException { Document doc = Document.newBuilder() .setContent(text).setType(Type.PLAIN_TEXT).build(); AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() @@ -185,4 +214,17 @@ public List analyzeSyntax(String text) throws IOException { AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); return response.getTokensList(); } + + /** + * Gets {@link Token}s from the string representing the GCS {@code path}. + */ + public List analyzeSyntaxFile(String path) throws IOException { + Document doc = Document.newBuilder() + .setGcsContentUri(path).setType(Type.PLAIN_TEXT).build(); + AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder() + .setDocument(doc) + .setEncodingType(EncodingType.UTF16).build(); + AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request); + return response.getTokensList(); + } } diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index aa1a40fddc6..90a05d01638 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -48,7 +48,7 @@ public class AnalyzeIT { @Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception { // Act List entities = - analyzeApp.analyzeEntities( + analyzeApp.analyzeEntitiesText( "Larry Page, Google's co-founder, once described the 'perfect search engine' as" + " something that 'understands exactly what you mean and gives you back exactly what" + " you want.' Since he spoke those words Google has grown to offer products beyond" @@ -58,11 +58,21 @@ public class AnalyzeIT { // Assert assertThat(got).named("entity names").contains("Larry Page"); } - - @Test public void analyzeSentiment_returnPositive() throws Exception { + + @Test public void analyzeEntities_withEntitiesFile_containsGod() throws Exception { + // Act + List entities = + analyzeApp.analyzeEntitiesFile("gs://cloud-samples-tests/natural-language/gettysburg.txt"); + List got = entities.stream().map(e -> e.getName()).collect(Collectors.toList()); + + // Assert + assertThat(got).named("entity names").contains("God"); + } + + @Test public void analyzeSentimentText_returnPositive() throws Exception { // Act Sentiment sentiment = - analyzeApp.analyzeSentiment( + analyzeApp.analyzeSentimentText( "Tom Cruise is one of the finest actors in hollywood and a great star!"); // Assert @@ -70,21 +80,54 @@ public class AnalyzeIT { assertThat((double)sentiment.getScore()).isGreaterThan(0.0); } + @Test public void analyzeSentimentFile_returnPositiveFile() throws Exception { + // Act + Sentiment sentiment = + analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-pos.txt"); + + // Assert + assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); + assertThat((double)sentiment.getScore()).isGreaterThan(0.0); + } + @Test public void analyzeSentiment_returnNegative() throws Exception { // Act Sentiment sentiment = - analyzeApp.analyzeSentiment( + analyzeApp.analyzeSentimentText( "That was the worst performance I've seen in awhile."); // Assert assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); assertThat((double)sentiment.getScore()).isLessThan(0.0); } + + @Test public void analyzeSentiment_returnNegativeFile() throws Exception { + // Act + Sentiment sentiment = + analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-neg.txt"); + + // Assert + assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0); + assertThat((double)sentiment.getScore()).isLessThan(0.0); + } + + @Test public void analyzeSentiment_returnNeutralFile() throws Exception { + // Act + Sentiment sentiment = + analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-neutral.txt"); + + // Assert + assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0); + assertThat((double)sentiment.getScore()).isWithin(0.0); + } @Test public void analyzeSyntax_partOfSpeech() throws Exception { // Act List token = - analyzeApp.analyzeSyntax( + analyzeApp.analyzeSyntaxText( "President Obama was elected for the second term"); List got = token.stream().map(e -> e.getPartOfSpeech().getTag()) @@ -94,4 +137,20 @@ public class AnalyzeIT { assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB, Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder(); } + + @Test public void analyzeSyntax_partOfSpeechFile() throws Exception { + // Act + List token = + analyzeApp.analyzeSyntaxFile("gs://cloud-samples-tests/natural-language/" + + "sentiment/bladerunner-neutral.txt"); + + List got = token.stream().map(e -> e.getPartOfSpeech().getTag()) + .collect(Collectors.toList()); + + // Assert + assertThat(got).containsExactly(Tag.PRON, Tag.CONJ, Tag.VERB, Tag.CONJ, Tag.VERB, + Tag.DET, Tag.NOUN, Tag.PUNCT, Tag.NOUN, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.CONJ, + Tag.ADV, Tag.PRON, Tag.VERB, Tag.VERB, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.DET, + Tag.NOUN, Tag.VERB, Tag.ADV, Tag.ADJ,Tag.PUNCT).inOrder(); + } } From cc6ec313cd229d666c960b3108ccd312456d67d7 Mon Sep 17 00:00:00 2001 From: Gus Class Date: Fri, 2 Dec 2016 15:48:10 -0800 Subject: [PATCH 6/6] Cleaner arguments, better comments, and less brittle tests --- language/analysis/README.md | 18 +++++----- .../cloud/language/samples/Analyze.java | 35 ++++++++++++------- .../cloud/language/samples/AnalyzeIT.java | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/language/analysis/README.md b/language/analysis/README.md index 084b3caaf9d..fd00b27c000 100644 --- a/language/analysis/README.md +++ b/language/analysis/README.md @@ -30,13 +30,12 @@ mvn clean compile assembly:single ``` We can then run the assembled JAR file with the `java` command. The variable $COMMAND takes -six values `entities-text`, `entities-file`, `sentiment-text`, `sentiment-file`, -`syntax-text`, or `syntax-file`. +three values `entities`, `sentiment`, or `syntax`. ``` MAIN_CLASS=com.google.cloud.language.samples.Analyze JAR_FILE=target/language-entities-1.0-jar-with-dependencies.jar -java -cp $JAR_FILE $MAIN_CLASS +java -cp $JAR_FILE $MAIN_CLASS ``` Example usage: @@ -47,11 +46,10 @@ QUOTE="Larry Page, Google's co-founder, once described the 'perfect search back exactly what you want.' Since he spoke those words Google has grown to offer products beyond search, but the spirit of what he said remains." -java -cp $JAR_FILE $MAIN_CLASS entities-text "$QUOTE" -java -cp $JAR_FILE $MAIN_CLASS entities-file "gs://bucket/file.txt" -java -cp $JAR_FILE $MAIN_CLASS sentiment-text "$QUOTE" -java -cp $JAR_FILE $MAIN_CLASS sentiment-file "gs://bucket/file.txt" -java -cp $JAR_FILE $MAIN_CLASS syntax-text "$QUOTE" -java -cp $JAR_FILE $MAIN_CLASS syntax-file "gs://bucket/file.txt" +java -cp $JAR_FILE $MAIN_CLASS entities "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS entities "gs://bucket/file.txt" +java -cp $JAR_FILE $MAIN_CLASS sentiment "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS sentiment "gs://bucket/file.txt" +java -cp $JAR_FILE $MAIN_CLASS syntax "$QUOTE" +java -cp $JAR_FILE $MAIN_CLASS syntax "gs://bucket/file.txt" ``` - diff --git a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java index 61519e2449d..9b4adfcff96 100644 --- a/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java +++ b/language/analysis/src/main/java/com/google/cloud/language/samples/Analyze.java @@ -59,18 +59,24 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept Analyze app = new Analyze(LanguageServiceClient.create()); - if (command.equals("entities-text")) { - printEntities(System.out, app.analyzeEntitiesText(text)); - } else if (command.equals("entities-file")) { - printEntities(System.out, app.analyzeEntitiesFile(text)); - } else if (command.equals("sentiment-text")) { - printSentiment(System.out, app.analyzeSentimentText(text)); - } else if (command.equals("sentiment-file")) { + if (command.equals("entities")) { + if (text.startsWith("gs://")) { + printEntities(System.out, app.analyzeEntitiesFile(text)); + } else { + printEntities(System.out, app.analyzeEntitiesText(text)); + } + } else if (command.equals("sentiment")) { + if (text.startsWith("gs://")) { printSentiment(System.out, app.analyzeSentimentFile(text)); - } else if (command.equals("syntax-text")) { - printSyntax(System.out, app.analyzeSyntaxText(text)); - } else if (command.equals("syntax-file")) { + } else { + printSentiment(System.out, app.analyzeSentimentText(text)); + } + } else if (command.equals("syntax")) { + if (text.startsWith("gs://")) { printSyntax(System.out, app.analyzeSyntaxFile(text)); + } else { + printSyntax(System.out, app.analyzeSyntaxText(text)); + } } } @@ -117,6 +123,9 @@ public static void printSentiment(PrintStream out, Sentiment sentiment) { out.printf("\tScore: %.3f\n", sentiment.getScore()); } + /** + * Prints the Syntax for the {@code tokens}. + */ public static void printSyntax(PrintStream out, List tokens) { if (tokens == null || tokens.size() == 0) { out.println("No syntax found"); @@ -170,7 +179,7 @@ public List analyzeEntitiesText(String text) throws IOException { } /** - * Gets {@link Entity}s from the string representing the GCS {@code path}. + * Gets {@link Entity}s from the contents of the object at the given GCS {@code path}. */ public List analyzeEntitiesFile(String path) throws IOException { Document doc = Document.newBuilder() @@ -193,7 +202,7 @@ public Sentiment analyzeSentimentText(String text) throws IOException { } /** - * Gets {@link Sentiment} from the string representing the GCS {@code path}. + * Gets {@link Sentiment} from the contents of the object at the given GCS {@code path}. */ public Sentiment analyzeSentimentFile(String path) throws IOException { Document doc = Document.newBuilder() @@ -216,7 +225,7 @@ public List analyzeSyntaxText(String text) throws IOException { } /** - * Gets {@link Token}s from the string representing the GCS {@code path}. + * Gets {@link Token}s from the contents of the object at the given GCS {@code path}. */ public List analyzeSyntaxFile(String path) throws IOException { Document doc = Document.newBuilder() diff --git a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java index ff64c7c238f..faa9b0d1f06 100644 --- a/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java +++ b/language/analysis/src/test/java/com/google/cloud/language/samples/AnalyzeIT.java @@ -121,7 +121,7 @@ public class AnalyzeIT { // Assert assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0); - assertThat((double)sentiment.getScore()).isWithin(0.0); + assertThat((double)sentiment.getScore()).isWithin(0.1); } @Test public void analyzeSyntax_partOfSpeech() throws Exception {