Skip to content

Commit

Permalink
Merge pull request #432 from GoogleCloudPlatform/language-gcs
Browse files Browse the repository at this point in the history
Add GCS examples for Natural Language
  • Loading branch information
gguuss authored Dec 5, 2016
2 parents d407c61 + cc6ec31 commit 7770807
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 14 deletions.
8 changes: 5 additions & 3 deletions language/analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ mvn clean compile assembly:single
```

We can then run the assembled JAR file with the `java` command. The variable $COMMAND takes
three values `entities`, `sentiment` or `syntax`.
three values `entities`, `sentiment`, or `syntax`.

```
MAIN_CLASS=com.google.cloud.language.samples.Analyze
JAR_FILE=target/language-entities-1.0-jar-with-dependencies.jar
java -cp $JAR_FILE $MAIN_CLASS <sentiment|entities|syntax> <text>
java -cp $JAR_FILE $MAIN_CLASS <sentiment|entities|syntax> <text|path>
```

Example usage:
Expand All @@ -47,7 +47,9 @@ QUOTE="Larry Page, Google's co-founder, once described the 'perfect search
offer products beyond search, but the spirit of what he said remains."
java -cp $JAR_FILE $MAIN_CLASS entities "$QUOTE"
java -cp $JAR_FILE $MAIN_CLASS entities "gs://bucket/file.txt"
java -cp $JAR_FILE $MAIN_CLASS sentiment "$QUOTE"
java -cp $JAR_FILE $MAIN_CLASS sentiment "gs://bucket/file.txt"
java -cp $JAR_FILE $MAIN_CLASS syntax "$QUOTE"
java -cp $JAR_FILE $MAIN_CLASS syntax "gs://bucket/file.txt"
```

Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,23 @@ public static void main(String[] args) throws IOException, GeneralSecurityExcept
Analyze app = new Analyze(LanguageServiceClient.create());

if (command.equals("entities")) {
printEntities(System.out, app.analyzeEntities(text));
if (text.startsWith("gs://")) {
printEntities(System.out, app.analyzeEntitiesFile(text));
} else {
printEntities(System.out, app.analyzeEntitiesText(text));
}
} else if (command.equals("sentiment")) {
printSentiment(System.out, app.analyzeSentiment(text));
if (text.startsWith("gs://")) {
printSentiment(System.out, app.analyzeSentimentFile(text));
} else {
printSentiment(System.out, app.analyzeSentimentText(text));
}
} else if (command.equals("syntax")) {
printSyntax(System.out, app.analyzeSyntax(text));
if (text.startsWith("gs://")) {
printSyntax(System.out, app.analyzeSyntaxFile(text));
} else {
printSyntax(System.out, app.analyzeSyntaxText(text));
}
}
}

Expand Down Expand Up @@ -111,6 +123,9 @@ public static void printSentiment(PrintStream out, Sentiment sentiment) {
out.printf("\tScore: %.3f\n", sentiment.getScore());
}

/**
* Prints the Syntax for the {@code tokens}.
*/
public static void printSyntax(PrintStream out, List<Token> tokens) {
if (tokens == null || tokens.size() == 0) {
out.println("No syntax found");
Expand Down Expand Up @@ -153,7 +168,7 @@ public Analyze(LanguageServiceClient languageApi) {
/**
* Gets {@link Entity}s from the string {@code text}.
*/
public List<Entity> analyzeEntities(String text) throws IOException {
public List<Entity> analyzeEntitiesText(String text) throws IOException {
Document doc = Document.newBuilder()
.setContent(text).setType(Type.PLAIN_TEXT).build();
AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder()
Expand All @@ -163,20 +178,43 @@ public List<Entity> analyzeEntities(String text) throws IOException {
return response.getEntitiesList();
}

/**
* Gets {@link Entity}s from the contents of the object at the given GCS {@code path}.
*/
public List<Entity> analyzeEntitiesFile(String path) throws IOException {
Document doc = Document.newBuilder()
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
AnalyzeEntitiesRequest request = AnalyzeEntitiesRequest.newBuilder()
.setDocument(doc)
.setEncodingType(EncodingType.UTF16).build();
AnalyzeEntitiesResponse response = languageApi.analyzeEntities(request);
return response.getEntitiesList();
}

/**
* Gets {@link Sentiment} from the string {@code text}.
*/
public Sentiment analyzeSentiment(String text) throws IOException {
public Sentiment analyzeSentimentText(String text) throws IOException {
Document doc = Document.newBuilder()
.setContent(text).setType(Type.PLAIN_TEXT).build();
AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc);
return response.getDocumentSentiment();
}

/**
* Gets {@link Sentiment} from the contents of the object at the given GCS {@code path}.
*/
public Sentiment analyzeSentimentFile(String path) throws IOException {
Document doc = Document.newBuilder()
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
AnalyzeSentimentResponse response = languageApi.analyzeSentiment(doc);
return response.getDocumentSentiment();
}

/**
* Gets {@link Token}s from the string {@code text}.
*/
public List<Token> analyzeSyntax(String text) throws IOException {
public List<Token> analyzeSyntaxText(String text) throws IOException {
Document doc = Document.newBuilder()
.setContent(text).setType(Type.PLAIN_TEXT).build();
AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder()
Expand All @@ -185,4 +223,17 @@ public List<Token> analyzeSyntax(String text) throws IOException {
AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request);
return response.getTokensList();
}

/**
* Gets {@link Token}s from the contents of the object at the given GCS {@code path}.
*/
public List<Token> analyzeSyntaxFile(String path) throws IOException {
Document doc = Document.newBuilder()
.setGcsContentUri(path).setType(Type.PLAIN_TEXT).build();
AnalyzeSyntaxRequest request = AnalyzeSyntaxRequest.newBuilder()
.setDocument(doc)
.setEncodingType(EncodingType.UTF16).build();
AnalyzeSyntaxResponse response = languageApi.analyzeSyntax(request);
return response.getTokensList();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public class AnalyzeIT {
@Test public void analyzeEntities_withEntities_returnsLarryPage() throws Exception {
// Act
List<Entity> entities =
analyzeApp.analyzeEntities(
analyzeApp.analyzeEntitiesText(
"Larry Page, Google's co-founder, once described the 'perfect search engine' as"
+ " something that 'understands exactly what you mean and gives you back exactly what"
+ " you want.' Since he spoke those words Google has grown to offer products beyond"
Expand All @@ -59,32 +59,75 @@ public class AnalyzeIT {
assertThat(got).named("entity names").contains("Larry Page");
}

@Test public void analyzeSentiment_returnPositive() throws Exception {
@Test public void analyzeEntities_withEntitiesFile_containsGod() throws Exception {
// Act
List<Entity> entities =
analyzeApp.analyzeEntitiesFile("gs://cloud-samples-tests/natural-language/gettysburg.txt");
List<String> got = entities.stream().map(e -> e.getName()).collect(Collectors.toList());

// Assert
assertThat(got).named("entity names").contains("God");
}

@Test public void analyzeSentimentText_returnPositive() throws Exception {
// Act
Sentiment sentiment =
analyzeApp.analyzeSentiment(
analyzeApp.analyzeSentimentText(
"Tom Cruise is one of the finest actors in hollywood and a great star!");

// Assert
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
assertThat((double)sentiment.getScore()).isGreaterThan(0.0);
}

@Test public void analyzeSentimentFile_returnPositiveFile() throws Exception {
// Act
Sentiment sentiment =
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
+ "sentiment/bladerunner-pos.txt");

// Assert
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
assertThat((double)sentiment.getScore()).isGreaterThan(0.0);
}

@Test public void analyzeSentiment_returnNegative() throws Exception {
// Act
Sentiment sentiment =
analyzeApp.analyzeSentiment(
analyzeApp.analyzeSentimentText(
"That was the worst performance I've seen in awhile.");

// Assert
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
assertThat((double)sentiment.getScore()).isLessThan(0.0);
}

@Test public void analyzeSentiment_returnNegativeFile() throws Exception {
// Act
Sentiment sentiment =
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
+ "sentiment/bladerunner-neg.txt");

// Assert
assertThat((double)sentiment.getMagnitude()).isGreaterThan(0.0);
assertThat((double)sentiment.getScore()).isLessThan(0.0);
}

@Test public void analyzeSentiment_returnNeutralFile() throws Exception {
// Act
Sentiment sentiment =
analyzeApp.analyzeSentimentFile("gs://cloud-samples-tests/natural-language/"
+ "sentiment/bladerunner-neutral.txt");

// Assert
assertThat((double)sentiment.getMagnitude()).isGreaterThan(1.0);
assertThat((double)sentiment.getScore()).isWithin(0.1);
}

@Test public void analyzeSyntax_partOfSpeech() throws Exception {
// Act
List<Token> token =
analyzeApp.analyzeSyntax(
analyzeApp.analyzeSyntaxText(
"President Obama was elected for the second term");

List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag())
Expand All @@ -94,4 +137,20 @@ public class AnalyzeIT {
assertThat(got).containsExactly(Tag.NOUN, Tag.NOUN, Tag.VERB,
Tag.VERB, Tag.ADP, Tag.DET, Tag.ADJ, Tag.NOUN).inOrder();
}

@Test public void analyzeSyntax_partOfSpeechFile() throws Exception {
// Act
List<Token> token =
analyzeApp.analyzeSyntaxFile("gs://cloud-samples-tests/natural-language/"
+ "sentiment/bladerunner-neutral.txt");

List<Tag> got = token.stream().map(e -> e.getPartOfSpeech().getTag())
.collect(Collectors.toList());

// Assert
assertThat(got).containsExactly(Tag.PRON, Tag.CONJ, Tag.VERB, Tag.CONJ, Tag.VERB,
Tag.DET, Tag.NOUN, Tag.PUNCT, Tag.NOUN, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.CONJ,
Tag.ADV, Tag.PRON, Tag.VERB, Tag.VERB, Tag.VERB, Tag.ADJ, Tag.PUNCT, Tag.DET,
Tag.NOUN, Tag.VERB, Tag.ADV, Tag.ADJ,Tag.PUNCT).inOrder();
}
}

0 comments on commit 7770807

Please sign in to comment.