Skip to content

Commit

Permalink
Add analyze API to high-level rest client (#31577)
Browse files Browse the repository at this point in the history
  • Loading branch information
romseygeek committed Jul 3, 2018
1 parent 85bb167 commit 439e67f
Show file tree
Hide file tree
Showing 11 changed files with 720 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheResponse;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
Expand Down Expand Up @@ -1182,4 +1184,32 @@ public void getTemplateAsync(GetIndexTemplatesRequest getIndexTemplatesRequest,
restHighLevelClient.performRequestAsyncAndParseEntity(getIndexTemplatesRequest, RequestConverters::getTemplates,
options, GetIndexTemplatesResponse::fromXContent, listener, emptySet());
}

/**
* Calls the analyze API
*
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-analyze.html">Analyze API on elastic.co</a>
*
* @param request the request
* @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
*/
public AnalyzeResponse analyze(AnalyzeRequest request, RequestOptions options) throws IOException {
return restHighLevelClient.performRequestAndParseEntity(request, RequestConverters::analyze, options,
AnalyzeResponse::fromXContent, emptySet());
}

/**
* Asynchronously calls the analyze API
*
* See <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-analyze.html">Analyze API on elastic.co</a>
*
* @param request the request
* @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized
* @param listener the listener to be notified upon request completion
*/
public void analyzeAsync(AnalyzeRequest request, RequestOptions options,
ActionListener<AnalyzeResponse> listener) {
restHighLevelClient.performRequestAsyncAndParseEntity(request, RequestConverters::analyze, options,
AnalyzeResponse::fromXContent, listener, emptySet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.elasticsearch.action.admin.cluster.snapshots.delete.DeleteSnapshotRequest;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
Expand Down Expand Up @@ -1009,6 +1010,18 @@ static Request getAlias(GetAliasesRequest getAliasesRequest) {
return request;
}

static Request analyze(AnalyzeRequest request) throws IOException {
EndpointBuilder builder = new EndpointBuilder();
String index = request.index();
if (index != null) {
builder.addPathPart(index);
}
builder.addPathPartAsIs("_analyze");
Request req = new Request(HttpGet.METHOD_NAME, builder.build());
req.setEntity(createEntity(request, REQUEST_BODY_CONTENT_TYPE));
return req;
}

static Request getScript(GetStoredScriptRequest getStoredScriptRequest) {
String endpoint = new EndpointBuilder().addPathPartAsIs("_scripts").addPathPart(getStoredScriptRequest.id()).build();
Request request = new Request(HttpGet.METHOD_NAME, endpoint);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheResponse;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
Expand Down Expand Up @@ -1320,4 +1322,20 @@ public void testGetIndexTemplate() throws Exception {
new GetIndexTemplatesRequest().names("the-template-*"), client.indices()::getTemplate, client.indices()::getTemplateAsync));
assertThat(notFound.status(), equalTo(RestStatus.NOT_FOUND));
}

public void testAnalyze() throws Exception {

RestHighLevelClient client = highLevelClient();

AnalyzeRequest noindexRequest = new AnalyzeRequest().text("One two three").analyzer("english");
AnalyzeResponse noindexResponse = execute(noindexRequest, client.indices()::analyze, client.indices()::analyzeAsync);

assertThat(noindexResponse.getTokens(), hasSize(3));

AnalyzeRequest detailsRequest = new AnalyzeRequest().text("One two three").analyzer("english").explain(true);
AnalyzeResponse detailsResponse = execute(detailsRequest, client.indices()::analyze, client.indices()::analyzeAsync);

assertNotNull(detailsResponse.detail());

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest;
Expand Down Expand Up @@ -2219,6 +2220,22 @@ public void testGetTemplateRequest() throws Exception {
assertThat(request.getEntity(), nullValue());
}

public void testAnalyzeRequest() throws Exception {
AnalyzeRequest indexAnalyzeRequest = new AnalyzeRequest()
.text("Here is some text")
.index("test_index")
.analyzer("test_analyzer");

Request request = RequestConverters.analyze(indexAnalyzeRequest);
assertThat(request.getEndpoint(), equalTo("/test_index/_analyze"));
assertToXContentBody(indexAnalyzeRequest, request.getEntity());

AnalyzeRequest analyzeRequest = new AnalyzeRequest()
.text("more text")
.analyzer("test_analyzer");
assertThat(RequestConverters.analyze(analyzeRequest).getEndpoint(), equalTo("/_analyze"));
}

public void testGetScriptRequest() {
GetStoredScriptRequest getStoredScriptRequest = new GetStoredScriptRequest("x-script");
Map<String, String> expectedParams = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions;
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse;
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.DetailAnalyzeResponse;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest;
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheResponse;
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest;
Expand Down Expand Up @@ -2315,4 +2318,127 @@ public void onFailure(Exception e) {

assertTrue(latch.await(30L, TimeUnit.SECONDS));
}

public void testAnalyze() throws IOException, InterruptedException {

RestHighLevelClient client = highLevelClient();

{
// tag::analyze-builtin-request
AnalyzeRequest request = new AnalyzeRequest();
request.text("Some text to analyze", "Some more text to analyze"); // <1>
request.analyzer("english"); // <2>
// end::analyze-builtin-request
}

{
// tag::analyze-custom-request
AnalyzeRequest request = new AnalyzeRequest();
request.text("<b>Some text to analyze</b>");
request.addCharFilter("html_strip"); // <1>
request.tokenizer("standard"); // <2>
request.addTokenFilter("lowercase"); // <3>

Map<String, Object> stopFilter = new HashMap<>();
stopFilter.put("type", "stop");
stopFilter.put("stopwords", new String[]{ "to" }); // <4>
request.addTokenFilter(stopFilter); // <5>
// end::analyze-custom-request
}

{
// tag::analyze-custom-normalizer-request
AnalyzeRequest request = new AnalyzeRequest();
request.text("<b>BaR</b>");
request.addTokenFilter("lowercase");
// end::analyze-custom-normalizer-request

// tag::analyze-request-explain
request.explain(true); // <1>
request.attributes("keyword", "type"); // <2>
// end::analyze-request-explain

// tag::analyze-request-sync
AnalyzeResponse response = client.indices().analyze(request, RequestOptions.DEFAULT);
// end::analyze-request-sync

// tag::analyze-response-tokens
List<AnalyzeResponse.AnalyzeToken> tokens = response.getTokens(); // <1>
// end::analyze-response-tokens
// tag::analyze-response-detail
DetailAnalyzeResponse detail = response.detail(); // <1>
// end::analyze-response-detail

assertNull(tokens);
assertNotNull(detail.tokenizer());
}

CreateIndexRequest req = new CreateIndexRequest("my_index");
CreateIndexResponse resp = client.indices().create(req, RequestOptions.DEFAULT);
assertTrue(resp.isAcknowledged());

PutMappingRequest pmReq = new PutMappingRequest()
.indices("my_index")
.type("_doc")
.source("my_field", "type=text,analyzer=english");
PutMappingResponse pmResp = client.indices().putMapping(pmReq, RequestOptions.DEFAULT);
assertTrue(pmResp.isAcknowledged());

{
// tag::analyze-index-request
AnalyzeRequest request = new AnalyzeRequest();
request.index("my_index"); // <1>
request.analyzer("my_analyzer"); // <2>
request.text("some text to analyze");
// end::analyze-index-request

// tag::analyze-execute-listener
ActionListener<AnalyzeResponse> listener = new ActionListener<AnalyzeResponse>() {
@Override
public void onResponse(AnalyzeResponse analyzeTokens) {

}

@Override
public void onFailure(Exception e) {

}
};
// end::analyze-execute-listener

// use a built-in analyzer in the test
request = new AnalyzeRequest();
request.index("my_index");
request.field("my_field");
request.text("some text to analyze");
// Use a blocking listener in the test
final CountDownLatch latch = new CountDownLatch(1);
listener = new LatchedActionListener<>(listener, latch);

// tag::analyze-request-async
client.indices().analyzeAsync(request, RequestOptions.DEFAULT, listener);
// end::analyze-request-async

assertTrue(latch.await(30L, TimeUnit.SECONDS));
}

{
// tag::analyze-index-normalizer-request
AnalyzeRequest request = new AnalyzeRequest();
request.index("my_index"); // <1>
request.normalizer("my_normalizer"); // <2>
request.text("some text to analyze");
// end::analyze-index-normalizer-request
}

{
// tag::analyze-field-request
AnalyzeRequest request = new AnalyzeRequest();
request.index("my_index");
request.field("my_field");
request.text("some text to analyze");
// end::analyze-field-request
}

}
}
119 changes: 119 additions & 0 deletions docs/java-rest/high-level/indices/analyze.asciidoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
[[java-rest-high-analyze]]
=== Analyze API

[[java-rest-high-analyze-request]]
==== Analyze Request

An `AnalyzeRequest` contains the text to analyze, and one of several options to
specify how the analysis should be performed.

The simplest version uses a built-in analyzer:

["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-builtin-request]
---------------------------------------------------
<1> The text to include. Multiple strings are treated as a multi-valued field
<2> A built-in analyzer

You can configure a custom analyzer:
["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-custom-request]
---------------------------------------------------
<1> Configure char filters
<2> Configure the tokenizer
<3> Add a built-in tokenfilter
<4> Configuration for a custom tokenfilter
<5> Add the custom tokenfilter

You can also build a custom normalizer, by including only charfilters and
tokenfilters:
["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-custom-normalizer-request]
---------------------------------------------------

You can analyze text using an analyzer defined in an existing index:
["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-index-request]
---------------------------------------------------
<1> The index containing the mappings
<2> The analyzer defined on this index to use

Or you can use a normalizer:
["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-index-normalizer-request]
---------------------------------------------------
<1> The index containing the mappings
<2> The normalizer defined on this index to use

You can analyze text using the mappings for a particular field in an index:
["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-field-request]
---------------------------------------------------

==== Optional arguemnts
The following arguments can also optionally be provided:

["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-request-explain]
---------------------------------------------------
<1> Setting `explain` to true will add further details to the response
<2> Setting `attributes` allows you to return only token attributes that you are
interested in

[[java-rest-high-analyze-sync]]
==== Synchronous Execution

["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-request-sync]
---------------------------------------------------

[[java-rest-high-analyze-async]]
==== Asynchronous Execution

The asynchronous execution of an analyze request requires both the `AnalyzeRequest`
instance and an `ActionListener` instance to be passed to the asyncronous method:

["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-request-async]
---------------------------------------------------

The asynchronous method does not block and returns immediately. Once it is
completed the `ActionListener` is called back using the `onResponse` method if the
execution successfully completed or using the `onFailure` method if it failed.

A typical listener for `AnalyzeResponse` looks like:

["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-execute-listener]
---------------------------------------------------

[[java-rest-high-analyze-response]]
==== Analyze Response

The returned `AnalyzeResponse` allows you to retrieve details of the analysis as
follows:
["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-response-tokens]
---------------------------------------------------
<1> `AnalyzeToken` holds information about the individual tokens produced by analysis

If `explain` was set to `true`, then information is instead returned from the `detail()`
method:

["source","java",subs="attributes,callouts,macros"]
---------------------------------------------------
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-response-detail]
---------------------------------------------------
<1> `DetailAnalyzeResponse` holds more detailed information about tokens produced by
the various substeps in the analysis chain.
1 change: 1 addition & 0 deletions docs/java-rest/high-level/supported-apis.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ Alias Management::
* <<java-rest-high-exists-alias>>
* <<java-rest-high-get-alias>>

include::indices/analyze.asciidoc[]
include::indices/create_index.asciidoc[]
include::indices/delete_index.asciidoc[]
include::indices/indices_exists.asciidoc[]
Expand Down
Loading

0 comments on commit 439e67f

Please sign in to comment.