-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add analyze API to high-level rest client #31577
Changes from 7 commits
e2f73bb
2563b4b
6332155
3bd541c
722214e
2c4ee45
c8c5008
69e1119
52d2c41
7435228
ceb6601
fc936ee
ba2cec1
d573dbb
57ec84e
070c3cb
08d2766
0e7ce01
20fc870
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,7 @@ | |
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; | ||
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions; | ||
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; | ||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; | ||
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; | ||
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest; | ||
import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; | ||
|
@@ -1950,6 +1951,22 @@ public void testGetTemplateRequest() throws Exception { | |
assertThat(request.getEntity(), nullValue()); | ||
} | ||
|
||
public void testAnalyzeRequest() throws Exception { | ||
AnalyzeRequest indexAnalyzeRequest = new AnalyzeRequest() | ||
.text("Here is some text") | ||
.index("test_index") | ||
.analyzer("test_analyzer"); | ||
|
||
Request request = RequestConverters.analyze(indexAnalyzeRequest); | ||
assertThat(request.getEndpoint(), equalTo("/test_index/_analyze")); | ||
assertThat(request.getEntity(), notNullValue()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think other folks are using |
||
|
||
AnalyzeRequest analyzeRequest = new AnalyzeRequest() | ||
.text("more text") | ||
.analyzer("test_analyzer"); | ||
assertThat(RequestConverters.analyze(analyzeRequest).getEndpoint(), equalTo("/_analyze")); | ||
} | ||
|
||
public void testGetScriptRequest() { | ||
GetStoredScriptRequest getStoredScriptRequest = new GetStoredScriptRequest("x-script"); | ||
Map<String, String> expectedParams = new HashMap<>(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,9 @@ | |
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest.AliasActions; | ||
import org.elasticsearch.action.admin.indices.alias.IndicesAliasesResponse; | ||
import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; | ||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; | ||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; | ||
import org.elasticsearch.action.admin.indices.analyze.DetailAnalyzeResponse; | ||
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheRequest; | ||
import org.elasticsearch.action.admin.indices.cache.clear.ClearIndicesCacheResponse; | ||
import org.elasticsearch.action.admin.indices.close.CloseIndexRequest; | ||
|
@@ -2211,4 +2214,127 @@ public void onFailure(Exception e) { | |
|
||
assertTrue(latch.await(30L, TimeUnit.SECONDS)); | ||
} | ||
|
||
public void testAnalyze() throws IOException, InterruptedException { | ||
|
||
RestHighLevelClient client = highLevelClient(); | ||
|
||
{ | ||
// tag::analyze-builtin-request | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
request.text("Some text to analyze", "Some more text to analyze"); // <1> | ||
request.analyzer("english"); // <2> | ||
// end::analyze-builtin-request | ||
} | ||
|
||
{ | ||
// tag::analyze-custom-request | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
request.text("<b>Some text to analyze</b>"); | ||
request.addCharFilter("html_strip"); // <1> | ||
request.tokenizer("standard"); // <2> | ||
request.addTokenFilter("lowercase"); // <3> | ||
|
||
Map<String, Object> stopFilter = new HashMap<>(); | ||
stopFilter.put("type", "stop"); | ||
stopFilter.put("stopwords", new String[]{ "to" }); // <4> | ||
request.addTokenFilter(stopFilter); // <5> | ||
// end::analyze-custom-request | ||
} | ||
|
||
{ | ||
// tag::analyze-custom-normalizer-request | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
request.text("<b>BaR</b>"); | ||
request.addCharFilter("html_strip"); | ||
request.addTokenFilter("lowercase"); | ||
// end::analyze-custom-normalizer-request | ||
|
||
// tag::analyze-request-explain | ||
request.explain(true); | ||
request.attributes("keyword", "type"); | ||
// end::analyze-request-explain | ||
|
||
// tag::analyze-request-sync | ||
AnalyzeResponse response = client.indices().analyze(request, RequestOptions.DEFAULT); | ||
// end::analyze-request-sync | ||
|
||
// tag::analyze-response | ||
List<AnalyzeResponse.AnalyzeToken> tokens = response.getTokens(); // <1> | ||
DetailAnalyzeResponse detail = response.detail(); // <2> | ||
// end::analyze-response | ||
|
||
assertEquals(tokens.size(), 1); | ||
assertEquals(tokens.get(0).getTerm(), "bar"); | ||
assertNotNull(detail.tokenizer()); | ||
} | ||
|
||
CreateIndexRequest req = new CreateIndexRequest("my_index"); | ||
CreateIndexResponse resp = client.indices().create(req, RequestOptions.DEFAULT); | ||
assertTrue(resp.isAcknowledged()); | ||
|
||
PutMappingRequest pmReq = new PutMappingRequest() | ||
.indices("my_index") | ||
.source("my_field", "type=text,analyzer=english"); | ||
PutMappingResponse pmResp = client.indices().putMapping(pmReq, RequestOptions.DEFAULT); | ||
assertTrue(pmResp.isAcknowledged()); | ||
|
||
{ | ||
// tag::analyze-index-request | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
request.index("my_index"); // <1> | ||
request.analyzer("my_analyzer"); // <2> | ||
request.text("some text to analyze"); | ||
// end::analyze-index-request | ||
|
||
// tag::analyze-execute-listener | ||
ActionListener<AnalyzeResponse> listener = new ActionListener<AnalyzeResponse>() { | ||
@Override | ||
public void onResponse(AnalyzeResponse analyzeTokens) { | ||
|
||
} | ||
|
||
@Override | ||
public void onFailure(Exception e) { | ||
|
||
} | ||
}; | ||
// end::analyze-execute-listener | ||
|
||
// Use a blocking listener in the test | ||
final CountDownLatch latch = new CountDownLatch(1); | ||
final ActionListener<AnalyzeResponse> blockingListener = new LatchedActionListener<>(listener, latch); | ||
listener = ActionListener.wrap(r -> { | ||
assertThat(r.getTokens(), hasSize(4)); | ||
}, e-> { | ||
blockingListener.onFailure(e); | ||
fail("should not fail"); | ||
}); | ||
|
||
// tag::analyze-request-async | ||
client.indices().analyzeAsync(request, RequestOptions.DEFAULT, listener); | ||
// end::analyze-request-async | ||
|
||
assertTrue(latch.await(30L, TimeUnit.SECONDS)); | ||
} | ||
|
||
{ | ||
// tag::analyze-index-normalizer-request | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
request.index("my_index"); // <1> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make them line up? |
||
request.normalizer("my_normalizer"); // <2> | ||
request.text("some text to analyze"); | ||
// end::analyze-index-normalizer-request | ||
} | ||
|
||
{ | ||
// tag::analyze-field-request | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
request.index("my_index"); | ||
request.field("my_field"); | ||
request.text("some text to analyze"); | ||
// end::analyze-field-request | ||
} | ||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
[[java-rest-high-analyze]] | ||
=== Analyze API | ||
|
||
[[java-rest-high-analyze-request]] | ||
==== Analyze Request | ||
|
||
An `AnalyzeRequest` contains the text to analyze, and one of several options to | ||
specify how the analysis should be performed. | ||
|
||
The simplest version uses a built-in analyzer: | ||
|
||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-builtin-request] | ||
--------------------------------------------------- | ||
<1> The text to include. Multiple strings are treated as a multi-valued field | ||
<2> A built-in analyzer | ||
|
||
You can configure a custom analyzer: | ||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-custom-request] | ||
--------------------------------------------------- | ||
<1> Configure char filters | ||
<2> Configure the tokenizer | ||
<3> Add a built-in tokenfilter | ||
<4> Configuration for a custom tokenfilter | ||
<5> Add the custom tokenfilter | ||
|
||
You can also build a custom normalizer, by including only charfilters and | ||
tokenfilters: | ||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-custom-normalizer-request] | ||
--------------------------------------------------- | ||
|
||
You can analyze text using an analyzer defined in an existing index: | ||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-index-request] | ||
--------------------------------------------------- | ||
<1> The index containing the mappings | ||
<2> The analyzer defined on this index to use | ||
|
||
Or you can use a normalizer: | ||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-index-normalizer-request] | ||
--------------------------------------------------- | ||
<1> The index containing the mappings | ||
<2> The normalizer defined on this index to use | ||
|
||
You can analyze text using the mappings for a particular field in an index: | ||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-field-request] | ||
--------------------------------------------------- | ||
|
||
==== Optional arguemnts | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. argh, well spotted. Am I alright to directly commit a fix, or should I open another PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am fine with pushing this fix directly |
||
The following arguments can also optionally be provided: | ||
|
||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-request-explain] | ||
--------------------------------------------------- | ||
<1> Setting `explain` to true will add further details to the response | ||
<2> Setting `attributes` allows you to return only token attributes that you are | ||
interested in | ||
|
||
[[java-rest-high-analyze-sync]] | ||
==== Synchronous Execution | ||
|
||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-request-sync] | ||
--------------------------------------------------- | ||
|
||
[[java-rest-high-analyze-async]] | ||
==== Asynchronous Execution | ||
|
||
The asynchronous execution of an analyze request requires both the `AnalyzeRequest` | ||
instance and an `ActionListener` instance to be passed to the asyncronous method: | ||
|
||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-request-async] | ||
--------------------------------------------------- | ||
|
||
The asynchronous method does not block and returns immediately. Once it is | ||
completed the `ActionListener` is called back using the `onResponse` method if the | ||
execution successfully completed or using the `onFailure` method if it failed. | ||
|
||
A typical listener for `AnalyzeResponse` looks like: | ||
|
||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-execute-listener] | ||
--------------------------------------------------- | ||
|
||
[[java-rest-high-analyze-response]] | ||
==== Analyze Response | ||
|
||
The returned `AnalyzeResponse` allows you to retrieve details of the analysis as | ||
follows: | ||
["source","java",subs="attributes,callouts,macros"] | ||
--------------------------------------------------- | ||
include-tagged::{doc-tests}/IndicesClientDocumentationIT.java[analyze-response] | ||
--------------------------------------------------- | ||
<1> `AnalyzeToken` holds information about the individual tokens produced by analysis | ||
<2> `DetailAnalyzeResponse` holds more detailed information about tokens produced by | ||
the various substeps in the analysis chain. If `explain` was set to `false` in the | ||
request, this method will return `null` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
question: I see in the REST spec that we have also prefer_local and format. I don't see them supported in the corresponding REST action though. Can you double check? Maybe those params should be removed from the SPEC?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
prefer_local
was removed by commit cafc707 and I don't thinkformat
has ever been supported. I'll open a new PR to change the rest-spec, and include the typo fix in that one tooThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sounds good thanks!