diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java index 08f220e0199d8..b44fa67183c0d 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.action.admin.indices.analyze; +import org.elasticsearch.Version; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.single.shard.SingleShardRequest; import org.elasticsearch.common.Strings; @@ -59,6 +60,8 @@ public class AnalyzeRequest extends SingleShardRequest { private String[] attributes = Strings.EMPTY_ARRAY; + private String normalizer; + public static class NameOrDefinition implements Writeable { // exactly one of these two members is not null public final String name; @@ -202,12 +205,27 @@ public String[] attributes() { return this.attributes; } + public String normalizer() { + return this.normalizer; + } + + public AnalyzeRequest normalizer(String normalizer) { + this.normalizer = normalizer; + return this; + } + @Override public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; if (text == null || text.length == 0) { validationException = addValidationError("text is missing", validationException); } + if ((index == null || index.length() == 0) && normalizer != null) { + validationException = addValidationError("index is required if normalizer is specified", validationException); + } + if (normalizer != null && (tokenizer != null || analyzer != null)) { + validationException = addValidationError("tokenizer/analyze should be null if normalizer is specified", validationException); + } return validationException; } @@ -222,6 +240,9 @@ public void readFrom(StreamInput in) throws IOException { field = in.readOptionalString(); explain = in.readBoolean(); attributes = in.readStringArray(); + if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) { + normalizer = in.readOptionalString(); + } } @Override @@ -235,5 +256,8 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(field); out.writeBoolean(explain); out.writeStringArray(attributes); + if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) { + out.writeOptionalString(normalizer); + } } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java index 5070862ed69b5..3893cb25d9dbb 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java @@ -125,4 +125,13 @@ public AnalyzeRequestBuilder setText(String... texts) { request.text(texts); return this; } + + /** + * Instead of setting the analyzer and tokenizer, sets the normalizer as name + */ + public AnalyzeRequestBuilder setNormalizer(String normalizer) { + request.normalizer(normalizer); + return this; + } + } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index b7da50139bbc8..ffa4a73d87fa1 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.analysis.CustomAnalyzer; import org.elasticsearch.index.analysis.CustomAnalyzerProvider; import org.elasticsearch.index.analysis.IndexAnalyzers; +import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; @@ -60,6 +61,7 @@ import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.indices.analysis.PreBuiltTokenizers; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; @@ -178,21 +180,46 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]"); } } - } else if (request.tokenizer() != null) { final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); Tuple tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment); - List charFilterFactoryList = parseCharFilterFactories(request, indexSettings, analysisRegistry, environment); + List charFilterFactoryList = + parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false); List tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry, - environment, tokenizerFactory, charFilterFactoryList); + environment, tokenizerFactory, charFilterFactoryList, false); analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(), charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]), tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()])); closeAnalyzer = true; + } else if (request.normalizer() != null) { + // Get normalizer from indexAnalyzers + analyzer = indexAnalyzers.getNormalizer(request.normalizer()); + if (analyzer == null) { + throw new IllegalArgumentException("failed to find normalizer under [" + request.normalizer() + "]"); + } + } else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0) + || (request.charFilters() != null && request.charFilters().size() > 0))) { + final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); + // custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null + // get charfilter and filter from request + List charFilterFactoryList = + parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true); + + final String keywordTokenizerName = "keyword"; + TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); + + List tokenFilterFactoryList = + parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true); + + analyzer = new CustomAnalyzer("keyword_for_normalizer", + keywordTokenizerFactory, + charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]), + tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()])); + closeAnalyzer = true; } else if (analyzer == null) { if (indexAnalyzers == null) { analyzer = analysisRegistry.getAnalyzer("standard"); @@ -465,7 +492,7 @@ private static Map extractExtendedAttributes(TokenStream stream, } private static List parseCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, - Environment environment) throws IOException { + Environment environment, boolean normalizer) throws IOException { List charFilterFactoryList = new ArrayList<>(); if (request.charFilters() != null && request.charFilters().size() > 0) { List charFilters = request.charFilters(); @@ -506,6 +533,13 @@ private static List parseCharFilterFactories(AnalyzeRequest r if (charFilterFactory == null) { throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); } + if (normalizer) { + if (charFilterFactory instanceof MultiTermAwareComponent == false) { + throw new IllegalArgumentException("Custom normalizer may not use char filter [" + + charFilterFactory.name() + "]"); + } + charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent(); + } charFilterFactoryList.add(charFilterFactory); } } @@ -514,7 +548,7 @@ private static List parseCharFilterFactories(AnalyzeRequest r private static List parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, Environment environment, Tuple tokenizerFactory, - List charFilterFactoryList) throws IOException { + List charFilterFactoryList, boolean normalizer) throws IOException { List tokenFilterFactoryList = new ArrayList<>(); if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { List tokenFilters = request.tokenFilters(); @@ -561,6 +595,13 @@ private static List parseTokenFilterFactories(AnalyzeRequest if (tokenFilterFactory == null) { throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]"); } + if (normalizer) { + if (tokenFilterFactory instanceof MultiTermAwareComponent == false) { + throw new IllegalArgumentException("Custom normalizer may not use filter [" + + tokenFilterFactory.name() + "]"); + } + tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent(); + } tokenFilterFactoryList.add(tokenFilterFactory); } } @@ -590,12 +631,8 @@ private static Tuple parseTokenizerFactory(AnalyzeRequ } else { AnalysisModule.AnalysisProvider tokenizerFactoryFactory; if (indexAnalzyers == null) { - tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]"); - } + tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name); name = tokenizer.name; - tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name); } else { tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings()); if (tokenizerFactoryFactory == null) { @@ -610,6 +647,17 @@ private static Tuple parseTokenizerFactory(AnalyzeRequ return new Tuple<>(name, tokenizerFactory); } + private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException { + AnalysisModule.AnalysisProvider tokenizerFactoryFactory; + TokenizerFactory tokenizerFactory; + tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); + if (tokenizerFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]"); + } + tokenizerFactory = tokenizerFactoryFactory.get(environment, name); + return tokenizerFactory; + } + private static IndexSettings getNaIndexSettings(Settings settings) { IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); return new IndexSettings(metaData, Settings.EMPTY); diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java index 44ff79c4d9fa0..62c0e97c03506 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeAction.java @@ -46,6 +46,7 @@ public static class Fields { public static final ParseField CHAR_FILTERS = new ParseField("char_filter"); public static final ParseField EXPLAIN = new ParseField("explain"); public static final ParseField ATTRIBUTES = new ParseField("attributes"); + public static final ParseField NORMALIZER = new ParseField("normalizer"); } public RestAnalyzeAction(Settings settings, RestController controller) { @@ -147,6 +148,12 @@ static void buildFromContent(XContentParser parser, AnalyzeRequest analyzeReques attributes.add(parser.text()); } analyzeRequest.attributes(attributes.toArray(new String[attributes.size()])); + } else if (Fields.NORMALIZER.match(currentFieldName)) { + if (token == XContentParser.Token.VALUE_STRING) { + analyzeRequest.normalizer(parser.text()); + } else { + throw new IllegalArgumentException(currentFieldName + " should be normalizer's name"); + } } else { throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] "); diff --git a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 5dfcd102431a5..79e9975484e7e 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -72,7 +72,9 @@ public void setUp() throws Exception { .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) .put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard") - .put("index.analysis.analyzer.custom_analyzer.filter", "mock").build(); + .put("index.analysis.analyzer.custom_analyzer.filter", "mock") + .put("index.analysis.normalizer.my_normalizer.type", "custom") + .putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase").build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); environment = new Environment(settings); AnalysisPlugin plugin = new AnalysisPlugin() { @@ -304,6 +306,14 @@ public void testUnknown() throws IOException { } else { assertEquals(e.getMessage(), "failed to find global char filter under [foobar]"); } + + e = expectThrows(IllegalArgumentException.class, + () -> TransportAnalyzeAction.analyze( + new AnalyzeRequest() + .normalizer("foobar") + .text("the qu1ck brown fox"), + AllFieldMapper.NAME, null, indexAnalyzers, registry, environment)); + assertEquals(e.getMessage(), "failed to find normalizer under [foobar]"); } public void testNonPreBuildTokenFilter() throws IOException { @@ -317,6 +327,16 @@ public void testNonPreBuildTokenFilter() throws IOException { int default_bucket_size = 512; int default_hash_set_size = 1; assertEquals(default_hash_count * default_bucket_size * default_hash_set_size, tokens.size()); + } + + public void testNormalizerWithIndex() throws IOException { + AnalyzeRequest request = new AnalyzeRequest("index"); + request.normalizer("my_normalizer"); + request.text("ABc"); + AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment); + List tokens = analyze.getTokens(); + assertEquals(1, tokens.size()); + assertEquals("abc", tokens.get(0).getTerm()); } } diff --git a/core/src/test/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestTests.java b/core/src/test/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestTests.java new file mode 100644 index 0000000000000..39a136672de55 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestTests.java @@ -0,0 +1,111 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.admin.indices.analyze; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.VersionUtils; + +import java.io.IOException; +import java.util.Base64; + + +public class AnalyzeRequestTests extends ESTestCase { + + public void testValidation() throws Exception { + AnalyzeRequest request = new AnalyzeRequest(); + + ActionRequestValidationException e = request.validate(); + assertNotNull("text validation should fail", e); + assertTrue(e.getMessage().contains("text is missing")); + + request.text(new String[0]); + e = request.validate(); + assertNotNull("text validation should fail", e); + assertTrue(e.getMessage().contains("text is missing")); + + request.text(""); + request.normalizer("some normalizer"); + e = request.validate(); + assertNotNull("normalizer validation should fail", e); + assertTrue(e.getMessage().contains("index is required if normalizer is specified")); + + request.index(""); + e = request.validate(); + assertNotNull("normalizer validation should fail", e); + assertTrue(e.getMessage().contains("index is required if normalizer is specified")); + + request.index("something"); + e = request.validate(); + assertNull("something wrong in validate", e); + + request.tokenizer("tokenizer"); + e = request.validate(); + assertTrue(e.getMessage().contains("tokenizer/analyze should be null if normalizer is specified")); + + AnalyzeRequest requestAnalyzer = new AnalyzeRequest("index"); + requestAnalyzer.normalizer("some normalizer"); + requestAnalyzer.text("something"); + requestAnalyzer.analyzer("analyzer"); + e = requestAnalyzer.validate(); + assertTrue(e.getMessage().contains("tokenizer/analyze should be null if normalizer is specified")); + } + + public void testSerialization() throws IOException { + AnalyzeRequest request = new AnalyzeRequest("foo"); + request.text("a", "b"); + request.tokenizer("tokenizer"); + request.addTokenFilter("tokenfilter"); + request.addCharFilter("charfilter"); + request.normalizer("normalizer"); + + try (BytesStreamOutput output = new BytesStreamOutput()) { + request.writeTo(output); + try (StreamInput in = output.bytes().streamInput()) { + AnalyzeRequest serialized = new AnalyzeRequest(); + serialized.readFrom(in); + assertArrayEquals(request.text(), serialized.text()); + assertEquals(request.tokenizer().name, serialized.tokenizer().name); + assertEquals(request.tokenFilters().get(0).name, serialized.tokenFilters().get(0).name); + assertEquals(request.charFilters().get(0).name, serialized.charFilters().get(0).name); + assertEquals(request.normalizer(), serialized.normalizer()); + } + } + } + + public void testSerializationBwc() throws IOException { + // AnalyzeRequest serializedRequest = new AnalyzeRequest("foo"); + // serializedRequest.text("text"); + // serializedRequest.normalizer("normalizer"); + // Using Version.V_6_0_0_alpha3 + final byte[] data = Base64.getDecoder().decode("AAABA2ZvbwEEdGV4dAAAAAAAAAABCm5vcm1hbGl6ZXI="); + final Version version = VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_5_4_0); + try (StreamInput in = StreamInput.wrap(data)) { + in.setVersion(version); + AnalyzeRequest request = new AnalyzeRequest(); + request.readFrom(in); + assertEquals("foo", request.index()); + assertNull("normalizer support after 6.0.0", request.normalizer()); + } + } +} diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index 6e0c61c1544dd..d53dba67e0dc4 100644 --- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -458,6 +458,7 @@ public void testAnalyzeNormalizedKeywordField() throws IOException { assertThat(token.getEndOffset(), equalTo(3)); assertThat(token.getPosition(), equalTo(0)); assertThat(token.getPositionLength(), equalTo(1)); - } + + } diff --git a/core/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java b/core/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java index 958b9e5222fad..66f0bd796eaef 100644 --- a/core/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java +++ b/core/src/test/java/org/elasticsearch/rest/action/admin/indices/RestAnalyzeActionTests.java @@ -75,6 +75,7 @@ public void testParseXContentForAnalyzeRequestWithCustomFilters() throws Excepti .array("mappings", "ph => f", "qu => q") .endObject() .endArray() + .field("normalizer", "normalizer") .endObject()); AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); @@ -89,6 +90,7 @@ public void testParseXContentForAnalyzeRequestWithCustomFilters() throws Excepti assertThat(analyzeRequest.tokenFilters().get(1).definition, notNullValue()); assertThat(analyzeRequest.charFilters().size(), equalTo(1)); assertThat(analyzeRequest.charFilters().get(0).definition, notNullValue()); + assertThat(analyzeRequest.normalizer(), equalTo("normalizer")); } public void testParseXContentForAnalyzeRequestWithInvalidJsonThrowsException() throws Exception { @@ -122,6 +124,17 @@ public void testParseXContentForAnalyzeRequestWithInvalidStringExplainParamThrow assertThat(e.getMessage(), startsWith("explain must be either 'true' or 'false'")); } + public void testParseXContentForAnalyzeRequestWithInvalidNormalizerThrowsException() throws Exception { + AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); + XContentParser invalidExplain = createParser(XContentFactory.jsonBuilder() + .startObject() + .field("normalizer", true) + .endObject()); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> RestAnalyzeAction.buildFromContent(invalidExplain, analyzeRequest)); + assertThat(e.getMessage(), startsWith("normalizer should be normalizer's name")); + } + public void testDeprecatedParamIn2xException() throws Exception { { XContentParser parser = createParser(XContentFactory.jsonBuilder() diff --git a/docs/build.gradle b/docs/build.gradle index 339b39c8a25d0..e0de246135505 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -457,3 +457,23 @@ buildRestTests.setups['stored_scripted_metric_script'] = ''' body: { "script": { "lang": "painless", "source": "double profit = 0;for (a in params._aggs) { profit += a; } return profit" } } - match: { acknowledged: true } ''' + +// Used by analyze api +buildRestTests.setups['analyze_sample'] = ''' + - do: + indices.create: + index: analyze_sample + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + analysis: + normalizer: + my_normalizer: + type: custom + filter: [lowercase] + mappings: + tweet: + properties: + obj1.field1: + type: text''' diff --git a/docs/reference/indices/analyze.asciidoc b/docs/reference/indices/analyze.asciidoc index de35f84489241..e29a5b2432a54 100644 --- a/docs/reference/indices/analyze.asciidoc +++ b/docs/reference/indices/analyze.asciidoc @@ -75,45 +75,70 @@ It can also run against a specific index: [source,js] -------------------------------------------------- -GET twitter/_analyze +GET analyze_sample/_analyze { "text" : "this is a test" } -------------------------------------------------- // CONSOLE -// TEST[setup:twitter] +// TEST[setup:analyze_sample] The above will run an analysis on the "this is a test" text, using the -default index analyzer associated with the `test` index. An `analyzer` +default index analyzer associated with the `analyze_sample` index. An `analyzer` can also be provided to use a different analyzer: [source,js] -------------------------------------------------- -GET twitter/_analyze +GET analyze_sample/_analyze { "analyzer" : "whitespace", "text" : "this is a test" } -------------------------------------------------- // CONSOLE -// TEST[setup:twitter] +// TEST[setup:analyze_sample] Also, the analyzer can be derived based on a field mapping, for example: [source,js] -------------------------------------------------- -GET twitter/_analyze +GET analyze_sample/_analyze { "field" : "obj1.field1", "text" : "this is a test" } -------------------------------------------------- // CONSOLE -// TEST[setup:twitter] +// TEST[setup:analyze_sample] Will cause the analysis to happen based on the analyzer configured in the mapping for `obj1.field1` (and if not, the default index analyzer). +A `normalizer` can be provided for keyword field with normalizer associated with the `analyze_sample` index. + +[source,js] +-------------------------------------------------- +GET analyze_sample/_analyze +{ + "normalizer" : "my_normalizer", + "text" : "BaR" +} +-------------------------------------------------- +// CONSOLE +// TEST[setup:analyze_sample] + +Or by building a custom transient normalizer out of token filters and char filters. + +[source,js] +-------------------------------------------------- +GET _analyze +{ + "filter" : ["lowercase"], + "text" : "BaR" +} +-------------------------------------------------- +// CONSOLE + === Explain Analyze If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token. diff --git a/docs/reference/migration/migrate_6_0/rest.asciidoc b/docs/reference/migration/migrate_6_0/rest.asciidoc index 5f0c88f13cfee..be87f857965b7 100644 --- a/docs/reference/migration/migrate_6_0/rest.asciidoc +++ b/docs/reference/migration/migrate_6_0/rest.asciidoc @@ -31,6 +31,13 @@ value `false` and boolean "true" as the value `true`. All other values will rais The deprecated request parameters and plain text in request body has been removed. Define parameters in request body. +==== Support custom normalizer in Analyze API + +Analyze API can analyze normalizer and custom normalizer. +In previous versions of Elasticsearch, Analyze API is requiring a `tokenizer` or `analyzer` parameter. +In Elasticsearch 6.0.0, Analyze API can analyze a text as a keyword field with custom normalizer +or if `char_filter`/`filter` is set and `tokenizer`/`analyzer` is not set. + ==== Indices exists API The `ignore_unavailable` and `allow_no_indices` options are no longer accepted diff --git a/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml new file mode 100644 index 0000000000000..0866dc5bc4dfd --- /dev/null +++ b/modules/analysis-common/src/test/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -0,0 +1,19 @@ +--- +"Custom normalizer with illegal filter in request": + # Tests analyze api with normalizer. This is in the analysis-common module + # because there are no filters that support multiTermAware + - skip: + version: " - 5.99.99" + reason: normalizer support in 6.0.0 + - do: + catch: request + indices.analyze: + body: + text: ABc + explain: true + filter: [word_delimiter] + + - match: { status: 400 } + - match: { error.type: illegal_argument_exception } + - match: { error.reason: "Custom normalizer may not use filter [word_delimiter]" } + diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml index 358023c25afc0..3429de6ed455f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yml @@ -111,3 +111,22 @@ - length: { tokens: 2 } - match: { tokens.0.token: sha } - match: { tokens.1.token: hay } + +--- +"Custom normalizer in request": + - skip: + version: " - 5.99.99" + reason: normalizer support in 6.0.0 + - do: + indices.analyze: + body: + text: ABc + explain: true + filter: ["lowercase"] + + - length: { detail.tokenizer.tokens: 1 } + - length: { detail.tokenfilters.0.tokens: 1 } + - match: { detail.tokenizer.name: keyword_for_normalizer } + - match: { detail.tokenizer.tokens.0.token: ABc } + - match: { detail.tokenfilters.0.name: lowercase } + - match: { detail.tokenfilters.0.tokens.0.token: abc }