Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Analysis] Support normalizer in request param #24767

Merged
merged 4 commits into from
Jul 4, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
*/
package org.elasticsearch.action.admin.indices.analyze;

import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
import org.elasticsearch.common.Strings;
Expand Down Expand Up @@ -59,6 +60,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {

private String[] attributes = Strings.EMPTY_ARRAY;

private String normalizer;

public static class NameOrDefinition implements Writeable {
// exactly one of these two members is not null
public final String name;
Expand Down Expand Up @@ -202,12 +205,27 @@ public String[] attributes() {
return this.attributes;
}

public String normalizer() {
return this.normalizer;
}

public AnalyzeRequest normalizer(String normalizer) {
this.normalizer = normalizer;
return this;
}

@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = null;
if (text == null || text.length == 0) {
validationException = addValidationError("text is missing", validationException);
}
if ((index == null || index.length() == 0) && normalizer != null) {
validationException = addValidationError("index is required if normalizer is specified", validationException);
}
if (normalizer != null && (tokenizer != null || analyzer != null)) {
validationException = addValidationError("tokenizer/analyze should be null if normalizer is specified", validationException);
}
return validationException;
}

Expand All @@ -222,6 +240,9 @@ public void readFrom(StreamInput in) throws IOException {
field = in.readOptionalString();
explain = in.readBoolean();
attributes = in.readStringArray();
if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
normalizer = in.readOptionalString();
}
}

@Override
Expand All @@ -235,5 +256,8 @@ public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalString(field);
out.writeBoolean(explain);
out.writeStringArray(attributes);
if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
out.writeOptionalString(normalizer);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,13 @@ public AnalyzeRequestBuilder setText(String... texts) {
request.text(texts);
return this;
}

/**
* Instead of setting the analyzer and tokenizer, sets the normalizer as name
*/
public AnalyzeRequestBuilder setNormalizer(String normalizer) {
request.normalizer(normalizer);
return this;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.CustomAnalyzerProvider;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
Expand All @@ -60,6 +61,7 @@
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;

Expand Down Expand Up @@ -178,21 +180,46 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
}
}

} else if (request.tokenizer() != null) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers,
analysisRegistry, environment);

List<CharFilterFactory> charFilterFactoryList = parseCharFilterFactories(request, indexSettings, analysisRegistry, environment);
List<CharFilterFactory> charFilterFactoryList =
parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false);

List<TokenFilterFactory> tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry,
environment, tokenizerFactory, charFilterFactoryList);
environment, tokenizerFactory, charFilterFactoryList, false);

analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(),
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
closeAnalyzer = true;
} else if (request.normalizer() != null) {
// Get normalizer from indexAnalyzers
analyzer = indexAnalyzers.getNormalizer(request.normalizer());
if (analyzer == null) {
throw new IllegalArgumentException("failed to find normalizer under [" + request.normalizer() + "]");
}
} else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0)
|| (request.charFilters() != null && request.charFilters().size() > 0))) {
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
// custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null
// get charfilter and filter from request
List<CharFilterFactory> charFilterFactoryList =
parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true);

final String keywordTokenizerName = "keyword";
TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName);

List<TokenFilterFactory> tokenFilterFactoryList =
parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true);

analyzer = new CustomAnalyzer("keyword_for_normalizer",
keywordTokenizerFactory,
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
closeAnalyzer = true;
} else if (analyzer == null) {
if (indexAnalyzers == null) {
analyzer = analysisRegistry.getAnalyzer("standard");
Expand Down Expand Up @@ -465,7 +492,7 @@ private static Map<String, Object> extractExtendedAttributes(TokenStream stream,
}

private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment) throws IOException {
Environment environment, boolean normalizer) throws IOException {
List<CharFilterFactory> charFilterFactoryList = new ArrayList<>();
if (request.charFilters() != null && request.charFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> charFilters = request.charFilters();
Expand Down Expand Up @@ -506,6 +533,13 @@ private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest r
if (charFilterFactory == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
if (normalizer) {
if (charFilterFactory instanceof MultiTermAwareComponent == false) {
throw new IllegalArgumentException("Custom normalizer may not use char filter ["
+ charFilterFactory.name() + "]");
}
charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent();
}
charFilterFactoryList.add(charFilterFactory);
}
}
Expand All @@ -514,7 +548,7 @@ private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest r

private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
List<CharFilterFactory> charFilterFactoryList) throws IOException {
List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
Expand Down Expand Up @@ -561,6 +595,13 @@ private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest
if (tokenFilterFactory == null) {
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
}
if (normalizer) {
if (tokenFilterFactory instanceof MultiTermAwareComponent == false) {
throw new IllegalArgumentException("Custom normalizer may not use filter ["
+ tokenFilterFactory.name() + "]");
}
tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent();
}
tokenFilterFactoryList.add(tokenFilterFactory);
}
}
Expand Down Expand Up @@ -590,12 +631,8 @@ private static Tuple<String, TokenizerFactory> parseTokenizerFactory(AnalyzeRequ
} else {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
if (indexAnalzyers == null) {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name);
name = tokenizer.name;
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
if (tokenizerFactoryFactory == null) {
Expand All @@ -610,6 +647,17 @@ private static Tuple<String, TokenizerFactory> parseTokenizerFactory(AnalyzeRequ
return new Tuple<>(name, tokenizerFactory);
}

private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
TokenizerFactory tokenizerFactory;
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(environment, name);
return tokenizerFactory;
}

private static IndexSettings getNaIndexSettings(Settings settings) {
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
return new IndexSettings(metaData, Settings.EMPTY);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ public static class Fields {
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
public static final ParseField EXPLAIN = new ParseField("explain");
public static final ParseField ATTRIBUTES = new ParseField("attributes");
public static final ParseField NORMALIZER = new ParseField("normalizer");
}

public RestAnalyzeAction(Settings settings, RestController controller) {
Expand Down Expand Up @@ -147,6 +148,12 @@ static void buildFromContent(XContentParser parser, AnalyzeRequest analyzeReques
attributes.add(parser.text());
}
analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
} else if (Fields.NORMALIZER.match(currentFieldName)) {
if (token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.normalizer(parser.text());
} else {
throw new IllegalArgumentException(currentFieldName + " should be normalizer's name");
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a test for this parsing part to RestAnalyzeActionTests?

} else {
throw new IllegalArgumentException("Unknown parameter ["
+ currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ public void setUp() throws Exception {
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.custom_analyzer.filter", "mock").build();
.put("index.analysis.analyzer.custom_analyzer.filter", "mock")
.put("index.analysis.normalizer.my_normalizer.type", "custom")
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
environment = new Environment(settings);
AnalysisPlugin plugin = new AnalysisPlugin() {
Expand Down Expand Up @@ -304,6 +306,14 @@ public void testUnknown() throws IOException {
} else {
assertEquals(e.getMessage(), "failed to find global char filter under [foobar]");
}

e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.normalizer("foobar")
.text("the qu1ck brown fox"),
AllFieldMapper.NAME, null, indexAnalyzers, registry, environment));
assertEquals(e.getMessage(), "failed to find normalizer under [foobar]");
}

public void testNonPreBuildTokenFilter() throws IOException {
Expand All @@ -317,6 +327,16 @@ public void testNonPreBuildTokenFilter() throws IOException {
int default_bucket_size = 512;
int default_hash_set_size = 1;
assertEquals(default_hash_count * default_bucket_size * default_hash_set_size, tokens.size());
}

public void testNormalizerWithIndex() throws IOException {
AnalyzeRequest request = new AnalyzeRequest("index");
request.normalizer("my_normalizer");
request.text("ABc");
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();

assertEquals(1, tokens.size());
assertEquals("abc", tokens.get(0).getTerm());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to add a test for the second code path added in this PR (the case where normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null)? I don't know if it is possible with this test setup but it might be useful

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I added that test case in rest api test. Now, we are moving to filter/char_filter to analysis-common module, so I think it would be better than in this test class.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

makes sense

}
}
Loading