From 1e68c3149741b2a4bec5df04f19a6b04c9b01c21 Mon Sep 17 00:00:00 2001 From: MikePieperSer <41434171+MikePieperSer@users.noreply.github.com> Date: Mon, 21 Aug 2023 20:57:28 +0200 Subject: [PATCH] Added "smartcn" analyzer. (#605) * Added "smartcn" analyzer. Signed-off-by: pieper * Adapted changelog. Signed-off-by: pieper * Fixed license headers. Signed-off-by: pieper * Fixed release number. Signed-off-by: pieper --------- Signed-off-by: pieper --- CHANGELOG.md | 1 + .../opensearch/_types/analysis/Analyzer.java | 33 +++++- .../_types/analysis/SmartcnAnalyzer.java | 101 ++++++++++++++++++ .../analysis/SmartcnStopTokenFilter.java | 98 +++++++++++++++++ .../_types/analysis/SmartcnTokenizer.java | 97 +++++++++++++++++ .../analysis/TokenFilterDefinition.java | 16 ++- .../_types/analysis/TokenizerDefinition.java | 16 ++- .../opensearch/experiments/ParsingTests.java | 40 +++++++ 8 files changed, 399 insertions(+), 3 deletions(-) create mode 100644 java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnAnalyzer.java create mode 100644 java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnStopTokenFilter.java create mode 100644 java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnTokenizer.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 60063e5852..f1458921ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [Unreleased 2.x] ### Added +- Added support for "smartcn" analyzer ([#605](https://github.com/opensearch-project/opensearch-java/pull/605)) ### Dependencies diff --git a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/Analyzer.java b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/Analyzer.java index 66ba4d4de4..8ef11d1086 100644 --- a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/Analyzer.java +++ b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/Analyzer.java @@ -90,6 +90,8 @@ public enum Kind implements JsonEnum { Whitespace("whitespace"), + Smartcn("smartcn"), + ; private final String jsonValue; @@ -373,7 +375,25 @@ public WhitespaceAnalyzer whitespace() { return TaggedUnionUtils.get(this, Kind.Whitespace); } - @Override + /** + * Is this variant instance of kind {@code cjk}? + */ + public boolean isSmartcn() { + return _kind == Kind.Smartcn; + } + + /** + * Get the {@code cjk} variant value. + * + * @throws IllegalStateException + * if the current variant is not of the {@code cjk} kind. + */ + public SmartcnAnalyzer smartcn() { + return TaggedUnionUtils.get(this, Kind.Smartcn); + } + + + @Override public void serialize(JsonGenerator generator, JsonpMapper mapper) { mapper.serialize(_value, generator); @@ -530,6 +550,16 @@ public ObjectBuilder whitespace( return this.whitespace(fn.apply(new WhitespaceAnalyzer.Builder()).build()); } + public ObjectBuilder smartcn(SmartcnAnalyzer v) { + this._kind = Kind.Smartcn; + this._value = v; + return this; + } + + public ObjectBuilder smartcn() { + return this.smartcn(new SmartcnAnalyzer.Builder().build()); + } + public Analyzer build() { _checkSingleUse(); return new Analyzer(this); @@ -553,6 +583,7 @@ protected static void setupAnalyzerDeserializer(ObjectDeserializer op) op.add(Builder::standard, StandardAnalyzer._DESERIALIZER, "standard"); op.add(Builder::stop, StopAnalyzer._DESERIALIZER, "stop"); op.add(Builder::whitespace, WhitespaceAnalyzer._DESERIALIZER, "whitespace"); + op.add(Builder::smartcn, SmartcnAnalyzer._DESERIALIZER, Kind.Smartcn.jsonValue()); op.setTypeProperty("type", null); diff --git a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnAnalyzer.java b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnAnalyzer.java new file mode 100644 index 0000000000..11cee498cf --- /dev/null +++ b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnAnalyzer.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.client.opensearch._types.analysis; + +import java.util.function.Function; + +import org.opensearch.client.json.JsonpDeserializable; +import org.opensearch.client.json.JsonpDeserializer; +import org.opensearch.client.json.JsonpMapper; +import org.opensearch.client.json.JsonpSerializable; +import org.opensearch.client.json.ObjectBuilderDeserializer; +import org.opensearch.client.json.ObjectDeserializer; +import org.opensearch.client.util.ObjectBuilder; +import org.opensearch.client.util.ObjectBuilderBase; + +import jakarta.json.stream.JsonGenerator; + +// typedef: _types.analysis.LanguageAnalyzer + +@JsonpDeserializable +public class SmartcnAnalyzer implements AnalyzerVariant, JsonpSerializable { + + // --------------------------------------------------------------------------------------------- + + private SmartcnAnalyzer(Builder builder) { + + } + + public static SmartcnAnalyzer of(Function> fn) { + return fn.apply(new Builder()).build(); + } + + /** + * Analyzer variant kind. + */ + @Override + public Analyzer.Kind _analyzerKind() { + return Analyzer.Kind.Smartcn; + } + + /** + * Serialize this object to JSON. + */ + public void serialize(JsonGenerator generator, JsonpMapper mapper) { + generator.writeStartObject(); + serializeInternal(generator, mapper); + generator.writeEnd(); + } + + protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) { + + generator.write("type", Analyzer.Kind.Smartcn.jsonValue()); + + } + + // --------------------------------------------------------------------------------------------- + + /** + * Builder for {@link SmartcnAnalyzer}. + */ + + public static class Builder extends ObjectBuilderBase implements ObjectBuilder { + + /** + * Builds a {@link SmartcnAnalyzer}. + * + * @throws NullPointerException + * if some required fields are null. + */ + public SmartcnAnalyzer build() { + _checkSingleUse(); + + return new SmartcnAnalyzer(this); + } + } + + // --------------------------------------------------------------------------------------------- + + /** + * Json deserializer for {@link SmartcnAnalyzer} + */ + public static final JsonpDeserializer _DESERIALIZER = ObjectBuilderDeserializer.lazy(Builder::new, + SmartcnAnalyzer::setupLanguageAnalyzerDeserializer); + + protected static void setupLanguageAnalyzerDeserializer(ObjectDeserializer op) { + + op.ignore("type"); + } + +} diff --git a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnStopTokenFilter.java b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnStopTokenFilter.java new file mode 100644 index 0000000000..7e47454ec0 --- /dev/null +++ b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnStopTokenFilter.java @@ -0,0 +1,98 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.client.opensearch._types.analysis; + +import java.util.function.Function; + +import org.opensearch.client.json.JsonpDeserializable; +import org.opensearch.client.json.JsonpDeserializer; +import org.opensearch.client.json.JsonpMapper; +import org.opensearch.client.json.ObjectBuilderDeserializer; +import org.opensearch.client.json.ObjectDeserializer; +import org.opensearch.client.util.ObjectBuilder; + +import jakarta.json.stream.JsonGenerator; + +// typedef: _types.analysis.ReverseTokenFilter + +@JsonpDeserializable +public class SmartcnStopTokenFilter extends TokenFilterBase implements TokenFilterDefinitionVariant { + // --------------------------------------------------------------------------------------------- + + private SmartcnStopTokenFilter(Builder builder) { + super(builder); + + } + + public static SmartcnStopTokenFilter of(Function> fn) { + return fn.apply(new Builder()).build(); + } + + /** + * TokenFilterDefinition variant kind. + */ + @Override + public TokenFilterDefinition.Kind _tokenFilterDefinitionKind() { + return TokenFilterDefinition.Kind.SmartcnStop; + } + + protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) { + + generator.write("type", TokenFilterDefinition.Kind.SmartcnStop.jsonValue()); + super.serializeInternal(generator, mapper); + + } + + // --------------------------------------------------------------------------------------------- + + /** + * Builder for {@link SmartcnStopTokenFilter}. + */ + + public static class Builder extends AbstractBuilder + implements + ObjectBuilder { + @Override + protected Builder self() { + return this; + } + + /** + * Builds a {@link SmartcnStopTokenFilter}. + * + * @throws NullPointerException + * if some required fields are null. + */ + public SmartcnStopTokenFilter build() { + _checkSingleUse(); + + return new SmartcnStopTokenFilter(this); + } + } + + // --------------------------------------------------------------------------------------------- + + /** + * Json deserializer for {@link SmartcnStopTokenFilter} + */ + public static final JsonpDeserializer _DESERIALIZER = ObjectBuilderDeserializer + .lazy(Builder::new, SmartcnStopTokenFilter::setupSmartcnStopTokenFilterDeserializer); + + protected static void setupSmartcnStopTokenFilterDeserializer(ObjectDeserializer op) { + setupTokenFilterBaseDeserializer(op); + + op.ignore("type"); + } + +} diff --git a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnTokenizer.java b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnTokenizer.java new file mode 100644 index 0000000000..f8d37e1bc7 --- /dev/null +++ b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/SmartcnTokenizer.java @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/* + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.client.opensearch._types.analysis; + +import java.util.function.Function; + +import org.opensearch.client.json.JsonpDeserializable; +import org.opensearch.client.json.JsonpDeserializer; +import org.opensearch.client.json.JsonpMapper; +import org.opensearch.client.json.ObjectBuilderDeserializer; +import org.opensearch.client.json.ObjectDeserializer; +import org.opensearch.client.util.ObjectBuilder; + +import jakarta.json.stream.JsonGenerator; + +// typedef: _types.analysis.IcuTokenizer + +@JsonpDeserializable +public class SmartcnTokenizer extends TokenizerBase implements TokenizerDefinitionVariant { + // --------------------------------------------------------------------------------------------- + + private SmartcnTokenizer(Builder builder) { + super(builder); + + } + + public static SmartcnTokenizer of(Function> fn) { + return fn.apply(new Builder()).build(); + } + + /** + * TokenizerDefinition variant kind. + */ + @Override + public TokenizerDefinition.Kind _tokenizerDefinitionKind() { + return TokenizerDefinition.Kind.SmartcnTokenizer; + } + + protected void serializeInternal(JsonGenerator generator, JsonpMapper mapper) { + + generator.write("type", TokenizerDefinition.Kind.SmartcnTokenizer.jsonValue()); + super.serializeInternal(generator, mapper); + + } + + // --------------------------------------------------------------------------------------------- + + /** + * Builder for {@link SmartcnTokenizer}. + */ + + public static class Builder extends AbstractBuilder implements ObjectBuilder { + + @Override + protected Builder self() { + return this; + } + + /** + * Builds a {@link SmartcnTokenizer}. + * + * @throws NullPointerException + * if some required fields are null. + */ + public SmartcnTokenizer build() { + _checkSingleUse(); + + return new SmartcnTokenizer(this); + } + } + + // --------------------------------------------------------------------------------------------- + + /** + * Json deserializer for {@link SmartcnTokenizer} + */ + public static final JsonpDeserializer _DESERIALIZER = ObjectBuilderDeserializer.lazy(Builder::new, + SmartcnTokenizer::setupSmartcnTokenizerDeserializer); + + protected static void setupSmartcnTokenizerDeserializer(ObjectDeserializer op) { + TokenizerBase.setupTokenizerBaseDeserializer(op); + + op.ignore("type"); + } + +} diff --git a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenFilterDefinition.java b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenFilterDefinition.java index ee16cb10df..fd2b08558f 100644 --- a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenFilterDefinition.java +++ b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenFilterDefinition.java @@ -161,6 +161,8 @@ public enum Kind implements JsonEnum { WordDelimiter("word_delimiter"), + SmartcnStop("smartcn_stop"), + ; private final String jsonValue; @@ -1575,7 +1577,18 @@ public ObjectBuilder wordDelimiter( return this.wordDelimiter(fn.apply(new WordDelimiterTokenFilter.Builder()).build()); } - public TokenFilterDefinition build() { + public ObjectBuilder smartcn_stop(SmartcnStopTokenFilter v) { + this._kind = Kind.IcuTokenizer; + this._value = v; + return this; + } + + public ObjectBuilder smartcn_stop( + Function> fn) { + return this.smartcn_stop(fn.apply(new SmartcnStopTokenFilter.Builder()).build()); + } + + public TokenFilterDefinition build() { _checkSingleUse(); return new TokenFilterDefinition(this); } @@ -1634,6 +1647,7 @@ protected static void setupTokenFilterDefinitionDeserializer(ObjectDeserializer< op.add(Builder::uppercase, UppercaseTokenFilter._DESERIALIZER, "uppercase"); op.add(Builder::wordDelimiterGraph, WordDelimiterGraphTokenFilter._DESERIALIZER, "word_delimiter_graph"); op.add(Builder::wordDelimiter, WordDelimiterTokenFilter._DESERIALIZER, "word_delimiter"); + op.add(Builder::smartcn_stop, SmartcnStopTokenFilter._DESERIALIZER, Kind.SmartcnStop.jsonValue()); op.setTypeProperty("type", null); diff --git a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenizerDefinition.java b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenizerDefinition.java index 79b541851b..2a8b2f0dde 100644 --- a/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenizerDefinition.java +++ b/java-client/src/main/java/org/opensearch/client/opensearch/_types/analysis/TokenizerDefinition.java @@ -93,6 +93,8 @@ public enum Kind implements JsonEnum { Whitespace("whitespace"), + SmartcnTokenizer("smartcn_tokenizer"), + ; private final String jsonValue; @@ -542,7 +544,18 @@ public ObjectBuilder whitespace( return this.whitespace(fn.apply(new WhitespaceTokenizer.Builder()).build()); } - public TokenizerDefinition build() { + public ObjectBuilder smartcn(SmartcnTokenizer v) { + this._kind = Kind.SmartcnTokenizer; + this._value = v; + return this; + } + + public ObjectBuilder smartcn( + Function> fn) { + return this.smartcn(fn.apply(new SmartcnTokenizer.Builder()).build()); + } + + public TokenizerDefinition build() { _checkSingleUse(); return new TokenizerDefinition(this); } @@ -565,6 +578,7 @@ protected static void setupTokenizerDefinitionDeserializer(ObjectDeserializer b).build(); + + String str = toJson(tokenizerDefinition); + assertEquals("{\"type\":\"smartcn_tokenizer\"}", str); + + TokenizerDefinition tokenizerDefinition2 = fromJson(str, TokenizerDefinition._DESERIALIZER); + } + + @Test + public void testSmartcn_StopFilter() { + final TokenFilterDefinition analyzer = new TokenFilterDefinition.Builder() + .smartcn_stop(b -> b) + .build(); + + String str = toJson(analyzer); + assertEquals("{\"type\":\"smartcn_stop\"}", str); + + TokenFilterDefinition analyzer2 = fromJson(str, TokenFilterDefinition._DESERIALIZER); + } }