Skip to content

Commit

Permalink
[Remove] Analyzer Deprecations (#1741)
Browse files Browse the repository at this point in the history
This commit removes deprecated analyzer instantiation that is no longer
permitted in OpenSearch 2.0.0.

Signed-off-by: Nicholas Walter Knize <[email protected]>
  • Loading branch information
nknize authored Dec 16, 2021
1 parent 5966cc0 commit 5550f8d
Show file tree
Hide file tree
Showing 21 changed files with 22 additions and 834 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand All @@ -63,9 +61,6 @@
* In all cases, all non-CJK input is passed thru unmodified.
*/
public final class CJKBigramFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CJKBigramFilterFactory.class);

private final int flags;
private final boolean outputUnigrams;

Expand Down Expand Up @@ -110,14 +105,7 @@ public TokenStream create(TokenStream tokenStream) {
@Override
public TokenFilterFactory getSynonymFilter() {
if (outputUnigrams) {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,6 @@ public List<ScriptContext<?>> getContexts() {
public Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() {
Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> analyzers = new TreeMap<>();
analyzers.put("fingerprint", FingerprintAnalyzerProvider::new);

// TODO remove in 8.0
analyzers.put("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.put("pattern", PatternAnalyzerProvider::new);
analyzers.put("snowball", SnowballAnalyzerProvider::new);

Expand Down Expand Up @@ -265,7 +262,6 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
requiresAnalysisSettings((i, e, n, s) -> new ScriptedConditionTokenFilterFactory(i, n, s, scriptService.get()))
);
filters.put("decimal_digit", DecimalDigitFilterFactory::new);
filters.put("delimited_payload_filter", LegacyDelimitedPayloadTokenFilterFactory::new);
filters.put("delimited_payload", DelimitedPayloadTokenFilterFactory::new);
filters.put("dictionary_decompounder", requiresAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
filters.put("dutch_stem", DutchStemTokenFilterFactory::new);
Expand Down Expand Up @@ -388,14 +384,6 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
// TODO remove in 8.0
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"standard_html_strip",
CachingStrategy.OPENSEARCH,
() -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)
)
);
analyzers.add(
new PreBuiltAnalyzerProviderFactory(
"pattern",
Expand Down Expand Up @@ -462,16 +450,6 @@ public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactorie
public List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
List<PreConfiguredCharFilter> filters = new ArrayList<>();
filters.add(PreConfiguredCharFilter.singleton("html_strip", false, HTMLStripCharFilter::new));
filters.add(PreConfiguredCharFilter.openSearchVersion("htmlStrip", false, (reader, version) -> {
if (version.onOrAfter(LegacyESVersion.V_6_3_0)) {
deprecationLogger.deprecate(
"htmlStrip_deprecation",
"The [htmpStrip] char filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [html_strip] instead."
);
}
return new HTMLStripCharFilter(reader);
}));
return filters;
}

Expand All @@ -492,18 +470,6 @@ public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() {
);
filters.add(PreConfiguredTokenFilter.singleton("czech_stem", false, CzechStemFilter::new));
filters.add(PreConfiguredTokenFilter.singleton("decimal_digit", true, DecimalDigitFilter::new));
filters.add(PreConfiguredTokenFilter.openSearchVersion("delimited_payload_filter", false, (input, version) -> {
if (version.onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException(
"[delimited_payload_filter] is not supported for new indices, use [delimited_payload] instead"
);
}
return new DelimitedPayloadTokenFilter(
input,
DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER,
DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER
);
}));
filters.add(
PreConfiguredTokenFilter.singleton(
"delimited_payload",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.commongrams.CommonGramsFilter;
import org.apache.lucene.analysis.commongrams.CommonGramsQueryFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand All @@ -46,9 +44,6 @@
import org.opensearch.index.analysis.TokenFilterFactory;

public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(CommonGramsTokenFilterFactory.class);

private final CharArraySet words;

private final boolean ignoreCase;
Expand Down Expand Up @@ -80,15 +75,6 @@ public TokenStream create(TokenStream tokenStream) {

@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
}

return this;
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,13 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.reverse.ReverseStringFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;

public class EdgeNGramTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(EdgeNGramTokenFilterFactory.class);

private final int minGram;

private final int maxGram;
Expand Down Expand Up @@ -102,14 +97,6 @@ public boolean breaksFastVectorHighlighter() {

@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.FingerprintFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand All @@ -46,9 +44,6 @@
import static org.opensearch.analysis.common.FingerprintAnalyzerProvider.MAX_OUTPUT_SIZE;

public class FingerprintTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(FingerprintTokenFilterFactory.class);

private final char separator;
private final int maxOutputSize;

Expand All @@ -67,15 +62,7 @@ public TokenStream create(TokenStream tokenStream) {

@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}

}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@
import org.apache.lucene.analysis.miscellaneous.ConditionalTokenFilter;
import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilter;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.Strings;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
Expand All @@ -55,9 +53,6 @@
import java.util.function.Function;

public class MultiplexerTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(MultiplexerTokenFilterFactory.class);

private List<String> filterNames;
private final boolean preserveOriginal;

Expand All @@ -74,20 +69,7 @@ public TokenStream create(TokenStream tokenStream) {

@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
if (preserveOriginal) {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return IDENTITY_FILTER;
}
throw new IllegalArgumentException(
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
);
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}

@Override
Expand Down Expand Up @@ -142,20 +124,7 @@ public TokenStream create(TokenStream tokenStream) {

@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
if (preserveOriginal) {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return IDENTITY_FILTER;
}
throw new IllegalArgumentException(
"Token filter [" + name() + "] cannot be used to parse synonyms unless [preserve_original] is [true]"
);
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,13 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
import org.opensearch.LegacyESVersion;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractTokenFilterFactory;
import org.opensearch.index.analysis.TokenFilterFactory;

public class NGramTokenFilterFactory extends AbstractTokenFilterFactory {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(NGramTokenFilterFactory.class);

private final int minGram;
private final int maxGram;
private final boolean preserveOriginal;
Expand Down Expand Up @@ -88,14 +84,6 @@ public TokenStream create(TokenStream tokenStream) {

@Override
public TokenFilterFactory getSynonymFilter() {
if (indexSettings.getIndexVersionCreated().onOrAfter(LegacyESVersion.V_7_0_0)) {
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
} else {
DEPRECATION_LOGGER.deprecate(
name() + "_synonym_tokenfilters",
"Token filter [" + name() + "] will not be usable to parse synonyms after v7.0"
);
return this;
}
throw new IllegalArgumentException("Token filter [" + name() + "] cannot be used to parse synonyms");
}
}
Loading

0 comments on commit 5550f8d

Please sign in to comment.