From bc9ec5a0d4f3e3485a03b74917fb295a551e59b9 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Thu, 19 Dec 2013 23:25:38 +0100 Subject: [PATCH] Add some tokenizer and analyzer tests Related to #9. --- pom.xml | 2 - .../index/analysis/PolishAnalysisTests.java | 66 ++++++++++++++ .../SimplePolishTokenFilterTests.java | 86 +++++++++++++++++++ src/test/resources/log4j.properties | 5 ++ 4 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 src/test/java/org/elasticsearch/index/analysis/PolishAnalysisTests.java create mode 100644 src/test/java/org/elasticsearch/index/analysis/SimplePolishTokenFilterTests.java create mode 100644 src/test/resources/log4j.properties diff --git a/pom.xml b/pom.xml index c6ce82016765c..7fed554c457be 100644 --- a/pom.xml +++ b/pom.xml @@ -114,8 +114,6 @@ junit4 - - true 20 pipe,warn true diff --git a/src/test/java/org/elasticsearch/index/analysis/PolishAnalysisTests.java b/src/test/java/org/elasticsearch/index/analysis/PolishAnalysisTests.java new file mode 100644 index 0000000000000..cc2d2267f688a --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/PolishAnalysisTests.java @@ -0,0 +1,66 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.pl.PolishAnalyzer; +import org.elasticsearch.common.inject.Injector; +import org.elasticsearch.common.inject.ModulesBuilder; +import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.EnvironmentModule; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexNameModule; +import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor; +import org.elasticsearch.index.analysis.pl.PolishStemTokenFilterFactory; +import org.elasticsearch.index.settings.IndexSettingsModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.hamcrest.MatcherAssert; +import org.junit.Test; + +import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; +import static org.hamcrest.Matchers.instanceOf; + +/** + */ +public class PolishAnalysisTests extends ElasticsearchTestCase { + + @Test + public void testDefaultsPolishAnalysis() { + Index index = new Index("test"); + + Injector parentInjector = new ModulesBuilder().add(new SettingsModule(EMPTY_SETTINGS), new EnvironmentModule(new Environment(EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector(); + Injector injector = new ModulesBuilder().add( + new IndexSettingsModule(index, EMPTY_SETTINGS), + new IndexNameModule(index), + new AnalysisModule(EMPTY_SETTINGS, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PolishAnalysisBinderProcessor())) + .createChildInjector(parentInjector); + + AnalysisService analysisService = injector.getInstance(AnalysisService.class); + + TokenFilterFactory tokenizerFactory = analysisService.tokenFilter("polish_stem"); + MatcherAssert.assertThat(tokenizerFactory, instanceOf(PolishStemTokenFilterFactory.class)); + + Analyzer analyzer = analysisService.analyzer("polish").analyzer(); + MatcherAssert.assertThat(analyzer, instanceOf(PolishAnalyzer.class)); + } +} diff --git a/src/test/java/org/elasticsearch/index/analysis/SimplePolishTokenFilterTests.java b/src/test/java/org/elasticsearch/index/analysis/SimplePolishTokenFilterTests.java new file mode 100644 index 0000000000000..ab6d236407000 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/analysis/SimplePolishTokenFilterTests.java @@ -0,0 +1,86 @@ +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.KeywordTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.elasticsearch.common.inject.Injector; +import org.elasticsearch.common.inject.ModulesBuilder; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.settings.SettingsModule; +import org.elasticsearch.env.Environment; +import org.elasticsearch.env.EnvironmentModule; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.IndexNameModule; +import org.elasticsearch.index.analysis.pl.PolishAnalysisBinderProcessor; +import org.elasticsearch.index.settings.IndexSettingsModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisModule; +import org.elasticsearch.indices.analysis.IndicesAnalysisService; +import org.elasticsearch.test.ElasticsearchTestCase; +import org.junit.Test; + +import java.io.IOException; +import java.io.StringReader; + +import static org.hamcrest.Matchers.equalTo; + +public class SimplePolishTokenFilterTests extends ElasticsearchTestCase { + + @Test + public void testBasicUsage() throws Exception { + testToken("kwiaty", "kwć"); + testToken("canona", "ć"); + testToken("wirtualna", "wirtualny"); + testToken("polska", "polski"); + + testAnalyzer("wirtualna polska", "wirtualny", "polski"); + } + + private void testToken(String source, String expected) throws IOException { + Index index = new Index("test"); + Settings settings = ImmutableSettings.settingsBuilder() + .put("index.analysis.filter.myStemmer.type", "polish_stem") + .build(); + AnalysisService analysisService = createAnalysisService(index, settings); + + TokenFilterFactory filterFactory = analysisService.tokenFilter("myStemmer"); + + TokenStream ts = filterFactory.create(new KeywordTokenizer(new StringReader(source))); + + CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + assertThat(ts.incrementToken(), equalTo(true)); + + assertThat(term1.toString(), equalTo(expected)); + } + + private void testAnalyzer(String source, String... expected_terms) throws IOException { + Index index = new Index("test"); + Settings settings = ImmutableSettings.settingsBuilder().build(); + AnalysisService analysisService = createAnalysisService(index, settings); + + Analyzer analyzer = analysisService.analyzer("polish").analyzer(); + + TokenStream ts = analyzer.tokenStream("test", source); + + CharTermAttribute term1 = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + + for (String expected : expected_terms) { + assertThat(ts.incrementToken(), equalTo(true)); + assertThat(term1.toString(), equalTo(expected)); + } + } + + private AnalysisService createAnalysisService(Index index, Settings settings) { + Injector parentInjector = new ModulesBuilder().add(new SettingsModule(settings), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector(); + Injector injector = new ModulesBuilder().add( + new IndexSettingsModule(index, settings), + new IndexNameModule(index), + new AnalysisModule(settings, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new PolishAnalysisBinderProcessor())) + .createChildInjector(parentInjector); + + return injector.getInstance(AnalysisService.class); + } +} diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties new file mode 100644 index 0000000000000..497c97f995974 --- /dev/null +++ b/src/test/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, out + +log4j.appender.out=org.apache.log4j.ConsoleAppender +log4j.appender.out.layout=org.apache.log4j.PatternLayout +log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n