From 3dbded06bcf9cc89cac1cd2107d6b6d0209b9f64 Mon Sep 17 00:00:00 2001 From: Dainius Jocas Date: Wed, 8 Dec 2021 23:11:09 +0200 Subject: [PATCH] Update to lucene 9.0.0 (#134) * chore: update to Lucene 9.0.0 * feat: add tests for Norwegian and Swedish minimal stemmers --- .github/workflows/release.yml | 6 +- .github/workflows/test.yml | 2 +- deps.edn | 29 +- graalvm/lucene-reflect-config.json | 396 +++++++++--------- resources/LMGREP_VERSION | 2 +- ...apache.lucene.analysis.TokenFilterFactory} | 0 src/lmgrep/lucene/analyzer.clj | 3 +- src/lmgrep/lucene/dictionary.clj | 5 +- src/lmgrep/lucene/predefined_analyzers.clj | 6 +- .../tokenfilters/norwegiannormalization.json | 1 + .../tokenfilters/swedishminimalstem.json | 1 + 11 files changed, 233 insertions(+), 218 deletions(-) rename resources/META-INF/services/{org.apache.lucene.analysis.util.TokenFilterFactory => org.apache.lucene.analysis.TokenFilterFactory} (100%) create mode 100644 test/resources/binary/tokenfilters/norwegiannormalization.json create mode 100644 test/resources/binary/tokenfilters/swedishminimalstem.json diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b5764e4..6f7cdc2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -67,7 +67,7 @@ jobs: - name: Install clojure tools-deps uses: DeLaGuardo/setup-clojure@master with: - tools-deps: 1.10.3.1029 + tools-deps: 1.10.3.1040 - name: Compile uberjar run: | @@ -142,8 +142,8 @@ jobs: - name: Install clojure tools-deps uses: DeLaGuardo/setup-clojure@master with: - tools-deps: 1.10.3.1029 - cli: 1.10.3.1029 + tools-deps: 1.10.3.1040 + cli: 1.10.3.1040 - name: Compile uberjar on windows if: ${{ matrix.os == 'windows-latest' }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4767503..7bd3bd9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: - name: Install clojure tools-deps uses: DeLaGuardo/setup-clojure@master with: - tools-deps: 1.10.3.1029 + tools-deps: 1.10.3.1040 - name: Unit Tests run: clojure -A:dev:test diff --git a/deps.edn b/deps.edn index 37e1ab5..16268d2 100644 --- a/deps.edn +++ b/deps.edn @@ -1,21 +1,24 @@ {:paths ["src" "resources"] :deps - {org.clojure/clojure {:mvn/version "1.10.3"} - org.clojure/tools.cli {:mvn/version "1.0.206"} - org.clojure/tools.logging {:mvn/version "1.1.0"} - org.clojure/core.async {:mvn/version "1.4.627"} - org.apache.lucene/lucene-core {:mvn/version "8.11.0"} - org.apache.lucene/lucene-monitor {:mvn/version "8.11.0"} - org.apache.lucene/lucene-analyzers-stempel {:mvn/version "8.11.0"} - metosin/jsonista {:mvn/version "0.3.4"} - lt.jocas/lucene-monitor-helpers {:mvn/version "0.1.7"} - babashka/fs {:mvn/version "0.1.1"} - io.quarkiverse.lucene/quarkus-lucene {:mvn/version "0.3"}} + {org.clojure/clojure {:mvn/version "1.10.3"} + org.clojure/tools.cli {:mvn/version "1.0.206"} + org.clojure/tools.logging {:mvn/version "1.1.0"} + org.clojure/core.async {:mvn/version "1.5.644"} + org.apache.lucene/lucene-core {:mvn/version "9.0.0"} + org.apache.lucene/lucene-monitor {:mvn/version "9.0.0"} + org.apache.lucene/lucene-analysis-stempel {:mvn/version "9.0.0"} + org.apache.lucene/lucene-queries {:mvn/version "9.0.0"} + org.apache.lucene/lucene-queryparser {:mvn/version "9.0.0"} + metosin/jsonista {:mvn/version "0.3.5"} + lt.jocas/lucene-monitor-helpers {:mvn/version "0.2.0"} + babashka/fs {:mvn/version "0.1.2"} + io.quarkiverse.lucene/quarkus-lucene {:mvn/version "0.3" + :exclusions [org.apache.lucene/lucene-analyzers-common]}} :aliases {:dev {:extra-paths ["dev" "classes" "test" "test/resources"] - :extra-deps {org.clojure/tools.deps.alpha {:mvn/version "0.12.1071" + :extra-deps {org.clojure/tools.deps.alpha {:mvn/version "0.12.1084" :exclusions [org.slf4j/slf4j-log4j12 org.slf4j/slf4j-api org.slf4j/slf4j-nop]} @@ -27,7 +30,7 @@ :main-opts ["-m" "cognitect.test-runner"]} :clj-kondo {:main-opts ["-m" "clj-kondo.main" "--lint" "src" "test"] - :extra-deps {clj-kondo/clj-kondo {:mvn/version "2021.10.19"}} + :extra-deps {clj-kondo/clj-kondo {:mvn/version "2021.12.01"}} :jvm-opts ["-Dclojure.main.report=stderr"]} :uberjar {:replace-deps {com.github.seancorfield/depstar {:mvn/version "2.1.303"}} diff --git a/graalvm/lucene-reflect-config.json b/graalvm/lucene-reflect-config.json index 9fb5d7b..479da98 100644 --- a/graalvm/lucene-reflect-config.json +++ b/graalvm/lucene-reflect-config.json @@ -18,6 +18,28 @@ ], "allDeclaredConstructors" : true }, + { + "name" : "org.apache.lucene.analysis.CharFilterFactory", + "queryAllPublicMethods" : true, + "methods" : [ + { + "name" : "availableCharFilters", + "parameterTypes" : [ ] + } + ], + "allDeclaredConstructors" : true + }, + { + "name" : "org.apache.lucene.analysis.TokenFilterFactory", + "queryAllPublicMethods" : true, + "methods" : [ + { + "name" : "availableTokenFilters", + "parameterTypes" : [ ] + } + ], + "allDeclaredConstructors" : true + }, { "name" : "org.apache.lucene.analysis.TokenStream", "queryAllPublicMethods" : true, @@ -29,6 +51,17 @@ "queryAllPublicConstructors" : true, "allDeclaredConstructors" : true }, + { + "name" : "org.apache.lucene.analysis.TokenizerFactory", + "queryAllPublicMethods" : true, + "methods" : [ + { + "name" : "availableTokenizers", + "parameterTypes" : [ ] + } + ], + "allDeclaredConstructors" : true + }, { "name" : "org.apache.lucene.analysis.ar.ArabicAnalyzer", "queryAllPublicConstructors" : true, @@ -205,6 +238,11 @@ }, { "name" : "org.apache.lucene.analysis.ca.CatalanSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -356,6 +394,51 @@ ], "allDeclaredConstructors" : true }, + { + "name" : "org.apache.lucene.analysis.classic.ClassicAnalyzer", + "queryAllPublicConstructors" : true, + "methods" : [ + { + "name" : "", + "parameterTypes" : [ ] + } + ], + "allDeclaredConstructors" : true + }, + { + "name" : "org.apache.lucene.analysis.classic.ClassicFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], + "methods" : [ + { + "name" : "", + "parameterTypes" : [ + "java.util.Map" + ] + } + ], + "allDeclaredConstructors" : true + }, + { + "name" : "org.apache.lucene.analysis.classic.ClassicTokenizerFactory", + "fields" : [ + { + "name" : "NAME" + } + ], + "methods" : [ + { + "name" : "", + "parameterTypes" : [ + "java.util.Map" + ] + } + ], + "allDeclaredConstructors" : true + }, { "name" : "org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory", "fields" : [ @@ -685,6 +768,11 @@ }, { "name" : "org.apache.lucene.analysis.da.DanishSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -819,6 +907,34 @@ ], "allDeclaredConstructors" : true }, + { + "name" : "org.apache.lucene.analysis.email.UAX29URLEmailAnalyzer", + "queryAllPublicConstructors" : true, + "methods" : [ + { + "name" : "", + "parameterTypes" : [ ] + } + ], + "allDeclaredConstructors" : true + }, + { + "name" : "org.apache.lucene.analysis.email.UAX29URLEmailTokenizerFactory", + "fields" : [ + { + "name" : "NAME" + } + ], + "methods" : [ + { + "name" : "", + "parameterTypes" : [ + "java.util.Map" + ] + } + ], + "allDeclaredConstructors" : true + }, { "name" : "org.apache.lucene.analysis.en.EnglishAnalyzer", "queryAllPublicConstructors" : true, @@ -883,6 +999,11 @@ }, { "name" : "org.apache.lucene.analysis.en.LovinsSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -968,6 +1089,11 @@ }, { "name" : "org.apache.lucene.analysis.et.EstonianSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -991,6 +1117,19 @@ }, { "name" : "org.apache.lucene.analysis.eu.BasqueSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], + "methods" : [ + { + "name" : "", + "parameterTypes" : [ + "java.util.Map" + ] + } + ], "allDeclaredConstructors" : true }, { @@ -1066,18 +1205,6 @@ ], "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.analysis.fi.RaudikkoTokenFilterFactory", - "methods" : [ - { - "name" : "", - "parameterTypes" : [ - "java.util.Map" - ] - } - ], - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.analysis.fr.FrenchAnalyzer", "queryAllPublicConstructors" : true, @@ -1153,6 +1280,11 @@ }, { "name" : "org.apache.lucene.analysis.ga.IrishSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -1311,6 +1443,11 @@ }, { "name" : "org.apache.lucene.analysis.hy.ArmenianSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -1407,6 +1544,11 @@ }, { "name" : "org.apache.lucene.analysis.lt.LithuanianSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -2017,6 +2159,11 @@ }, { "name" : "org.apache.lucene.analysis.nl.DutchSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -2029,6 +2176,11 @@ }, { "name" : "org.apache.lucene.analysis.nl.KPSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -2084,6 +2236,23 @@ ], "allDeclaredConstructors" : true }, + { + "name" : "org.apache.lucene.analysis.no.NorwegianNormalizationFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], + "methods" : [ + { + "name" : "", + "parameterTypes" : [ + "java.util.Map" + ] + } + ], + "allDeclaredConstructors" : true + }, { "name" : "org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory", "fields" : [ @@ -2391,6 +2560,11 @@ }, { "name" : "org.apache.lucene.analysis.ro.RomanianSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -2498,7 +2672,7 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.standard.ClassicAnalyzer", + "name" : "org.apache.lucene.analysis.standard.StandardAnalyzer", "queryAllPublicConstructors" : true, "methods" : [ { @@ -2509,7 +2683,7 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.standard.ClassicFilterFactory", + "name" : "org.apache.lucene.analysis.standard.StandardTokenizerFactory", "fields" : [ { "name" : "NAME" @@ -2526,7 +2700,7 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.standard.ClassicTokenizerFactory", + "name" : "org.apache.lucene.analysis.stempel.StempelPolishStemFilterFactory", "fields" : [ { "name" : "NAME" @@ -2543,7 +2717,7 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.standard.StandardAnalyzer", + "name" : "org.apache.lucene.analysis.sv.SwedishAnalyzer", "queryAllPublicConstructors" : true, "methods" : [ { @@ -2554,7 +2728,7 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.standard.StandardTokenizerFactory", + "name" : "org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory", "fields" : [ { "name" : "NAME" @@ -2571,18 +2745,7 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer", - "queryAllPublicConstructors" : true, - "methods" : [ - { - "name" : "", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory", + "name" : "org.apache.lucene.analysis.sv.SwedishMinimalStemFilterFactory", "fields" : [ { "name" : "NAME" @@ -2599,35 +2762,21 @@ "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.stempel.StempelPolishStemFilterFactory", - "fields" : [ - { - "name" : "NAME" - } - ], + "name" : "org.apache.lucene.analysis.synonym.SolrSynonymParser", "methods" : [ { "name" : "", "parameterTypes" : [ - "java.util.Map" + "boolean", + "boolean", + "org.apache.lucene.analysis.Analyzer" ] } ], "allDeclaredConstructors" : true }, { - "name" : "org.apache.lucene.analysis.sv.SwedishAnalyzer", - "queryAllPublicConstructors" : true, - "methods" : [ - { - "name" : "", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory", + "name" : "org.apache.lucene.analysis.synonym.SynonymFilterFactory", "fields" : [ { "name" : "NAME" @@ -2643,29 +2792,6 @@ ], "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.analysis.synonym.SolrSynonymParser", - "methods" : [ - { - "name" : "", - "parameterTypes" : [ - "boolean", - "boolean", - "org.apache.lucene.analysis.Analyzer" - ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.analysis.synonym.SynonymFilterFactory", - "fields" : [ - { - "name" : "NAME" - } - ], - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.analysis.synonym.SynonymGraphFilterFactory", "fields" : [ @@ -2857,6 +2983,11 @@ }, { "name" : "org.apache.lucene.analysis.tr.TurkishSnowballStemTokenFilterFactory", + "fields" : [ + { + "name" : "NAME" + } + ], "methods" : [ { "name" : "", @@ -2867,17 +2998,6 @@ ], "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.analysis.util.CharFilterFactory", - "queryAllPublicMethods" : true, - "methods" : [ - { - "name" : "availableCharFilters", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.analysis.util.ElisionFilterFactory", "fields" : [ @@ -2895,28 +3015,6 @@ ], "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.analysis.util.TokenFilterFactory", - "queryAllPublicMethods" : true, - "methods" : [ - { - "name" : "availableTokenFilters", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.analysis.util.TokenizerFactory", - "queryAllPublicMethods" : true, - "methods" : [ - { - "name" : "availableTokenizers", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.analysis.wikipedia.WikipediaTokenizerFactory", "fields" : [ @@ -2934,81 +3032,6 @@ ], "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.codecs.DocValuesProducer", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.FieldsProducer", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.PostingsReaderBase", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.blocktree.FieldReader", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.compressing.FieldsIndex", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.compressing.FieldsIndexReader", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat", - "methods" : [ - { - "name" : "", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.lucene80.Lucene80DocValuesProducer", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.lucene84.Lucene84PostingsFormat", - "methods" : [ - { - "name" : "", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.lucene84.Lucene84PostingsReader", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.lucene87.Lucene87Codec", - "methods" : [ - { - "name" : "", - "parameterTypes" : [ ] - } - ], - "allDeclaredConstructors" : true - }, - { - "name" : "org.apache.lucene.codecs.perfield.PerFieldPostingsFormat$FieldsReader", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.collation.CollationKeyAnalyzer", "queryAllPublicConstructors" : true, @@ -3049,11 +3072,6 @@ "allDeclaredFields" : true, "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.index.Fields", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.index.IndexOptions", "allPublicFields" : true, @@ -3064,11 +3082,6 @@ "allDeclaredFields" : true, "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.index.Terms", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.monitor.HighlightsMatch", "queryAllPublicMethods" : true, @@ -3389,11 +3402,6 @@ "allDeclaredFields" : true, "allDeclaredConstructors" : true }, - { - "name" : "org.apache.lucene.util.fst.FST$Arc", - "allDeclaredFields" : true, - "allDeclaredConstructors" : true - }, { "name" : "org.apache.lucene.util.fst.OffHeapFSTStore", "allDeclaredFields" : true, diff --git a/resources/LMGREP_VERSION b/resources/LMGREP_VERSION index 5a2415e..cb3bc06 100644 --- a/resources/LMGREP_VERSION +++ b/resources/LMGREP_VERSION @@ -1 +1 @@ -v2021.11.08-SNAPSHOT +v2021.11.09-SNAPSHOT diff --git a/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory similarity index 100% rename from resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory rename to resources/META-INF/services/org.apache.lucene.analysis.TokenFilterFactory diff --git a/src/lmgrep/lucene/analyzer.clj b/src/lmgrep/lucene/analyzer.clj index bd9d85e..c5d98f5 100644 --- a/src/lmgrep/lucene/analyzer.clj +++ b/src/lmgrep/lucene/analyzer.clj @@ -6,8 +6,7 @@ (java.io File) (java.nio.file Path) (org.apache.lucene.analysis.custom CustomAnalyzer CustomAnalyzer$Builder) - (org.apache.lucene.analysis.util TokenizerFactory TokenFilterFactory CharFilterFactory) - (org.apache.lucene.analysis Analyzer))) + (org.apache.lucene.analysis Analyzer TokenizerFactory CharFilterFactory TokenFilterFactory))) ; https://lucene.apache.org/core/8_8_0/analyzers-common/constant-values.html#org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter.GENERATE_WORD_PARTS diff --git a/src/lmgrep/lucene/dictionary.clj b/src/lmgrep/lucene/dictionary.clj index d55676e..f3bb0b5 100644 --- a/src/lmgrep/lucene/dictionary.clj +++ b/src/lmgrep/lucene/dictionary.clj @@ -8,7 +8,8 @@ (:import (org.apache.lucene.queryparser.classic ParseException) (org.apache.lucene.monitor MonitorQuery) (org.apache.lucene.search Query) - (org.apache.lucene.analysis Analyzer))) + (org.apache.lucene.analysis Analyzer) + (clojure.lang PersistentArrayMap))) (defn prepare-metadata "Metadata must be a map String->String" @@ -84,7 +85,7 @@ (r/map (fn [questionnaire-entry] (if (get questionnaire-entry :id) questionnaire-entry - (assoc questionnaire-entry :id (str (Math/abs ^int (.hashCode questionnaire-entry))))))) + (assoc questionnaire-entry :id (str (Math/abs ^int (.hashCode ^PersistentArrayMap questionnaire-entry))))))) (r/map (fn [questionnaire-entry] (prepare-query-entry (handle-query-parser-settings questionnaire-entry options) default-type global-analysis-conf custom-analyzers))) diff --git a/src/lmgrep/lucene/predefined_analyzers.clj b/src/lmgrep/lucene/predefined_analyzers.clj index 8c690ed..e0d7362 100644 --- a/src/lmgrep/lucene/predefined_analyzers.clj +++ b/src/lmgrep/lucene/predefined_analyzers.clj @@ -34,13 +34,15 @@ (org.apache.lucene.analysis.pt PortugueseAnalyzer) (org.apache.lucene.analysis.ro RomanianAnalyzer) (org.apache.lucene.analysis.ru RussianAnalyzer) - (org.apache.lucene.analysis.standard ClassicAnalyzer UAX29URLEmailAnalyzer StandardAnalyzer) + (org.apache.lucene.analysis.standard StandardAnalyzer) (org.apache.lucene.analysis.sv SwedishAnalyzer) (org.apache.lucene.analysis.th ThaiAnalyzer) (org.apache.lucene.analysis.tr TurkishAnalyzer) (org.apache.lucene.analysis.en EnglishAnalyzer) (org.apache.lucene.analysis.pl PolishAnalyzer) - (org.apache.lucene.collation CollationKeyAnalyzer))) + (org.apache.lucene.collation CollationKeyAnalyzer) + (org.apache.lucene.analysis.classic ClassicAnalyzer) + (org.apache.lucene.analysis.email UAX29URLEmailAnalyzer))) (def analyzers {"ArabicAnalyzer" (ArabicAnalyzer.) diff --git a/test/resources/binary/tokenfilters/norwegiannormalization.json b/test/resources/binary/tokenfilters/norwegiannormalization.json new file mode 100644 index 0000000..15bcaa6 --- /dev/null +++ b/test/resources/binary/tokenfilters/norwegiannormalization.json @@ -0,0 +1 @@ +{"token-filters":[{"name":"norwegiannormalization"}]} \ No newline at end of file diff --git a/test/resources/binary/tokenfilters/swedishminimalstem.json b/test/resources/binary/tokenfilters/swedishminimalstem.json new file mode 100644 index 0000000..7c79797 --- /dev/null +++ b/test/resources/binary/tokenfilters/swedishminimalstem.json @@ -0,0 +1 @@ +{"token-filters":[{"name":"swedishminimalstem"}]} \ No newline at end of file