Skip to content

Commit

Permalink
Fix failing ICU tests (#35207)
Browse files Browse the repository at this point in the history
Fixes #35173
  • Loading branch information
romseygeek authored Nov 6, 2018
1 parent 833e0f8 commit 9f4b93f
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 41 deletions.
6 changes: 0 additions & 6 deletions plugins/analysis-icu/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,3 @@ dependencyLicenses {
mapping from: /lucene-.*/, to: 'lucene'
}

// Muted: https://github.com/elastic/elasticsearch/issues/35173
integTestRunner {
systemProperty 'tests.rest.blacklist',
'analysis_icu/10_basic/Normalization with a UnicodeSet Filter,' +
'analysis_icu/10_basic/Normalization with a CamcelCase UnicodeSet Filter'
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,10 @@
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.</p>
*/
public class IcuNormalizerTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

private static final DeprecationLogger deprecationLogger =
new DeprecationLogger(LogManager.getLogger(IcuNormalizerTokenFilterFactory.class));

private final Normalizer2 normalizer;

public IcuNormalizerTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
- match: { tokens.1.token: bar }
- match: { tokens.2.token: resume }
---
"Normalization with a UnicodeSet Filter":
"Normalization with unicode_set_filter":
- do:
indices.create:
index: test
Expand All @@ -70,31 +70,42 @@
index: test
body:
char_filter: ["charfilter_icu_normalizer"]
tokenizer: keyword
tokenizer: standard
text: charfilter Föo Bâr Ruß
- length: { tokens: 1 }
- match: { tokens.0.token: charfilter föo bâr ruß }
- length: { tokens: 4 }
- match: { tokens.0.token: charfilter }
- match: { tokens.1.token: föo }
- match: { tokens.2.token: bâr }
- match: { tokens.3.token: ruß }

- do:
indices.analyze:
index: test
body:
tokenizer: keyword
tokenizer: standard
filter: ["tokenfilter_icu_normalizer"]
text: tokenfilter Föo Bâr Ruß
- length: { tokens: 1 }
- match: { tokens.0.token: tokenfilter föo Bâr ruß }
- length: { tokens: 4 }
- match: { tokens.0.token: tokenfilter }
- match: { tokens.1.token: föo }
- match: { tokens.2.token: Bâr }
- match: { tokens.3.token: ruß }

- do:
indices.analyze:
index: test
body:
tokenizer: keyword
tokenizer: standard
filter: ["tokenfilter_icu_folding"]
text: icufolding Föo Bâr Ruß
- length: { tokens: 1 }
- match: { tokens.0.token: icufolding foo bâr russ }
- length: { tokens: 4 }
- match: { tokens.0.token: icufolding }
- match: { tokens.1.token: foo }
- match: { tokens.2.token: bâr }
- match: { tokens.3.token: russ }

---
"Normalization with a CamcelCase UnicodeSet Filter":
"Normalization with deprecated unicodeSetFilter":
- skip:
version: " - 6.99.99"
reason: unicodeSetFilter deprecated in 7.0.0, replaced by unicode_set_filter
Expand All @@ -121,6 +132,8 @@
type: icu_folding
unicodeSetFilter: "[^â]"
- do:
warnings:
- "[unicodeSetFilter] has been deprecated in favor of [unicode_set_filter]"
indices.analyze:
index: test
body:
Expand All @@ -132,27 +145,4 @@
- match: { tokens.1.token: föo }
- match: { tokens.2.token: bâr }
- match: { tokens.3.token: ruß }
- do:
indices.analyze:
index: test
body:
tokenizer: standard
filter: ["tokenfilter_icu_normalizer"]
text: tokenfilter Föo Bâr Ruß
- length: { tokens: 4 }
- match: { tokens.0.token: tokenfilter }
- match: { tokens.1.token: föo }
- match: { tokens.2.token: Bâr }
- match: { tokens.3.token: ruß }
- do:
indices.analyze:
index: test
body:
tokenizer: standard
filter: ["tokenfilter_icu_folding"]
text: icufolding Föo Bâr Ruß
- length: { tokens: 4 }
- match: { tokens.0.token: icufolding }
- match: { tokens.1.token: foo }
- match: { tokens.2.token: bâr }
- match: { tokens.3.token: russ }

0 comments on commit 9f4b93f

Please sign in to comment.