From 700a316bb32bb943b2e9bd240401c66c80e165f5 Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Fri, 1 Nov 2019 12:37:24 -0400 Subject: [PATCH] [DOCS] Reformat decimal digit token filter docs (#48722) --- .../decimal-digit-tokenfilter.asciidoc | 89 ++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc index 8dede54d0d264..58303185c1351 100644 --- a/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/decimal-digit-tokenfilter.asciidoc @@ -1,4 +1,89 @@ [[analysis-decimal-digit-tokenfilter]] -=== Decimal Digit Token Filter +=== Decimal digit token filter +++++ +Decimal digit +++++ -The `decimal_digit` token filter folds unicode digits to `0-9` +Converts all digits in the Unicode `Decimal_Number` General Category to `0-9`. +For example, the filter changes the Bengali numeral `৩` to `3`. + +This filter uses Lucene's +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysiscore/DecimalDigitFilter.html[DecimalDigitFilter]. + +[[analysis-decimal-digit-tokenfilter-analyze-ex]] +==== Example + +The following <> request uses the `decimal_digit` +filter to convert Devanagari numerals to `0-9`: + +[source,console] +-------------------------------------------------- +GET /_analyze +{ + "tokenizer" : "whitespace", + "filter" : ["decimal_digit"], + "text" : "१-one two-२ ३" +} +-------------------------------------------------- + +The filter produces the following tokens: + +[source,text] +-------------------------------------------------- +[ 1-one, two-2, 3] +-------------------------------------------------- + +///////////////////// +[source,console-result] +-------------------------------------------------- +{ + "tokens" : [ + { + "token" : "1-one", + "start_offset" : 0, + "end_offset" : 5, + "type" : "word", + "position" : 0 + }, + { + "token" : "two-2", + "start_offset" : 6, + "end_offset" : 11, + "type" : "word", + "position" : 1 + }, + { + "token" : "3", + "start_offset" : 12, + "end_offset" : 13, + "type" : "word", + "position" : 2 + } + ] +} +-------------------------------------------------- +///////////////////// + +[[analysis-decimal-digit-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the +`decimal_digit` filter to configure a new +<>. + +[source,console] +-------------------------------------------------- +PUT /decimal_digit_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "whitespace_decimal_digit" : { + "tokenizer" : "whitespace", + "filter" : ["decimal_digit"] + } + } + } + } +} +--------------------------------------------------