From a0795163a990f8c93d40309e081c8557d9dc32a4 Mon Sep 17 00:00:00 2001 From: James Rodewig Date: Wed, 23 Oct 2019 09:38:22 -0500 Subject: [PATCH] [DOCS] Reformat classic token filter docs (#48314) --- .../tokenfilters/classic-tokenfilter.asciidoc | 148 +++++++++++++++++- 1 file changed, 143 insertions(+), 5 deletions(-) diff --git a/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc index b138dabaf70ec..2b281f541d146 100644 --- a/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/classic-tokenfilter.asciidoc @@ -1,9 +1,147 @@ [[analysis-classic-tokenfilter]] -=== Classic Token Filter +=== Classic token filter +++++ +Classic +++++ -The `classic` token filter does optional post-processing of -terms that are generated by the <>. +Performs optional post-processing of terms generated by the +<>. -This filter removes the english possessive from the end of words, and -it removes dots from acronyms. +This filter removes the english possessive (`'s`) from the end of words and +removes dots from acronyms. It uses Lucene's +https://lucene.apache.org/core/{lucene_version_path}/analyzers-common/org/apache/lucene/analysis/standard/ClassicFilter.html[ClassicFilter]. + +[[analysis-classic-tokenfilter-analyze-ex]] +==== Example + +The following <> request demonstrates how the +classic token filter works. + +[source,console] +-------------------------------------------------- +GET /_analyze +{ + "tokenizer" : "classic", + "filter" : ["classic"], + "text" : "The 2 Q.U.I.C.K. Brown-Foxes jumped over the lazy dog's bone." +} +-------------------------------------------------- + +The filter produces the following tokens: + +[source,text] +-------------------------------------------------- +[ The, 2, QUICK, Brown, Foxes, jumped, over, the, lazy, dog, bone ] +-------------------------------------------------- + +///////////////////// +[source,console-result] +-------------------------------------------------- +{ + "tokens" : [ + { + "token" : "The", + "start_offset" : 0, + "end_offset" : 3, + "type" : "", + "position" : 0 + }, + { + "token" : "2", + "start_offset" : 4, + "end_offset" : 5, + "type" : "", + "position" : 1 + }, + { + "token" : "QUICK", + "start_offset" : 6, + "end_offset" : 16, + "type" : "", + "position" : 2 + }, + { + "token" : "Brown", + "start_offset" : 17, + "end_offset" : 22, + "type" : "", + "position" : 3 + }, + { + "token" : "Foxes", + "start_offset" : 23, + "end_offset" : 28, + "type" : "", + "position" : 4 + }, + { + "token" : "jumped", + "start_offset" : 29, + "end_offset" : 35, + "type" : "", + "position" : 5 + }, + { + "token" : "over", + "start_offset" : 36, + "end_offset" : 40, + "type" : "", + "position" : 6 + }, + { + "token" : "the", + "start_offset" : 41, + "end_offset" : 44, + "type" : "", + "position" : 7 + }, + { + "token" : "lazy", + "start_offset" : 45, + "end_offset" : 49, + "type" : "", + "position" : 8 + }, + { + "token" : "dog", + "start_offset" : 50, + "end_offset" : 55, + "type" : "", + "position" : 9 + }, + { + "token" : "bone", + "start_offset" : 56, + "end_offset" : 60, + "type" : "", + "position" : 10 + } + ] +} +-------------------------------------------------- +///////////////////// + +[[analysis-classic-tokenfilter-analyzer-ex]] +==== Add to an analyzer + +The following <> request uses the +classic token filter to configure a new +<>. + +[source,console] +-------------------------------------------------- +PUT /classic_example +{ + "settings" : { + "analysis" : { + "analyzer" : { + "classic_analyzer" : { + "tokenizer" : "classic", + "filter" : ["classic"] + } + } + } + } +} +--------------------------------------------------