diff --git a/docs/reference/analysis/testing.asciidoc b/docs/reference/analysis/testing.asciidoc index a4ae7a555f271..ba3300802ac87 100644 --- a/docs/reference/analysis/testing.asciidoc +++ b/docs/reference/analysis/testing.asciidoc @@ -2,8 +2,7 @@ === Test an analyzer The <> is an invaluable tool for viewing the -terms produced by an analyzer. A built-in analyzer (or combination of built-in -tokenizer, token filters, and character filters) can be specified inline in +terms produced by an analyzer. A built-in analyzer can be specified inline in the request: [source,console] @@ -13,7 +12,54 @@ POST _analyze "analyzer": "whitespace", "text": "The quick brown fox." } +------------------------------------- + +The API returns the following response: + +[source,console-result] +------------------------------------- +{ + "tokens": [ + { + "token": "The", + "start_offset": 0, + "end_offset": 3, + "type": "word", + "position": 0 + }, + { + "token": "quick", + "start_offset": 4, + "end_offset": 9, + "type": "word", + "position": 1 + }, + { + "token": "brown", + "start_offset": 10, + "end_offset": 15, + "type": "word", + "position": 2 + }, + { + "token": "fox.", + "start_offset": 16, + "end_offset": 20, + "type": "word", + "position": 3 + } + ] +} +------------------------------------- + +You can also test combinations of: +* A tokenizer +* Zero or token filters +* Zero or more character filters + +[source,console] +------------------------------------- POST _analyze { "tokenizer": "standard", @@ -22,7 +68,43 @@ POST _analyze } ------------------------------------- +The API returns the following response: +[source,console-result] +------------------------------------- +{ + "tokens": [ + { + "token": "is", + "start_offset": 0, + "end_offset": 2, + "type": "", + "position": 0 + }, + { + "token": "this", + "start_offset": 3, + "end_offset": 7, + "type": "", + "position": 1 + }, + { + "token": "deja", + "start_offset": 8, + "end_offset": 12, + "type": "", + "position": 2 + }, + { + "token": "vu", + "start_offset": 13, + "end_offset": 15, + "type": "", + "position": 3 + } + ] +} +------------------------------------- .Positions and character offsets ********************************************************* @@ -80,6 +162,44 @@ GET my_index/_analyze <3> } ------------------------------------- +The API returns the following response: + +[source,console-result] +------------------------------------- +{ + "tokens": [ + { + "token": "is", + "start_offset": 0, + "end_offset": 2, + "type": "", + "position": 0 + }, + { + "token": "this", + "start_offset": 3, + "end_offset": 7, + "type": "", + "position": 1 + }, + { + "token": "deja", + "start_offset": 8, + "end_offset": 12, + "type": "", + "position": 2 + }, + { + "token": "vu", + "start_offset": 13, + "end_offset": 15, + "type": "", + "position": 3 + } + ] +} +------------------------------------- + <1> Define a `custom` analyzer called `std_folded`. <2> The field `my_text` uses the `std_folded` analyzer. <3> To refer to this analyzer, the `analyze` API must specify the index name.