From 38ff49fde9704b4ab0340d076ae747d13658c048 Mon Sep 17 00:00:00 2001
From: Anton Rubin <anton.rubin@eliatra.com>
Date: Mon, 14 Oct 2024 15:35:50 +0100
Subject: [PATCH 1/3] add whitespace analyzer docs

Signed-off-by: Anton Rubin <anton.rubin@eliatra.com>
---
 _analyzers/whitespace.md | 86 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 _analyzers/whitespace.md

diff --git a/_analyzers/whitespace.md b/_analyzers/whitespace.md
new file mode 100644
index 0000000000..3528d31b8d
--- /dev/null
+++ b/_analyzers/whitespace.md
@@ -0,0 +1,86 @@
+---
+layout: default
+title: Whitespace analyzer
+nav_order: 60
+---
+
+# Whitespace analyzer
+
+The `whitespace` analyzer breaks text into tokens based solely on whitespace characters (spaces, tabs, etc.). It does not apply any transformations, such as lowercasing or removing stop words, therefore the case of the original text is retained and will include punctuation as part of the tokens.
+
+## Example configuration
+
+You can use the following command to create index `my_whitespace_index` with `whitespace` analyzer:
+
+```json
+PUT /my_whitespace_index
+{
+  "mappings": {
+    "properties": {
+      "my_field": {
+        "type": "text",
+        "analyzer": "whitespace"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Configuring custom analyzer
+
+You can use the following command to configure index `my_custom_whitespace_index` with custom analyzer equivalent to `whitespace` analyzer but with added `lowercase` character filter:
+
+```json
+PUT /my_custom_whitespace_index
+{
+  "settings": {
+    "analysis": {
+      "analyzer": {
+        "my_custom_whitespace_analyzer": {
+          "type": "custom",
+          "tokenizer": "whitespace",
+          "filter": ["lowercase"]
+        }
+      }
+    }
+  },
+  "mappings": {
+    "properties": {
+      "my_field": {
+        "type": "text",
+        "analyzer": "my_custom_whitespace_analyzer"
+      }
+    }
+  }
+}
+```
+{% include copy-curl.html %}
+
+## Generated tokens
+
+Use the following request to examine the tokens generated using the created analyzer:
+
+```json
+POST /my_custom_whitespace_index/_analyze
+{
+  "analyzer": "my_custom_whitespace_analyzer",
+  "text": "The SLOW turtle swims away! 123"
+}
+```
+{% include copy-curl.html %}
+
+The response contains the generated tokens:
+
+```json
+{
+  "tokens": [
+    {"token": "the","start_offset": 0,"end_offset": 3,"type": "word","position": 0},
+    {"token": "slow","start_offset": 4,"end_offset": 8,"type": "word","position": 1},
+    {"token": "turtle","start_offset": 9,"end_offset": 15,"type": "word","position": 2},
+    {"token": "swims","start_offset": 16,"end_offset": 21,"type": "word","position": 3},
+    {"token": "away!","start_offset": 22,"end_offset": 27,"type": "word","position": 4},
+    {"token": "123","start_offset": 28,"end_offset": 31,"type": "word","position": 5}
+  ]
+}
+```

From 472f738ddfb61494e3a538ac6ea5cf348798e9a9 Mon Sep 17 00:00:00 2001
From: Fanit Kolchina <kolchfa@amazon.com>
Date: Fri, 6 Dec 2024 13:32:59 -0500
Subject: [PATCH 2/3] Doc review

Signed-off-by: Fanit Kolchina <kolchfa@amazon.com>
---
 _analyzers/whitespace.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/_analyzers/whitespace.md b/_analyzers/whitespace.md
index 3528d31b8d..6c9e9c639b 100644
--- a/_analyzers/whitespace.md
+++ b/_analyzers/whitespace.md
@@ -6,11 +6,11 @@ nav_order: 60
 
 # Whitespace analyzer
 
-The `whitespace` analyzer breaks text into tokens based solely on whitespace characters (spaces, tabs, etc.). It does not apply any transformations, such as lowercasing or removing stop words, therefore the case of the original text is retained and will include punctuation as part of the tokens.
+The `whitespace` analyzer breaks text into tokens based only on whitespace characters (spaces, tabs, and others). It does not apply any transformations, such as lowercasing or removing stopwords, therefore the case of the original text is retained and punctuation is included as part of the tokens.
 
-## Example configuration
+## Example
 
-You can use the following command to create index `my_whitespace_index` with `whitespace` analyzer:
+Use the following command to create an index named `my_whitespace_index` with a `whitespace` analyzer:
 
 ```json
 PUT /my_whitespace_index
@@ -27,9 +27,9 @@ PUT /my_whitespace_index
 ```
 {% include copy-curl.html %}
 
-## Configuring custom analyzer
+## Configuring a custom analyzer
 
-You can use the following command to configure index `my_custom_whitespace_index` with custom analyzer equivalent to `whitespace` analyzer but with added `lowercase` character filter:
+Use the following command to configure an index with a custom analyzer that is equivalent to a `whitespace` analyzer with an added `lowercase` character filter:
 
 ```json
 PUT /my_custom_whitespace_index
@@ -59,7 +59,7 @@ PUT /my_custom_whitespace_index
 
 ## Generated tokens
 
-Use the following request to examine the tokens generated using the created analyzer:
+Use the following request to examine the tokens generated using the analyzer:
 
 ```json
 POST /my_custom_whitespace_index/_analyze

From c6a28e1cef84192f7256c300623d6a0d71c3c665 Mon Sep 17 00:00:00 2001
From: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com>
Date: Tue, 10 Dec 2024 10:22:19 -0500
Subject: [PATCH 3/3] Update _analyzers/whitespace.md

Co-authored-by: Nathan Bower <nbower@amazon.com>
Signed-off-by: kolchfa-aws <105444904+kolchfa-aws@users.noreply.github.com>
---
 _analyzers/whitespace.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_analyzers/whitespace.md b/_analyzers/whitespace.md
index 6c9e9c639b..67fee61295 100644
--- a/_analyzers/whitespace.md
+++ b/_analyzers/whitespace.md
@@ -6,7 +6,7 @@ nav_order: 60
 
 # Whitespace analyzer
 
-The `whitespace` analyzer breaks text into tokens based only on whitespace characters (spaces, tabs, and others). It does not apply any transformations, such as lowercasing or removing stopwords, therefore the case of the original text is retained and punctuation is included as part of the tokens.
+The `whitespace` analyzer breaks text into tokens based only on white space characters (for example, spaces and tabs). It does not apply any transformations, such as lowercasing or removing stopwords, so the original case of the text is retained and punctuation is included as part of the tokens.
 
 ## Example