Introduce node mappings stats (#89807)

So that they are visible in NodeIndicesStats only at the node and index (but not shard) levels. Also visible in the _cat/nodes table. And make an exact count yaml REST test.
elastic · Sep 19, 2022 · 3ed7a04 · 3ed7a04
1 parent 34471b1
commit 3ed7a04
Show file tree

Hide file tree

Showing 26 changed files with 545 additions and 52 deletions.
diff --git a/docs/changelog/89807.yaml b/docs/changelog/89807.yaml
@@ -0,0 +1,5 @@
+pr: 89807
+summary: Introduce node mappings stats
+area: "Stats"
+type: enhancement
+issues: []
diff --git a/docs/reference/cluster/nodes-stats.asciidoc b/docs/reference/cluster/nodes-stats.asciidoc
@@ -99,19 +99,22 @@ using metrics.
     metric is specified. Supported metrics are:
 +
 --
+    * `bulk`
     * `completion`
     * `docs`
     * `fielddata`
     * `flush`
     * `get`
     * `indexing`
+    * `mappings`
     * `merge`
     * `query_cache`
     * `recovery`
     * `refresh`
     * `request_cache`
     * `search`
     * `segments`
+    * `shard_stats`
     * `store`
     * `translog`
     * `warmer`
@@ -960,7 +963,7 @@ Time in milliseconds
 recovery operations were delayed due to throttling.
 =======
 
-`shards_stats`::
+`shard_stats`::
 (object)
 Contains statistics about all shards assigned to the node.
 +
@@ -971,6 +974,29 @@ Contains statistics about all shards assigned to the node.
 (integer)
 The total number of shards assigned to the node.
 =======
+
+`mappings`::
+(object)
+Contains statistics about the mappings for the node.
+This is not shown for the `shards` level, since mappings may be
+shared across the shards of an index on a node.
++
+.Properties of `mappings`
+[%collapsible%open]
+=======
+`total_count`::
+(integer)
+Number of mappings, including <<runtime,runtime>> and <<object,object>> fields.
+
+`total_estimated_overhead`::
+(<<byte-units,byte value>>)
+Estimated heap overhead of mappings on this node, which allows for 1kiB of heap for every mapped field.
+
+`total_estimated_overhead_in_bytes`::
+(integer)
+Estimated heap overhead, in bytes, of mappings on this node, which allows for 1kiB of heap for every mapped field.
+=======
+
 ======
 
 [[cluster-nodes-stats-api-response-body-os]]
@@ -1826,7 +1852,7 @@ The total number of kilobytes written for the device since starting {es}.
 
 `io_time_in_millis` (Linux only)::
 (integer)
-The total time in milliseconds spent performing I/O operations for the device 
+The total time in milliseconds spent performing I/O operations for the device
 since starting {es}.
 ========
 
@@ -1857,7 +1883,7 @@ since starting {es}.
 
 `io_time_in_millis` (Linux only)::
     (integer)
-    The total time in milliseconds spent performing I/O operations across all 
+    The total time in milliseconds spent performing I/O operations across all
     devices used by {es} since starting {es}.
 =======
 ======

diff --git a/docs/reference/how-to/size-your-shards.asciidoc b/docs/reference/how-to/size-your-shards.asciidoc
@@ -226,7 +226,11 @@ GET _cat/shards?v=true
 
 The exact resource usage of each mapped field depends on its type, but a rule
 of thumb is to allow for approximately 1kB of heap overhead per mapped field
-per index held by each data node. You must also allow enough heap for {es}'s
+per index held by each data node. In a running cluster, you can also consult the
+<<cluster-nodes-stats,Nodes stats API>>'s `mappings` indices statistic, which
+reports the number of field mappings and an estimation of their heap overhead.
+
+Additionally, you must also allow enough heap for {es}'s
 baseline usage as well as your workload such as indexing, searches and
 aggregations. 0.5GB of extra heap will suffice for many reasonable workloads,
 and you may need even less if your workload is very light while heavy workloads

diff --git a/...api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml b/...api-spec/src/yamlRestTest/resources/rest-api-spec/test/nodes.stats/11_indices_metrics.yml
@@ -110,6 +110,7 @@
   - is_false:  nodes.$node_id.indices.translog
   - is_false:  nodes.$node_id.indices.recovery
   - is_false:  nodes.$node_id.indices.shard_stats
+  - is_false:  nodes.$node_id.indices.mappings
 
 ---
 "Metric - multi":
@@ -138,7 +139,8 @@
   - is_false:  nodes.$node_id.indices.segments
   - is_false:  nodes.$node_id.indices.translog
   - is_false:  nodes.$node_id.indices.recovery
-
+  - is_false:  nodes.$node_id.indices.shard_stats
+  - is_false:  nodes.$node_id.indices.mappings
 
 ---
 "Metric - recovery":
@@ -168,6 +170,7 @@
   - is_false:  nodes.$node_id.indices.translog
   - is_true:   nodes.$node_id.indices.recovery
   - is_false:  nodes.$node_id.indices.shard_stats
+  - is_false:  nodes.$node_id.indices.mappings
 
 ---
 "Metric - _all include_segment_file_sizes":
@@ -226,6 +229,7 @@
   - is_false:  nodes.$node_id.indices.translog
   - is_false:  nodes.$node_id.indices.recovery
   - is_false:  nodes.$node_id.indices.shard_stats
+  - is_false:  nodes.$node_id.indices.mappings
   - is_true:   nodes.$node_id.indices.segments.file_sizes
 
 ---
@@ -258,6 +262,7 @@
   - is_false:  nodes.$node_id.indices.translog
   - is_false:  nodes.$node_id.indices.recovery
   - is_false:  nodes.$node_id.indices.shard_stats
+  - is_false:  nodes.$node_id.indices.mappings
 
 ---
 "Metric - _all include_unloaded_segments":
@@ -388,3 +393,151 @@
       nodes.stats: { metric: _all }
 
   - gte: { nodes.$node_id.indices.shard_stats.total_count: 1 }
+
+---
+"Metric - blank for indices mappings":
+  - skip:
+      features: [arbitrary_key]
+      version: " - 8.4.99"
+      reason:  "mappings added in version 8.5.0"
+  - do:
+      nodes.info: {}
+  - set:
+      nodes._arbitrary_key_: node_id
+
+  - do:
+      nodes.stats: {}
+
+  - is_true:  nodes.$node_id.indices.mappings
+  - match: { nodes.$node_id.indices.mappings.total_count: 0 }
+  - match: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 0 }
+
+---
+"indices mappings exact count test for indices level":
+
+  - skip:
+      features: [arbitrary_key]
+      version: " - 8.4.99"
+      reason:  "mappings added in version 8.5.0"
+
+  - do:
+      indices.create:
+        index: index1
+        body:
+          settings:
+            number_of_shards: "3"
+            number_of_replicas: "1"
+          mappings:
+            runtime:
+              a_source_field:
+                type: keyword
+            properties:
+              "@timestamp":
+                type: date
+              authors:
+                properties:
+                  age:
+                    type: long
+                  company:
+                    type: text
+                    fields:
+                      keyword:
+                        type: keyword
+                        ignore_above: 256
+                  name:
+                    properties:
+                      first_name:
+                        type: keyword
+                      full_name:
+                        type: text
+                      last_name:
+                        type: keyword
+              link:
+                type: alias
+                path: url
+              title:
+                type: text
+              url:
+                type: keyword
+
+  - do:
+      nodes.info: {}
+  - set:
+      nodes._arbitrary_key_: node_id
+
+  - do:
+      nodes.stats: { metric: _all, level: "indices", human: true }
+
+  # In the below assertions, we expect a field count of 26 because the above mapping expects the following:
+  # Field mappers (incl. alias fields and object mappers' flattened leaves):
+  # 1.  _data_stream_timestamp
+  # 2.  _doc_count
+  # 3.  _feature
+  # 4. _field_names
+  # 5. _id
+  # 6. _ignored
+  # 7. _index
+  # 8. _nested_path
+  # 9. _routing
+  # 10. _seq_no
+  # 11. _source
+  # 12. _tier
+  # 13. _version
+  # 14. @timestamp
+  # 15. authors.age
+  # 16. authors.company
+  # 17. authors.company.keyword
+  # 18. authors.name.last_name
+  # 19. authors.name.first_name
+  # 20. authors.name.full_name
+  # 21. link
+  # 22. title
+  # 23. url
+  # Object mappers:
+  # 24. authors
+  # 25. authors.name
+  # Runtime field mappers:
+  # 26. a_source_field
+
+  - gte: { nodes.$node_id.indices.mappings.total_count: 26 }
+  - is_true: nodes.$node_id.indices.mappings.total_estimated_overhead
+  - gte: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 26624 }
+  - match: { nodes.$node_id.indices.indices.index1.mappings.total_count: 26 }
+  - is_true: nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead
+  - match: { nodes.$node_id.indices.indices.index1.mappings.total_estimated_overhead_in_bytes: 26624 }
+
+---
+"indices mappings does not exist in shards level":
+
+  - skip:
+      features: [arbitrary_key]
+      version: " - 8.4.99"
+      reason:  "mappings added in version 8.5.0"
+
+  - do:
+      indices.create:
+        index: index1
+        body:
+          settings:
+            number_of_shards: "3"
+            number_of_replicas: "1"
+          mappings:
+            properties:
+              prop1:
+                type: "keyword"
+              prop2:
+                type: "keyword"
+
+  - do:
+      nodes.info: {}
+  - set:
+      nodes._arbitrary_key_: node_id
+
+  - do:
+      nodes.stats: { metric: _all, level: "shards", human: true }
+
+  # We assert that there are at least two fields (for the mapping above), and that each accounts for at least 1 byte estimated overhead
+  - gte: { nodes.$node_id.indices.mappings.total_count: 2 }
+  - is_true: nodes.$node_id.indices.mappings.total_estimated_overhead
+  - gte: { nodes.$node_id.indices.mappings.total_estimated_overhead_in_bytes: 2 }
+  - is_false: nodes.$node_id.indices.shards.index1.0.mappings
diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indices/stats/IndexStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indices/stats/IndexStatsIT.java
@@ -844,7 +844,7 @@ public void testAllFlags() throws Exception {
 
         client().admin().indices().prepareRefresh().execute().actionGet();
         IndicesStatsRequestBuilder builder = client().admin().indices().prepareStats();
-        Flag[] values = CommonStatsFlags.Flag.values();
+        Flag[] values = CommonStatsFlags.SHARD_LEVEL.getFlags();
         for (Flag flag : values) {
             set(flag, builder, false);
         }
@@ -953,7 +953,8 @@ public void testFlagOrdinalOrder() {
             Flag.RequestCache,
             Flag.Recovery,
             Flag.Bulk,
-            Flag.Shards };
+            Flag.Shards,
+            Flag.Mappings };
 
         assertThat(flags.length, equalTo(Flag.values().length));
         for (int i = 0; i < flags.length; i++) {
@@ -1160,6 +1161,8 @@ private static boolean isSet(Flag flag, CommonStats response) {
                 return response.getBulk() != null;
             case Shards:
                 return response.getShards() != null;
+            case Mappings:
+                return response.getNodeMappings() != null;
             default:
                 fail("new flag? " + flag);
                 return false;

diff --git a/...c/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java b/...c/main/java/org/elasticsearch/action/admin/cluster/stats/TransportClusterStatsAction.java
@@ -208,7 +208,7 @@ protected ClusterStatsNodeResponse nodeOperation(ClusterStatsNodeRequest nodeReq
                         new ShardStats(
                             indexShard.routingEntry(),
                             indexShard.shardPath(),
-                            new CommonStats(indicesService.getIndicesQueryCache(), indexShard, SHARD_STATS_FLAGS),
+                            CommonStats.getShardLevelStats(indicesService.getIndicesQueryCache(), indexShard, SHARD_STATS_FLAGS),
                             commitStats,
                             seqNoStats,
                             retentionLeaseStats