Skip to content

Commit

Permalink
JMX Scraper - YAML config and integration test for HBase (#1538)
Browse files Browse the repository at this point in the history
  • Loading branch information
robsunday authored Nov 25, 2024
1 parent 436b166 commit 1d7f482
Show file tree
Hide file tree
Showing 5 changed files with 839 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,22 +43,22 @@ void endToEnd() {
metric,
"hbase.master.region_server.count",
"The number of region servers.",
"{servers}",
"{server}",
attrs -> attrs.contains(entry("state", "dead")),
attrs -> attrs.contains(entry("state", "live"))),
metric ->
assertSum(
metric,
"hbase.master.regions_in_transition.count",
"The number of regions that are in transition.",
"{regions}",
"{region}",
/* isMonotonic= */ false),
metric ->
assertSum(
metric,
"hbase.master.regions_in_transition.over_threshold",
"The number of regions that have been in transition longer than a threshold time.",
"{regions}",
"{region}",
/* isMonotonic= */ false),
metric ->
assertGauge(
Expand All @@ -71,14 +71,14 @@ void endToEnd() {
metric,
"hbase.region_server.region.count",
"The number of regions hosted by the region server.",
"{regions}",
"{region}",
attrs -> attrs.containsKey("region_server")),
metric ->
assertSumWithAttributes(
metric,
"hbase.region_server.disk.store_file.count",
"The number of store files on disk currently managed by the region server.",
"{files}",
"{file}",
attrs -> attrs.containsKey("region_server")),
metric ->
assertSumWithAttributes(
Expand All @@ -92,22 +92,22 @@ void endToEnd() {
metric,
"hbase.region_server.write_ahead_log.count",
"The number of write ahead logs not yet archived.",
"{logs}",
"{log}",
attrs -> attrs.containsKey("region_server")),
metric ->
assertSumWithAttributes(
metric,
"hbase.region_server.request.count",
"The number of requests received.",
"{requests}",
"{request}",
attrs -> attrs.contains(entry("state", "write")),
attrs -> attrs.contains(entry("state", "read"))),
metric ->
assertSumWithAttributes(
metric,
"hbase.region_server.queue.length",
"The number of RPC handlers actively servicing requests.",
"{handlers}",
"{handler}",
attrs -> attrs.contains(entry("state", "flush")),
attrs -> attrs.contains(entry("state", "compaction"))),
metric ->
Expand All @@ -122,7 +122,7 @@ void endToEnd() {
metric,
"hbase.region_server.request.count",
"The number of requests received.",
"{requests}",
"{request}",
attrs -> attrs.contains(entry("state", "write")),
attrs -> attrs.contains(entry("state", "read"))),
metric ->
Expand Down Expand Up @@ -347,7 +347,7 @@ void endToEnd() {
metric,
"hbase.region_server.operations.slow",
"Number of operations that took over 1000ms to complete.",
"{operations}",
"{operation}",
attrs -> attrs.contains(entry("operation", "delete")),
attrs -> attrs.contains(entry("operation", "append")),
attrs -> attrs.contains(entry("operation", "get")),
Expand All @@ -358,21 +358,21 @@ void endToEnd() {
metric,
"hbase.region_server.open_connection.count",
"The number of open connections at the RPC layer.",
"{connections}",
"{connection}",
attrs -> attrs.containsKey("region_server")),
metric ->
assertSumWithAttributes(
metric,
"hbase.region_server.active_handler.count",
"The number of RPC handlers actively servicing requests.",
"{handlers}",
"{handler}",
attrs -> attrs.containsKey("region_server")),
metric ->
assertSumWithAttributes(
metric,
"hbase.region_server.queue.request.count",
"The number of currently enqueued requests.",
"{requests}",
"{request}",
attrs -> attrs.contains(entry("state", "replication")),
attrs -> attrs.contains(entry("state", "user")),
attrs -> attrs.contains(entry("state", "priority"))),
Expand All @@ -381,7 +381,7 @@ void endToEnd() {
metric,
"hbase.region_server.authentication.count",
"Number of client connection authentication failures/successes.",
"{authentication requests}",
"{authentication request}",
attrs -> attrs.contains(entry("state", "successes")),
attrs -> attrs.contains(entry("state", "failures"))),
metric ->
Expand Down
30 changes: 15 additions & 15 deletions jmx-metrics/src/main/resources/target-systems/hbase.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,45 +16,45 @@

def beanMasterServer = otel.mbeans("Hadoop:service=HBase,name=Master,sub=Server")
otel.instrument(beanMasterServer, "hbase.master.region_server.count",
"The number of region servers.", "{servers}",
"The number of region servers.", "{server}",
["numDeadRegionServers":["state" : {"dead"}], "numRegionServers": ["state" : {"live"}]],
otel.&longUpDownCounterCallback)

def beanMasterAssignmentManager = otel.mbean("Hadoop:service=HBase,name=Master,sub=AssignmentManager")
otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition.count",
"The number of regions that are in transition.", "{regions}",
"The number of regions that are in transition.", "{region}",
"ritCount", otel.&longUpDownCounterCallback)
otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition.over_threshold",
"The number of regions that have been in transition longer than a threshold time.", "{regions}",
"The number of regions that have been in transition longer than a threshold time.", "{region}",
"ritCountOverThreshold", otel.&longUpDownCounterCallback)
otel.instrument(beanMasterAssignmentManager, "hbase.master.regions_in_transition.oldest_age",
"The age of the longest region in transition.", "ms",
"ritOldestAge", otel.&longValueCallback)

def beanRegionServerServer = otel.mbean("Hadoop:service=HBase,name=RegionServer,sub=Server")
otel.instrument(beanRegionServerServer, "hbase.region_server.region.count",
"The number of regions hosted by the region server.", "{regions}",
"The number of regions hosted by the region server.", "{region}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"regionCount", otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerServer, "hbase.region_server.disk.store_file.count",
"The number of store files on disk currently managed by the region server.", "{files}",
"The number of store files on disk currently managed by the region server.", "{file}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"storeFileCount", otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerServer, "hbase.region_server.disk.store_file.size",
"Aggregate size of the store files on disk.", "By",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"storeFileSize", otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerServer, "hbase.region_server.write_ahead_log.count",
"The number of write ahead logs not yet archived.", "{logs}",
"The number of write ahead logs not yet archived.", "{log}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"hlogFileCount", otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerServer, "hbase.region_server.request.count",
"The number of requests received.", "{requests}",
"The number of requests received.", "{request}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
["writeRequestCount":["state" : {"write"}], "readRequestCount": ["state" : {"read"}]],
otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerServer, "hbase.region_server.queue.length",
"The number of RPC handlers actively servicing requests.", "{handlers}",
"The number of RPC handlers actively servicing requests.", "{handler}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
["flushQueueLength":["state" : {"flush"}], "compactionQueueLength": ["state" : {"compaction"}]],
otel.&longUpDownCounterCallback)
Expand All @@ -63,7 +63,7 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.blocked_update.time
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"updatesBlockedTime", otel.&longValueCallback)
otel.instrument(beanRegionServerServer, "hbase.region_server.block_cache.operation.count",
"Number of block cache hits/misses.", "{operations}",
"Number of block cache hits/misses.", "{operation}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
["blockCacheMissCount":["state" : {"miss"}], "blockCacheHitCount": ["state" : {"hit"}]],
otel.&longValueCallback)
Expand Down Expand Up @@ -199,7 +199,7 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.operation.increment
"Increment_median", otel.&longValueCallback)

otel.instrument(beanRegionServerServer, "hbase.region_server.operations.slow",
"Number of operations that took over 1000ms to complete.", "{operations}",
"Number of operations that took over 1000ms to complete.", "{operation}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
[
"slowDeleteCount":["operation" : {"delete"}],
Expand All @@ -212,15 +212,15 @@ otel.instrument(beanRegionServerServer, "hbase.region_server.operations.slow",

def beanRegionServerIPC = otel.mbean("Hadoop:service=HBase,name=RegionServer,sub=IPC")
otel.instrument(beanRegionServerIPC, "hbase.region_server.open_connection.count",
"The number of open connections at the RPC layer.", "{connections}",
"The number of open connections at the RPC layer.", "{connection}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"numOpenConnections", otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerIPC, "hbase.region_server.active_handler.count",
"The number of RPC handlers actively servicing requests.", "{handlers}",
"The number of RPC handlers actively servicing requests.", "{handler}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"numActiveHandler", otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerIPC, "hbase.region_server.queue.request.count",
"The number of currently enqueued requests.", "{requests}",
"The number of currently enqueued requests.", "{request}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
[
"numCallsInReplicationQueue":["state" : {"replication"}],
Expand All @@ -229,7 +229,7 @@ otel.instrument(beanRegionServerIPC, "hbase.region_server.queue.request.count",
],
otel.&longUpDownCounterCallback)
otel.instrument(beanRegionServerIPC, "hbase.region_server.authentication.count",
"Number of client connection authentication failures/successes.", "{authentication requests}",
"Number of client connection authentication failures/successes.", "{authentication request}",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
["authenticationSuccesses":["state" : {"successes"}], "authenticationFailures": ["state" : {"failures"}]],
otel.&longUpDownCounterCallback)
Expand All @@ -246,4 +246,4 @@ otel.instrument(beanJVMMetrics, "hbase.region_server.gc.young_gen.time",
otel.instrument(beanJVMMetrics, "hbase.region_server.gc.old_gen.time",
"Time spent in garbage collection of the old generation.", "ms",
["region_server" : { mbean -> mbean.getProperty("tag.Hostname") }],
"GcTimeMillisConcurrentMarkSweep", otel.&longCounterCallback)
"GcTimeMillisConcurrentMarkSweep", otel.&longCounterCallback)
Loading

0 comments on commit 1d7f482

Please sign in to comment.