From e0ffcda81fd35e4f9e1161cc50a88e63fa633873 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Mon, 24 Sep 2018 16:05:26 -0400 Subject: [PATCH 01/26] interim commit --- docker/grafana/Dockerfile | 3 + docker/grafana/datasource.yaml | 5 + docker/m3dbnode/Dockerfile2 | 27 +++ github_token | 1 + .../prometheus/docker-compose.yml | 38 +++- .../prometheus/prometheus-integration-test.sh | 27 ++- .../prometheus/prometheus.yml | 2 +- src/dbnode/client/session.go | 2 + src/dbnode/config/m3dbnode-local-etcd.yml | 15 -- src/dbnode/config/m3dbnode-local-etcd2.yml | 197 ++++++++++++++++++ 10 files changed, 282 insertions(+), 35 deletions(-) create mode 100644 docker/grafana/Dockerfile create mode 100644 docker/grafana/datasource.yaml create mode 100644 docker/m3dbnode/Dockerfile2 create mode 100644 github_token create mode 100644 src/dbnode/config/m3dbnode-local-etcd2.yml diff --git a/docker/grafana/Dockerfile b/docker/grafana/Dockerfile new file mode 100644 index 0000000000..6303922798 --- /dev/null +++ b/docker/grafana/Dockerfile @@ -0,0 +1,3 @@ +FROM grafana/grafana:latest + +COPY ./docker/grafana/datasource.yaml /etc/grafana/provisioning/datasources/datasource.yaml \ No newline at end of file diff --git a/docker/grafana/datasource.yaml b/docker/grafana/datasource.yaml new file mode 100644 index 0000000000..5c214fe1df --- /dev/null +++ b/docker/grafana/datasource.yaml @@ -0,0 +1,5 @@ +datasources: + - name: Prometheus + type: prometheus + access: proxy + url: http://localhost:9090 diff --git a/docker/m3dbnode/Dockerfile2 b/docker/m3dbnode/Dockerfile2 new file mode 100644 index 0000000000..838f6aee8b --- /dev/null +++ b/docker/m3dbnode/Dockerfile2 @@ -0,0 +1,27 @@ +# stage 1: build +FROM golang:1.10-alpine AS builder +LABEL maintainer="The M3DB Authors " + +# Install Glide +RUN apk add --update glide git make bash + +# Add source code +RUN mkdir -p /go/src/github.com/m3db/m3 +ADD . /go/src/github.com/m3db/m3 + +# Build m3dbnode binary +RUN cd /go/src/github.com/m3db/m3/ && \ + git submodule update --init && \ + make m3dbnode-linux-amd64 + +# stage 2: lightweight "release" +FROM alpine:latest +LABEL maintainer="The M3DB Authors " + +EXPOSE 2381/tcp 2382/tcp 7202/tcp 9005-9009/tcp + +COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ +COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd2.yml /etc/m3dbnode/m3dbnode2.yml + +ENTRYPOINT [ "/bin/m3dbnode" ] +CMD [ "-f", "/etc/m3dbnode/m3dbnode2.yml" ] diff --git a/github_token b/github_token new file mode 100644 index 0000000000..713f1eca75 --- /dev/null +++ b/github_token @@ -0,0 +1 @@ +f6192b8d35518098fbb645ffd807a53749ffb00e \ No newline at end of file diff --git a/scripts/integration-tests/prometheus/docker-compose.yml b/scripts/integration-tests/prometheus/docker-compose.yml index 86fed27b40..0deaa614cf 100644 --- a/scripts/integration-tests/prometheus/docker-compose.yml +++ b/scripts/integration-tests/prometheus/docker-compose.yml @@ -13,6 +13,19 @@ services: context: ../../../ dockerfile: ./docker/m3dbnode/Dockerfile image: m3dbnode01:latest + dbnode02: + expose: + - "9005-9009" + - "2381-2382" + ports: + - "0.0.0.0:9005-9009:9005-9009" + - "0.0.0.0:2381-2382:2381-2382" + networks: + - backend + build: + context: ../../../ + dockerfile: ./docker/m3dbnode/Dockerfile2 + image: m3dbnode02:latest coordinator01: expose: - "7201" @@ -28,17 +41,20 @@ services: image: m3coordinator01:latest volumes: - "./:/etc/m3coordinator/" - prometheus01: - expose: - - "9090" - ports: - - "0.0.0.0:9090:9090" - networks: - - backend - image: prom/prometheus:latest - volumes: - - "./:/etc/prometheus/" - grafana: + # prometheus01: + # expose: + # - "9090" + # ports: + # - "0.0.0.0:9090:9090" + # networks: + # - backend + # image: prom/prometheus:latest + # volumes: + # - "./:/etc/prometheus/" + grafana2: + build: + context: ../../../ + dockerfile: ./docker/grafana/Dockerfile expose: - "3000" ports: diff --git a/scripts/integration-tests/prometheus/prometheus-integration-test.sh b/scripts/integration-tests/prometheus/prometheus-integration-test.sh index c7cb273ae9..758c71b97e 100755 --- a/scripts/integration-tests/prometheus/prometheus-integration-test.sh +++ b/scripts/integration-tests/prometheus/prometheus-integration-test.sh @@ -9,7 +9,7 @@ PARAM_TEST_BUILD="${TEST_BUILD:-true}" PARAM_TEST_VERIFY="${TEST_VERIFY:-true}" PARAM_TEST_TEARDOWN="${TEST_TEARDOWN:-true}" -if [ $PARAM_TEST_BUILD != "true" ]; then +if [ "$PARAM_TEST_BUILD" != "true" ]; then echo "SKIP build docker images" else echo "Build docker images" @@ -19,11 +19,12 @@ fi echo "Run m3dbnode and m3coordinator containers" docker-compose -f docker-compose.yml up -d dbnode01 +docker-compose -f docker-compose.yml up -d dbnode02 docker-compose -f docker-compose.yml up -d coordinator01 echo "Sleeping for a bit to ensure db up" -sleep 10 # TODO Replace sleeps with logic to determine when to proceed +sleep 2 # TODO Replace sleeps with logic to determine when to proceed echo "Adding namespace" @@ -34,7 +35,7 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ "flushEnabled": true, "writesToCommitLog": true, "cleanupEnabled": true, - "snapshotEnabled": false, + "snapshotEnabled": true, "repairEnabled": false, "retentionOptions": { "retentionPeriodNanos": 172800000000000, @@ -53,7 +54,7 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ echo "Sleep while namespace is init'd" -sleep 10 # TODO Replace sleeps with logic to determine when to proceed +sleep 2 # TODO Replace sleeps with logic to determine when to proceed [ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.prometheus_metrics.indexOptions.enabled)" == true ] @@ -71,6 +72,15 @@ curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ "endpoint": "dbnode01:9000", "hostname": "dbnode01", "port": 9000 + }, + { + "id": "m3db_local_2", + "isolation_group": "rack-b", + "zone": "embedded", + "weight": 1024, + "endpoint": "dbnode02:9005", + "hostname": "dbnode02", + "port": 9005 } ] }' @@ -79,13 +89,14 @@ curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ echo "Wait for placement to fully initialize" -sleep 10 # TODO Replace sleeps with logic to determine when to proceed +sleep 2 # TODO Replace sleeps with logic to determine when to proceed -echo "Start Prometheus container" +echo "Start Prometheus and Grafana containers" docker-compose -f docker-compose.yml up -d prometheus01 +docker-compose -f docker-compose.yml up -d grafana -if [ $PARAM_TEST_VERIFY != "true" ]; then +if [ "$PARAM_TEST_VERIFY" != "true" ]; then echo "SKIP verify" else echo "Write direct test data" @@ -138,7 +149,7 @@ else [ "$(curl -sSf localhost:9090/api/v1/query?query=prometheus_remote_storage_succeeded_samples_total | jq .data.result[].value[1])" != '"0"' ] fi -if [ $PARAM_TEST_TEARDOWN != "true" ]; then +if [ "$PARAM_TEST_TEARDOWN" != "true" ]; then echo "SKIP teardown" else docker-compose -f docker-compose.yml down || echo "unable to shutdown containers" # CI fails to stop all containers sometimes diff --git a/scripts/integration-tests/prometheus/prometheus.yml b/scripts/integration-tests/prometheus/prometheus.yml index ce96174fe2..9067b12d04 100644 --- a/scripts/integration-tests/prometheus/prometheus.yml +++ b/scripts/integration-tests/prometheus/prometheus.yml @@ -36,7 +36,7 @@ scrape_configs: - job_name: 'dbnode' static_configs: - - targets: ['dbnode01:9004'] + - targets: ['dbnode01:7203', 'dbnode02:7203'] remote_read: - url: http://coordinator01:7201/api/v1/prom/remote/read diff --git a/src/dbnode/client/session.go b/src/dbnode/client/session.go index 81af153556..07cf4b48b9 100644 --- a/src/dbnode/client/session.go +++ b/src/dbnode/client/session.go @@ -763,6 +763,7 @@ func (s *session) setTopologyWithLock(topoMap topology.Map, queues []hostQueue, s.pools.multiReaderIterator = encoding.NewMultiReaderIteratorPool(poolOpts) s.pools.multiReaderIterator.Init(s.opts.ReaderIteratorAllocate()) } + if replicas > len(s.metrics.writeNodesRespondingErrors) { curr := len(s.metrics.writeNodesRespondingErrors) for i := curr; i < replicas; i++ { @@ -772,6 +773,7 @@ func (s *session) setTopologyWithLock(topoMap topology.Map, queues []hostQueue, append(s.metrics.writeNodesRespondingErrors, counter) } } + if replicas > len(s.metrics.fetchNodesRespondingErrors) { curr := len(s.metrics.fetchNodesRespondingErrors) for i := curr; i < replicas; i++ { diff --git a/src/dbnode/config/m3dbnode-local-etcd.yml b/src/dbnode/config/m3dbnode-local-etcd.yml index 37c1c95ff6..fb4a247408 100644 --- a/src/dbnode/config/m3dbnode-local-etcd.yml +++ b/src/dbnode/config/m3dbnode-local-etcd.yml @@ -1,18 +1,3 @@ -coordinator: - listenAddress: - type: "config" - value: "0.0.0.0:7201" - - metrics: - scope: - prefix: "coordinator" - prometheus: - handlerPath: /metrics - listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved - sanitization: prometheus - samplingRate: 1.0 - extended: none - db: logging: level: info diff --git a/src/dbnode/config/m3dbnode-local-etcd2.yml b/src/dbnode/config/m3dbnode-local-etcd2.yml new file mode 100644 index 0000000000..82d1858ead --- /dev/null +++ b/src/dbnode/config/m3dbnode-local-etcd2.yml @@ -0,0 +1,197 @@ +db: + logging: + level: info + + metrics: + prometheus: + handlerPath: /metrics + sanitization: prometheus + samplingRate: 1.0 + extended: detailed + + listenAddress: 0.0.0.0:9005 + clusterListenAddress: 0.0.0.0:9006 + httpNodeListenAddress: 0.0.0.0:9007 + httpClusterListenAddress: 0.0.0.0:9008 + debugListenAddress: 0.0.0.0:9009 + + hostID: + resolver: config + value: m3db_local_2 + + client: + writeConsistencyLevel: majority + readConsistencyLevel: unstrict_majority + writeTimeout: 10s + fetchTimeout: 15s + connectTimeout: 20s + writeRetry: + initialBackoff: 500ms + backoffFactor: 3 + maxRetries: 2 + jitter: true + fetchRetry: + initialBackoff: 500ms + backoffFactor: 2 + maxRetries: 3 + jitter: true + backgroundHealthCheckFailLimit: 4 + backgroundHealthCheckFailThrottleFactor: 0.5 + + gcPercentage: 100 + + writeNewSeriesAsync: true + writeNewSeriesLimitPerSecond: 1048576 + writeNewSeriesBackoffDuration: 2ms + + bootstrap: + bootstrappers: + - filesystem + - commitlog + - peers + - uninitialized_topology + fs: + numProcessorsPerCPU: 0.125 + + cache: + series: + policy: lru + + commitlog: + flushMaxBytes: 524288 + flushEvery: 1s + queue: + calculationType: fixed + size: 2097152 + blockSize: 10m + + fs: + filePathPrefix: /var/lib/m3db + writeBufferSize: 65536 + dataReadBufferSize: 65536 + infoReadBufferSize: 128 + seekReadBufferSize: 4096 + throughputLimitMbps: 100.0 + throughputCheckEvery: 128 + + repair: + enabled: false + interval: 2h + offset: 30m + jitter: 1h + throttle: 2m + checkInterval: 1m + + pooling: + blockAllocSize: 16 + type: simple + seriesPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + blockPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + encoderPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + closersPool: + size: 104857 + lowWatermark: 0.7 + highWatermark: 1.0 + contextPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + segmentReaderPool: + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + iteratorPool: + size: 2048 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlockMetadataResultsPool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlocksMetadataResultsPool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + hostBlockMetadataSlicePool: + size: 131072 + capacity: 3 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataSlicePool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataSlicePool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + identifierPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + bytesPool: + buckets: + - capacity: 16 + size: 524288 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 32 + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 64 + size: 131072 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 128 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 256 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 1440 + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 4096 + size: 8192 + lowWatermark: 0.7 + highWatermark: 1.0 + + config: + service: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - dbnode01:2379 + seedNodes: + initialCluster: + - hostID: m3db_local + endpoint: http://dbnode01:2380 From 6e8a359a64a179eff3b61c1b6e54cf99f47f6b20 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 11:53:52 -0400 Subject: [PATCH 02/26] Interim commit --- docker/grafana/datasource.yaml | 2 +- docker/m3dbnode/Dockerfile | 2 +- docker/m3dbnode/Dockerfile2 | 2 +- docker/m3dbnode/Dockerfile3 | 27 +++ .../prometheus/docker-compose.yml | 37 ++- .../prometheus/prometheus.yml | 6 +- src/dbnode/client/session.go | 14 +- src/dbnode/config/m3dbnode-local-etcd.yml | 19 +- src/dbnode/config/m3dbnode-local-etcd2.yml | 17 +- src/dbnode/config/m3dbnode-local-etcd3.yml | 212 ++++++++++++++++++ 10 files changed, 311 insertions(+), 27 deletions(-) create mode 100644 docker/m3dbnode/Dockerfile3 create mode 100644 src/dbnode/config/m3dbnode-local-etcd3.yml diff --git a/docker/grafana/datasource.yaml b/docker/grafana/datasource.yaml index 5c214fe1df..6165637c01 100644 --- a/docker/grafana/datasource.yaml +++ b/docker/grafana/datasource.yaml @@ -2,4 +2,4 @@ datasources: - name: Prometheus type: prometheus access: proxy - url: http://localhost:9090 + url: http://prometheus01:9090 diff --git a/docker/m3dbnode/Dockerfile b/docker/m3dbnode/Dockerfile index c1f4805768..647e6003d1 100644 --- a/docker/m3dbnode/Dockerfile +++ b/docker/m3dbnode/Dockerfile @@ -18,7 +18,7 @@ RUN cd /go/src/github.com/m3db/m3/ && \ FROM alpine:latest LABEL maintainer="The M3DB Authors " -EXPOSE 2379/tcp 2380/tcp 7201/tcp 9000-9004/tcp +EXPOSE 2379/tcp 2380/tcp 7201/tcp 7203/tcp 9000-9004/tcp COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd.yml /etc/m3dbnode/m3dbnode.yml diff --git a/docker/m3dbnode/Dockerfile2 b/docker/m3dbnode/Dockerfile2 index 838f6aee8b..2b7a8f4129 100644 --- a/docker/m3dbnode/Dockerfile2 +++ b/docker/m3dbnode/Dockerfile2 @@ -18,7 +18,7 @@ RUN cd /go/src/github.com/m3db/m3/ && \ FROM alpine:latest LABEL maintainer="The M3DB Authors " -EXPOSE 2381/tcp 2382/tcp 7202/tcp 9005-9009/tcp +EXPOSE 2381/tcp 2382/tcp 7201/tcp 7203/tcp 9005-9009/tcp COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd2.yml /etc/m3dbnode/m3dbnode2.yml diff --git a/docker/m3dbnode/Dockerfile3 b/docker/m3dbnode/Dockerfile3 new file mode 100644 index 0000000000..a6ca8712d6 --- /dev/null +++ b/docker/m3dbnode/Dockerfile3 @@ -0,0 +1,27 @@ +# stage 1: build +FROM golang:1.10-alpine AS builder +LABEL maintainer="The M3DB Authors " + +# Install Glide +RUN apk add --update glide git make bash + +# Add source code +RUN mkdir -p /go/src/github.com/m3db/m3 +ADD . /go/src/github.com/m3db/m3 + +# Build m3dbnode binary +RUN cd /go/src/github.com/m3db/m3/ && \ + git submodule update --init && \ + make m3dbnode-linux-amd64 + +# stage 2: lightweight "release" +FROM alpine:latest +LABEL maintainer="The M3DB Authors " + +EXPOSE 2381/tcp 2382/tcp 7201/tcp 7203/tcp 9010-9014/tcp + +COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ +COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd3.yml /etc/m3dbnode/m3dbnode3.yml + +ENTRYPOINT [ "/bin/m3dbnode" ] +CMD [ "-f", "/etc/m3dbnode/m3dbnode3.yml" ] diff --git a/scripts/integration-tests/prometheus/docker-compose.yml b/scripts/integration-tests/prometheus/docker-compose.yml index 0deaa614cf..da713cd4e8 100644 --- a/scripts/integration-tests/prometheus/docker-compose.yml +++ b/scripts/integration-tests/prometheus/docker-compose.yml @@ -1,4 +1,4 @@ -version: "3.5" +version: "2.2" services: dbnode01: expose: @@ -26,13 +26,28 @@ services: context: ../../../ dockerfile: ./docker/m3dbnode/Dockerfile2 image: m3dbnode02:latest + dbnode03: + expose: + - "9010-9014" + - "2383-2384" + ports: + - "0.0.0.0:9010-9014:9010-9014" + - "0.0.0.0:2383-2384:2383-2384" + networks: + - backend + build: + context: ../../../ + dockerfile: ./docker/m3dbnode/Dockerfile3 + image: m3dbnode03:latest coordinator01: expose: - "7201" - "7203" + - "7208" ports: - "0.0.0.0:7201:7201" - "0.0.0.0:7203:7203" + - "0.0.0.0:7208:7208" networks: - backend build: @@ -41,16 +56,16 @@ services: image: m3coordinator01:latest volumes: - "./:/etc/m3coordinator/" - # prometheus01: - # expose: - # - "9090" - # ports: - # - "0.0.0.0:9090:9090" - # networks: - # - backend - # image: prom/prometheus:latest - # volumes: - # - "./:/etc/prometheus/" + prometheus01: + expose: + - "9090" + ports: + - "0.0.0.0:9090:9090" + networks: + - backend + image: prom/prometheus:latest + volumes: + - "./:/etc/prometheus/" grafana2: build: context: ../../../ diff --git a/scripts/integration-tests/prometheus/prometheus.yml b/scripts/integration-tests/prometheus/prometheus.yml index 9067b12d04..079da0039e 100644 --- a/scripts/integration-tests/prometheus/prometheus.yml +++ b/scripts/integration-tests/prometheus/prometheus.yml @@ -2,8 +2,8 @@ global: external_labels: role: "remote" - scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + scrape_interval: 1s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 1s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration @@ -36,7 +36,7 @@ scrape_configs: - job_name: 'dbnode' static_configs: - - targets: ['dbnode01:7203', 'dbnode02:7203'] + - targets: ['dbnode01:7203', 'dbnode02:7203', 'dbnode03:7203'] remote_read: - url: http://coordinator01:7201/api/v1/prom/remote/read diff --git a/src/dbnode/client/session.go b/src/dbnode/client/session.go index 07cf4b48b9..f6a322501c 100644 --- a/src/dbnode/client/session.go +++ b/src/dbnode/client/session.go @@ -281,7 +281,7 @@ func newSession(opts Options) (clientSession, error) { s.pools.writeAttempt.Init() fetchAttemptPoolOpts := pool.NewObjectPoolOptions(). - SetSize(opts.FetchBatchOpPoolSize()). + SetSize(1). SetInstrumentOptions(opts.InstrumentOptions().SetMetricsScope( scope.SubScope("fetch-attempt-pool"), )) @@ -289,7 +289,7 @@ func newSession(opts Options) (clientSession, error) { s.pools.fetchAttempt.Init() fetchTaggedAttemptPoolImplOpts := pool.NewObjectPoolOptions(). - SetSize(opts.FetchBatchOpPoolSize()). + SetSize(1). SetInstrumentOptions(opts.InstrumentOptions().SetMetricsScope( scope.SubScope("fetch-tagged-attempt-pool"), )) @@ -505,7 +505,7 @@ func (s *session) Open() error { s.pools.writeState.Init() fetchBatchOpPoolOpts := pool.NewObjectPoolOptions(). - SetSize(s.opts.FetchBatchOpPoolSize()). + SetSize(1). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-batch-op-pool"), )) @@ -513,7 +513,7 @@ func (s *session) Open() error { s.pools.fetchBatchOp.Init() fetchTaggedOpPoolOpts := pool.NewObjectPoolOptions(). - SetSize(s.opts.FetchBatchOpPoolSize()). + SetSize(1). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-tagged-op-pool"), )) @@ -521,7 +521,7 @@ func (s *session) Open() error { s.pools.fetchTaggedOp.Init() fetchStatePoolOpts := pool.NewObjectPoolOptions(). - SetSize(s.opts.FetchBatchOpPoolSize()). + SetSize(1). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-tagged-state-pool"), )) @@ -723,14 +723,14 @@ func (s *session) setTopologyWithLock(topoMap topology.Map, queues []hostQueue, // directly into the return array in fetch calls. if len(queues) != len(prevQueues) { poolOpts := pool.NewObjectPoolOptions(). - SetSize(s.opts.FetchBatchOpPoolSize()). + SetSize(1). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-batch-op-array-array-pool"), )) s.pools.fetchBatchOpArrayArray = newFetchBatchOpArrayArrayPool( poolOpts, len(queues), - s.opts.FetchBatchOpPoolSize()/len(queues)) + 1/len(queues)) s.pools.fetchBatchOpArrayArray.Init() } diff --git a/src/dbnode/config/m3dbnode-local-etcd.yml b/src/dbnode/config/m3dbnode-local-etcd.yml index fb4a247408..d737026afa 100644 --- a/src/dbnode/config/m3dbnode-local-etcd.yml +++ b/src/dbnode/config/m3dbnode-local-etcd.yml @@ -1,3 +1,18 @@ +coordinator: + listenAddress: + type: "config" + value: "0.0.0.0:7201" + + metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + db: logging: level: info @@ -26,7 +41,7 @@ db: fetchTimeout: 15s connectTimeout: 20s writeRetry: - initialBackoff: 500ms + initialBackoff: 500msh backoffFactor: 3 maxRetries: 2 jitter: true @@ -47,8 +62,8 @@ db: bootstrap: bootstrappers: - filesystem - - commitlog - peers + - commitlog - uninitialized_topology fs: numProcessorsPerCPU: 0.125 diff --git a/src/dbnode/config/m3dbnode-local-etcd2.yml b/src/dbnode/config/m3dbnode-local-etcd2.yml index 82d1858ead..7bfe93f8db 100644 --- a/src/dbnode/config/m3dbnode-local-etcd2.yml +++ b/src/dbnode/config/m3dbnode-local-etcd2.yml @@ -1,3 +1,18 @@ +coordinator: + listenAddress: + type: "config" + value: "0.0.0.0:7201" + + metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + db: logging: level: info @@ -47,8 +62,8 @@ db: bootstrap: bootstrappers: - filesystem - - commitlog - peers + - commitlog - uninitialized_topology fs: numProcessorsPerCPU: 0.125 diff --git a/src/dbnode/config/m3dbnode-local-etcd3.yml b/src/dbnode/config/m3dbnode-local-etcd3.yml new file mode 100644 index 0000000000..028ab48dd0 --- /dev/null +++ b/src/dbnode/config/m3dbnode-local-etcd3.yml @@ -0,0 +1,212 @@ +coordinator: + listenAddress: + type: "config" + value: "0.0.0.0:7201" + + metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + +db: + logging: + level: info + + metrics: + prometheus: + handlerPath: /metrics + sanitization: prometheus + samplingRate: 1.0 + extended: detailed + + listenAddress: 0.0.0.0:9010 + clusterListenAddress: 0.0.0.0:9011 + httpNodeListenAddress: 0.0.0.0:9012 + httpClusterListenAddress: 0.0.0.0:9013 + debugListenAddress: 0.0.0.0:9014 + + hostID: + resolver: config + value: m3db_local_3 + + client: + writeConsistencyLevel: majority + readConsistencyLevel: unstrict_majority + writeTimeout: 10s + fetchTimeout: 15s + connectTimeout: 20s + writeRetry: + initialBackoff: 500ms + backoffFactor: 3 + maxRetries: 2 + jitter: true + fetchRetry: + initialBackoff: 500ms + backoffFactor: 2 + maxRetries: 3 + jitter: true + backgroundHealthCheckFailLimit: 4 + backgroundHealthCheckFailThrottleFactor: 0.5 + + gcPercentage: 100 + + writeNewSeriesAsync: true + writeNewSeriesLimitPerSecond: 1048576 + writeNewSeriesBackoffDuration: 2ms + + bootstrap: + bootstrappers: + - filesystem + - peers + - commitlog + - uninitialized_topology + fs: + numProcessorsPerCPU: 0.125 + + cache: + series: + policy: lru + + commitlog: + flushMaxBytes: 524288 + flushEvery: 1s + queue: + calculationType: fixed + size: 2097152 + blockSize: 10m + + fs: + filePathPrefix: /var/lib/m3db + writeBufferSize: 65536 + dataReadBufferSize: 65536 + infoReadBufferSize: 128 + seekReadBufferSize: 4096 + throughputLimitMbps: 100.0 + throughputCheckEvery: 128 + + repair: + enabled: false + interval: 2h + offset: 30m + jitter: 1h + throttle: 2m + checkInterval: 1m + + pooling: + blockAllocSize: 16 + type: simple + seriesPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + blockPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + encoderPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + closersPool: + size: 104857 + lowWatermark: 0.7 + highWatermark: 1.0 + contextPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + segmentReaderPool: + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + iteratorPool: + size: 2048 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlockMetadataResultsPool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlocksMetadataResultsPool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + hostBlockMetadataSlicePool: + size: 131072 + capacity: 3 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataSlicePool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataSlicePool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + identifierPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + bytesPool: + buckets: + - capacity: 16 + size: 524288 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 32 + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 64 + size: 131072 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 128 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 256 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 1440 + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 4096 + size: 8192 + lowWatermark: 0.7 + highWatermark: 1.0 + + config: + service: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - dbnode01:2379 + seedNodes: + initialCluster: + - hostID: m3db_local + endpoint: http://dbnode01:2380 From 302c6a20a656299278daabdc9e585a9d167d17fd Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 11:59:30 -0400 Subject: [PATCH 03/26] Add github_token to .gitignore --- .gitignore | 3 +++ docker/grafana/Dockerfile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d1a600bc91..9613e1157e 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ site/ !m3db.io/**/vendor # Automatically populated from asset sources m3db.io/openapi + +# GitHub API token +github_token diff --git a/docker/grafana/Dockerfile b/docker/grafana/Dockerfile index 6303922798..32ef6242c0 100644 --- a/docker/grafana/Dockerfile +++ b/docker/grafana/Dockerfile @@ -1,3 +1,3 @@ FROM grafana/grafana:latest -COPY ./docker/grafana/datasource.yaml /etc/grafana/provisioning/datasources/datasource.yaml \ No newline at end of file +COPY ./docker/grafana/datasource.yaml /etc/grafana/provisioning/datasources/datasource.yaml From 37b0bb4b3f3218acac574cb5ef028057574880b3 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 12:00:15 -0400 Subject: [PATCH 04/26] switch docker-compose file version --- scripts/integration-tests/prometheus/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/integration-tests/prometheus/docker-compose.yml b/scripts/integration-tests/prometheus/docker-compose.yml index da713cd4e8..a129ae56fb 100644 --- a/scripts/integration-tests/prometheus/docker-compose.yml +++ b/scripts/integration-tests/prometheus/docker-compose.yml @@ -1,4 +1,4 @@ -version: "2.2" +version: "3.5" services: dbnode01: expose: From 377f3db8c0f2147ed1e6093012ffa9bf5563399c Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 12:03:46 -0400 Subject: [PATCH 05/26] Undo change --- src/dbnode/config/m3dbnode-local-etcd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbnode/config/m3dbnode-local-etcd.yml b/src/dbnode/config/m3dbnode-local-etcd.yml index d737026afa..a3d9d1a529 100644 --- a/src/dbnode/config/m3dbnode-local-etcd.yml +++ b/src/dbnode/config/m3dbnode-local-etcd.yml @@ -41,7 +41,7 @@ db: fetchTimeout: 15s connectTimeout: 20s writeRetry: - initialBackoff: 500msh + initialBackoff: 500ms backoffFactor: 3 maxRetries: 2 jitter: true From 1508061d1f5be7a741ec830a7781db88d7b1b69d Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 13:59:13 -0400 Subject: [PATCH 06/26] Add m3_stack --- scripts/development/m3_stack/README.md | 8 + .../development/m3_stack/dbnode_config.yml | 212 ++++++++++++++++++ .../development/m3_stack/docker-compose.yml | 75 +++++++ .../development/m3_stack/m3coordinator.yml | 47 ++++ scripts/development/m3_stack/prometheus.yml | 45 ++++ scripts/development/m3_stack/start.sh | 73 ++++++ scripts/development/m3_stack/stop.sh | 5 + 7 files changed, 465 insertions(+) create mode 100644 scripts/development/m3_stack/README.md create mode 100644 scripts/development/m3_stack/dbnode_config.yml create mode 100644 scripts/development/m3_stack/docker-compose.yml create mode 100644 scripts/development/m3_stack/m3coordinator.yml create mode 100644 scripts/development/m3_stack/prometheus.yml create mode 100755 scripts/development/m3_stack/start.sh create mode 100755 scripts/development/m3_stack/stop.sh diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md new file mode 100644 index 0000000000..dc5276f857 --- /dev/null +++ b/scripts/development/m3_stack/README.md @@ -0,0 +1,8 @@ +# Local Development + +This docker-compose file will setup the following environment: + +1. 3 M3DB nodes with a single node acting as an EtcD seed +2. 1 M3Coordinator node +3. 1 Grafana node (with a pre-configured Prometheus source) +4. 1 Prometheus node that scrapes the M3DB/M3Coordinator nodes and writes the metrics to M3Coordinator \ No newline at end of file diff --git a/scripts/development/m3_stack/dbnode_config.yml b/scripts/development/m3_stack/dbnode_config.yml new file mode 100644 index 0000000000..5e1e470f00 --- /dev/null +++ b/scripts/development/m3_stack/dbnode_config.yml @@ -0,0 +1,212 @@ +coordinator: + listenAddress: + type: "config" + value: "0.0.0.0:7201" + + metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + +db: + logging: + level: info + + metrics: + prometheus: + handlerPath: /metrics + sanitization: prometheus + samplingRate: 1.0 + extended: detailed + + listenAddress: 0.0.0.0:9000 + clusterListenAddress: 0.0.0.0:9001 + httpNodeListenAddress: 0.0.0.0:9002 + httpClusterListenAddress: 0.0.0.0:9003 + debugListenAddress: 0.0.0.0:9004 + + hostID: + resolver: environment + envVarName: M3DB_HOST_ID + + client: + writeConsistencyLevel: majority + readConsistencyLevel: unstrict_majority + writeTimeout: 10s + fetchTimeout: 15s + connectTimeout: 20s + writeRetry: + initialBackoff: 500ms + backoffFactor: 3 + maxRetries: 2 + jitter: true + fetchRetry: + initialBackoff: 500ms + backoffFactor: 2 + maxRetries: 3 + jitter: true + backgroundHealthCheckFailLimit: 4 + backgroundHealthCheckFailThrottleFactor: 0.5 + + gcPercentage: 100 + + writeNewSeriesAsync: true + writeNewSeriesLimitPerSecond: 1048576 + writeNewSeriesBackoffDuration: 2ms + + bootstrap: + bootstrappers: + - filesystem + - peers + - commitlog + - uninitialized_topology + fs: + numProcessorsPerCPU: 0.125 + + cache: + series: + policy: lru + + commitlog: + flushMaxBytes: 524288 + flushEvery: 1s + queue: + calculationType: fixed + size: 2097152 + blockSize: 10m + + fs: + filePathPrefix: /var/lib/m3db + writeBufferSize: 65536 + dataReadBufferSize: 65536 + infoReadBufferSize: 128 + seekReadBufferSize: 4096 + throughputLimitMbps: 100.0 + throughputCheckEvery: 128 + + repair: + enabled: false + interval: 2h + offset: 30m + jitter: 1h + throttle: 2m + checkInterval: 1m + + pooling: + blockAllocSize: 16 + type: simple + seriesPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + blockPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + encoderPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + closersPool: + size: 104857 + lowWatermark: 0.7 + highWatermark: 1.0 + contextPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + segmentReaderPool: + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + iteratorPool: + size: 2048 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlockMetadataResultsPool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + fetchBlocksMetadataResultsPool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + hostBlockMetadataSlicePool: + size: 131072 + capacity: 3 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blockMetadataSlicePool: + size: 65536 + capacity: 32 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataPool: + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + blocksMetadataSlicePool: + size: 32 + capacity: 4096 + lowWatermark: 0.7 + highWatermark: 1.0 + identifierPool: + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + bytesPool: + buckets: + - capacity: 16 + size: 524288 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 32 + size: 262144 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 64 + size: 131072 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 128 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 256 + size: 65536 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 1440 + size: 16384 + lowWatermark: 0.7 + highWatermark: 1.0 + - capacity: 4096 + size: 8192 + lowWatermark: 0.7 + highWatermark: 1.0 + + config: + service: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - m3db_seed:2379 + seedNodes: + initialCluster: + - hostID: m3db_seed + endpoint: http://m3db_seed:2380 diff --git a/scripts/development/m3_stack/docker-compose.yml b/scripts/development/m3_stack/docker-compose.yml new file mode 100644 index 0000000000..e90fb8df0d --- /dev/null +++ b/scripts/development/m3_stack/docker-compose.yml @@ -0,0 +1,75 @@ +version: "3.5" +services: + m3db_seed: + networks: + - backend + build: + context: ../../../ + dockerfile: ./docker/m3dbnode/Dockerfile + image: m3dbnode01:latest + volumes: + - "./dbnode_config.yml:/etc/m3dbnode/m3dbnode.yml" + environment: + - M3DB_HOST_ID=m3db_seed + m3db_data01: + networks: + - backend + build: + context: ../../../ + dockerfile: ./docker/m3dbnode/Dockerfile + image: m3dbnode02:latest + volumes: + - "./dbnode_config.yml:/etc/m3dbnode/m3dbnode.yml" + environment: + - M3DB_HOST_ID=m3db_data01 + m3db_data02: + networks: + - backend + build: + context: ../../../ + dockerfile: ./docker/m3dbnode/Dockerfile + image: m3dbnode03:latest + volumes: + - "./dbnode_config.yml:/etc/m3dbnode/m3dbnode.yml" + environment: + - M3DB_HOST_ID=m3db_data02 + coordinator01: + expose: + - "7201" + - "7203" + - "7208" + ports: + - "0.0.0.0:7201:7201" + - "0.0.0.0:7203:7203" + - "0.0.0.0:7208:7208" + networks: + - backend + build: + context: ../../../ + dockerfile: ./docker/m3coordinator/Dockerfile + image: m3coordinator01:latest + volumes: + - "./:/etc/m3coordinator/" + prometheus01: + expose: + - "9090" + ports: + - "0.0.0.0:9090:9090" + networks: + - backend + image: prom/prometheus:latest + volumes: + - "./:/etc/prometheus/" + grafana2: + build: + context: ../../../ + dockerfile: ./docker/grafana/Dockerfile + expose: + - "3000" + ports: + - "0.0.0.0:3000:3000" + networks: + - backend + image: grafana/grafana:latest +networks: + backend: diff --git a/scripts/development/m3_stack/m3coordinator.yml b/scripts/development/m3_stack/m3coordinator.yml new file mode 100644 index 0000000000..f24643cdf3 --- /dev/null +++ b/scripts/development/m3_stack/m3coordinator.yml @@ -0,0 +1,47 @@ +listenAddress: + type: "config" + value: "0.0.0.0:7201" + +metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + +clusters: + - namespaces: + - namespace: prometheus_metrics + type: unaggregated + retention: 48h + client: + config: + service: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - m3db_seed:2379 + writeConsistencyLevel: majority + readConsistencyLevel: unstrict_majority + writeTimeout: 10s + fetchTimeout: 15s + connectTimeout: 20s + writeRetry: + initialBackoff: 500ms + backoffFactor: 3 + maxRetries: 2 + jitter: true + fetchRetry: + initialBackoff: 500ms + backoffFactor: 2 + maxRetries: 3 + jitter: true + backgroundHealthCheckFailLimit: 4 + backgroundHealthCheckFailThrottleFactor: 0.5 diff --git a/scripts/development/m3_stack/prometheus.yml b/scripts/development/m3_stack/prometheus.yml new file mode 100644 index 0000000000..e2d5fac745 --- /dev/null +++ b/scripts/development/m3_stack/prometheus.yml @@ -0,0 +1,45 @@ +# my global config +global: + external_labels: + role: "remote" + scrape_interval: 1s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 1s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + +# Alertmanager configuration +alerting: + alertmanagers: + - static_configs: + - targets: + # - alertmanager:9093 + +# Load rules once and periodically evaluate them according to the global 'evaluation_interval'. +rule_files: + # - "first_rules.yml" + # - "second_rules.yml" + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'prometheus' + + # metrics_path defaults to '/metrics' + # scheme defaults to 'http'. + + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'coordinator' + static_configs: + - targets: ['coordinator01:7203'] + + - job_name: 'dbnode' + static_configs: + - targets: ['m3db_seed:7203', 'm3db_data01:7203', 'm3db_data02:7203'] + +remote_read: + - url: http://coordinator01:7201/api/v1/prom/remote/read + +remote_write: + - url: http://coordinator01:7201/api/v1/prom/remote/write diff --git a/scripts/development/m3_stack/start.sh b/scripts/development/m3_stack/start.sh new file mode 100755 index 0000000000..c94c65da50 --- /dev/null +++ b/scripts/development/m3_stack/start.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +set -xe + +echo "Bringing up nodes in the backgorund with docker compose, remember to run ./stop.sh when done" +docker-compose -f docker-compose.yml up -d --renew-anon-volumes +echo "Nodes online" + +echo "Initializing namespace" +curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ + "name": "prometheus_metrics", + "options": { + "bootstrapEnabled": true, + "flushEnabled": true, + "writesToCommitLog": true, + "cleanupEnabled": true, + "snapshotEnabled": true, + "repairEnabled": false, + "retentionOptions": { + "retentionPeriodNanos": 172800000000000, + "blockSizeNanos": 7200000000000, + "bufferFutureNanos": 600000000000, + "bufferPastNanos": 600000000000, + "blockDataExpiry": true, + "blockDataExpiryAfterNotAccessPeriodNanos": 300000000000 + }, + "indexOptions": { + "enabled": true, + "blockSizeNanos": 7200000000000 + } + } +}' +echo "Done initializing namespace" + +echo "Initializing topology" +curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ + "num_shards": 64, + "replication_factor": 3, + "instances": [ + { + "id": "m3db_seed", + "isolation_group": "rack-a", + "zone": "embedded", + "weight": 1024, + "endpoint": "m3db_seed:9000", + "hostname": "m3db_seed", + "port": 9000 + }, + { + "id": "m3db_data01", + "isolation_group": "rack-b", + "zone": "embedded", + "weight": 1024, + "endpoint": "m3db_data01:9000", + "hostname": "m3db_data01", + "port": 9000 + }, + { + "id": "m3db_data02", + "isolation_group": "rack-c", + "zone": "embedded", + "weight": 1024, + "endpoint": "m3db_data02:9000", + "hostname": "m3db_data02", + "port": 9000 + } + ] +}' +echo "Done initializing topology" + +echo "Prometheus available at localhost:9090" +echo "Grafana available at localhost:3000" +echo "Run ./stop.sh to shutdown nodes when done" \ No newline at end of file diff --git a/scripts/development/m3_stack/stop.sh b/scripts/development/m3_stack/stop.sh new file mode 100755 index 0000000000..7053865951 --- /dev/null +++ b/scripts/development/m3_stack/stop.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +set -xe + +docker-compose -f docker-compose.yml down \ No newline at end of file From e3aa7130c6b70ce7e0404308fcdfc55527719f55 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 13:59:48 -0400 Subject: [PATCH 07/26] Reset docker-compose --- .../prometheus/docker-compose.yml | 33 +------------------ 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/scripts/integration-tests/prometheus/docker-compose.yml b/scripts/integration-tests/prometheus/docker-compose.yml index a129ae56fb..86fed27b40 100644 --- a/scripts/integration-tests/prometheus/docker-compose.yml +++ b/scripts/integration-tests/prometheus/docker-compose.yml @@ -13,41 +13,13 @@ services: context: ../../../ dockerfile: ./docker/m3dbnode/Dockerfile image: m3dbnode01:latest - dbnode02: - expose: - - "9005-9009" - - "2381-2382" - ports: - - "0.0.0.0:9005-9009:9005-9009" - - "0.0.0.0:2381-2382:2381-2382" - networks: - - backend - build: - context: ../../../ - dockerfile: ./docker/m3dbnode/Dockerfile2 - image: m3dbnode02:latest - dbnode03: - expose: - - "9010-9014" - - "2383-2384" - ports: - - "0.0.0.0:9010-9014:9010-9014" - - "0.0.0.0:2383-2384:2383-2384" - networks: - - backend - build: - context: ../../../ - dockerfile: ./docker/m3dbnode/Dockerfile3 - image: m3dbnode03:latest coordinator01: expose: - "7201" - "7203" - - "7208" ports: - "0.0.0.0:7201:7201" - "0.0.0.0:7203:7203" - - "0.0.0.0:7208:7208" networks: - backend build: @@ -66,10 +38,7 @@ services: image: prom/prometheus:latest volumes: - "./:/etc/prometheus/" - grafana2: - build: - context: ../../../ - dockerfile: ./docker/grafana/Dockerfile + grafana: expose: - "3000" ports: From c599d7028450026015e6923957c5f0401d38ad36 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:00:16 -0400 Subject: [PATCH 08/26] Reset prometheus config --- scripts/integration-tests/prometheus/prometheus.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/integration-tests/prometheus/prometheus.yml b/scripts/integration-tests/prometheus/prometheus.yml index 079da0039e..ce96174fe2 100644 --- a/scripts/integration-tests/prometheus/prometheus.yml +++ b/scripts/integration-tests/prometheus/prometheus.yml @@ -2,8 +2,8 @@ global: external_labels: role: "remote" - scrape_interval: 1s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 1s # Evaluate rules every 15 seconds. The default is every 1 minute. + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). # Alertmanager configuration @@ -36,7 +36,7 @@ scrape_configs: - job_name: 'dbnode' static_configs: - - targets: ['dbnode01:7203', 'dbnode02:7203', 'dbnode03:7203'] + - targets: ['dbnode01:9004'] remote_read: - url: http://coordinator01:7201/api/v1/prom/remote/read From de0f3f399deb99396ca47c861edfe269119c327e Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:02:35 -0400 Subject: [PATCH 09/26] Remove unused files --- docker/m3dbnode/Dockerfile2 | 27 --- docker/m3dbnode/Dockerfile3 | 27 --- src/dbnode/config/m3dbnode-local-etcd2.yml | 212 --------------------- src/dbnode/config/m3dbnode-local-etcd3.yml | 212 --------------------- 4 files changed, 478 deletions(-) delete mode 100644 docker/m3dbnode/Dockerfile2 delete mode 100644 docker/m3dbnode/Dockerfile3 delete mode 100644 src/dbnode/config/m3dbnode-local-etcd2.yml delete mode 100644 src/dbnode/config/m3dbnode-local-etcd3.yml diff --git a/docker/m3dbnode/Dockerfile2 b/docker/m3dbnode/Dockerfile2 deleted file mode 100644 index 2b7a8f4129..0000000000 --- a/docker/m3dbnode/Dockerfile2 +++ /dev/null @@ -1,27 +0,0 @@ -# stage 1: build -FROM golang:1.10-alpine AS builder -LABEL maintainer="The M3DB Authors " - -# Install Glide -RUN apk add --update glide git make bash - -# Add source code -RUN mkdir -p /go/src/github.com/m3db/m3 -ADD . /go/src/github.com/m3db/m3 - -# Build m3dbnode binary -RUN cd /go/src/github.com/m3db/m3/ && \ - git submodule update --init && \ - make m3dbnode-linux-amd64 - -# stage 2: lightweight "release" -FROM alpine:latest -LABEL maintainer="The M3DB Authors " - -EXPOSE 2381/tcp 2382/tcp 7201/tcp 7203/tcp 9005-9009/tcp - -COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ -COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd2.yml /etc/m3dbnode/m3dbnode2.yml - -ENTRYPOINT [ "/bin/m3dbnode" ] -CMD [ "-f", "/etc/m3dbnode/m3dbnode2.yml" ] diff --git a/docker/m3dbnode/Dockerfile3 b/docker/m3dbnode/Dockerfile3 deleted file mode 100644 index a6ca8712d6..0000000000 --- a/docker/m3dbnode/Dockerfile3 +++ /dev/null @@ -1,27 +0,0 @@ -# stage 1: build -FROM golang:1.10-alpine AS builder -LABEL maintainer="The M3DB Authors " - -# Install Glide -RUN apk add --update glide git make bash - -# Add source code -RUN mkdir -p /go/src/github.com/m3db/m3 -ADD . /go/src/github.com/m3db/m3 - -# Build m3dbnode binary -RUN cd /go/src/github.com/m3db/m3/ && \ - git submodule update --init && \ - make m3dbnode-linux-amd64 - -# stage 2: lightweight "release" -FROM alpine:latest -LABEL maintainer="The M3DB Authors " - -EXPOSE 2381/tcp 2382/tcp 7201/tcp 7203/tcp 9010-9014/tcp - -COPY --from=builder /go/src/github.com/m3db/m3/bin/m3dbnode /bin/ -COPY --from=builder /go/src/github.com/m3db/m3/src/dbnode/config/m3dbnode-local-etcd3.yml /etc/m3dbnode/m3dbnode3.yml - -ENTRYPOINT [ "/bin/m3dbnode" ] -CMD [ "-f", "/etc/m3dbnode/m3dbnode3.yml" ] diff --git a/src/dbnode/config/m3dbnode-local-etcd2.yml b/src/dbnode/config/m3dbnode-local-etcd2.yml deleted file mode 100644 index 7bfe93f8db..0000000000 --- a/src/dbnode/config/m3dbnode-local-etcd2.yml +++ /dev/null @@ -1,212 +0,0 @@ -coordinator: - listenAddress: - type: "config" - value: "0.0.0.0:7201" - - metrics: - scope: - prefix: "coordinator" - prometheus: - handlerPath: /metrics - listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved - sanitization: prometheus - samplingRate: 1.0 - extended: none - -db: - logging: - level: info - - metrics: - prometheus: - handlerPath: /metrics - sanitization: prometheus - samplingRate: 1.0 - extended: detailed - - listenAddress: 0.0.0.0:9005 - clusterListenAddress: 0.0.0.0:9006 - httpNodeListenAddress: 0.0.0.0:9007 - httpClusterListenAddress: 0.0.0.0:9008 - debugListenAddress: 0.0.0.0:9009 - - hostID: - resolver: config - value: m3db_local_2 - - client: - writeConsistencyLevel: majority - readConsistencyLevel: unstrict_majority - writeTimeout: 10s - fetchTimeout: 15s - connectTimeout: 20s - writeRetry: - initialBackoff: 500ms - backoffFactor: 3 - maxRetries: 2 - jitter: true - fetchRetry: - initialBackoff: 500ms - backoffFactor: 2 - maxRetries: 3 - jitter: true - backgroundHealthCheckFailLimit: 4 - backgroundHealthCheckFailThrottleFactor: 0.5 - - gcPercentage: 100 - - writeNewSeriesAsync: true - writeNewSeriesLimitPerSecond: 1048576 - writeNewSeriesBackoffDuration: 2ms - - bootstrap: - bootstrappers: - - filesystem - - peers - - commitlog - - uninitialized_topology - fs: - numProcessorsPerCPU: 0.125 - - cache: - series: - policy: lru - - commitlog: - flushMaxBytes: 524288 - flushEvery: 1s - queue: - calculationType: fixed - size: 2097152 - blockSize: 10m - - fs: - filePathPrefix: /var/lib/m3db - writeBufferSize: 65536 - dataReadBufferSize: 65536 - infoReadBufferSize: 128 - seekReadBufferSize: 4096 - throughputLimitMbps: 100.0 - throughputCheckEvery: 128 - - repair: - enabled: false - interval: 2h - offset: 30m - jitter: 1h - throttle: 2m - checkInterval: 1m - - pooling: - blockAllocSize: 16 - type: simple - seriesPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - blockPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - encoderPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - closersPool: - size: 104857 - lowWatermark: 0.7 - highWatermark: 1.0 - contextPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - segmentReaderPool: - size: 16384 - lowWatermark: 0.7 - highWatermark: 1.0 - iteratorPool: - size: 2048 - lowWatermark: 0.7 - highWatermark: 1.0 - fetchBlockMetadataResultsPool: - size: 65536 - capacity: 32 - lowWatermark: 0.7 - highWatermark: 1.0 - fetchBlocksMetadataResultsPool: - size: 32 - capacity: 4096 - lowWatermark: 0.7 - highWatermark: 1.0 - hostBlockMetadataSlicePool: - size: 131072 - capacity: 3 - lowWatermark: 0.7 - highWatermark: 1.0 - blockMetadataPool: - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - blockMetadataSlicePool: - size: 65536 - capacity: 32 - lowWatermark: 0.7 - highWatermark: 1.0 - blocksMetadataPool: - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - blocksMetadataSlicePool: - size: 32 - capacity: 4096 - lowWatermark: 0.7 - highWatermark: 1.0 - identifierPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - bytesPool: - buckets: - - capacity: 16 - size: 524288 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 32 - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 64 - size: 131072 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 128 - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 256 - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 1440 - size: 16384 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 4096 - size: 8192 - lowWatermark: 0.7 - highWatermark: 1.0 - - config: - service: - env: default_env - zone: embedded - service: m3db - cacheDir: /var/lib/m3kv - etcdClusters: - - zone: embedded - endpoints: - - dbnode01:2379 - seedNodes: - initialCluster: - - hostID: m3db_local - endpoint: http://dbnode01:2380 diff --git a/src/dbnode/config/m3dbnode-local-etcd3.yml b/src/dbnode/config/m3dbnode-local-etcd3.yml deleted file mode 100644 index 028ab48dd0..0000000000 --- a/src/dbnode/config/m3dbnode-local-etcd3.yml +++ /dev/null @@ -1,212 +0,0 @@ -coordinator: - listenAddress: - type: "config" - value: "0.0.0.0:7201" - - metrics: - scope: - prefix: "coordinator" - prometheus: - handlerPath: /metrics - listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved - sanitization: prometheus - samplingRate: 1.0 - extended: none - -db: - logging: - level: info - - metrics: - prometheus: - handlerPath: /metrics - sanitization: prometheus - samplingRate: 1.0 - extended: detailed - - listenAddress: 0.0.0.0:9010 - clusterListenAddress: 0.0.0.0:9011 - httpNodeListenAddress: 0.0.0.0:9012 - httpClusterListenAddress: 0.0.0.0:9013 - debugListenAddress: 0.0.0.0:9014 - - hostID: - resolver: config - value: m3db_local_3 - - client: - writeConsistencyLevel: majority - readConsistencyLevel: unstrict_majority - writeTimeout: 10s - fetchTimeout: 15s - connectTimeout: 20s - writeRetry: - initialBackoff: 500ms - backoffFactor: 3 - maxRetries: 2 - jitter: true - fetchRetry: - initialBackoff: 500ms - backoffFactor: 2 - maxRetries: 3 - jitter: true - backgroundHealthCheckFailLimit: 4 - backgroundHealthCheckFailThrottleFactor: 0.5 - - gcPercentage: 100 - - writeNewSeriesAsync: true - writeNewSeriesLimitPerSecond: 1048576 - writeNewSeriesBackoffDuration: 2ms - - bootstrap: - bootstrappers: - - filesystem - - peers - - commitlog - - uninitialized_topology - fs: - numProcessorsPerCPU: 0.125 - - cache: - series: - policy: lru - - commitlog: - flushMaxBytes: 524288 - flushEvery: 1s - queue: - calculationType: fixed - size: 2097152 - blockSize: 10m - - fs: - filePathPrefix: /var/lib/m3db - writeBufferSize: 65536 - dataReadBufferSize: 65536 - infoReadBufferSize: 128 - seekReadBufferSize: 4096 - throughputLimitMbps: 100.0 - throughputCheckEvery: 128 - - repair: - enabled: false - interval: 2h - offset: 30m - jitter: 1h - throttle: 2m - checkInterval: 1m - - pooling: - blockAllocSize: 16 - type: simple - seriesPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - blockPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - encoderPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - closersPool: - size: 104857 - lowWatermark: 0.7 - highWatermark: 1.0 - contextPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - segmentReaderPool: - size: 16384 - lowWatermark: 0.7 - highWatermark: 1.0 - iteratorPool: - size: 2048 - lowWatermark: 0.7 - highWatermark: 1.0 - fetchBlockMetadataResultsPool: - size: 65536 - capacity: 32 - lowWatermark: 0.7 - highWatermark: 1.0 - fetchBlocksMetadataResultsPool: - size: 32 - capacity: 4096 - lowWatermark: 0.7 - highWatermark: 1.0 - hostBlockMetadataSlicePool: - size: 131072 - capacity: 3 - lowWatermark: 0.7 - highWatermark: 1.0 - blockMetadataPool: - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - blockMetadataSlicePool: - size: 65536 - capacity: 32 - lowWatermark: 0.7 - highWatermark: 1.0 - blocksMetadataPool: - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - blocksMetadataSlicePool: - size: 32 - capacity: 4096 - lowWatermark: 0.7 - highWatermark: 1.0 - identifierPool: - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - bytesPool: - buckets: - - capacity: 16 - size: 524288 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 32 - size: 262144 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 64 - size: 131072 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 128 - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 256 - size: 65536 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 1440 - size: 16384 - lowWatermark: 0.7 - highWatermark: 1.0 - - capacity: 4096 - size: 8192 - lowWatermark: 0.7 - highWatermark: 1.0 - - config: - service: - env: default_env - zone: embedded - service: m3db - cacheDir: /var/lib/m3kv - etcdClusters: - - zone: embedded - endpoints: - - dbnode01:2379 - seedNodes: - initialCluster: - - hostID: m3db_local - endpoint: http://dbnode01:2380 From 644561c986d1edbe0e644728d6be1b04e3eb9711 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:03:19 -0400 Subject: [PATCH 10/26] Update readme --- scripts/development/m3_stack/README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md index dc5276f857..04c747776d 100644 --- a/scripts/development/m3_stack/README.md +++ b/scripts/development/m3_stack/README.md @@ -5,4 +5,8 @@ This docker-compose file will setup the following environment: 1. 3 M3DB nodes with a single node acting as an EtcD seed 2. 1 M3Coordinator node 3. 1 Grafana node (with a pre-configured Prometheus source) -4. 1 Prometheus node that scrapes the M3DB/M3Coordinator nodes and writes the metrics to M3Coordinator \ No newline at end of file +4. 1 Prometheus node that scrapes the M3DB/M3Coordinator nodes and writes the metrics to M3Coordinator + +## Usage + +Use the `start.sh` and `stop.sh` scripts \ No newline at end of file From 9aa8a09ec267e4917c657f468eca382cf82e6b20 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:04:28 -0400 Subject: [PATCH 11/26] Update prometheus yaml --- scripts/development/m3_stack/prometheus.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/development/m3_stack/prometheus.yml b/scripts/development/m3_stack/prometheus.yml index e2d5fac745..0479de484f 100644 --- a/scripts/development/m3_stack/prometheus.yml +++ b/scripts/development/m3_stack/prometheus.yml @@ -1,10 +1,8 @@ -# my global config global: external_labels: role: "remote" - scrape_interval: 1s # Set the scrape interval to every 15 seconds. Default is every 1 minute. - evaluation_interval: 1s # Evaluate rules every 15 seconds. The default is every 1 minute. - # scrape_timeout is set to the global default (10s). + scrape_interval: 1s # Set low to increase load + evaluation_interval: 1s # Set low to increase load # Alertmanager configuration alerting: From e2d5f551a7bf6101ca158ec2a5cd797515232e78 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:06:14 -0400 Subject: [PATCH 12/26] undo more stuff --- scripts/development/m3_stack/stop.sh | 2 +- .../prometheus/prometheus-integration-test.sh | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/scripts/development/m3_stack/stop.sh b/scripts/development/m3_stack/stop.sh index 7053865951..ed6e43d9f4 100755 --- a/scripts/development/m3_stack/stop.sh +++ b/scripts/development/m3_stack/stop.sh @@ -2,4 +2,4 @@ set -xe -docker-compose -f docker-compose.yml down \ No newline at end of file +docker-compose -f docker-compose.yml down diff --git a/scripts/integration-tests/prometheus/prometheus-integration-test.sh b/scripts/integration-tests/prometheus/prometheus-integration-test.sh index 758c71b97e..93f735521b 100755 --- a/scripts/integration-tests/prometheus/prometheus-integration-test.sh +++ b/scripts/integration-tests/prometheus/prometheus-integration-test.sh @@ -19,12 +19,11 @@ fi echo "Run m3dbnode and m3coordinator containers" docker-compose -f docker-compose.yml up -d dbnode01 -docker-compose -f docker-compose.yml up -d dbnode02 docker-compose -f docker-compose.yml up -d coordinator01 echo "Sleeping for a bit to ensure db up" -sleep 2 # TODO Replace sleeps with logic to determine when to proceed +sleep 10 # TODO Replace sleeps with logic to determine when to proceed echo "Adding namespace" @@ -54,7 +53,7 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ echo "Sleep while namespace is init'd" -sleep 2 # TODO Replace sleeps with logic to determine when to proceed +sleep 10 # TODO Replace sleeps with logic to determine when to proceed [ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.prometheus_metrics.indexOptions.enabled)" == true ] @@ -72,15 +71,6 @@ curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ "endpoint": "dbnode01:9000", "hostname": "dbnode01", "port": 9000 - }, - { - "id": "m3db_local_2", - "isolation_group": "rack-b", - "zone": "embedded", - "weight": 1024, - "endpoint": "dbnode02:9005", - "hostname": "dbnode02", - "port": 9005 } ] }' From 89c6342973cfc304ba5d4fae4172311c6c370940 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:07:00 -0400 Subject: [PATCH 13/26] Undo more changes --- src/dbnode/client/session.go | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/dbnode/client/session.go b/src/dbnode/client/session.go index f6a322501c..81af153556 100644 --- a/src/dbnode/client/session.go +++ b/src/dbnode/client/session.go @@ -281,7 +281,7 @@ func newSession(opts Options) (clientSession, error) { s.pools.writeAttempt.Init() fetchAttemptPoolOpts := pool.NewObjectPoolOptions(). - SetSize(1). + SetSize(opts.FetchBatchOpPoolSize()). SetInstrumentOptions(opts.InstrumentOptions().SetMetricsScope( scope.SubScope("fetch-attempt-pool"), )) @@ -289,7 +289,7 @@ func newSession(opts Options) (clientSession, error) { s.pools.fetchAttempt.Init() fetchTaggedAttemptPoolImplOpts := pool.NewObjectPoolOptions(). - SetSize(1). + SetSize(opts.FetchBatchOpPoolSize()). SetInstrumentOptions(opts.InstrumentOptions().SetMetricsScope( scope.SubScope("fetch-tagged-attempt-pool"), )) @@ -505,7 +505,7 @@ func (s *session) Open() error { s.pools.writeState.Init() fetchBatchOpPoolOpts := pool.NewObjectPoolOptions(). - SetSize(1). + SetSize(s.opts.FetchBatchOpPoolSize()). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-batch-op-pool"), )) @@ -513,7 +513,7 @@ func (s *session) Open() error { s.pools.fetchBatchOp.Init() fetchTaggedOpPoolOpts := pool.NewObjectPoolOptions(). - SetSize(1). + SetSize(s.opts.FetchBatchOpPoolSize()). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-tagged-op-pool"), )) @@ -521,7 +521,7 @@ func (s *session) Open() error { s.pools.fetchTaggedOp.Init() fetchStatePoolOpts := pool.NewObjectPoolOptions(). - SetSize(1). + SetSize(s.opts.FetchBatchOpPoolSize()). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-tagged-state-pool"), )) @@ -723,14 +723,14 @@ func (s *session) setTopologyWithLock(topoMap topology.Map, queues []hostQueue, // directly into the return array in fetch calls. if len(queues) != len(prevQueues) { poolOpts := pool.NewObjectPoolOptions(). - SetSize(1). + SetSize(s.opts.FetchBatchOpPoolSize()). SetInstrumentOptions(s.opts.InstrumentOptions().SetMetricsScope( s.scope.SubScope("fetch-batch-op-array-array-pool"), )) s.pools.fetchBatchOpArrayArray = newFetchBatchOpArrayArrayPool( poolOpts, len(queues), - 1/len(queues)) + s.opts.FetchBatchOpPoolSize()/len(queues)) s.pools.fetchBatchOpArrayArray.Init() } @@ -763,7 +763,6 @@ func (s *session) setTopologyWithLock(topoMap topology.Map, queues []hostQueue, s.pools.multiReaderIterator = encoding.NewMultiReaderIteratorPool(poolOpts) s.pools.multiReaderIterator.Init(s.opts.ReaderIteratorAllocate()) } - if replicas > len(s.metrics.writeNodesRespondingErrors) { curr := len(s.metrics.writeNodesRespondingErrors) for i := curr; i < replicas; i++ { @@ -773,7 +772,6 @@ func (s *session) setTopologyWithLock(topoMap topology.Map, queues []hostQueue, append(s.metrics.writeNodesRespondingErrors, counter) } } - if replicas > len(s.metrics.fetchNodesRespondingErrors) { curr := len(s.metrics.fetchNodesRespondingErrors) for i := curr; i < replicas; i++ { From 75fc4f77dc75a0581341ea93c23dab00a381e447 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:07:46 -0400 Subject: [PATCH 14/26] Fix config --- src/dbnode/config/m3dbnode-local-etcd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbnode/config/m3dbnode-local-etcd.yml b/src/dbnode/config/m3dbnode-local-etcd.yml index a3d9d1a529..37c1c95ff6 100644 --- a/src/dbnode/config/m3dbnode-local-etcd.yml +++ b/src/dbnode/config/m3dbnode-local-etcd.yml @@ -62,8 +62,8 @@ db: bootstrap: bootstrappers: - filesystem - - peers - commitlog + - peers - uninitialized_topology fs: numProcessorsPerCPU: 0.125 From 83c77b68e6f8d12f287f83911943c3e8d5a41354 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:08:33 -0400 Subject: [PATCH 15/26] Remove github_token file --- github_token | 1 - 1 file changed, 1 deletion(-) delete mode 100644 github_token diff --git a/github_token b/github_token deleted file mode 100644 index 713f1eca75..0000000000 --- a/github_token +++ /dev/null @@ -1 +0,0 @@ -f6192b8d35518098fbb645ffd807a53749ffb00e \ No newline at end of file From 8a7c21882639cb17020a46644fc97a6cf220d649 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:09:00 -0400 Subject: [PATCH 16/26] Update README --- scripts/development/m3_stack/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md index 04c747776d..e3c50cab37 100644 --- a/scripts/development/m3_stack/README.md +++ b/scripts/development/m3_stack/README.md @@ -2,7 +2,7 @@ This docker-compose file will setup the following environment: -1. 3 M3DB nodes with a single node acting as an EtcD seed +1. 3 M3DB nodes with a single node acting as an ETCD seed 2. 1 M3Coordinator node 3. 1 Grafana node (with a pre-configured Prometheus source) 4. 1 Prometheus node that scrapes the M3DB/M3Coordinator nodes and writes the metrics to M3Coordinator From 37b1f22d86c2d1f1da3ffc3d43ffd73f2780f465 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:13:09 -0400 Subject: [PATCH 17/26] Add grafana dashboard for m3db --- integrations/grafana/m3db_dashboard.json | 6090 ++++++++++++++++++++++ 1 file changed, 6090 insertions(+) create mode 100644 integrations/grafana/m3db_dashboard.json diff --git a/integrations/grafana/m3db_dashboard.json b/integrations/grafana/m3db_dashboard.json new file mode 100644 index 0000000000..69455be2e1 --- /dev/null +++ b/integrations/grafana/m3db_dashboard.json @@ -0,0 +1,6090 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.2.4" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1537889403436, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 90, + "panels": [], + "repeat": null, + "title": "Bootstrap Node Counts", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 1 + }, + "height": "", + "hideTimeOverride": true, + "id": 51, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "Value", + "targets": [ + { + "expr": "sum(database_bootstrapped{instance=~\"$instance\"} == 1)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "key": 0.3593853225333008, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.bootstrapped host:$servers | >=1 | removeEmpty | count" + } + ], + "thresholds": "", + "timeFrom": "1m", + "timeShift": null, + "title": "Bootstrapped", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current", + "y_formats": [ + "short", + "short" + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 1 + }, + "height": "", + "hideTimeOverride": true, + "id": 50, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "Value", + "targets": [ + { + "expr": "sum(database_bootstrapped{instance=~\"$instance\"} == bool 0)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "key": 0.08874521907599053, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.bootstrapped host:$servers | <1 | removeEmpty | count" + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "Bootstrapping", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current", + "y_formats": [ + "short", + "short" + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 1 + }, + "height": "", + "hideTimeOverride": true, + "id": 55, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "revision", + "targets": [ + { + "expr": "count(build_information{instance=~\"$instance\"}) by (revision)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "key": 0.25234693632915994, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | sum revision | removeEmpty | count" + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "# Revisions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current", + "y_formats": [ + "short", + "short" + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 1 + }, + "height": "", + "hideTimeOverride": true, + "id": 56, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "go_version", + "targets": [ + { + "expr": "count(build_information{instance=~\"$instance\"}) by (go_version)", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "key": 0.08534584879068197, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | sum go-version | removeEmpty | count" + } + ], + "thresholds": "", + "timeFrom": "1m", + "title": "# Runtime Versions", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current", + "y_formats": [ + "short", + "short" + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 91, + "panels": [], + "repeat": null, + "title": "Commitlog / Inserts", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 5 + }, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "smoothed-length", + "color": "#BF1B00", + "linewidth": 4 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "commitlog_writes_queued{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.09635984016153087, + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:commitlog.writes.queued host:$servers | aliasByTags name host", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Commit Log Queue Length", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 5 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "average", + "color": "#BF1B00", + "linewidth": 4 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": " sum(rate(commitlog_writes_success{instance=~\"$instance\"}[$step]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.7940843773344208, + "legendFormat": "writes", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:commitlog.writes.success host:$servers | \n sumSeries | \n scaleToSeconds 1 | \n aliasByTags name host", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Commit Log Writes / Second (Includes replication)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 5 + }, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + }, + { + "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(dbshard_insert_queue_inserts{instance=~\"$instance\"}[$step])) by (pending_write)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.6661769838428799, + "legendFormat": "pendingWrite-{{pending_write}}", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:dbshard.insert-queue.inserts host:$servers | scaleToSeconds 1 | sum pending-write", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "New Series Inserts / Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 92, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 13 + }, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "smoothed-length", + "color": "#BF1B00", + "fill": 0, + "linewidth": 5 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(service_fetchBatchRaw_success{instance=~\"$instance\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + }, + { + "expr": "rate(service_fetchBatchRaw_errors{instance=~\"$instance\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + }, + { + "expr": "rate(service_writeBatchRaw_success{instance=~\"$instance\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + }, + { + "expr": "rate(service_writeBatchRaw_errors{instance=~\"$instance\"}[1m])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Service Reads / Writes / Minute", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 13 + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + }, + { + "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "service_writeBatchRaw_latency{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.813148562053182, + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:service.writebatchraw.latency timertype:p99 host:$servers | aliasByTags host name timertype" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Server Side Write Latency (p99)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 13 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + }, + { + "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "service_fetchBatchRaw_latency{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.37467787057058555, + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.latency timertype:p99 host:$servers | aliasByTags host name timertype" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Server Side Fetch Latency (p99)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Un-Tagged RPC", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 93, + "panels": [], + "repeat": null, + "title": "Tagged RPC", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 14 + }, + "id": 81, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "smoothed-length", + "color": "#BF1B00", + "fill": 0, + "linewidth": 5 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(service_writeTaggedBatchRaw_success{instance=~\"$instance\"}[$step]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "legendFormat": "writeTaggedSuccess", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + }, + { + "expr": "sum(rate(service_writeTaggedBatchRaw_errors{instance=~\"$instance\"}[$step]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "legendFormat": "writeTaggedErrors", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + }, + { + "expr": "sum(rate(service_fetchTaggedBatchRaw_success{instance=~\"$instance\"}[$step]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "legendFormat": "fetchTaggedSuccess", + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + }, + { + "expr": "sum(rate(service_fetchTaggedBatchRaw_errors{instance=~\"$instance\"}[$step]))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.666035411738013, + "legendFormat": "fetchTaggedErrors", + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Service Reads / Writes / Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 14 + }, + "id": 82, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + }, + { + "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "service_writeTaggedBatchRaw_latency{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.813148562053182, + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:service.writetaggedbatchraw.latency timertype:p99 host:$servers | aliasByTags host name timertype" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Server Side Write Latency (p99)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 14 + }, + "id": 83, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + }, + { + "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "service_fetchTagged_success_latency{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.37467787057058555, + "legendFormat": "{{instance}}", + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success-latency timertype:p99 host:$servers | aliasByTags host name timertype" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Server Side Fetch Latency (p99)", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 94, + "panels": [], + "repeat": null, + "title": "CPU and Memory Utilization", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(process_cpu_seconds_total{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "key": 0.7090064740553321, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch name:node_load1 dc:$dc host:$servers | aliasByTags host name", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Process CPU Seconds", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 22 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_resident_memory_bytes{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.7090064740553321, + "legendFormat": "resident-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_node_exporter name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host\n| diff (fetch name:node_memory_memavailable dc:$dc host:$servers | keepLastValue | sum host)\n| asPercent (fetch name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host) | aliasByTags host", + "textEditor": true + }, + { + "expr": "process_virtual_memory_bytes{instance=~\"$instance\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "key": 0.7090064740553321, + "legendFormat": "virtual-{{instance}}", + "refId": "B", + "target": "fetch service:statsdex_node_exporter name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host\n| diff (fetch name:node_memory_memavailable dc:$dc host:$servers | keepLastValue | sum host)\n| asPercent (fetch name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host) | aliasByTags host", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Utilization - Resident", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 22 + }, + "id": 39, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "aliasByNode(sortByName(servers.$servers.disk.internal-root.available_bytes), 1)", + "textEditor": false + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Free Space - Not Implemented", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 29 + }, + "id": 61, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "process_num_fds{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.4107640408210529, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:process.num-fds host:$servers | aliasByTags host | transformNull" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Num File Descriptors", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 29 + }, + "id": 49, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "runtime_num_goroutines{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.5498548297308425, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:runtime.num-goroutines host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Num Go Routines", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 95, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_bootstrapped{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.5232381035821441, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.bootstrapped host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bootstrapped", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_tick_duration{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.045140271498789186, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.tick.duration timertype:p99 host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Tick Duration", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(database_tick_active_series{instance=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 1, + "key": 0.7552494727739296, + "legendFormat": "totalActiveSeries", + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:database.status.active-series host:$servers | movingAverage 2m | sum namespace | scale 0.333333 | alias {{.namespace}} unique-series" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Ticking", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 44 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_fs_flush{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.025515486279266364, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.fs.flush host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Flushing", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_fs_snapshot{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.fs.snapshot host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Snapshotting", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 51 + }, + "id": 75, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_fs_index_flush{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.fs.index-flush host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Index Flushing", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Background Tasks", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 96, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 0, + "y": 59 + }, + "id": 38, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_tick_wired_blocks{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.6091384812937135, + "legendFormat": "wiredBlocks-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.tick.{wired,unwired}-blocks host:$servers | scaleToSeconds 1 | aliasByTags host namespace name" + }, + { + "expr": "database_tick_unwired_blocks{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.6091384812937135, + "legendFormat": "unwiredBlocks-{{instance}}", + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:database.tick.{wired,unwired,open,active}-blocks host:$servers | scaleToSeconds 1 | aliasByTags host namespace name" + }, + { + "expr": "database_tick_active_blocks{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "activeBlocks-{{instance}}", + "refId": "C" + }, + { + "expr": "database_tick_open_blocks{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "openBlocks-{{instance}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Wired and Unwired Blocks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 6, + "y": 59 + }, + "id": 46, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(database_tick_made_expired_blocks{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "key": 0.12716494400774314, + "legendFormat": "madeExpiredBlocks-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.tick.{made-unwired,made-wired,merged-out-of-order}-blocks host:$servers | scaleToSeconds 1 | aliasByTags host namespace name" + }, + { + "expr": "rate(database_tick_made_unwired_blocks{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "madeUnwiredBlocks-{{instance}}", + "refId": "B" + }, + { + "expr": "rate(database_tick_merged_out_of_order_blocks{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "mergedOutOfOrderBlocks-{{instance}}", + "refId": "C" + }, + { + "expr": "rate(database_tick_made_wired_blocks{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "madeWiredBlocks-{{instance}}", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Made Wired/Unwired & Merged Out of Order Blocks", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 59 + }, + "id": 80, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(database_series_encoder_created{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "key": 0.12716494400774314, + "legendFormat": "seriesEncoderCreated-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.series.encoder-created host:$servers | scaleToSeconds 1 | aliasByTags host name" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "New Encoder Created / Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 59 + }, + "id": 40, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(cluster_shards_available{instace=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8040141995856238, + "legendFormat": "available", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:cluster.shards.* host:$servers | aliasByTags host name" + }, + { + "expr": "sum(cluster_shards_initializing{instace=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8040141995856238, + "legendFormat": "initializing", + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:cluster.shards.* host:$servers | aliasByTags host name" + }, + { + "expr": "sum(cluster_shards_leaving{instace=~\"$instance\"})", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8040141995856238, + "legendFormat": "leaving", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:cluster.shards.* host:$servers | aliasByTags host name" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Shard State", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 64 + }, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "wired_list_limit{instance=~\"$instance\"}", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:wired-list.{limit,evicted,unwireable} host:$servers | aliasByTags host name" + }, + { + "expr": "rate(wired_list_evicted{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "refId": "B" + }, + { + "expr": "wired_list_unwireable{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Wired List Capacity / Size / Evictions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 5, + "w": 10, + "x": 12, + "y": 64 + }, + "id": 63, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "wired_list_evicted_after_duration{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:wired-list.* host:$servers timertype:p99" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Time spent in Wired List - P99", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Wired & Unwired Blocks", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 97, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 39 + }, + "id": 15, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "key": 0.7090064740553321, + "refId": "A", + "target": "aliasByNode(scale(perSecond(sortByName(servers.$servers.network.eth0_rx_bytes)), 8), 1, 3)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network In", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 39 + }, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "key": 0.7090064740553321, + "refId": "A", + "target": "aliasByNode(scale(perSecond(sortByName(servers.$servers.network.eth0_tx_bytes)), 8), 1, 3)", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Out", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bps", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Network In/Out - Not Implemented", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 98, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 48, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(database_write_tagged_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.05448509070742946, + "legendFormat": "{{namespace}}_tagged_success", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:database.write.success host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-success", + "textEditor": true + }, + { + "expr": "sum(rate(database_write_tagged_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_tagged_errors", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" + }, + { + "expr": "sum(rate(database_write_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_success", + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" + }, + { + "expr": "sum(rate(database_write_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_errors", + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Writes / Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 61 + }, + "id": 109, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(database_read_tagged_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.05448509070742946, + "legendFormat": "{{namespace}}_tagged_success", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:database.write.success host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-success", + "textEditor": true + }, + { + "expr": "sum(rate(database_read_tagged_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_tagged_errors", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" + }, + { + "expr": "sum(rate(database_read_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_success", + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" + }, + { + "expr": "sum(rate(database_reads_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_errors", + "refId": "D", + "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Reads / Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 68 + }, + "id": 77, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(database_queryIDs_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.05448509070742946, + "legendFormat": "{{namespace}}_query_ids_success", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:database.queryIDs.success host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-tagged-success", + "textEditor": true + }, + { + "expr": "sum(rate(database_queryIDs_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "key": 0.8767351594065642, + "legendFormat": "{{namespace}}_query_ids_errors", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.queryIDs.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-tagged-errors" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "QueryIDs / Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 75 + }, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(database_flush_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.44937754638999294, + "legendFormat": "{{namespace}}_flush_success", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:database.flush.success host:$servers | scaleToSeconds 1 | sum name namespace | aliasByTags name namespace | removeEmpty", + "textEditor": true + }, + { + "expr": "sum(increase(database_flush_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.44937754638999294, + "legendFormat": "{{namespace}}_flush_errors", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.flush.errors host:$servers | scaleToSeconds 1 | sum name namespace | aliasByTags name namespace | removeEmpty ", + "textEditor": true + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Flush", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 75 + }, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(database_index_flush_success{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "key": 0.44937754638999294, + "legendFormat": "{{namespace}}_index_flush_success", + "refId": "C", + "target": "fetch service:statsdex_m3dbnode name:database.index-flush.success host:$servers | scaleToSeconds 1 | sum name namespace |removeEmpty| aliasByTags name namespace ", + "textEditor": true + }, + { + "expr": "sum(increase(database_index_flush_errors{instance=~\"$instance\"}[$step])) by (namespace)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{namespace}}_index_flush_errors", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "IndexFlush", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Namespace", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 100, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 62 + }, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "m3dbclient_stream_from_peers_fetch_blocks_inprogress{instace=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.26636320136208314, + "legendFormat": "fetchBlocksInProgress-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-blocks-inprogress host:$servers | sum host | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Shard Blocks In Progress", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 62 + }, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "m3dbclient_stream_from_peers_fetch_metadata_peers_inprogress{instace=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.6565937560035238, + "legendFormat": "fetchBlocksMetadataInProgress-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-inprogress host:$servers | sum host | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Shard Blocks Metadata In Progress", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 62 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "m3dbclient_stream_from_peers_fetch_blocks_enqueue_channel_length{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.024380707546677316, + "legendFormat": "fetchBlocksEnqueueChannelLength-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-blocks-enqueue-channel-length host:$servers | sum host | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Shard Blocks Enqueue Channel Length", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 69 + }, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_metadata_peers_batch_call{instance=~\"$instance\"}[$step])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8052472520531426, + "legendFormat": "fetchBlocksMetadataBatchCalls-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-batch-call host:$servers | sum host | scaleToSeconds 1 | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Shard Blocks Metadata Batch Calls / Sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 69 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_metadata_peers_received{instance=~\"$instance\"}[$step])) by (host)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.359145903986988, + "legendFormat": "fetchBlocksMetadataReceived-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-received host:$servers | sum host | scaleToSeconds 1 | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Shard Blocks Metadata Received / Sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 69 + }, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_metadata_peers_peer_retry{instance=~\"$instance\"}[$step])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.359145903986988, + "legendFormat": "fetchBlocksMetadataPaginationRetry-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-peer-retry host:$servers | sum host | scaleToSeconds 1 | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Shard Blocks Metadata Pagination Retry / Sec", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Peers Bootstrapping", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 101, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 42 + }, + "id": 43, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(service_overload_rejected{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "key": 0.05455062296227564, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:service.overload-rejected host:$servers | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Overloaded", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 42 + }, + "id": 44, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_fs_persist_write_duration_ms{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.8368215616799846, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.fs.persist.write-duration-ms host:$servers | aliasByTags host name" + }, + { + "expr": "database_fs_persist_throttle_duration_ms{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.2814048282536148, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.fs.persist.throttle-duration-ms host:$servers | aliasByTags host name" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Persistence", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Overload/Persistence", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 102, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 78 + }, + "id": 31, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_block_success{instance=~\"$instance\"}[$step])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.4195302108331671, + "legendFormat": "fetchBlockSuccess-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-success host:$servers | sum host | transformNull | scaleToSeconds 1 | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Block Success/Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 78 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_block_error{instance=~\"$instance\"}[$step])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.7411861401478703, + "legendFormat": "fetchBlockError-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-error host:$servers | sum host | transformNull | scaleToSeconds 1 | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Block Error/Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 78 + }, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_block_full_retry{instance=~\"$instance\"}[$step])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.3848186046051656, + "legendFormat": "fetchBlockFullRetry-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-full-retry host:$servers | sum host | transformNull | scaleToSeconds 1 | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Block Full-Retry/Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 78 + }, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(m3dbclient_stream_from_peers_fetch_block_final_error{instance=~\"$instance\"}[$step])) by (instance)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.3848186046051656, + "legendFormat": "fetchBlockFinalError-{{instance}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-final-error host:$servers | sum host | transformNull | aliasByTags host" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Block Final Error/Second", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Fetch Blocks Detail", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 103, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 79 + }, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "[[PoolType]]_free{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "key": 0.4590639612241416, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:$PoolType.{free,put-on-full} host:$servers | aliasByTags host name bucket-capacity" + }, + { + "expr": "[[PoolType]]_put_on_full{instance=~\"$instance\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "key": 0.4590639612241416, + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:$PoolType.{free,put-on-full} host:$servers | aliasByTags host name bucket-capacity" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free & Put On Full", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 79 + }, + "id": 47, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 2, + "links": [], + "maxDataPoints": "", + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "", + "format": "time_series", + "intervalFactor": 1, + "key": 0.3914310519012809, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:$PoolType.{total,get-on-empty} host:$servers | keepLastValue | sum bucket-capacity | diff (fetch service:statsdex_m3dbnode name:$PoolType.{put-on-full,free} host:$servers | sum bucket-capacity) | asPercent (fetch service:statsdex_m3dbnode name:$PoolType.total host:$servers | keepLastValue | sum bucket-capacity) | alias bucket-capacity-{{.bucket-capacity}}" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "% Pool Used - Not Implemented", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Pooling", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 44 + }, + "id": 106, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 57, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(build_information{}) by (go_version)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.7855942131678251, + "legendFormat": "{{go_version}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | removeEmpty | sum go-version | alias {{.go-version}}" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Runtime Version", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 58, + "legend": { + "avg": false, + "current": false, + "hideEmpty": true, + "hideZero": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "6fafa81", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(build_information{}) by (revision)", + "format": "time_series", + "intervalFactor": 1, + "key": 0.5783520603949805, + "legendFormat": "{{revision}}", + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | sum revision | aliasByTags revision" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Git Revision", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Build Versions", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 45 + }, + "id": 107, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 46 + }, + "id": 69, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "name:index-error error_type:async-insert | sum", + "yaxis": 2 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(dbindex_insert_queue_index_queue_num_pending{instance=~\"$instance\"}[$step])", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:dbindex.insert-queue.index-queue.num-pending host:$servers | scaleToSeconds 1 | sum | alias pending-indexing" + }, + { + "expr": "rate(dbindex_index_error{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:dbindex.index-error host:$servers | scaleToSeconds 1 | sum name error_type" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Index Queue", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 46 + }, + "id": 79, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dbindex_insert_end_to_end_latency{instance=~\"$instance\",quantile=\"0.99\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:dbindex.insert-end-to-end-latency host:$servers timertype:{median,p99} | keepLastValue |aliasByTags host timertype" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Indexing End-to-End Latency - P99", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Index Queue", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 108, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 47 + }, + "id": 85, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_tick_index_num_docs{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:dbindex.num-docs host:$servers type:gauge | sum segment_type host | alias {{.host}}-{{.segment_type}}-num-docs" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Num Docs", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 47 + }, + "id": 86, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "database_tick_index_num_segments{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:dbindex.num-segments host:$servers type:gauge | sum segment_type host | alias {{.host}}-{{.segment_type}}-num-segments" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Num Segments", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 54 + }, + "id": 87, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dbindex_num_active_compactions{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:dbindex.num-active-compactions host:$servers type:gauge | alias {{.host}}-num-active-compactions" + }, + { + "expr": "dbindex_num_segments_compacting{instance=~\"$instance\"}", + "format": "time_series", + "hide": true, + "intervalFactor": 1, + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:dbindex.num-segments-compacting host:$servers type:gauge | alias {{.host}}-num-segments-compacting" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Num Compactions", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 54 + }, + "id": 88, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "dbindex_compaction_latency{instance=~\"$instance\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:dbindex.compaction-latency host:$servers timertype:{upper} | keepLastValue | alias {{.host}}-compaction-latency-{{.timertype}}" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Compaction Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 61 + }, + "id": 74, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "increase(database_tick_index_num_blocks_evicted{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "refId": "A", + "target": "fetch service:statsdex_m3dbnode name:database.tick.index.num-blocks-{evicted,sealed} host:$servers | aliasByTags host namespace name" + }, + { + "expr": "increase(database_tick_index_num_blocks_sealed{instance=~\"$instance\"}[$step])", + "format": "time_series", + "intervalFactor": 1, + "refId": "B", + "target": "fetch service:statsdex_m3dbnode name:database.tick.index.num-blocks-{evicted,sealed} host:$servers | aliasByTags host namespace name" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Evicted/Sealed blocks", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": null, + "title": "Index Stats", + "type": "row" + } + ], + "refresh": false, + "schemaVersion": 16, + "style": "dark", + "tags": [ + "disable-sync" + ], + "templating": { + "list": [ + { + "allFormat": "glob", + "allValue": null, + "current": { + "selected": true, + "text": "bytes_pool", + "value": "bytes_pool" + }, + "datasource": "M3_meta", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "multiFormat": "glob", + "name": "PoolType", + "options": [ + { + "selected": false, + "text": "All", + "value": "$__all" + }, + { + "selected": false, + "text": "series_pool", + "value": "series_pool" + }, + { + "selected": false, + "text": "block_pool", + "value": "block_pool" + }, + { + "selected": false, + "text": "encoder_pool", + "value": "encoder_pool" + }, + { + "selected": false, + "text": "context_pool", + "value": "context_pool" + }, + { + "selected": false, + "text": "iterator_pool", + "value": "iterator_pool" + }, + { + "selected": false, + "text": "multi_iterator_pool", + "value": "multi_iterator_pool" + }, + { + "selected": false, + "text": "segment_reader_pool", + "value": "segment_reader_pool" + }, + { + "selected": true, + "text": "bytes_pool", + "value": "bytes_pool" + }, + { + "selected": false, + "text": "fetch_block_metadata_results_pool", + "value": "fetch_block_metadata_results_pool" + }, + { + "selected": false, + "text": "fetch_blocks_metadata_results_pool", + "value": "fetch_blocks_metadata_results_pool" + }, + { + "selected": false, + "text": "block_metadata_pool", + "value": "block_metadata_pool" + }, + { + "selected": false, + "text": "block_metadata_slice_pool", + "value": "block_metadata_slice_pool" + }, + { + "selected": false, + "text": "blocks_metadata_pool", + "value": "blocks_metadata_pool" + }, + { + "selected": false, + "text": "blocks_metadata_slice_pool", + "value": "blocks_metadata_slice_pool" + }, + { + "selected": false, + "text": "host_block_metadata_slice_pool", + "value": "host_block_metadata_slice_pool" + }, + { + "selected": false, + "text": "identifier_pool", + "value": "identifier_pool" + } + ], + "query": "series_pool,block_pool,encoder_pool,context_pool,iterator_pool,multi_iterator_pool,segment_reader_pool,bytes_pool,fetch_block_metadata_results_pool,fetch_blocks_metadata_results_pool,block_metadata_pool,block_metadata_slice_pool,blocks_metadata_pool,blocks_metadata_slice_pool,host_block_metadata_slice_pool,identifier_pool", + "refresh": 0, + "type": "custom" + }, + { + "allValue": null, + "current": { + "selected": true, + "text": "30s", + "value": "30s" + }, + "hide": 0, + "includeAll": false, + "label": "step", + "multi": false, + "name": "step", + "options": [ + { + "selected": true, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + } + ], + "query": "30s,1m,5m,10m", + "type": "custom" + }, + { + "allValue": "*", + "current": { + "selected": true, + "text": ".*", + "value": ".*" + }, + "hide": 0, + "includeAll": false, + "label": "instance", + "multi": false, + "name": "instance", + "options": [ + { + "selected": true, + "text": ".*", + "value": ".*" + } + ], + "query": ".*", + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "M3DB Node Details", + "uid": "99SFck0iz", + "version": 4 +} \ No newline at end of file From ef26fa0e60ce9605980c5dc136d480d5491e0326 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:21:27 -0400 Subject: [PATCH 18/26] Add validations --- scripts/development/m3_stack/start.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/development/m3_stack/start.sh b/scripts/development/m3_stack/start.sh index c94c65da50..32dd071049 100755 --- a/scripts/development/m3_stack/start.sh +++ b/scripts/development/m3_stack/start.sh @@ -32,6 +32,10 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{ }' echo "Done initializing namespace" +echo "Validating namespace" +[ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.prometheus_metrics.indexOptions.enabled)" == true ] +echo "Done validating namespace" + echo "Initializing topology" curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ "num_shards": 64, @@ -68,6 +72,10 @@ curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{ }' echo "Done initializing topology" +echo "Validating topology" +[ "$(curl -sSf localhost:7201/api/v1/placement | jq .placement.instances.m3db_seed.id)" == '"m3db_seed"' ] +echo "Done validating topology" + echo "Prometheus available at localhost:9090" echo "Grafana available at localhost:3000" echo "Run ./stop.sh to shutdown nodes when done" \ No newline at end of file From 1948889f6dac5bc71655852ad442617bce292fe2 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:24:47 -0400 Subject: [PATCH 19/26] Remove statsdex stuff from grafana --- integrations/grafana/m3db_dashboard.json | 129 ----------------------- 1 file changed, 129 deletions(-) diff --git a/integrations/grafana/m3db_dashboard.json b/integrations/grafana/m3db_dashboard.json index 69455be2e1..09e383276c 100644 --- a/integrations/grafana/m3db_dashboard.json +++ b/integrations/grafana/m3db_dashboard.json @@ -140,7 +140,6 @@ "intervalFactor": 1, "key": 0.3593853225333008, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.bootstrapped host:$servers | >=1 | removeEmpty | count" } ], "thresholds": "", @@ -233,7 +232,6 @@ "intervalFactor": 1, "key": 0.08874521907599053, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.bootstrapped host:$servers | <1 | removeEmpty | count" } ], "thresholds": "", @@ -325,7 +323,6 @@ "intervalFactor": 1, "key": 0.25234693632915994, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | sum revision | removeEmpty | count" } ], "thresholds": "", @@ -417,7 +414,6 @@ "intervalFactor": 1, "key": 0.08534584879068197, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | sum go-version | removeEmpty | count" } ], "thresholds": "", @@ -504,7 +500,6 @@ "intervalFactor": 1, "key": 0.09635984016153087, "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:commitlog.writes.queued host:$servers | aliasByTags name host", "textEditor": true } ], @@ -600,7 +595,6 @@ "key": 0.7940843773344208, "legendFormat": "writes", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:commitlog.writes.success host:$servers | \n sumSeries | \n scaleToSeconds 1 | \n aliasByTags name host", "textEditor": true } ], @@ -677,16 +671,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - }, - { - "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - } - ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -699,7 +683,6 @@ "key": 0.6661769838428799, "legendFormat": "pendingWrite-{{pending_write}}", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:dbshard.insert-queue.inserts host:$servers | scaleToSeconds 1 | sum pending-write", "textEditor": true } ], @@ -805,7 +788,6 @@ "intervalFactor": 1, "key": 0.666035411738013, "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true }, { @@ -815,7 +797,6 @@ "intervalFactor": 1, "key": 0.666035411738013, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true }, { @@ -825,7 +806,6 @@ "intervalFactor": 1, "key": 0.666035411738013, "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true }, { @@ -835,7 +815,6 @@ "intervalFactor": 1, "key": 0.666035411738013, "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true } ], @@ -912,16 +891,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - }, - { - "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - } - ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -932,7 +901,6 @@ "intervalFactor": 1, "key": 0.813148562053182, "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:service.writebatchraw.latency timertype:p99 host:$servers | aliasByTags host name timertype" } ], "thresholds": [], @@ -1008,16 +976,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - }, - { - "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - } - ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -1028,7 +986,6 @@ "intervalFactor": 1, "key": 0.37467787057058555, "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:service.fetchbatchraw.latency timertype:p99 host:$servers | aliasByTags host name timertype" } ], "thresholds": [], @@ -1143,7 +1100,6 @@ "key": 0.666035411738013, "legendFormat": "writeTaggedSuccess", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true }, { @@ -1154,7 +1110,6 @@ "key": 0.666035411738013, "legendFormat": "writeTaggedErrors", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true }, { @@ -1165,7 +1120,6 @@ "key": 0.666035411738013, "legendFormat": "fetchTaggedSuccess", "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true }, { @@ -1176,7 +1130,6 @@ "key": 0.666035411738013, "legendFormat": "fetchTaggedErrors", "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success host:$servers | scaleToSeconds 1 | aliasByTags host name", "textEditor": true } ], @@ -1253,16 +1206,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - }, - { - "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - } - ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -1273,7 +1216,6 @@ "intervalFactor": 1, "key": 0.813148562053182, "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:service.writetaggedbatchraw.latency timertype:p99 host:$servers | aliasByTags host name timertype" } ], "thresholds": [], @@ -1349,16 +1291,6 @@ "pointradius": 5, "points": false, "renderer": "flot", - "seriesOverrides": [ - { - "alias": "divideSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - }, - { - "alias": "divideSeries(diffSeries(sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.mismatches))),sumSeries(perSecond(statsdex.metrics.query.mquery01-sjc1.storage.m3dbshadow.matches)))", - "yaxis": 2 - } - ], "spaceLength": 10, "stack": false, "steppedLine": false, @@ -1370,7 +1302,6 @@ "key": 0.37467787057058555, "legendFormat": "{{instance}}", "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:service.fetchtagged.success-latency timertype:p99 host:$servers | aliasByTags host name timertype" } ], "thresholds": [], @@ -1561,7 +1492,6 @@ "key": 0.7090064740553321, "legendFormat": "resident-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_node_exporter name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host\n| diff (fetch name:node_memory_memavailable dc:$dc host:$servers | keepLastValue | sum host)\n| asPercent (fetch name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host) | aliasByTags host", "textEditor": true }, { @@ -1572,7 +1502,6 @@ "key": 0.7090064740553321, "legendFormat": "virtual-{{instance}}", "refId": "B", - "target": "fetch service:statsdex_node_exporter name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host\n| diff (fetch name:node_memory_memavailable dc:$dc host:$servers | keepLastValue | sum host)\n| asPercent (fetch name:node_memory_memtotal dc:$dc host:$servers | keepLastValue | sum host) | aliasByTags host", "textEditor": true } ], @@ -1746,7 +1675,6 @@ "key": 0.4107640408210529, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:process.num-fds host:$servers | aliasByTags host | transformNull" } ], "thresholds": [], @@ -1833,7 +1761,6 @@ "key": 0.5498548297308425, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:runtime.num-goroutines host:$servers | aliasByTags host" } ], "thresholds": [], @@ -1930,7 +1857,6 @@ "key": 0.5232381035821441, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.bootstrapped host:$servers | aliasByTags host" } ], "thresholds": [], @@ -2017,7 +1943,6 @@ "key": 0.045140271498789186, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.tick.duration timertype:p99 host:$servers | aliasByTags host" } ], "thresholds": [], @@ -2104,7 +2029,6 @@ "key": 0.7552494727739296, "legendFormat": "totalActiveSeries", "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:database.status.active-series host:$servers | movingAverage 2m | sum namespace | scale 0.333333 | alias {{.namespace}} unique-series" } ], "thresholds": [], @@ -2191,7 +2115,6 @@ "key": 0.025515486279266364, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.fs.flush host:$servers | aliasByTags host" } ], "thresholds": [], @@ -2274,7 +2197,6 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.fs.snapshot host:$servers | aliasByTags host" } ], "thresholds": [], @@ -2359,7 +2281,6 @@ "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.fs.index-flush host:$servers | aliasByTags host" } ], "thresholds": [], @@ -2463,7 +2384,6 @@ "key": 0.6091384812937135, "legendFormat": "wiredBlocks-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.tick.{wired,unwired}-blocks host:$servers | scaleToSeconds 1 | aliasByTags host namespace name" }, { "expr": "database_tick_unwired_blocks{instance=~\"$instance\"}", @@ -2472,7 +2392,6 @@ "key": 0.6091384812937135, "legendFormat": "unwiredBlocks-{{instance}}", "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:database.tick.{wired,unwired,open,active}-blocks host:$servers | scaleToSeconds 1 | aliasByTags host namespace name" }, { "expr": "database_tick_active_blocks{instance=~\"$instance\"}", @@ -2573,7 +2492,6 @@ "key": 0.12716494400774314, "legendFormat": "madeExpiredBlocks-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.tick.{made-unwired,made-wired,merged-out-of-order}-blocks host:$servers | scaleToSeconds 1 | aliasByTags host namespace name" }, { "expr": "rate(database_tick_made_unwired_blocks{instance=~\"$instance\"}[$step])", @@ -2681,7 +2599,6 @@ "key": 0.12716494400774314, "legendFormat": "seriesEncoderCreated-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.series.encoder-created host:$servers | scaleToSeconds 1 | aliasByTags host name" } ], "thresholds": [], @@ -2768,7 +2685,6 @@ "key": 0.8040141995856238, "legendFormat": "available", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:cluster.shards.* host:$servers | aliasByTags host name" }, { "expr": "sum(cluster_shards_initializing{instace=~\"$instance\"})", @@ -2777,7 +2693,6 @@ "key": 0.8040141995856238, "legendFormat": "initializing", "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:cluster.shards.* host:$servers | aliasByTags host name" }, { "expr": "sum(cluster_shards_leaving{instace=~\"$instance\"})", @@ -2786,7 +2701,6 @@ "key": 0.8040141995856238, "legendFormat": "leaving", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:cluster.shards.* host:$servers | aliasByTags host name" } ], "thresholds": [], @@ -2869,7 +2783,6 @@ "hide": false, "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:wired-list.{limit,evicted,unwireable} host:$servers | aliasByTags host name" }, { "expr": "rate(wired_list_evicted{instance=~\"$instance\"}[$step])", @@ -2965,7 +2878,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:wired-list.* host:$servers timertype:p99" } ], "thresholds": [], @@ -3253,7 +3165,6 @@ "key": 0.05448509070742946, "legendFormat": "{{namespace}}_tagged_success", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:database.write.success host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-success", "textEditor": true }, { @@ -3263,7 +3174,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_tagged_errors", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" }, { "expr": "sum(rate(database_write_success{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3272,7 +3182,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_success", "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" }, { "expr": "sum(rate(database_write_errors{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3281,7 +3190,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_errors", "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" } ], "thresholds": [], @@ -3370,7 +3278,6 @@ "key": 0.05448509070742946, "legendFormat": "{{namespace}}_tagged_success", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:database.write.success host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-success", "textEditor": true }, { @@ -3380,7 +3287,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_tagged_errors", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" }, { "expr": "sum(rate(database_read_success{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3389,7 +3295,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_success", "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" }, { "expr": "sum(rate(database_reads_errors{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3398,7 +3303,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_errors", "refId": "D", - "target": "fetch service:statsdex_m3dbnode name:database.write.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-errors" } ], "thresholds": [], @@ -3487,7 +3391,6 @@ "key": 0.05448509070742946, "legendFormat": "{{namespace}}_query_ids_success", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:database.queryIDs.success host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-tagged-success", "textEditor": true }, { @@ -3498,7 +3401,6 @@ "key": 0.8767351594065642, "legendFormat": "{{namespace}}_query_ids_errors", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.queryIDs.errors host:$servers | scaleToSeconds 1 | sum name namespace | alias {{.namespace}}-write-tagged-errors" } ], "thresholds": [], @@ -3587,7 +3489,6 @@ "key": 0.44937754638999294, "legendFormat": "{{namespace}}_flush_success", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:database.flush.success host:$servers | scaleToSeconds 1 | sum name namespace | aliasByTags name namespace | removeEmpty", "textEditor": true }, { @@ -3598,7 +3499,6 @@ "key": 0.44937754638999294, "legendFormat": "{{namespace}}_flush_errors", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.flush.errors host:$servers | scaleToSeconds 1 | sum name namespace | aliasByTags name namespace | removeEmpty ", "textEditor": true } ], @@ -3688,7 +3588,6 @@ "key": 0.44937754638999294, "legendFormat": "{{namespace}}_index_flush_success", "refId": "C", - "target": "fetch service:statsdex_m3dbnode name:database.index-flush.success host:$servers | scaleToSeconds 1 | sum name namespace |removeEmpty| aliasByTags name namespace ", "textEditor": true }, { @@ -3799,7 +3698,6 @@ "key": 0.26636320136208314, "legendFormat": "fetchBlocksInProgress-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-blocks-inprogress host:$servers | sum host | aliasByTags host" } ], "thresholds": [], @@ -3886,7 +3784,6 @@ "key": 0.6565937560035238, "legendFormat": "fetchBlocksMetadataInProgress-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-inprogress host:$servers | sum host | aliasByTags host" } ], "thresholds": [], @@ -3973,7 +3870,6 @@ "key": 0.024380707546677316, "legendFormat": "fetchBlocksEnqueueChannelLength-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-blocks-enqueue-channel-length host:$servers | sum host | aliasByTags host" } ], "thresholds": [], @@ -4060,7 +3956,6 @@ "key": 0.8052472520531426, "legendFormat": "fetchBlocksMetadataBatchCalls-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-batch-call host:$servers | sum host | scaleToSeconds 1 | aliasByTags host" } ], "thresholds": [], @@ -4147,7 +4042,6 @@ "key": 0.359145903986988, "legendFormat": "fetchBlocksMetadataReceived-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-received host:$servers | sum host | scaleToSeconds 1 | aliasByTags host" } ], "thresholds": [], @@ -4234,7 +4128,6 @@ "key": 0.359145903986988, "legendFormat": "fetchBlocksMetadataPaginationRetry-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-metadata-peers-peer-retry host:$servers | sum host | scaleToSeconds 1 | aliasByTags host" } ], "thresholds": [], @@ -4335,7 +4228,6 @@ "intervalFactor": 1, "key": 0.05455062296227564, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:service.overload-rejected host:$servers | aliasByTags host" } ], "thresholds": [], @@ -4421,7 +4313,6 @@ "intervalFactor": 1, "key": 0.8368215616799846, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.fs.persist.write-duration-ms host:$servers | aliasByTags host name" }, { "expr": "database_fs_persist_throttle_duration_ms{instance=~\"$instance\"}", @@ -4429,7 +4320,6 @@ "intervalFactor": 1, "key": 0.2814048282536148, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.fs.persist.throttle-duration-ms host:$servers | aliasByTags host name" } ], "thresholds": [], @@ -4531,7 +4421,6 @@ "key": 0.4195302108331671, "legendFormat": "fetchBlockSuccess-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-success host:$servers | sum host | transformNull | scaleToSeconds 1 | aliasByTags host" } ], "thresholds": [], @@ -4618,7 +4507,6 @@ "key": 0.7411861401478703, "legendFormat": "fetchBlockError-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-error host:$servers | sum host | transformNull | scaleToSeconds 1 | aliasByTags host" } ], "thresholds": [], @@ -4705,7 +4593,6 @@ "key": 0.3848186046051656, "legendFormat": "fetchBlockFullRetry-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-full-retry host:$servers | sum host | transformNull | scaleToSeconds 1 | aliasByTags host" } ], "thresholds": [], @@ -4792,7 +4679,6 @@ "key": 0.3848186046051656, "legendFormat": "fetchBlockFinalError-{{instance}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:m3dbclient.stream-from-peers.fetch-block-final-error host:$servers | sum host | transformNull | aliasByTags host" } ], "thresholds": [], @@ -4893,7 +4779,6 @@ "intervalFactor": 1, "key": 0.4590639612241416, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:$PoolType.{free,put-on-full} host:$servers | aliasByTags host name bucket-capacity" }, { "expr": "[[PoolType]]_put_on_full{instance=~\"$instance\"}", @@ -4902,7 +4787,6 @@ "intervalFactor": 1, "key": 0.4590639612241416, "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:$PoolType.{free,put-on-full} host:$servers | aliasByTags host name bucket-capacity" } ], "thresholds": [], @@ -4989,7 +4873,6 @@ "intervalFactor": 1, "key": 0.3914310519012809, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:$PoolType.{total,get-on-empty} host:$servers | keepLastValue | sum bucket-capacity | diff (fetch service:statsdex_m3dbnode name:$PoolType.{put-on-full,free} host:$servers | sum bucket-capacity) | asPercent (fetch service:statsdex_m3dbnode name:$PoolType.total host:$servers | keepLastValue | sum bucket-capacity) | alias bucket-capacity-{{.bucket-capacity}}" } ], "thresholds": [], @@ -5091,7 +4974,6 @@ "key": 0.7855942131678251, "legendFormat": "{{go_version}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | removeEmpty | sum go-version | alias {{.go-version}}" } ], "thresholds": [], @@ -5185,7 +5067,6 @@ "key": 0.5783520603949805, "legendFormat": "{{revision}}", "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:build-information host:$servers | sum revision | aliasByTags revision" } ], "thresholds": [], @@ -5288,14 +5169,12 @@ "hide": false, "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:dbindex.insert-queue.index-queue.num-pending host:$servers | scaleToSeconds 1 | sum | alias pending-indexing" }, { "expr": "rate(dbindex_index_error{instance=~\"$instance\"}[$step])", "format": "time_series", "intervalFactor": 1, "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:dbindex.index-error host:$servers | scaleToSeconds 1 | sum name error_type" } ], "thresholds": [], @@ -5379,7 +5258,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:dbindex.insert-end-to-end-latency host:$servers timertype:{median,p99} | keepLastValue |aliasByTags host timertype" } ], "thresholds": [], @@ -5478,7 +5356,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:dbindex.num-docs host:$servers type:gauge | sum segment_type host | alias {{.host}}-{{.segment_type}}-num-docs" } ], "thresholds": [], @@ -5562,7 +5439,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:dbindex.num-segments host:$servers type:gauge | sum segment_type host | alias {{.host}}-{{.segment_type}}-num-segments" } ], "thresholds": [], @@ -5646,7 +5522,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:dbindex.num-active-compactions host:$servers type:gauge | alias {{.host}}-num-active-compactions" }, { "expr": "dbindex_num_segments_compacting{instance=~\"$instance\"}", @@ -5654,7 +5529,6 @@ "hide": true, "intervalFactor": 1, "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:dbindex.num-segments-compacting host:$servers type:gauge | alias {{.host}}-num-segments-compacting" } ], "thresholds": [], @@ -5738,7 +5612,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:dbindex.compaction-latency host:$servers timertype:{upper} | keepLastValue | alias {{.host}}-compaction-latency-{{.timertype}}" } ], "thresholds": [], @@ -5822,14 +5695,12 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "fetch service:statsdex_m3dbnode name:database.tick.index.num-blocks-{evicted,sealed} host:$servers | aliasByTags host namespace name" }, { "expr": "increase(database_tick_index_num_blocks_sealed{instance=~\"$instance\"}[$step])", "format": "time_series", "intervalFactor": 1, "refId": "B", - "target": "fetch service:statsdex_m3dbnode name:database.tick.index.num-blocks-{evicted,sealed} host:$servers | aliasByTags host namespace name" } ], "thresholds": [], From 36504dfd1ece4f812850f1f18481229f73dbe309 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 14:28:56 -0400 Subject: [PATCH 20/26] More dashboard JSON cleanup --- integrations/grafana/m3db_dashboard.json | 125 +++++++++++------------ 1 file changed, 60 insertions(+), 65 deletions(-) diff --git a/integrations/grafana/m3db_dashboard.json b/integrations/grafana/m3db_dashboard.json index 09e383276c..5d68e8ab53 100644 --- a/integrations/grafana/m3db_dashboard.json +++ b/integrations/grafana/m3db_dashboard.json @@ -139,7 +139,7 @@ "instant": true, "intervalFactor": 1, "key": 0.3593853225333008, - "refId": "A", + "refId": "A" } ], "thresholds": "", @@ -231,7 +231,7 @@ "instant": true, "intervalFactor": 1, "key": 0.08874521907599053, - "refId": "A", + "refId": "A" } ], "thresholds": "", @@ -322,7 +322,7 @@ "instant": true, "intervalFactor": 1, "key": 0.25234693632915994, - "refId": "A", + "refId": "A" } ], "thresholds": "", @@ -413,7 +413,7 @@ "instant": true, "intervalFactor": 1, "key": 0.08534584879068197, - "refId": "A", + "refId": "A" } ], "thresholds": "", @@ -900,7 +900,7 @@ "format": "time_series", "intervalFactor": 1, "key": 0.813148562053182, - "refId": "D", + "refId": "D" } ], "thresholds": [], @@ -985,7 +985,7 @@ "format": "time_series", "intervalFactor": 1, "key": 0.37467787057058555, - "refId": "D", + "refId": "D" } ], "thresholds": [], @@ -1215,7 +1215,7 @@ "format": "time_series", "intervalFactor": 1, "key": 0.813148562053182, - "refId": "D", + "refId": "D" } ], "thresholds": [], @@ -1301,7 +1301,7 @@ "intervalFactor": 1, "key": 0.37467787057058555, "legendFormat": "{{instance}}", - "refId": "D", + "refId": "D" } ], "thresholds": [], @@ -1403,7 +1403,6 @@ "key": 0.7090064740553321, "legendFormat": "{{instance}}", "refId": "A", - "target": "fetch name:node_load1 dc:$dc host:$servers | aliasByTags host name", "textEditor": true } ], @@ -1587,7 +1586,6 @@ "format": "time_series", "intervalFactor": 1, "refId": "A", - "target": "aliasByNode(sortByName(servers.$servers.disk.internal-root.available_bytes), 1)", "textEditor": false } ], @@ -1674,7 +1672,7 @@ "intervalFactor": 1, "key": 0.4107640408210529, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -1760,7 +1758,7 @@ "intervalFactor": 1, "key": 0.5498548297308425, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -1856,7 +1854,7 @@ "intervalFactor": 1, "key": 0.5232381035821441, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -1942,7 +1940,7 @@ "intervalFactor": 1, "key": 0.045140271498789186, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -2028,7 +2026,7 @@ "intervalFactor": 1, "key": 0.7552494727739296, "legendFormat": "totalActiveSeries", - "refId": "B", + "refId": "B" } ], "thresholds": [], @@ -2114,7 +2112,7 @@ "intervalFactor": 1, "key": 0.025515486279266364, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -2196,7 +2194,7 @@ "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -2280,7 +2278,7 @@ "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -2383,7 +2381,7 @@ "intervalFactor": 1, "key": 0.6091384812937135, "legendFormat": "wiredBlocks-{{instance}}", - "refId": "A", + "refId": "A" }, { "expr": "database_tick_unwired_blocks{instance=~\"$instance\"}", @@ -2391,7 +2389,7 @@ "intervalFactor": 1, "key": 0.6091384812937135, "legendFormat": "unwiredBlocks-{{instance}}", - "refId": "B", + "refId": "B" }, { "expr": "database_tick_active_blocks{instance=~\"$instance\"}", @@ -2491,7 +2489,7 @@ "intervalFactor": 1, "key": 0.12716494400774314, "legendFormat": "madeExpiredBlocks-{{instance}}", - "refId": "A", + "refId": "A" }, { "expr": "rate(database_tick_made_unwired_blocks{instance=~\"$instance\"}[$step])", @@ -2598,7 +2596,7 @@ "intervalFactor": 1, "key": 0.12716494400774314, "legendFormat": "seriesEncoderCreated-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -2684,7 +2682,7 @@ "intervalFactor": 1, "key": 0.8040141995856238, "legendFormat": "available", - "refId": "A", + "refId": "A" }, { "expr": "sum(cluster_shards_initializing{instace=~\"$instance\"})", @@ -2692,7 +2690,7 @@ "intervalFactor": 1, "key": 0.8040141995856238, "legendFormat": "initializing", - "refId": "B", + "refId": "B" }, { "expr": "sum(cluster_shards_leaving{instace=~\"$instance\"})", @@ -2700,7 +2698,7 @@ "intervalFactor": 1, "key": 0.8040141995856238, "legendFormat": "leaving", - "refId": "C", + "refId": "C" } ], "thresholds": [], @@ -2782,7 +2780,7 @@ "format": "time_series", "hide": false, "intervalFactor": 1, - "refId": "A", + "refId": "A" }, { "expr": "rate(wired_list_evicted{instance=~\"$instance\"}[$step])", @@ -2877,7 +2875,7 @@ "expr": "wired_list_evicted_after_duration{instance=~\"$instance\",quantile=\"0.99\"}", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -2977,7 +2975,6 @@ { "key": 0.7090064740553321, "refId": "A", - "target": "aliasByNode(scale(perSecond(sortByName(servers.$servers.network.eth0_rx_bytes)), 8), 1, 3)", "textEditor": true } ], @@ -3061,7 +3058,6 @@ { "key": 0.7090064740553321, "refId": "A", - "target": "aliasByNode(scale(perSecond(sortByName(servers.$servers.network.eth0_tx_bytes)), 8), 1, 3)", "textEditor": true } ], @@ -3173,7 +3169,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_tagged_errors", - "refId": "A", + "refId": "A" }, { "expr": "sum(rate(database_write_success{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3181,7 +3177,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_success", - "refId": "B", + "refId": "B" }, { "expr": "sum(rate(database_write_errors{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3189,7 +3185,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_errors", - "refId": "D", + "refId": "D" } ], "thresholds": [], @@ -3286,7 +3282,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_tagged_errors", - "refId": "A", + "refId": "A" }, { "expr": "sum(rate(database_read_success{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3294,7 +3290,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_success", - "refId": "B", + "refId": "B" }, { "expr": "sum(rate(database_reads_errors{instance=~\"$instance\"}[$step])) by (namespace)", @@ -3302,7 +3298,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_errors", - "refId": "D", + "refId": "D" } ], "thresholds": [], @@ -3400,7 +3396,7 @@ "intervalFactor": 1, "key": 0.8767351594065642, "legendFormat": "{{namespace}}_query_ids_errors", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -3697,7 +3693,7 @@ "intervalFactor": 1, "key": 0.26636320136208314, "legendFormat": "fetchBlocksInProgress-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -3783,7 +3779,7 @@ "intervalFactor": 1, "key": 0.6565937560035238, "legendFormat": "fetchBlocksMetadataInProgress-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -3869,7 +3865,7 @@ "intervalFactor": 1, "key": 0.024380707546677316, "legendFormat": "fetchBlocksEnqueueChannelLength-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -3955,7 +3951,7 @@ "intervalFactor": 1, "key": 0.8052472520531426, "legendFormat": "fetchBlocksMetadataBatchCalls-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4041,7 +4037,7 @@ "intervalFactor": 1, "key": 0.359145903986988, "legendFormat": "fetchBlocksMetadataReceived-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4127,7 +4123,7 @@ "intervalFactor": 1, "key": 0.359145903986988, "legendFormat": "fetchBlocksMetadataPaginationRetry-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4227,7 +4223,7 @@ "format": "time_series", "intervalFactor": 1, "key": 0.05455062296227564, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4312,14 +4308,14 @@ "format": "time_series", "intervalFactor": 1, "key": 0.8368215616799846, - "refId": "A", + "refId": "A" }, { "expr": "database_fs_persist_throttle_duration_ms{instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 1, "key": 0.2814048282536148, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4420,7 +4416,7 @@ "intervalFactor": 1, "key": 0.4195302108331671, "legendFormat": "fetchBlockSuccess-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4506,7 +4502,7 @@ "intervalFactor": 1, "key": 0.7411861401478703, "legendFormat": "fetchBlockError-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4592,7 +4588,7 @@ "intervalFactor": 1, "key": 0.3848186046051656, "legendFormat": "fetchBlockFullRetry-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4678,7 +4674,7 @@ "intervalFactor": 1, "key": 0.3848186046051656, "legendFormat": "fetchBlockFinalError-{{instance}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4778,7 +4774,7 @@ "format": "time_series", "intervalFactor": 1, "key": 0.4590639612241416, - "refId": "A", + "refId": "A" }, { "expr": "[[PoolType]]_put_on_full{instance=~\"$instance\"}", @@ -4786,7 +4782,7 @@ "hide": true, "intervalFactor": 1, "key": 0.4590639612241416, - "refId": "B", + "refId": "B" } ], "thresholds": [], @@ -4872,7 +4868,7 @@ "format": "time_series", "intervalFactor": 1, "key": 0.3914310519012809, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -4973,7 +4969,7 @@ "intervalFactor": 1, "key": 0.7855942131678251, "legendFormat": "{{go_version}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -5066,7 +5062,7 @@ "intervalFactor": 1, "key": 0.5783520603949805, "legendFormat": "{{revision}}", - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -5168,13 +5164,13 @@ "format": "time_series", "hide": false, "intervalFactor": 1, - "refId": "A", + "refId": "A" }, { "expr": "rate(dbindex_index_error{instance=~\"$instance\"}[$step])", "format": "time_series", "intervalFactor": 1, - "refId": "B", + "refId": "B" } ], "thresholds": [], @@ -5257,7 +5253,7 @@ "expr": "dbindex_insert_end_to_end_latency{instance=~\"$instance\",quantile=\"0.99\"}", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -5355,7 +5351,7 @@ "expr": "database_tick_index_num_docs{instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -5438,7 +5434,7 @@ "expr": "database_tick_index_num_segments{instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -5521,14 +5517,14 @@ "expr": "dbindex_num_active_compactions{instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" }, { "expr": "dbindex_num_segments_compacting{instance=~\"$instance\"}", "format": "time_series", "hide": true, "intervalFactor": 1, - "refId": "B", + "refId": "B" } ], "thresholds": [], @@ -5611,7 +5607,7 @@ "expr": "dbindex_compaction_latency{instance=~\"$instance\"}", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" } ], "thresholds": [], @@ -5694,13 +5690,13 @@ "expr": "increase(database_tick_index_num_blocks_evicted{instance=~\"$instance\"}[$step])", "format": "time_series", "intervalFactor": 1, - "refId": "A", + "refId": "A" }, { "expr": "increase(database_tick_index_num_blocks_sealed{instance=~\"$instance\"}[$step])", "format": "time_series", "intervalFactor": 1, - "refId": "B", + "refId": "B" } ], "thresholds": [], @@ -5765,7 +5761,6 @@ "text": "bytes_pool", "value": "bytes_pool" }, - "datasource": "M3_meta", "hide": 0, "includeAll": true, "label": "", From 57c480d5a7c3be4108489f790c2eedbebf8be44d Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 15:28:50 -0400 Subject: [PATCH 21/26] Improvements and fix auto-import dashboard --- docker/grafana/Dockerfile | 9 +++++++++ docker/grafana/dashboards.yaml | 6 ++++++ scripts/development/m3_stack/README.md | 18 +++++++++++++++++- scripts/development/m3_stack/prometheus.yml | 4 ++-- 4 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 docker/grafana/dashboards.yaml diff --git a/docker/grafana/Dockerfile b/docker/grafana/Dockerfile index 32ef6242c0..951ee19517 100644 --- a/docker/grafana/Dockerfile +++ b/docker/grafana/Dockerfile @@ -1,3 +1,12 @@ FROM grafana/grafana:latest COPY ./docker/grafana/datasource.yaml /etc/grafana/provisioning/datasources/datasource.yaml +COPY ./docker/grafana/dashboards.yaml /etc/grafana/provisioning/dashboards/all.yaml +COPY ./integrations/grafana/m3db_dashboard.json /var/lib/grafana/dashboards/m3db_dashboard.json +# Need to replace datasource template variable with name of actual data source so auto-import +# JustWorksTM. Need to switch users because the /var/lib/grafana/dashboards directory is +# only writable by root. +USER root +RUN sed -i 's/${DS_PROMETHEUS}/Prometheus/g' /var/lib/grafana/dashboards/m3db_dashboard.json +# Switch back to grafana user. +USER grafana \ No newline at end of file diff --git a/docker/grafana/dashboards.yaml b/docker/grafana/dashboards.yaml new file mode 100644 index 0000000000..256c0aff38 --- /dev/null +++ b/docker/grafana/dashboards.yaml @@ -0,0 +1,6 @@ +- name: 'default' + org_id: 1 + folder: '' + type: 'file' + options: + folder: '/var/lib/grafana/dashboards' diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md index e3c50cab37..05d1668cc3 100644 --- a/scripts/development/m3_stack/README.md +++ b/scripts/development/m3_stack/README.md @@ -9,4 +9,20 @@ This docker-compose file will setup the following environment: ## Usage -Use the `start.sh` and `stop.sh` scripts \ No newline at end of file +Use the `start.sh` and `stop.sh` scripts + +## Grafana + +Use Grafana by navigating to `http://localhost:3000` and using `admin` for both the username and password. The M3DB dashboard should already be populated and working. + +## Prometheus + +Use Prometheus by navigating to `http://localhost:9090` + +## Increasing Load + +Load can easily be increased by modifying the `prometheus.yml` file to reduce the scrape interval to `1s` + +## Containers Hanging / Unresponsive + +Running the entire stack can be resource intensive. If the containers are unresponsive try increasing the amount of cores and memory that the docker daemon is allowed to use. \ No newline at end of file diff --git a/scripts/development/m3_stack/prometheus.yml b/scripts/development/m3_stack/prometheus.yml index 0479de484f..a54b09463a 100644 --- a/scripts/development/m3_stack/prometheus.yml +++ b/scripts/development/m3_stack/prometheus.yml @@ -1,8 +1,8 @@ global: external_labels: role: "remote" - scrape_interval: 1s # Set low to increase load - evaluation_interval: 1s # Set low to increase load + scrape_interval: 15s + evaluation_interval: 15s # Alertmanager configuration alerting: From 295f106fb1778d42f494e77f4002a44070760b2c Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 15:30:03 -0400 Subject: [PATCH 22/26] Remove gitingore change --- .gitignore | 2 -- 1 file changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9613e1157e..6152560af8 100644 --- a/.gitignore +++ b/.gitignore @@ -47,5 +47,3 @@ site/ # Automatically populated from asset sources m3db.io/openapi -# GitHub API token -github_token From 1eabbf5903a09a31f67e39a24f72147f41b643c1 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 15:33:24 -0400 Subject: [PATCH 23/26] Improve prom query --- integrations/grafana/m3db_dashboard.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/grafana/m3db_dashboard.json b/integrations/grafana/m3db_dashboard.json index 5d68e8ab53..4d25d03fec 100644 --- a/integrations/grafana/m3db_dashboard.json +++ b/integrations/grafana/m3db_dashboard.json @@ -134,7 +134,7 @@ "tableColumn": "Value", "targets": [ { - "expr": "sum(database_bootstrapped{instance=~\"$instance\"} == 1)", + "expr": "sum(database_bootstrapped{instance=~\"$instance\"} == bool 1)", "format": "time_series", "instant": true, "intervalFactor": 1, From 72ab0e0b4984dfc14dc06289fe6eea0b8ef46830 Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 16:05:45 -0400 Subject: [PATCH 24/26] Rename scripts --- scripts/development/m3_stack/README.md | 2 +- scripts/development/m3_stack/{start.sh => start_m3.sh} | 0 scripts/development/m3_stack/{stop.sh => stop_m3.sh} | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename scripts/development/m3_stack/{start.sh => start_m3.sh} (100%) rename scripts/development/m3_stack/{stop.sh => stop_m3.sh} (100%) diff --git a/scripts/development/m3_stack/README.md b/scripts/development/m3_stack/README.md index 05d1668cc3..24524a9e85 100644 --- a/scripts/development/m3_stack/README.md +++ b/scripts/development/m3_stack/README.md @@ -9,7 +9,7 @@ This docker-compose file will setup the following environment: ## Usage -Use the `start.sh` and `stop.sh` scripts +Use the `start_m3.sh` and `stop_m3.sh` scripts ## Grafana diff --git a/scripts/development/m3_stack/start.sh b/scripts/development/m3_stack/start_m3.sh similarity index 100% rename from scripts/development/m3_stack/start.sh rename to scripts/development/m3_stack/start_m3.sh diff --git a/scripts/development/m3_stack/stop.sh b/scripts/development/m3_stack/stop_m3.sh similarity index 100% rename from scripts/development/m3_stack/stop.sh rename to scripts/development/m3_stack/stop_m3.sh From 82fddcfb2ef8f347618cc14a1508328185fc60dc Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 17:08:01 -0400 Subject: [PATCH 25/26] Remove unused ports --- scripts/development/m3_stack/docker-compose.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/development/m3_stack/docker-compose.yml b/scripts/development/m3_stack/docker-compose.yml index e90fb8df0d..3e6d531f69 100644 --- a/scripts/development/m3_stack/docker-compose.yml +++ b/scripts/development/m3_stack/docker-compose.yml @@ -37,11 +37,9 @@ services: expose: - "7201" - "7203" - - "7208" ports: - "0.0.0.0:7201:7201" - "0.0.0.0:7203:7203" - - "0.0.0.0:7208:7208" networks: - backend build: From edd73a88ee5e2cd43081b99e13008545f2d59d9d Mon Sep 17 00:00:00 2001 From: Richard Artoul Date: Tue, 25 Sep 2018 17:09:20 -0400 Subject: [PATCH 26/26] Update DEVELOPER.md --- DEVELOPER.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/DEVELOPER.md b/DEVELOPER.md index fd2dcf0b17..52aa374148 100644 --- a/DEVELOPER.md +++ b/DEVELOPER.md @@ -1,5 +1,9 @@ # Developer Notes +## Running the M3 stack locally + +Follow the instructions in `/scripts/development/m3_stack/README.md` + ## Testing Changes M3DB has an extensive (and ever increasing) set of tests to ensure we are able to validate changes. More notes about the various testing strategies we employ can be found in `TESTING.md`. An unfortunate consequence of the number of tests is running the test suite takes too long on a developer's laptop. Here's the workflow most developers employ to be productive. Note: take this as a suggestion of something that works for some people, not as a directive. Do what makes you enjoy the development process most, including disregarding this suggestion! @@ -56,3 +60,4 @@ make docs-deploy ## M3DB Website The [M3DB website](https://m3db.io/) is hosted via netlify. It is configured to run `make site-build` and then serving the contents of the `/m3db.io` directory. The site is built and republished every time there is a push to master. +