Skip to content

Commit

Permalink
feat: add more telemetry (#2059)
Browse files Browse the repository at this point in the history
## Description

Closes #1829

This PR introduces new metrics for different TM2 modules, as outlined in
#1829.

Thank you @gfanton and @ajnavarro for helping out with the Docker issues
🙏

## How do I test this out?

Head over to `misc/telemetry` and follow the README -- you can run
everything locally in Docker 😎


![grafana-2](https://github.com/gnolang/gno/assets/16712663/8177d338-6743-480d-b4b3-b447243043d9)


cc @mazzy89 

## Metrics added
### Consensus

- [x] block interval (time between current and prev block in seconds)
- [x] number of transactions in the latest block
- [x] block size (in bytes)
- [x] number of validators
- [x] total voting power of the validator set

### Networking

- [x] number of inbound peers
- [x] number of outbound peers
- [x] number of pending peers (dialing) 

### JSON-RPC

- [x] response time for requests (http / ws)

### Mempool

- [x] number of valid txs in the mempool
- [x] number of txs in the mempool cache

### VM

- [x] gas used per execution
- [x] CPU cycles
- [x] different VM query message call frequency
- [x] different VM execution frequency (run, call, addpkg)
- [x] VM query error frequency

<details><summary>Contributors' checklist...</summary>

- [x] Added new tests, or not needed, or not feasible
- [x] Provided an example (e.g. screenshot) to aid review or the PR is
self-explanatory
- [x] Updated the official documentation or not needed
- [x] No breaking changes were made, or a `BREAKING CHANGE: xxx` message
was included in the description
- [x] Added references to related issues and PRs
- [ ] Provided any useful hints for running manual tests
- [ ] Added new benchmarks to [generated
graphs](https://gnoland.github.io/benchmarks), if any. More info
[here](https://github.com/gnolang/gno/blob/master/.benchmarks/README.md).
</details>

---------

Signed-off-by: gfanton <[email protected]>
Co-authored-by: gfanton <[email protected]>
  • Loading branch information
zivkovicmilos and gfanton authored May 24, 2024
1 parent c6cde63 commit 90aa89c
Show file tree
Hide file tree
Showing 30 changed files with 2,213 additions and 166 deletions.
6 changes: 4 additions & 2 deletions gno.land/cmd/gnoland/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,10 @@ func execStart(c *startCfg, io commands.IO) error {
// Wrap the zap logger
logger := log.ZapLoggerToSlog(zapLogger)

// Initialize telemetry
telemetry.Init(*cfg.Telemetry)
// Initialize the telemetry
if err := telemetry.Init(*cfg.Telemetry); err != nil {
return fmt.Errorf("unable to initialize telemetry, %w", err)
}

// Write genesis file if missing.
// NOTE: this will be dropped in a PR that resolves issue #1886:
Expand Down
66 changes: 47 additions & 19 deletions gno.land/pkg/sdk/vm/handler.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
package vm

import (
"context"
"fmt"
"strings"

abci "github.com/gnolang/gno/tm2/pkg/bft/abci/types"
"github.com/gnolang/gno/tm2/pkg/sdk"
"github.com/gnolang/gno/tm2/pkg/std"
"github.com/gnolang/gno/tm2/pkg/telemetry"
"github.com/gnolang/gno/tm2/pkg/telemetry/metrics"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

type vmHandler struct {
Expand Down Expand Up @@ -51,14 +56,6 @@ func (vh vmHandler) handleMsgCall(ctx sdk.Context, msg MsgCall) (res sdk.Result)
}
res.Data = []byte(resstr)
return
/* TODO handle events.
ctx.EventManager().EmitEvent(
sdk.NewEvent(
sdk.EventTypeMessage,
sdk.NewAttribute(sdk.AttributeKeyXXX, types.AttributeValueXXX),
),
)
*/
}

// Handle MsgRun.
Expand All @@ -71,7 +68,7 @@ func (vh vmHandler) handleMsgRun(ctx sdk.Context, msg MsgRun) (res sdk.Result) {
return
}

//----------------------------------------
// ----------------------------------------
// Query

// query paths
Expand All @@ -84,27 +81,58 @@ const (
QueryFile = "qfile"
)

func (vh vmHandler) Query(ctx sdk.Context, req abci.RequestQuery) (res abci.ResponseQuery) {
switch secondPart(req.Path) {
func (vh vmHandler) Query(ctx sdk.Context, req abci.RequestQuery) abci.ResponseQuery {
var (
res abci.ResponseQuery
path = secondPart(req.Path)
)

switch path {
case QueryPackage:
return vh.queryPackage(ctx, req)
res = vh.queryPackage(ctx, req)
case QueryStore:
return vh.queryStore(ctx, req)
res = vh.queryStore(ctx, req)
case QueryRender:
return vh.queryRender(ctx, req)
res = vh.queryRender(ctx, req)
case QueryFuncs:
return vh.queryFuncs(ctx, req)
res = vh.queryFuncs(ctx, req)
case QueryEval:
return vh.queryEval(ctx, req)
res = vh.queryEval(ctx, req)
case QueryFile:
return vh.queryFile(ctx, req)
res = vh.queryFile(ctx, req)
default:
res = sdk.ABCIResponseQueryFromError(
return sdk.ABCIResponseQueryFromError(
std.ErrUnknownRequest(fmt.Sprintf(
"unknown vm query endpoint %s in %s",
secondPart(req.Path), req.Path)))
}

// Log the telemetry
logQueryTelemetry(path, res.IsErr())

return res
}

// logQueryTelemetry logs the relevant VM query telemetry
func logQueryTelemetry(path string, isErr bool) {
if !telemetry.MetricsEnabled() {
return
}

metrics.VMQueryCalls.Add(
context.Background(),
1,
metric.WithAttributes(
attribute.KeyValue{
Key: "path",
Value: attribute.StringValue(path),
},
),
)

if isErr {
metrics.VMQueryErrors.Add(context.Background(), 1)
}
}

// queryPackage fetch a package's files.
Expand Down Expand Up @@ -187,7 +215,7 @@ func (vh vmHandler) queryFile(ctx sdk.Context, req abci.RequestQuery) (res abci.
return
}

//----------------------------------------
// ----------------------------------------
// misc

func abciResult(err error) sdk.Result {
Expand Down
71 changes: 71 additions & 0 deletions gno.land/pkg/sdk/vm/keeper.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package vm

import (
"bytes"
"context"
"fmt"
"os"
"strings"
Expand All @@ -16,6 +17,10 @@ import (
"github.com/gnolang/gno/tm2/pkg/sdk/bank"
"github.com/gnolang/gno/tm2/pkg/std"
"github.com/gnolang/gno/tm2/pkg/store"
"github.com/gnolang/gno/tm2/pkg/telemetry"
"github.com/gnolang/gno/tm2/pkg/telemetry/metrics"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)

const (
Expand Down Expand Up @@ -215,6 +220,17 @@ func (vm *VMKeeper) AddPackage(ctx sdk.Context, msg MsgAddPackage) (err error) {
}
}()
m2.RunMemPackage(memPkg, true)

// Log the telemetry
logTelemetry(
m2.GasMeter.GasConsumed(),
m2.Cycles,
attribute.KeyValue{
Key: "operation",
Value: attribute.StringValue("m_addpkg"),
},
)

return nil
}

Expand Down Expand Up @@ -312,7 +328,19 @@ func (vm *VMKeeper) Call(ctx sdk.Context, msg MsgCall) (res string, err error) {
res += "\n"
}
}

// Log the telemetry
logTelemetry(
m.GasMeter.GasConsumed(),
m.Cycles,
attribute.KeyValue{
Key: "operation",
Value: attribute.StringValue("m_call"),
},
)

res += "\n\n" // use `\n\n` as separator to separate results for single tx with multi msgs

return res, nil
// TODO pay for gas? TODO see context?
}
Expand Down Expand Up @@ -418,6 +446,17 @@ func (vm *VMKeeper) Run(ctx sdk.Context, msg MsgRun) (res string, err error) {
}()
m2.RunMain()
res = buf.String()

// Log the telemetry
logTelemetry(
m2.GasMeter.GasConsumed(),
m2.Cycles,
attribute.KeyValue{
Key: "operation",
Value: attribute.StringValue("m_run"),
},
)

return res, nil
}

Expand Down Expand Up @@ -636,3 +675,35 @@ func (vm *VMKeeper) QueryFile(ctx sdk.Context, filepath string) (res string, err
return res, nil
}
}

// logTelemetry logs the VM processing telemetry
func logTelemetry(
gasUsed int64,
cpuCycles int64,
attributes ...attribute.KeyValue,
) {
if !telemetry.MetricsEnabled() {
return
}

// Record the operation frequency
metrics.VMExecMsgFrequency.Add(
context.Background(),
1,
metric.WithAttributes(attributes...),
)

// Record the CPU cycles
metrics.VMCPUCycles.Record(
context.Background(),
cpuCycles,
metric.WithAttributes(attributes...),
)

// Record the gas used
metrics.VMGasUsed.Record(
context.Background(),
gasUsed,
metric.WithAttributes(attributes...),
)
}
11 changes: 11 additions & 0 deletions misc/telemetry/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.PHONY: up
up:
docker compose up -d --build

.PHONY: down
down:
docker compose down

.PHONY: clean
clean:
docker compose down -v
56 changes: 56 additions & 0 deletions misc/telemetry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
## Overview

The purpose of this Telemetry documentation is to showcase the different node metrics exposed by the Gno node through
OpenTelemetry, without having to do extraneous setup.

The containerized setup is the following:

- Grafana dashboard
- Prometheus
- OpenTelemetry collector (separate service that needs to run)
- Single Gnoland node, with 1s block times and configured telemetry (enabled)
- Supernova process that simulates load periodically (generates network traffic)

## Starting the containers

### Step 1: Spinning up Docker

Make sure you have Docker installed and running on your system. After that, within the `misc/telemetry` folder run the
following command:

```shell
make up
```

This will build out the required Docker images for this simulation, and start the services

### Step 2: Open Grafana

When you've verified that the `telemetry` containers are up and running, head on over to http://localhost:3000 to open
the Grafana dashboard.

Default login details:

```
username: admin
password: admin
```

After you've logged in (you can skip setting a new password), on the left hand side, click on
`Dashboards -> Gno -> Gno Node Metrics`:
![Grafana](assets/grafana-1.jpeg)

This will open up the predefined Gno Metrics dashboards (added for ease of use) :
![Metrics Dashboard](assets/grafana-2.jpeg)

Periodically, these metrics will be updated as the `supernova` process is simulating network traffic.

### Step 3: Stopping the cluster

To stop the cluster, you can run:

```shell
make down
```

which will stop the Docker containers. Additionally, you can delete the Docker volumes with `make clean`.
Binary file added misc/telemetry/assets/grafana-1.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added misc/telemetry/assets/grafana-2.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
22 changes: 22 additions & 0 deletions misc/telemetry/collector/collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317

processors:
batch:

exporters:
prometheus:
endpoint: collector:8090

service:
telemetry:
logs:
level: "debug"
pipelines:
metrics:
receivers: [ otlp ]
processors: [ batch ]
exporters: [ prometheus ]
Loading

0 comments on commit 90aa89c

Please sign in to comment.