Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: multi-node metrics working #9486

Merged
merged 35 commits into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e08989f
x
ludamad Oct 25, 2024
03ff27b
x
ludamad Oct 25, 2024
fc40b0d
small ci fix; get load balancer url helper function ; allow otel inje…
ludamad Oct 25, 2024
380713d
.
ludamad Oct 25, 2024
11b41b7
.
ludamad Oct 25, 2024
c6ddaf3
.
ludamad Oct 25, 2024
92d1e4b
-
ludamad Oct 25, 2024
1ebfe9b
lead
ludamad Oct 28, 2024
1960d6b
Merge remote-tracking branch 'origin/master'
ludamad Oct 28, 2024
0738e89
deploy_spartan.sh prod changes
ludamad Oct 28, 2024
35e2a98
validator.sh
ludamad Oct 28, 2024
f477cd3
telemetry for winston
ludamad Oct 28, 2024
f83ed70
negative patterns in core logger
ludamad Oct 28, 2024
739acc7
negative patterns in core logger
ludamad Oct 28, 2024
45be95d
yarn project working
ludamad Oct 28, 2024
137b324
yarn project working
ludamad Oct 28, 2024
8fe411c
metrics
ludamad Oct 28, 2024
50b9c7a
revert
ludamad Oct 29, 2024
48067cf
missing file
ludamad Oct 29, 2024
9b7215d
missing file
ludamad Oct 29, 2024
7efb469
metrics in native testnet script
ludamad Oct 29, 2024
5fb0973
telemetry for winston
ludamad Oct 29, 2024
6cb4b46
fix metrics
ludamad Oct 30, 2024
fc299f3
Update install.sh
ludamad Oct 30, 2024
587a58e
updates
ludamad Oct 30, 2024
8638eb7
Merge remote-tracking branch 'origin/ad/metrics-working' into ad/metr…
ludamad Oct 30, 2024
ef41a4d
rev
ludamad Oct 30, 2024
0b9baed
-
ludamad Oct 30, 2024
4c138ed
Merge branch 'master' into ad/metrics-working
ludamad Oct 30, 2024
734c205
formatting
ludamad Oct 30, 2024
ec0a3d6
Merge remote-tracking branch 'origin/ad/metrics-working' into ad/metr…
ludamad Oct 30, 2024
90bf69d
cleaner
ludamad Oct 30, 2024
c900476
Update logger.ts
ludamad Oct 30, 2024
d6787cf
Update post_deploy_spartan.sh
ludamad Oct 30, 2024
d862ba4
Update post_deploy_spartan.sh
ludamad Oct 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions build-system/s3-cache-scripts/earthly-s3-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function s3_upload() {
if [ "${S3_BUILD_CACHE_UPLOAD:-true}" = "false" ] || [ "${AWS_ACCESS_KEY_ID}" == "" ] ; then
return 0 # exit silently
fi
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts || echo "WARNING: S3 upload failed!" >&2
}
function minio_download() {
if [ -z "$S3_BUILD_CACHE_MINIO_URL" ] ; then
Expand All @@ -35,7 +35,7 @@ function minio_upload() {
fi
# minio is S3-compatible
S3_BUILD_CACHE_AWS_PARAMS="--endpoint-url $S3_BUILD_CACHE_MINIO_URL" AWS_SECRET_ACCESS_KEY=minioadmin AWS_ACCESS_KEY_ID=minioadmin \
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts || echo "WARNING Minio upload failed!" >&2
}

# commands
Expand All @@ -53,5 +53,5 @@ if ! bash -c "$command" ; then
exit 1 # we have failed to build, don't continue
fi

minio_upload || echo "Minio upload failed!"
s3_upload || echo "S3 upload failed!"
minio_upload
s3_upload
25 changes: 25 additions & 0 deletions scripts/run_native_testnet_with_metrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
set -eu

NAMESPACE=${1:-staging}

echo "Trying to port forward. NOTE: Must be using a production k8s context with metrics chart."

# Helper function to get load balancer URL based on namespace and service name
function get_load_balancer_url() {
local namespace=$1
local service_name=$2
kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}"
}

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
OTEL_URL=http://$(get_load_balancer_url metrics metrics-opentelemetry-collector):4318

export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=$OTEL_URL/v1/metrics
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=$OTEL_URL/v1/trace
export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=$OTEL_URL/v1/logs
export LOG_JSON=1

# re-enter script dir
cd $(dirname "${BASH_SOURCE[0]}")
./run_native_testnet.sh $@
7 changes: 7 additions & 0 deletions spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ http://{{ include "aztec-network.fullname" . }}-metrics.{{ .Release.Namespace }}
{{- end -}}
{{- end -}}

{{- define "aztec-network.otelCollectorLogsEndpoint" -}}
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/logs
{{- end -}}
{{- end -}}
{{- end -}}

{{- define "helpers.flag" -}}
{{- $name := index . 0 -}}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ spec:
value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }}
ports:
- containerPort: {{ .Values.bootNode.service.nodePort }}
- containerPort: {{ .Values.bootNode.service.p2pTcpPort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ spec:
value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also need to add the above to the prover node yaml.

value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }}
ports:
- containerPort: {{ .Values.validator.service.nodePort }}
- containerPort: {{ .Values.validator.service.p2pTcpPort }}
Expand Down
4 changes: 3 additions & 1 deletion spartan/metrics/install-prod.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash
set -eu

helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace --atomic
cd "$(dirname "${BASH_SOURCE[0]}")"

helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace --atomic $@
2 changes: 2 additions & 0 deletions spartan/metrics/install.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/bin/bash
set -eu

cd "$(dirname "${BASH_SOURCE[0]}")"

helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
Expand Down
5 changes: 1 addition & 4 deletions spartan/metrics/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
opentelemetry-collector:
mode: daemonset
mode: deployment

service:
enabled: true
Expand Down Expand Up @@ -28,9 +28,6 @@ opentelemetry-collector:
protocol: TCP

presets:
logsCollection:
enabled: true
includeCollectorLogs: true
kubernetesAttributes:
enabled: true
config:
Expand Down
15 changes: 9 additions & 6 deletions spartan/scripts/deploy_spartan.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#!/bin/bash
set -eux
set -eu
set -o pipefail

TAG=$1
VALUES=$2
NAMESPACE=${3:-spartan}
PROD=${4:-true}
PROD_ARGS=""
if [ "$PROD" = "true" ] ; then
PROD_ARGS="--set network.public=true --set telemetry.enabled=true --set telemetry.otelCollectorEndpoint=http://metrics-opentelemetry-collector.metrics:4318"
fi
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [ -z "$TAG" ]; then
Expand Down Expand Up @@ -46,16 +51,14 @@ function upgrade() {
helm template $NAMESPACE $SCRIPT_DIR/../aztec-network \
--namespace $NAMESPACE \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--set images.aztec.image="$IMAGE" \
--set network.public=true
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml $PROD_ARGS \
--set images.aztec.image="$IMAGE"
else
helm upgrade --install $NAMESPACE $SCRIPT_DIR/../aztec-network \
--namespace $NAMESPACE \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml $PROD_ARGS \
--set images.aztec.image="$IMAGE" \
--set network.public=true \
--wait \
--wait-for-jobs=true \
--timeout=30m 2>&1
Expand Down
41 changes: 41 additions & 0 deletions spartan/scripts/post_deploy_spartan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash
# Targets a running cluster and deploys example contracts for testing
set -eu
set -o pipefail

echo "Bootstrapping network with test contracts"

NAMESPACE=${1:-spartan}
TAG=${2:-latest}
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [ -z "$NAMESPACE" ]; then
echo "Usage: $0 (optional: <namespace>)"
echo "Example: $0 devnet"
exit 1
fi

# Helper function to get load balancer URL based on namespace and service name
function get_load_balancer_url() {
local namespace=$1
local service_name=$2
kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}"
}

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
export BOOTNODE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-boot-node-lb-tcp"):8080
export PXE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-pxe-lb"):8080
export ETHEREUM_HOST=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-ethereum-lb"):8545

echo "BOOTNODE_URL: $BOOTNODE_URL"
echo "PXE_URL: $PXE_URL"
echo "ETHEREUM_HOST: $ETHEREUM_HOST"

echo "Bootstrapping contracts for test network. NOTE: This took one hour last run."
# hack to ensure L2 contracts are considered deployed
docker run aztecprotocol/aztec:$TAG bootstrap-network \
--rpc-url $BOOTNODE_URL \
--l1-rpc-url $ETHEREUM_HOST \
--l1-chain-id 31337 \
--l1-private-key 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 \
--json | tee ./basic_contracts.json
15 changes: 11 additions & 4 deletions spartan/scripts/test_spartan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@ fi

echo "Note: Repo should be bootstrapped with ./bootstrap.sh fast."

# Helper function to get load balancer URL based on namespace and service name
function get_load_balancer_url() {
local namespace=$1
local service_name=$2
kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}"
}

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
export BOOTNODE_URL=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-boot-node-lb-tcp')].status.loadBalancer.ingress[0].hostname}"):8080
export PXE_URL=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-pxe-lb')].status.loadBalancer.ingress[0].hostname}"):8080
export ETHEREUM_HOST=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-ethereum-lb')].status.loadBalancer.ingress[0].hostname}"):8545
export BOOTNODE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-boot-node-lb-tcp"):8080
export PXE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-pxe-lb"):8080
export ETHEREUM_HOST=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-ethereum-lb"):8545

echo "BOOTNODE_URL: $BOOTNODE_URL"
echo "PXE_URL: $PXE_URL"
echo "ETHEREUM_HOST: $ETHEREUM_HOST"

# hack to ensure L2 contracts are considered deployed
touch $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/state/l2-contracts.env
bash -x $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/test-transfer.sh
bash -x $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/test-4epochs.sh
3 changes: 2 additions & 1 deletion yarn-project/Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ prover-client-test:
# Running this inside the main builder as the point is not to run this through dockerization.
network-test:
ARG test=./test-transfer.sh
ARG validators=3
FROM +build
WORKDIR /usr/src/
# Bare minimum git setup to run 'git rev-parse --show-toplevel'
Expand All @@ -299,7 +300,7 @@ network-test:
./ethereum.sh \
"./prover-node.sh 8078 false" \
./pxe.sh \
"./validators.sh 3"
"./validators.sh $validators"

publish-npm:
FROM +build
Expand Down
1 change: 1 addition & 0 deletions yarn-project/aztec/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"@aztec/telemetry-client": "workspace:^",
"@aztec/txe": "workspace:^",
"@aztec/types": "workspace:^",
"@opentelemetry/winston-transport": "^0.7.0",
"@types/chalk": "^2.2.0",
"abitype": "^0.8.11",
"chalk": "^5.3.0",
Expand Down
28 changes: 4 additions & 24 deletions yarn-project/aztec/src/logging.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { currentLevel, onLog, setLevel } from '@aztec/foundation/log';

import { OpenTelemetryTransportV3 } from '@opentelemetry/winston-transport';
import * as path from 'path';
import * as process from 'process';
import * as winston from 'winston';
Expand Down Expand Up @@ -30,36 +31,15 @@ function createWinstonLocalFileLogger() {
});
}

function extractNegativePatterns(debugString: string): string[] {
return (
debugString
.split(',')
.filter(p => p.startsWith('-'))
// Remove the leading '-' from the pattern
.map(p => p.slice(1))
// Remove any '*' from the pattern
.map(p => p.replace('*', ''))
);
}

/** Creates a winston logger that logs everything to stdout in json format */
function createWinstonJsonStdoutLogger(
debugString: string = process.env.DEBUG ??
'aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*',
) {
const ignorePatterns = extractNegativePatterns(debugString);
const ignoreAztecPattern = format(info => {
if (ignorePatterns.some(pattern => info.module.startsWith(pattern))) {
return false; // Skip logging this message
}
return info;
});
function createWinstonJsonStdoutLogger() {
return winston.createLogger({
level: currentLevel,
transports: [
new winston.transports.Console({
format: format.combine(format.timestamp(), ignoreAztecPattern(), format.json()),
format: format.combine(format.timestamp(), format.json()),
}),
new OpenTelemetryTransportV3(),
],
});
}
Expand Down
5 changes: 3 additions & 2 deletions yarn-project/end-to-end/scripts/native-network/boot-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ export P2P_TCP_ANNOUNCE_ADDR="127.0.0.1:40400"
export P2P_UDP_ANNOUNCE_ADDR="127.0.0.1:40400"
export P2P_TCP_LISTEN_ADDR="0.0.0.0:40400"
export P2P_UDP_LISTEN_ADDR="0.0.0.0:40400"
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=""
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=""
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-}"
export VALIDATOR_PRIVATE_KEY="0x47e179ec197488593b187f80a00eb0da91f1b9d0b13f8733639f19c30a34926a"
REPO=$(git rev-parse --show-toplevel)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ echo "Done waiting."
# Set environment variables
export ETHEREUM_HOST="http://127.0.0.1:8545"
export AZTEC_NODE_URL="http://127.0.0.1:8080"
export LOG_JSON="1"
export LOG_LEVEL=${LOG_LEVEL:-"debug"}
export DEBUG="aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:l2_block_stream,-aztec:world-state:*"
export BOT_PRIVATE_KEY="0xcafe"
Expand Down
3 changes: 3 additions & 0 deletions yarn-project/end-to-end/scripts/native-network/validator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ export P2P_TCP_ANNOUNCE_ADDR="127.0.0.1:$P2P_PORT"
export P2P_UDP_ANNOUNCE_ADDR="127.0.0.1:$P2P_PORT"
export P2P_TCP_LISTEN_ADDR="0.0.0.0:$P2P_PORT"
export P2P_UDP_LISTEN_ADDR="0.0.0.0:$P2P_PORT"
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-}"

# Add L1 validator
# this may fail, so try 3 times
Expand Down
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export type EnvVar =
| 'NOMISMATOKOPIO_CONTRACT_ADDRESS'
| 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT'
| 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT'
| 'OTEL_EXPORTER_OTLP_LOGS_ENDPOINT'
| 'OTEL_SERVICE_NAME'
| 'OUTBOX_CONTRACT_ADDRESS'
| 'P2P_BLOCK_CHECK_INTERVAL_MS'
Expand Down
Loading
Loading