From be91d807c91fbd829181c8b5935f93308fef6dbb Mon Sep 17 00:00:00 2001 From: ludamad Date: Fri, 6 Dec 2024 04:12:58 -0500 Subject: [PATCH] chore: simplify otel config, 1val setup, fix pod dns, retries (#10344) There were a few things in the way of getting a devnet up - Added retries as we had 'intrinsic gas too low' before reth got a chance to sync - use cluster dns explicitly before host networkinge - work around an issue with broker external host --------- Co-authored-by: Alex Gherghisan --- .../files/config/deploy-l1-contracts.sh | 19 ++++--- .../files/config/setup-service-addresses.sh | 16 +++++- spartan/aztec-network/templates/_helpers.tpl | 28 ++--------- .../aztec-network/templates/boot-node.yaml | 16 +++--- .../templates/deploy-l1-verifier.yaml | 19 +++++-- .../aztec-network/templates/prover-agent.yaml | 12 ++--- .../templates/prover-broker.yaml | 15 +++--- .../aztec-network/templates/prover-node.yaml | 13 ++--- spartan/aztec-network/templates/pxe.yaml | 3 ++ spartan/aztec-network/templates/rbac.yaml | 2 +- .../templates/setup-l2-contracts.yaml | 5 +- .../templates/transaction-bot.yaml | 2 + .../aztec-network/templates/validator.yaml | 14 ++---- .../1-validator-with-proving-and-metrics.yaml | 3 +- .../values/16-validators-with-metrics.yaml | 1 - .../values/3-validators-with-metrics.yaml | 1 - .../values/4-validators-with-metrics.yaml | 1 - .../aztec-network/values/48-validators.yaml | 1 - .../values/gcp-proving-test.yaml | 1 - .../aztec-network/values/multicloud-demo.yaml | 1 - .../values/prover-node-with-agents.yaml | 1 - .../aztec-network/values/release-devnet.yaml | 50 +++++++++++++++++++ spartan/scripts/deploy_spartan.sh | 8 ++- spartan/scripts/get_service_address | 47 +++++++++++++++++ spartan/scripts/post_deploy_spartan.sh | 11 ++-- 25 files changed, 188 insertions(+), 102 deletions(-) create mode 100644 spartan/aztec-network/values/release-devnet.yaml create mode 100755 spartan/scripts/get_service_address diff --git a/spartan/aztec-network/files/config/deploy-l1-contracts.sh b/spartan/aztec-network/files/config/deploy-l1-contracts.sh index 529bb412e62..74f8e3c6bfc 100644 --- a/spartan/aztec-network/files/config/deploy-l1-contracts.sh +++ b/spartan/aztec-network/files/config/deploy-l1-contracts.sh @@ -6,12 +6,19 @@ CHAIN_ID=$1 # Run the deploy-l1-contracts command and capture the output output="" -# if INIT_VALIDATORS is true, then we need to pass the validators flag to the deploy-l1-contracts command -if [ "$INIT_VALIDATORS" = "true" ]; then - output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --mnemonic "$MNEMONIC" --validators $2 --l1-chain-id $CHAIN_ID) -else - output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --mnemonic "$MNEMONIC" --l1-chain-id $CHAIN_ID) -fi +MAX_RETRIES=5 +RETRY_DELAY=60 +for attempt in $(seq 1 $MAX_RETRIES); do + # if INIT_VALIDATORS is true, then we need to pass the validators flag to the deploy-l1-contracts command + if [ "${INIT_VALIDATORS:-false}" = "true" ]; then + output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --mnemonic "$MNEMONIC" --validators $2 --l1-chain-id $CHAIN_ID) && break + else + output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --mnemonic "$MNEMONIC" --l1-chain-id $CHAIN_ID) && break + fi + echo "Attempt $attempt failed. Retrying in $RETRY_DELAY seconds..." + sleep "$RETRY_DELAY" +done || { echo "All l1 contract deploy attempts failed."; exit 1; } + echo "$output" diff --git a/spartan/aztec-network/files/config/setup-service-addresses.sh b/spartan/aztec-network/files/config/setup-service-addresses.sh index 5ca3bb5a248..063c84a16e5 100644 --- a/spartan/aztec-network/files/config/setup-service-addresses.sh +++ b/spartan/aztec-network/files/config/setup-service-addresses.sh @@ -81,17 +81,29 @@ fi if [ "${PROVER_BROKER_EXTERNAL_HOST}" != "" ]; then PROVER_BROKER_ADDR="${PROVER_BROKER_EXTERNAL_HOST}" -elif [ "${NETWORK_PUBLIC}" = "true" ]; then - PROVER_BROKER_ADDR=$(get_service_address "prover-broker" "${PROVER_BROKER_PORT}") else PROVER_BROKER_ADDR="http://${SERVICE_NAME}-prover-broker.${NAMESPACE}:${PROVER_BROKER_PORT}" fi +# Configure OTEL_COLLECTOR_ENDPOINT if not set in values file +if [ "${TELEMETRY:-false}" = "true" ] && [ "${OTEL_COLLECTOR_ENDPOINT}" = "" ]; then + OTEL_COLLECTOR_PORT=${OTEL_COLLECTOR_PORT:-4318} + OTEL_COLLECTOR_ENDPOINT="http://metrics-opentelemetry-collector.metrics:$OTEL_COLLECTOR_PORT" +fi # Write addresses to file for sourcing echo "export ETHEREUM_HOST=${ETHEREUM_ADDR}" >> /shared/config/service-addresses echo "export BOOT_NODE_HOST=${BOOT_NODE_ADDR}" >> /shared/config/service-addresses echo "export PROVER_NODE_HOST=${PROVER_NODE_ADDR}" >> /shared/config/service-addresses echo "export PROVER_BROKER_HOST=${PROVER_BROKER_ADDR}" >> /shared/config/service-addresses + +if [ "${OTEL_COLLECTOR_ENDPOINT}" != "" ]; then + echo "export OTEL_COLLECTOR_ENDPOINT=$OTEL_COLLECTOR_ENDPOINT" >> /shared/config/service-addresses + echo "export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=$OTEL_COLLECTOR_ENDPOINT/v1/logs" >> /shared/config/service-addresses + echo "export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=$OTEL_COLLECTOR_ENDPOINT/v1/metrics" >> /shared/config/service-addresses + echo "export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=$OTEL_COLLECTOR_ENDPOINT/v1/traces" >> /shared/config/service-addresses +fi + + echo "Addresses configured:" cat /shared/config/service-addresses diff --git a/spartan/aztec-network/templates/_helpers.tpl b/spartan/aztec-network/templates/_helpers.tpl index 3db484690a0..8581bda2d4f 100644 --- a/spartan/aztec-network/templates/_helpers.tpl +++ b/spartan/aztec-network/templates/_helpers.tpl @@ -68,30 +68,6 @@ http://{{ include "aztec-network.fullname" . }}-validator.{{ .Release.Namespace http://{{ include "aztec-network.fullname" . }}-metrics.{{ .Release.Namespace }} {{- end -}} -{{- define "aztec-network.otelCollectorMetricsEndpoint" -}} -{{- if .Values.telemetry.enabled -}} -{{- if .Values.telemetry.otelCollectorEndpoint -}} -{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/metrics -{{- end -}} -{{- end -}} -{{- end -}} - -{{- define "aztec-network.otelCollectorTracesEndpoint" -}} -{{- if .Values.telemetry.enabled -}} -{{- if .Values.telemetry.otelCollectorEndpoint -}} -{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/traces -{{- end -}} -{{- end -}} -{{- end -}} - -{{- define "aztec-network.otelCollectorLogsEndpoint" -}} -{{- if .Values.telemetry.enabled -}} -{{- if .Values.telemetry.otelCollectorEndpoint -}} -{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/logs -{{- end -}} -{{- end -}} -{{- end -}} - {{- define "helpers.flag" -}} {{- $name := index . 0 -}} {{- $value := index . 1 -}} @@ -153,6 +129,10 @@ Service Address Setup Container value: "{{ .Values.network.public }}" - name: NAMESPACE value: {{ .Release.Namespace }} + - name: TELEMETRY + value: "{{ .Values.telemetry.enabled }}" + - name: OTEL_COLLECTOR_ENDPOINT + value: "{{ .Values.telemetry.otelCollectorEndpoint }}" - name: EXTERNAL_ETHEREUM_HOST value: "{{ .Values.ethereum.externalHost }}" - name: ETHEREUM_PORT diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index f638d329e64..3a5d2103f9a 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -17,6 +17,7 @@ spec: {{- include "aztec-network.selectorLabels" . | nindent 8 }} app: boot-node spec: + dnsPolicy: ClusterFirstWithHostNet {{- if .Values.network.public }} hostNetwork: true {{- include "aztec-network.publicAntiAffinity" . | nindent 6 }} @@ -32,17 +33,18 @@ spec: - -c - | source /shared/config/service-addresses + cat /shared/config/service-addresses echo "Awaiting ethereum node at ${ETHEREUM_HOST}" until curl -s -X POST -H 'Content-Type: application/json' \ -d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \ ${ETHEREUM_HOST} | grep -q reth; do - echo "Waiting for Ethereum node..." + echo "Waiting for Ethereum node ${ETHEREUM_HOST}..." sleep 5 done echo "Ethereum node is ready!" {{- if .Values.telemetry.enabled }} - until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do - echo "Waiting for OpenTelemetry collector..." + until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do + echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." sleep 5 done echo "OpenTelemetry collector is ready!" @@ -70,6 +72,8 @@ spec: - name: scripts mountPath: /scripts env: + - name: TELEMETRY + value: "{{ .Values.telemetry.enabled }}" - name: INIT_VALIDATORS value: "true" - name: MNEMONIC @@ -152,12 +156,6 @@ spec: value: "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }} - name: PROVER_REAL_PROOFS value: "{{ .Values.aztec.realProofs }}" - name: PXE_PROVER_ENABLED diff --git a/spartan/aztec-network/templates/deploy-l1-verifier.yaml b/spartan/aztec-network/templates/deploy-l1-verifier.yaml index 4da0eda4507..c3edb5d42ff 100644 --- a/spartan/aztec-network/templates/deploy-l1-verifier.yaml +++ b/spartan/aztec-network/templates/deploy-l1-verifier.yaml @@ -44,6 +44,7 @@ spec: chmod +x /tmp/setup-service-addresses.sh /tmp/setup-service-addresses.sh source /shared/config/service-addresses + cat /shared/config/service-addresses until curl -s -X GET "$BOOT_NODE_HOST/status"; do echo "Waiting for Aztec node $BOOT_NODE_HOST..." @@ -51,13 +52,21 @@ spec: done echo "Boot node is ready!" - export ROLLUP_CONTRACT_ADDRESS=$(curl -X POST -H 'Content-Type: application/json' \ + l1_contracts=$(curl -X POST -H 'Content-Type: application/json' \ -d '{"jsonrpc":"2.0","method":"node_getL1ContractAddresses","params":[],"id":1}' \ - "$BOOT_NODE_HOST" \ - | jq -r '.result.rollupAddress.value') - + "$BOOT_NODE_HOST") + echo "L1 Contracts" + echo $l1_contracts + export ROLLUP_CONTRACT_ADDRESS=$(echo $l1_contracts | jq -r '.result.rollupAddress') + [ -z "$ROLLUP_CONTRACT_ADDRESS" ] && echo "Could not retrieve rollup address!" && exit 1 echo "Rollup contract address: $ROLLUP_CONTRACT_ADDRESS" - node /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-verifier --verifier real + MAX_RETRIES=5 + RETRY_DELAY=60 + for attempt in $(seq 1 $MAX_RETRIES); do + node /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-verifier --verifier real + echo "Attempt $attempt failed. Retrying in $RETRY_DELAY seconds..." + sleep "$RETRY_DELAY" + done || { echo "All L1 verifier deploy attempts failed." >&2; exit 1; } echo "L1 verifier deployed" env: - name: NODE_NO_WARNINGS diff --git a/spartan/aztec-network/templates/prover-agent.yaml b/spartan/aztec-network/templates/prover-agent.yaml index fee445c68aa..c27adb96eeb 100644 --- a/spartan/aztec-network/templates/prover-agent.yaml +++ b/spartan/aztec-network/templates/prover-agent.yaml @@ -34,6 +34,7 @@ spec: serviceAccountName: {{ include "aztec-network.fullname" . }}-node {{- if .Values.network.public }} hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet {{- end }} volumes: - name: config @@ -50,14 +51,15 @@ spec: - -c - | source /shared/config/service-addresses + cat /shared/config/service-addresses until curl -s -X POST ${PROVER_BROKER_HOST}/status; do echo "Waiting for broker ${PROVER_BROKER_HOST} ..." sleep 5 done echo "Broker is ready!" {{- if .Values.telemetry.enabled }} - until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do - echo "Waiting for OpenTelemetry collector..." + until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do + echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." sleep 5 done echo "OpenTelemetry collector is ready!" @@ -95,12 +97,6 @@ spec: value: {{ join "," .Values.proverAgent.proofTypes | quote }} - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }} resources: {{- toYaml .Values.proverAgent.resources | nindent 12 }} {{- end }} diff --git a/spartan/aztec-network/templates/prover-broker.yaml b/spartan/aztec-network/templates/prover-broker.yaml index 1de2caa4fcd..850366997b2 100644 --- a/spartan/aztec-network/templates/prover-broker.yaml +++ b/spartan/aztec-network/templates/prover-broker.yaml @@ -20,6 +20,7 @@ spec: serviceAccountName: {{ include "aztec-network.fullname" . }}-node {{- if .Values.network.public }} hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet {{- end }} volumes: - name: config @@ -36,9 +37,10 @@ spec: - -c - | source /shared/config/service-addresses + cat /shared/config/service-addresses {{- if .Values.telemetry.enabled }} - until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do - echo "Waiting for OpenTelemetry collector..." + until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do + echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." sleep 5 done echo "OpenTelemetry collector is ready!" @@ -76,15 +78,11 @@ spec: value: "{{ .Values.proverBroker.dataDirectory }}" - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }} resources: {{- toYaml .Values.proverBroker.resources | nindent 12 }} +{{- end }} --- +# Headless service for StatefulSet DNS entries apiVersion: v1 kind: Service metadata: @@ -99,4 +97,3 @@ spec: ports: - port: {{ .Values.proverBroker.service.nodePort }} name: node -{{ end }} diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index bfe9447570c..44984a2fb2a 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -19,6 +19,7 @@ spec: spec: {{- if .Values.network.public }} hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet {{- include "aztec-network.publicAntiAffinity" . | nindent 6 }} {{- end }} serviceAccountName: {{ include "aztec-network.fullname" . }}-node @@ -35,7 +36,7 @@ spec: until curl -s -X POST -H 'Content-Type: application/json' \ -d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \ ${ETHEREUM_HOST} | grep -q reth; do - echo "Waiting for Ethereum node..." + echo "Waiting for Ethereum node ${ETHEREUM_HOST}..." sleep 5 done echo "Ethereum node is ready!" @@ -51,8 +52,8 @@ spec: fi {{- if .Values.telemetry.enabled }} - until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do - echo "Waiting for OpenTelemetry collector..." + until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do + echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." sleep 5 done echo "OpenTelemetry collector is ready!" @@ -142,12 +143,6 @@ spec: value: "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }} - name: L1_CHAIN_ID value: "{{ .Values.ethereum.chainId }}" - name: P2P_ENABLED diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index 553f6699e6d..d61df752190 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -19,6 +19,7 @@ spec: serviceAccountName: {{ include "aztec-network.fullname" . }}-node {{- if .Values.network.public }} hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet {{- end }} volumes: - name: config @@ -37,6 +38,7 @@ spec: - -c - | source /shared/config/service-addresses + cat /shared/config/service-addresses until curl --head --silent ${BOOT_NODE_HOST}/status; do echo "Waiting for boot node..." sleep 5 @@ -68,6 +70,7 @@ spec: - "-c" - | source /shared/config/service-addresses + cat /shared/config/service-addresses {{- if .Values.network.public }} # If the network is public, we need to use the boot node URL export AZTEC_NODE_URL=${BOOT_NODE_HOST} diff --git a/spartan/aztec-network/templates/rbac.yaml b/spartan/aztec-network/templates/rbac.yaml index a0e8e68cd11..94f143f619e 100644 --- a/spartan/aztec-network/templates/rbac.yaml +++ b/spartan/aztec-network/templates/rbac.yaml @@ -55,4 +55,4 @@ roleRef: subjects: - kind: ServiceAccount name: {{ include "aztec-network.fullname" . }}-node - namespace: {{ .Release.Namespace }} + namespace: {{ .Release.Namespace }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/setup-l2-contracts.yaml b/spartan/aztec-network/templates/setup-l2-contracts.yaml index 2d4383423da..8afc65abb66 100644 --- a/spartan/aztec-network/templates/setup-l2-contracts.yaml +++ b/spartan/aztec-network/templates/setup-l2-contracts.yaml @@ -48,12 +48,13 @@ spec: chmod +x /tmp/setup-service-addresses.sh /tmp/setup-service-addresses.sh source /shared/config/service-addresses + cat /shared/config/service-addresses export AZTEC_NODE_URL=$BOOT_NODE_HOST export PXE_URL=$BOOT_NODE_HOST until curl -s -X POST -H 'content-type: application/json' \ -d '{"jsonrpc":"2.0","method":"pxe_getNodeInfo","params":[],"id":67}' \ $PXE_URL | grep -q '"enr:-'; do - echo "Waiting for PXE service..." + echo "Waiting for PXE service at ${PXE_URL}..." sleep 5 done echo "PXE service is ready!" @@ -61,6 +62,8 @@ spec: node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js setup-protocol-contracts --skipProofWait --l1-chain-id {{ .Values.ethereum.chainId }} echo "L2 contracts initialized" env: + - name: TELEMETRY + value: "{{ .Values.telemetry.enabled }}" - name: LOG_LEVEL value: "debug" - name: NETWORK_PUBLIC diff --git a/spartan/aztec-network/templates/transaction-bot.yaml b/spartan/aztec-network/templates/transaction-bot.yaml index 3981ad5d3f9..06c6ce7048b 100644 --- a/spartan/aztec-network/templates/transaction-bot.yaml +++ b/spartan/aztec-network/templates/transaction-bot.yaml @@ -38,6 +38,7 @@ spec: - -c - | source /shared/config/service-addresses + cat /shared/config/service-addresses {{- if .Values.bot.nodeUrl }} export AZTEC_NODE_URL={{ .Values.bot.nodeUrl }} {{- else if .Values.network.public }} @@ -63,6 +64,7 @@ spec: - "-c" - | source /shared/config/service-addresses + cat /shared/config/service-addresses {{- if .Values.bot.nodeUrl }} export AZTEC_NODE_URL={{ .Values.bot.nodeUrl }} {{- else if .Values.network.public }} diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index 6f8aba191b2..1faa6823076 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -20,6 +20,7 @@ spec: spec: {{- if .Values.network.public }} hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet {{- include "aztec-network.publicAntiAffinity" . | nindent 6 }} {{- end }} serviceAccountName: {{ include "aztec-network.fullname" . }}-node @@ -33,18 +34,19 @@ spec: - -c - | source /shared/config/service-addresses + cat /shared/config/service-addresses # First check ethereum node until curl -s -X POST -H 'Content-Type: application/json' \ -d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \ $ETHEREUM_HOST | grep -q reth; do - echo "Waiting for Ethereum node..." + echo "Waiting for Ethereum node ${ETHEREUM_HOST}..." sleep 5 done echo "Ethereum node is ready!" {{- if .Values.telemetry.enabled }} - until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do - echo "Waiting for OpenTelemetry collector..." + until curl --head --silent $OTEL_COLLECTOR_ENDPOINT > /dev/null; do + echo "Waiting for OpenTelemetry collector $OTEL_COLLECTOR_ENDPOINT..." sleep 5 done echo "OpenTelemetry collector is ready!" @@ -165,12 +167,6 @@ spec: value: "{{ .Values.ethereum.chainId }}" - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT - value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT - value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }} - - name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT - value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }} - name: ETHEREUM_SLOT_DURATION value: "{{ .Values.ethereum.blockTime }}" - name: AZTEC_SLOT_DURATION diff --git a/spartan/aztec-network/values/1-validator-with-proving-and-metrics.yaml b/spartan/aztec-network/values/1-validator-with-proving-and-metrics.yaml index 95b7f0ac638..43814e98963 100644 --- a/spartan/aztec-network/values/1-validator-with-proving-and-metrics.yaml +++ b/spartan/aztec-network/values/1-validator-with-proving-and-metrics.yaml @@ -37,5 +37,4 @@ jobs: enable: true telemetry: - enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 + enabled: true \ No newline at end of file diff --git a/spartan/aztec-network/values/16-validators-with-metrics.yaml b/spartan/aztec-network/values/16-validators-with-metrics.yaml index 8bc8f2c115c..454ec8c2839 100644 --- a/spartan/aztec-network/values/16-validators-with-metrics.yaml +++ b/spartan/aztec-network/values/16-validators-with-metrics.yaml @@ -6,7 +6,6 @@ # (then `./forward.sh` if you want to see it) telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: replicas: 16 diff --git a/spartan/aztec-network/values/3-validators-with-metrics.yaml b/spartan/aztec-network/values/3-validators-with-metrics.yaml index b20b34b5194..c3a57e25228 100644 --- a/spartan/aztec-network/values/3-validators-with-metrics.yaml +++ b/spartan/aztec-network/values/3-validators-with-metrics.yaml @@ -6,7 +6,6 @@ # (then `./forward.sh` if you want to see it) telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: replicas: 3 diff --git a/spartan/aztec-network/values/4-validators-with-metrics.yaml b/spartan/aztec-network/values/4-validators-with-metrics.yaml index 47387cd89c1..6f59aa62708 100644 --- a/spartan/aztec-network/values/4-validators-with-metrics.yaml +++ b/spartan/aztec-network/values/4-validators-with-metrics.yaml @@ -6,7 +6,6 @@ # (then `./forward.sh` if you want to see it) telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: replicas: 4 diff --git a/spartan/aztec-network/values/48-validators.yaml b/spartan/aztec-network/values/48-validators.yaml index 31d48095681..4659655e4d2 100644 --- a/spartan/aztec-network/values/48-validators.yaml +++ b/spartan/aztec-network/values/48-validators.yaml @@ -6,7 +6,6 @@ # (then `./forward.sh` if you want to see it) telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: replicas: 48 diff --git a/spartan/aztec-network/values/gcp-proving-test.yaml b/spartan/aztec-network/values/gcp-proving-test.yaml index 765f1a2ade3..546ffc61f4c 100644 --- a/spartan/aztec-network/values/gcp-proving-test.yaml +++ b/spartan/aztec-network/values/gcp-proving-test.yaml @@ -1,6 +1,5 @@ telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: replicas: 1 diff --git a/spartan/aztec-network/values/multicloud-demo.yaml b/spartan/aztec-network/values/multicloud-demo.yaml index 2c4ea379e6e..f408059d69e 100644 --- a/spartan/aztec-network/values/multicloud-demo.yaml +++ b/spartan/aztec-network/values/multicloud-demo.yaml @@ -2,7 +2,6 @@ telemetry: enabled: false - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: replicas: 1 diff --git a/spartan/aztec-network/values/prover-node-with-agents.yaml b/spartan/aztec-network/values/prover-node-with-agents.yaml index c5dbfa010de..2f1e1454325 100644 --- a/spartan/aztec-network/values/prover-node-with-agents.yaml +++ b/spartan/aztec-network/values/prover-node-with-agents.yaml @@ -6,7 +6,6 @@ # (then `./forward.sh` if you want to see it) telemetry: enabled: true - otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318 validator: external: true diff --git a/spartan/aztec-network/values/release-devnet.yaml b/spartan/aztec-network/values/release-devnet.yaml new file mode 100644 index 00000000000..485e6462aeb --- /dev/null +++ b/spartan/aztec-network/values/release-devnet.yaml @@ -0,0 +1,50 @@ +########## +# BEWARE # +########## +# You need to deploy the metrics helm chart before using this values file. +# head to spartan/metrics and run `./install.sh` +# (then `./forward.sh` if you want to see it) +telemetry: + enabled: true + +validator: + replicas: 1 + validatorKeys: + - 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 + validatorAddresses: + - 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266 + validator: + disabled: false + +bootNode: + validator: + disabled: true + +# use small provers to produce fake proofs +proverAgent: + replicas: 1 + resources: + requests: + memory: "4Gi" + cpu: "1" + +bot: + followChain: "PENDING" + enabled: true + txIntervalSeconds: 200 + +network: + public: true + +images: + aztec: + pullPolicy: Always + +aztec: + slotDuration: 36 + epochDuration: 32 + realProofs: false # devnet does not use real proofs + +jobs: + deployL1Verifier: + enable: false diff --git a/spartan/scripts/deploy_spartan.sh b/spartan/scripts/deploy_spartan.sh index 96a8ef2c68d..16bb8c76628 100755 --- a/spartan/scripts/deploy_spartan.sh +++ b/spartan/scripts/deploy_spartan.sh @@ -8,7 +8,7 @@ NAMESPACE=${3:-spartan} PROD=${4:-true} PROD_ARGS="" if [ "$PROD" = "true" ] ; then - PROD_ARGS="--set network.public=true --set telemetry.enabled=true --set telemetry.otelCollectorEndpoint=http://metrics-opentelemetry-collector.metrics:4318" + PROD_ARGS="--set network.public=true" fi SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -69,14 +69,12 @@ function upgrade() { if ! upgrade | tee "$SCRIPT_DIR/logs/$NAMESPACE-helm.log" ; then if grep 'cannot patch "'$NAMESPACE'-aztec-network-setup-l2-contracts"' "$SCRIPT_DIR/logs/$NAMESPACE-helm.log" ; then kubectl delete job $NAMESPACE-aztec-network-setup-l2-contracts -n $NAMESPACE - upgrade fi -fi -if ! upgrade | tee "$SCRIPT_DIR/logs/$NAMESPACE-helm.log" ; then if grep 'cannot patch "'$NAMESPACE'-aztec-network-deploy-l1-verifier"' "$SCRIPT_DIR/logs/$NAMESPACE-helm.log" ; then kubectl delete job $NAMESPACE-aztec-network-deploy-l1-verifier -n $NAMESPACE - upgrade fi + + upgrade fi diff --git a/spartan/scripts/get_service_address b/spartan/scripts/get_service_address new file mode 100755 index 00000000000..3f3634faaef --- /dev/null +++ b/spartan/scripts/get_service_address @@ -0,0 +1,47 @@ +set -eu +SERVICE_LABEL=$1 +PORT=$2 +MAX_RETRIES=30 +RETRY_INTERVAL=2 +attempt=1 + +# Get pod name +while [ $attempt -le $MAX_RETRIES ]; do + POD_NAME=$(kubectl get pods -n ${NAMESPACE} -l app=${SERVICE_LABEL} -o jsonpath='{.items[0].metadata.name}') + if [ -n "$POD_NAME" ]; then + break + fi + echo "Attempt $attempt: Waiting for ${SERVICE_LABEL} pod to be available..." >&2 + sleep $RETRY_INTERVAL + attempt=$((attempt + 1)) +done + +if [ -z "$POD_NAME" ]; then + echo "Error: Failed to get ${SERVICE_LABEL} pod name after $MAX_RETRIES attempts" >&2 + return 1 +fi +echo "Pod name: [${POD_NAME}]" >&2 + +# Get node name +attempt=1 +NODE_NAME="" +while [ $attempt -le $MAX_RETRIES ]; do + NODE_NAME=$(kubectl get pod ${POD_NAME} -n ${NAMESPACE} -o jsonpath='{.spec.nodeName}') + if [ -n "$NODE_NAME" ]; then + break + fi + echo "Attempt $attempt: Waiting for node name to be available..." >&2 + sleep $RETRY_INTERVAL + attempt=$((attempt + 1)) +done + +if [ -z "$NODE_NAME" ]; then + echo "Error: Failed to get node name after $MAX_RETRIES attempts" >&2 + return 1 +fi +echo "Node name: ${NODE_NAME}" >&2 + +# Get the node's external IP +NODE_IP=$(kubectl get node ${NODE_NAME} -o jsonpath='{.status.addresses[?(@.type=="ExternalIP")].address}') +echo "Node IP: ${NODE_IP}" >&2 +echo "http://${NODE_IP}:${PORT}" \ No newline at end of file diff --git a/spartan/scripts/post_deploy_spartan.sh b/spartan/scripts/post_deploy_spartan.sh index bcf66bff49b..e268174f49c 100755 --- a/spartan/scripts/post_deploy_spartan.sh +++ b/spartan/scripts/post_deploy_spartan.sh @@ -5,7 +5,7 @@ set -o pipefail echo "Bootstrapping network with test contracts" -NAMESPACE=${1:-spartan} +export NAMESPACE=${1:-spartan} TAG=${2:-latest} SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -22,10 +22,11 @@ function get_load_balancer_url() { kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}" } + # Fetch the service URLs based on the namespace for injection in the test-transfer.sh -export BOOTNODE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-boot-node-lb-tcp"):8080 -export PXE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-pxe-lb"):8080 -export ETHEREUM_HOST=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-ethereum-lb"):8545 +export BOOTNODE_URL=$($(dirname $0)/get_service_address boot-node 8080) +export PXE_URL=$($(dirname $0)/get_service_address pxe 8080) +export ETHEREUM_HOST=$($(dirname $0)/get_service_address ethereum 8545) echo "BOOTNODE_URL: $BOOTNODE_URL" echo "PXE_URL: $PXE_URL" @@ -36,6 +37,6 @@ echo "Bootstrapping contracts for test network. NOTE: This took one hour last ru docker run aztecprotocol/aztec:$TAG bootstrap-network \ --rpc-url $BOOTNODE_URL \ --l1-rpc-url $ETHEREUM_HOST \ - --l1-chain-id 31337 \ + --l1-chain-id 1337 \ --l1-private-key 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 \ --json | tee ./basic_contracts.json