Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: new proving broker #10174

Merged
merged 9 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 25 additions & 6 deletions docker-compose.provernet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ services:
ARCHIVER_POLLING_INTERVAL_MS: 1000
ARCHIVER_VIEM_POLLING_INTERVAL_MS: 1000
PROVER_VIEM_POLLING_INTERVAL_MS: 1000
PROVER_AGENT_ENABLED: false
PROVER_AGENT_COUNT: 0
PROVER_BROKER_HOST: http://aztec-prover-broker
PROVER_PUBLISHER_PRIVATE_KEY: "0xdbda1821b80551c9d65939329250298aa3472ba22feea921c0cf5d620ea67b97"
PROVER_REAL_PROOFS: "${PROVER_REAL_PROOFS:-false}"
PROVER_MINIMUM_ESCROW_AMOUNT: 1000000000
Expand All @@ -76,6 +77,8 @@ services:
depends_on:
aztec-node:
condition: service_healthy
aztec-prover-broker:
condition: service_healthy
healthcheck:
test: [ "CMD", "curl", "-fSs", "http://127.0.0.1:80/status" ]
interval: 3s
Expand All @@ -84,6 +87,21 @@ services:
command: [ "start", "--prover-node", "--archiver" ]
restart: on-failure:5

aztec-prover-broker:
image: "aztecprotocol/${IMAGE:-aztec:master}"
ports:
- "8084:80"
environment:
LOG_LEVEL: verbose
AZTEC_PORT: 80
healthcheck:
test: [ "CMD", "curl", "-fSs", "http://127.0.0.1:80/status" ]
interval: 3s
timeout: 30s
start_period: 120s
command: [ "start", "--prover-broker" ]
restart: on-failure:5

# Prover agent that connects to the prover-node for fetching proving jobs and executing them
# Multiple instances can be run, or PROVER_AGENT_CONCURRENCY can be increased to run multiple workers in a single instance
aztec-prover-agent:
Expand All @@ -93,23 +111,24 @@ services:
environment:
LOG_LEVEL: verbose
ETHEREUM_HOST: http://ethereum:8545
AZTEC_NODE_URL: http://aztec-prover # Deprecated, use PROVER_JOB_SOURCE_URL
PROVER_JOB_SOURCE_URL: http://aztec-prover
PROVER_BROKER_HOST: http://aztec-prover-broker
L1_CHAIN_ID: 31337
AZTEC_PORT: 80
PROVER_REAL_PROOFS: "${PROVER_REAL_PROOFS:-false}"
PROVER_TEST_DELAY_MS: "${PROVER_TEST_DELAY_MS:-0}"
PROVER_AGENT_CONCURRENCY: 2
BB_SKIP_CLEANUP: "${BB_SKIP_CLEANUP:-0}" # Persist tmp dirs for debugging
PROVER_ID: "${PROVER_ID:-0x01}"
volumes:
- ./log/aztec-prover-agent/:/usr/src/yarn-project/aztec/log:rw
- ./cache/bb-crs/:/root/.bb-crs:rw
- ./workdir/bb-prover/:/usr/src/yarn-project/bb:rw
depends_on:
aztec-prover:
aztec-prover-broker:
condition: service_healthy
command: [ "start", "--prover" ]
command: [ "start", "--prover-agent" ]
deploy:
mode: replicated
replicas: 2
restart: on-failure:5
healthcheck:
test: [ "CMD", "curl", "-fSs", "http://127.0.0.1:80/status" ]
Expand Down
9 changes: 9 additions & 0 deletions spartan/aztec-network/files/config/setup-service-addresses.sh
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,19 @@ else
PROVER_NODE_ADDR="http://${SERVICE_NAME}-prover-node.${NAMESPACE}:${PROVER_NODE_PORT}"
fi

if [ "${PROVER_BROKER_EXTERNAL_HOST}" != "" ]; then
PROVER_BROKER_ADDR="${PROVER_BROKER_EXTERNAL_HOST}"
elif [ "${NETWORK_PUBLIC}" = "true" ]; then
PROVER_BROKER_ADDR=$(get_service_address "prover-broker" "${PROVER_BROKER_PORT}")
else
PROVER_BROKER_ADDR="http://${SERVICE_NAME}-prover-broker.${NAMESPACE}:${PROVER_BROKER_PORT}"
fi


# Write addresses to file for sourcing
echo "export ETHEREUM_HOST=${ETHEREUM_ADDR}" >> /shared/config/service-addresses
echo "export BOOT_NODE_HOST=${BOOT_NODE_ADDR}" >> /shared/config/service-addresses
echo "export PROVER_NODE_HOST=${PROVER_NODE_ADDR}" >> /shared/config/service-addresses
echo "export PROVER_BROKER_HOST=${PROVER_BROKER_ADDR}" >> /shared/config/service-addresses
echo "Addresses configured:"
cat /shared/config/service-addresses
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ Service Address Setup Container
value: "{{ .Values.proverNode.externalHost }}"
- name: PROVER_NODE_PORT
value: "{{ .Values.proverNode.service.nodePort }}"
- name: PROVER_BROKER_PORT
value: "{{ .Values.proverBroker.service.nodePort }}"
- name: SERVICE_NAME
value: {{ include "aztec-network.fullname" . }}
volumeMounts:
Expand Down
21 changes: 10 additions & 11 deletions spartan/aztec-network/templates/prover-agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ spec:
- -c
- |
source /shared/config/service-addresses
until curl -s -X POST ${PROVER_NODE_HOST}/status; do
echo "Waiting for Prover node ${PROVER_NODE_HOST} ..."
until curl -s -X POST ${PROVER_BROKER_HOST}/status; do
echo "Waiting for broker ${PROVER_BROKER_HOST} ..."
sleep 5
done
echo "Prover node is ready!"
echo "Broker is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
Expand All @@ -77,8 +77,7 @@ spec:
- "-c"
- |
source /shared/config/service-addresses && \
PROVER_JOB_SOURCE_URL=${PROVER_NODE_HOST} \
node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --prover
node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --prover-agent
env:
- name: AZTEC_PORT
value: "{{ .Values.proverAgent.service.nodePort }}"
Expand All @@ -90,12 +89,12 @@ spec:
value: "{{ .Values.proverAgent.debug }}"
- name: PROVER_REAL_PROOFS
value: "{{ .Values.proverAgent.realProofs }}"
- name: PROVER_AGENT_ENABLED
value: "true"
- name: PROVER_AGENT_CONCURRENCY
value: {{ .Values.proverAgent.concurrency | quote }}
- name: HARDWARE_CONCURRENCY
value: {{ .Values.proverAgent.bb.hardwareConcurrency | quote }}
- name: PROVER_AGENT_COUNT
value: "1"
- name: PROVER_AGENT_POLL_INTERVAL_MS
value: "{{ .Values.proverAgent.pollIntervalMs }}"
- name: PROVER_AGENT_PROOF_TYPES
value: {{ join "," .Values.proverAgent.proofTypes | quote }}
- name: OTEL_RESOURCE_ATTRIBUTES
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
Expand Down
104 changes: 104 additions & 0 deletions spartan/aztec-network/templates/prover-broker.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{{- if .Values.proverBroker.enabled }}
apiVersion: apps/v1
kind: ReplicaSet
metadata:
name: {{ include "aztec-network.fullname" . }}-prover-broker
labels:
{{- include "aztec-network.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.proverBroker.replicas }}
selector:
matchLabels:
{{- include "aztec-network.selectorLabels" . | nindent 6 }}
app: prover-broker
template:
metadata:
labels:
{{- include "aztec-network.selectorLabels" . | nindent 8 }}
app: prover-broker
spec:
serviceAccountName: {{ include "aztec-network.fullname" . }}-node
{{- if .Values.network.public }}
hostNetwork: true
{{- end }}
volumes:
- name: config
emptyDir: {}
- name: scripts
configMap:
name: {{ include "aztec-network.fullname" . }}-scripts
initContainers:
{{- include "aztec-network.serviceAddressSetupContainer" . | nindent 8 }}
- name: wait-for-prover-node
image: {{ .Values.images.aztec.image }}
command:
- /bin/bash
- -c
- |
source /shared/config/service-addresses
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
volumeMounts:
- name: config
mountPath: /shared/config
containers:
- name: prover-broker
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
volumeMounts:
- name: config
mountPath: /shared/config
command:
- "/bin/bash"
- "-c"
- |
source /shared/config/service-addresses && \
node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --prover-broker
env:
- name: AZTEC_PORT
value: "{{ .Values.proverBroker.service.nodePort }}"
- name: LOG_LEVEL
value: "{{ .Values.proverBroker.logLevel }}"
- name: LOG_JSON
value: "1"
- name: DEBUG
value: "{{ .Values.proverBroker.debug }}"
- name: PROVER_BROKER_POLL_INTERVAL_MS
value: "{{ .Values.proverBroker.pollIntervalMs }}"
- name: PROVER_BROKER_JOB_TIMEOUT_MS
value: "{{ .Values.proverBroker.jobTimeoutMs }}"
- name: PROVER_BROKER_JOB_MAX_RETRIES
value: "{{ .Values.proverBroker.jobMaxRetries }}"
- name: PROVER_BROKER_DATA_DIRECTORY
value: "{{ .Values.proverBroker.dataDirectory }}"
- name: OTEL_RESOURCE_ATTRIBUTES
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
- name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }}
resources:
{{- toYaml .Values.proverBroker.resources | nindent 12 }}
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "aztec-network.fullname" . }}-prover-broker
labels:
{{- include "aztec-network.labels" . | nindent 4 }}
spec:
type: ClusterIP
selector:
{{- include "aztec-network.selectorLabels" . | nindent 4 }}
app: prover-broker
ports:
- port: {{ .Values.proverBroker.service.nodePort }}
name: node
{{ end }}
36 changes: 31 additions & 5 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,17 @@ spec:
sleep 5
done
echo "Ethereum node is ready!"

if [ "${PROVER_BROKER_ENABLED}" == "false" ]; then
until curl -s -X POST ${PROVER_BROKER_HOST}/status; do
echo "Waiting for broker ${PROVER_BROKER_HOST} ..."
sleep 5
done
echo "Broker is ready!"
else
echo "Using built-in job broker"
fi

{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
Expand All @@ -54,6 +65,10 @@ spec:
volumeMounts:
- name: config
mountPath: /shared/config
env:
- name: PROVER_BROKER_ENABLED
value: "{{ .Values.proverNode.proverBroker.enabled }}"

- name: configure-prover-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand Down Expand Up @@ -107,15 +122,26 @@ spec:
value: "{{ .Values.proverNode.debug }}"
- name: PROVER_REAL_PROOFS
value: "{{ .Values.proverNode.realProofs }}"
- name: PROVER_AGENT_ENABLED
value: "{{ .Values.proverNode.proverAgentEnabled }}"
- name: PROVER_AGENT_COUNT
value: "{{ .Values.proverNode.proverAgent.count }}"
- name: PROVER_AGENT_POLL_INTERVAL_MS
value: "{{ .Values.proverNode.proverAgent.pollIntervalMs }}"
- name: PROVER_AGENT_PROOF_TYPES
value: {{ join "," .Values.proverNode.proverAgent.proofTypes | quote }}
- name: PROVER_BROKER_ENABLED
value: "{{ .Values.proverNode.proverBroker.enabled }}"
- name: PROVER_BROKER_POLL_INTERVAL_MS
value: "{{ .Values.proverNode.proverBroker.pollIntervalMs }}"
- name: PROVER_BROKER_JOB_TIMEOUT_MS
value: "{{ .Values.proverNode.proverBroker.jobTimeoutMs }}"
- name: PROVER_BROKER_JOB_MAX_RETRIES
value: "{{ .Values.proverNode.proverBroker.jobMaxRetries }}"
- name: PROVER_BROKER_DATA_DIRECTORY
value: "{{ .Values.proverNode.proverBroker.dataDirectory }}"
- name: PROVER_PUBLISHER_PRIVATE_KEY
value: "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80"
- name: OTEL_RESOURCE_ATTRIBUTES
value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }}
# get private proofs from the boot node
- name: PROVER_JOB_SOURCE_URL
value: "http://$(POD_IP):{{ .Values.proverNode.service.nodePort }}"
- name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT
value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
Expand Down
28 changes: 26 additions & 2 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,16 @@ proverNode:
logLevel: "debug"
debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*"
realProofs: false
proverAgentEnabled: false
proverAgent:
count: 0
pollIntervalMs: 1000
proofTypes: []
proverBroker:
enabled: false
jobTimeoutMs: 30000
pollIntervalMs: 1000
jobMaxRetries: 3
dataDirectory: ""
resources:
requests:
memory: "2Gi"
Expand Down Expand Up @@ -206,17 +215,32 @@ proverAgent:
nodePort: 8083
enabled: true
replicas: 1
pollIntervalMs: 1000
proofTypes: ["foo", "bar", "baz"]
gke:
spotEnabled: false
logLevel: "debug"
debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*"
realProofs: false
concurrency: 1
bb:
hardwareConcurrency: ""
nodeSelector: {}
resources: {}

proverBroker:
service:
nodePort: 8084
enabled: true
replicas: 1
jobTimeoutMs: 30000
pollIntervalMs: 1000
jobMaxRetries: 3
dataDirectory: ""
logLevel: "debug"
debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*"
nodeSelector: {}
resources: {}

jobs:
deployL1Verifier:
enable: false
1 change: 1 addition & 0 deletions yarn-project/aztec-node/src/aztec-node/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ export class AztecNodeService implements AztecNode {
// now create the merkle trees and the world state synchronizer
const worldStateSynchronizer = await createWorldStateSynchronizer(config, archiver, telemetry);
const proofVerifier = config.realProofs ? await BBCircuitVerifier.new(config) : new TestCircuitVerifier();
log.info(`Aztec node accepting ${config.realProofs ? 'real' : 'test'} proofs`);

// create the tx pool and the p2p client, which will need the l2 block source
const p2pClient = await createP2PClient(config, archiver, proofVerifier, worldStateSynchronizer, telemetry);
Expand Down
Loading
Loading