From 30de3bccdc14fc76df5391631a4c5b9ad59bde3d Mon Sep 17 00:00:00 2001 From: Alex Gherghisan Date: Tue, 26 Nov 2024 11:45:22 +0000 Subject: [PATCH] fix: k8s --- spartan/aztec-network/templates/_helpers.tpl | 2 ++ .../aztec-network/templates/prover-agent.yaml | 8 +++-- .../templates/prover-broker.yaml | 25 ++++++++++++- .../aztec-network/templates/prover-node.yaml | 36 +++++++++++++++---- spartan/aztec-network/values.yaml | 18 ++++++++-- .../src/interfaces/prover-broker.ts | 2 +- 6 files changed, 77 insertions(+), 14 deletions(-) diff --git a/spartan/aztec-network/templates/_helpers.tpl b/spartan/aztec-network/templates/_helpers.tpl index 8afb0c4636d7..3db484690a06 100644 --- a/spartan/aztec-network/templates/_helpers.tpl +++ b/spartan/aztec-network/templates/_helpers.tpl @@ -165,6 +165,8 @@ Service Address Setup Container value: "{{ .Values.proverNode.externalHost }}" - name: PROVER_NODE_PORT value: "{{ .Values.proverNode.service.nodePort }}" + - name: PROVER_BROKER_PORT + value: "{{ .Values.proverBroker.service.nodePort }}" - name: SERVICE_NAME value: {{ include "aztec-network.fullname" . }} volumeMounts: diff --git a/spartan/aztec-network/templates/prover-agent.yaml b/spartan/aztec-network/templates/prover-agent.yaml index 8d56eea21add..1367340c7054 100644 --- a/spartan/aztec-network/templates/prover-agent.yaml +++ b/spartan/aztec-network/templates/prover-agent.yaml @@ -91,9 +91,11 @@ spec: - name: PROVER_REAL_PROOFS value: "{{ .Values.proverAgent.realProofs }}" - name: PROVER_AGENT_COUNT - value: {{ .Values.proverAgent.concurrency | quote }} - - name: HARDWARE_CONCURRENCY - value: {{ .Values.proverAgent.bb.hardwareConcurrency | quote }} + value: "1" + - name: PROVER_AGENT_POLL_INTERVAL_MS + value: "{{ .Values.proverAgent.pollIntervalMs }}" + - name: PROVER_AGENT_PROOF_TYPES + value: {{ join "," .Values.proverAgent.proofTypes | quote }} - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT diff --git a/spartan/aztec-network/templates/prover-broker.yaml b/spartan/aztec-network/templates/prover-broker.yaml index e23c213d5535..214b6720fcef 100644 --- a/spartan/aztec-network/templates/prover-broker.yaml +++ b/spartan/aztec-network/templates/prover-broker.yaml @@ -68,6 +68,14 @@ spec: value: "1" - name: DEBUG value: "{{ .Values.proverBroker.debug }}" + - name: PROVER_BROKER_POLL_INTERVAL_MS + value: "{{ .Values.proverBroker.pollIntervalMs }}" + - name: PROVER_BROKER_JOB_TIMEOUT_MS + value: "{{ .Values.proverBroker.jobTimeoutMs }}" + - name: PROVER_BROKER_JOB_MAX_RETRIES + value: "{{ .Values.proverBroker.jobMaxRetries }}" + - name: PROVER_BROKER_DATA_DIRECTORY + value: "{{ .Values.proverBroker.dataDirectory }}" - name: OTEL_RESOURCE_ATTRIBUTES value: service.name={{ .Release.Name }},service.namespace={{ .Release.Namespace }},service.version={{ .Chart.AppVersion }},environment={{ .Values.environment | default "production" }} - name: OTEL_EXPORTER_OTLP_METRICS_ENDPOINT @@ -78,4 +86,19 @@ spec: value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }} resources: {{- toYaml .Values.proverBroker.resources | nindent 12 }} -{{- end }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ include "aztec-network.fullname" . }}-prover-broker + labels: + {{- include "aztec-network.labels" . | nindent 4 }} +spec: + type: ClusterIP + selector: + {{- include "aztec-network.selectorLabels" . | nindent 4 }} + app: prover-broker + ports: + - port: {{ .Values.proverBroker.service.nodePort }} + name: node +{{ end }} diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index 923644d7b730..4a3ab02bf19d 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -40,11 +40,15 @@ spec: done echo "Ethereum node is ready!" - until curl -s -X POST ${PROVER_BROKER_HOST}/status; do - echo "Waiting for broker ${PROVER_BROKER_HOST} ..." - sleep 5 - done - echo "Broker is ready!" + if [ "${PROVER_BROKER_ENABLED}" == "false" ]; then + until curl -s -X POST ${PROVER_BROKER_HOST}/status; do + echo "Waiting for broker ${PROVER_BROKER_HOST} ..." + sleep 5 + done + echo "Broker is ready!" + else + echo "Using built-in job broker" + fi {{- if .Values.telemetry.enabled }} until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do @@ -61,6 +65,10 @@ spec: volumeMounts: - name: config mountPath: /shared/config + env: + - name: PROVER_BROKER_ENABLED + value: "{{ .Values.proverNode.proverBroker.enabled }}" + - name: configure-prover-env image: "{{ .Values.images.aztec.image }}" imagePullPolicy: {{ .Values.images.aztec.pullPolicy }} @@ -114,8 +122,22 @@ spec: value: "{{ .Values.proverNode.debug }}" - name: PROVER_REAL_PROOFS value: "{{ .Values.proverNode.realProofs }}" - - name: PROVER_AGENT_ENABLED - value: "{{ .Values.proverNode.proverAgentEnabled }}" + - name: PROVER_AGENT_COUNT + value: "{{ .Values.proverNode.proverAgent.count }}" + - name: PROVER_AGENT_POLL_INTERVAL_MS + value: "{{ .Values.proverNode.proverAgent.pollIntervalMs }}" + - name: PROVER_AGENT_PROOF_TYPES + value: {{ join "," .Values.proverNode.proverAgent.proofTypes | quote }} + - name: PROVER_BROKER_ENABLED + value: "{{ .Values.proverNode.proverBroker.enabled }}" + - name: PROVER_BROKER_POLL_INTERVAL_MS + value: "{{ .Values.proverNode.proverBroker.pollIntervalMs }}" + - name: PROVER_BROKER_JOB_TIMEOUT_MS + value: "{{ .Values.proverNode.proverBroker.jobTimeoutMs }}" + - name: PROVER_BROKER_JOB_MAX_RETRIES + value: "{{ .Values.proverNode.proverBroker.jobMaxRetries }}" + - name: PROVER_BROKER_DATA_DIRECTORY + value: "{{ .Values.proverNode.proverBroker.dataDirectory }}" - name: PROVER_PUBLISHER_PRIVATE_KEY value: "0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" - name: OTEL_RESOURCE_ATTRIBUTES diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index 3bd3812d4665..245b51f9435e 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -118,7 +118,16 @@ proverNode: logLevel: "debug" debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*" realProofs: false - proverAgentEnabled: false + proverAgent: + count: 0 + pollIntervalMs: 1000 + proofTypes: [] + proverBroker: + enabled: false + jobTimeoutMs: 30000 + pollIntervalMs: 1000 + jobMaxRetries: 3 + dataDirectory: "" resources: requests: memory: "2Gi" @@ -206,12 +215,13 @@ proverAgent: nodePort: 8083 enabled: true replicas: 1 + pollIntervalMs: 1000 + proofTypes: ["foo", "bar", "baz"] gke: spotEnabled: false logLevel: "debug" debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*" realProofs: false - concurrency: 1 bb: hardwareConcurrency: "" nodeSelector: {} @@ -222,6 +232,10 @@ proverBroker: nodePort: 8084 enabled: true replicas: 1 + jobTimeoutMs: 30000 + pollIntervalMs: 1000 + jobMaxRetries: 3 + dataDirectory: "" logLevel: "debug" debug: "aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:world-state:database,-aztec:l2_block_stream*" nodeSelector: {} diff --git a/yarn-project/circuit-types/src/interfaces/prover-broker.ts b/yarn-project/circuit-types/src/interfaces/prover-broker.ts index a81ec6316adf..06457fdb1e17 100644 --- a/yarn-project/circuit-types/src/interfaces/prover-broker.ts +++ b/yarn-project/circuit-types/src/interfaces/prover-broker.ts @@ -34,7 +34,7 @@ export const proverBrokerConfigMappings: ConfigMappingsType proverBrokerJobTimeoutMs: { env: 'PROVER_BROKER_JOB_TIMEOUT_MS', description: 'Jobs are retried if not kept alive for this long', - ...numberConfigHelper(60_000), + ...numberConfigHelper(30_000), }, proverBrokerPollIntervalMs: { env: 'PROVER_BROKER_POLL_INTERVAL_MS',