From c70b45d6fcbc1d9a9db42351122135bf0bd291e9 Mon Sep 17 00:00:00 2001 From: "Steven Platt, PhD" <31355889+stevenplatt@users.noreply.github.com> Date: Thu, 26 Sep 2024 12:49:55 -0400 Subject: [PATCH] feat: Adding CPU / RAM configurations to helm network deployments (#8786) # Change 1: CPU/RAM Limits for node deployments This PR assigns resource configurations to nodes that are part of helm network deployments. Adding such resource configurations helps Kubernetes balance and deploy aztec nodes. These initial values are chosen based on historical usage of the currently deployed `devnet` environment in AWS ( [Grafana Dashboard](https://grafana.aztec.network/d/cdtxao66xa1ogc/aztec-dashboard?orgId=1&refresh=1m&var-network=devnet&var-instance=All&var-protocol_circuit=All&var-min_block_build=20m&var-system_res_interval=$__auto_interval_system_res_interval&var-sequencer=All&var-prover=All&from=now-7d&to=now) ). **Definitions** `requests:` This is the minimum resource that must be available on the underlying server before Kubernetes can deploy the component. `limits:` After deployment, the component is allowed to flex up and down, but never above this set limit. Using a limit keeps the shared infra stable when there is memory leaks or unexpected application behavior. Components are terminated and redeployed if exceeding the assigned limit. # Change 2: Options for bots and public networks Additionally, this PR add configuration to turn bots as well as public access on or off at the time of the helm deployment. This can be used with the following helm syntax: ``` helm upgrade --install . -n \ --set network.public=true --set network.enableBots=true ``` By default, `network.public` is `false` since enabling this deploys load balancers which are not available when running a Kubernetes cluster on a local machine and within CI environments. --- These resource configurations have been tested by deploying the parent helm chart to the spartan Kubernetes cluster in AWS. --- .../aztec-network/templates/boot-node.yaml | 2 +- spartan/aztec-network/templates/metrics.yaml | 2 +- .../aztec-network/templates/prover-node.yaml | 2 +- spartan/aztec-network/templates/pxe.yaml | 6 +- .../templates/transaction-bot.yaml | 4 +- .../aztec-network/templates/validator.yaml | 2 +- spartan/aztec-network/values.yaml | 61 +++++++++++++++---- 7 files changed, 60 insertions(+), 19 deletions(-) diff --git a/spartan/aztec-network/templates/boot-node.yaml b/spartan/aztec-network/templates/boot-node.yaml index 779d90ee86f8..6ea619b00593 100644 --- a/spartan/aztec-network/templates/boot-node.yaml +++ b/spartan/aztec-network/templates/boot-node.yaml @@ -136,7 +136,7 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - clusterIP: None + type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: boot-node diff --git a/spartan/aztec-network/templates/metrics.yaml b/spartan/aztec-network/templates/metrics.yaml index 8af3aa2f1ab3..ff62fad5f2c9 100644 --- a/spartan/aztec-network/templates/metrics.yaml +++ b/spartan/aztec-network/templates/metrics.yaml @@ -78,7 +78,7 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - type: {{ .Values.metrics.service.type }} + type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} ports: - port: {{ .Values.metrics.ports.otlp }} targetPort: otlp diff --git a/spartan/aztec-network/templates/prover-node.yaml b/spartan/aztec-network/templates/prover-node.yaml index 13cb47627195..ccc0a92c96af 100644 --- a/spartan/aztec-network/templates/prover-node.yaml +++ b/spartan/aztec-network/templates/prover-node.yaml @@ -106,7 +106,7 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - clusterIP: None + type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: prover-node diff --git a/spartan/aztec-network/templates/pxe.yaml b/spartan/aztec-network/templates/pxe.yaml index 709a0a190e34..52f1b8aed6ad 100644 --- a/spartan/aztec-network/templates/pxe.yaml +++ b/spartan/aztec-network/templates/pxe.yaml @@ -1,3 +1,4 @@ +{{- if .Values.network.enableBots }} apiVersion: apps/v1 kind: Deployment metadata: @@ -65,7 +66,7 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - type: {{ .Values.pxe.service.type }} + type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: pxe @@ -75,4 +76,5 @@ spec: targetPort: {{ .Values.pxe.service.targetPort }} {{- if and (eq .Values.pxe.service.type "NodePort") .Values.pxe.service.nodePort }} nodePort: {{ .Values.pxe.service.nodePort }} - {{- end }} \ No newline at end of file + {{- end }} +{{- end }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/transaction-bot.yaml b/spartan/aztec-network/templates/transaction-bot.yaml index e1d191271a41..798a28de5717 100644 --- a/spartan/aztec-network/templates/transaction-bot.yaml +++ b/spartan/aztec-network/templates/transaction-bot.yaml @@ -1,3 +1,4 @@ +{{- if .Values.network.enableBots }} apiVersion: apps/v1 kind: Deployment metadata: @@ -99,4 +100,5 @@ spec: targetPort: {{ .Values.bot.service.targetPort }} {{- if and (eq .Values.bot.service.type "NodePort") .Values.bot.service.nodePort }} nodePort: {{ .Values.bot.service.nodePort }} - {{- end }} \ No newline at end of file + {{- end }} +{{- end }} \ No newline at end of file diff --git a/spartan/aztec-network/templates/validator.yaml b/spartan/aztec-network/templates/validator.yaml index d6de0f2d5f42..4f7785efd893 100644 --- a/spartan/aztec-network/templates/validator.yaml +++ b/spartan/aztec-network/templates/validator.yaml @@ -115,7 +115,7 @@ metadata: labels: {{- include "aztec-network.labels" . | nindent 4 }} spec: - clusterIP: None + type: {{if .Values.network.public }}"LoadBalancer"{{ else }}"ClusterIP"{{ end }} selector: {{- include "aztec-network.selectorLabels" . | nindent 4 }} app: validator diff --git a/spartan/aztec-network/values.yaml b/spartan/aztec-network/values.yaml index c422508b7ffd..1f1c8146c768 100644 --- a/spartan/aztec-network/values.yaml +++ b/spartan/aztec-network/values.yaml @@ -1,3 +1,10 @@ +test: "sample" +scenario: "default" + +network: + public: false + enableBots: true + images: test: image: aztecprotocol/end-to-end @@ -38,7 +45,13 @@ bootNode: disabled: true p2p: enabled: "true" - resources: {} + resources: + requests: + memory: "2Gi" + cpu: "200m" + limits: + memory: "4Gi" + cpu: "4" validator: replicas: 0 @@ -54,7 +67,13 @@ validator: disabled: false p2p: enabled: "true" - resources: {} + resources: + requests: + memory: "2Gi" + cpu: "200m" + limits: + memory: "28Gi" + cpu: "7" proverNode: replicas: 1 @@ -64,14 +83,19 @@ proverNode: debug: "aztec:*" realProofs: false proverAgentEnabled: true - resources: {} + resources: + requests: + memory: "2Gi" + cpu: "200m" + limits: + memory: "120Gi" + cpu: "15" pxe: logLevel: "debug" debug: "aztec:*" replicas: 1 service: - type: ClusterIP port: 8080 targetPort: 8080 readinessProbe: @@ -80,7 +104,13 @@ pxe: timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 - resources: {} + resources: + requests: + memory: "2Gi" + cpu: "200m" + limits: + memory: "4Gi" + cpu: "1" bot: logLevel: "debug" @@ -104,7 +134,13 @@ bot: timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 - resources: {} + resources: + requests: + memory: "2Gi" + cpu: "200m" + limits: + memory: "4Gi" + cpu: "1" ethereum: replicas: 1 @@ -126,11 +162,15 @@ ethereum: timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 - resources: {} + resources: + requests: + memory: "2Gi" + cpu: "200m" + limits: + memory: "4Gi" + cpu: "1" metrics: - service: - type: ClusterIP ports: otlp: 4316 metrics: 4315 @@ -143,6 +183,3 @@ metrics: timeoutSeconds: 5 successThreshold: 1 failureThreshold: 3 - -test: "sample" -scenario: "default"