Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HAproxy] Fixup number of connections #4650

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 144 additions & 30 deletions terraform/helm/aptos-node/files/haproxy.cfg
Original file line number Diff line number Diff line change
@@ -1,51 +1,165 @@
global
log stdout len 10240 format raw local0
maxconn 500000
nbthread 16

# Config manual: https://cbonte.github.io/haproxy-dconv/2.5/configuration.html
# magic values : terraform/helm/aptos-node/values.yaml

# 256 connections/sec * upgrade_to(30 sec)
maxconn 8192
# This limits the whole HA Proxy impacting both validators and other frontends
# maxconnrate 128
nbthread 4

#4MB for client facing sndbuf/rcvbuf. -- 100Mb/s with 300 mili latency (e.g., us-asia)
tune.rcvbuf.client {{ $.Values.haproxy.limits.validator.tcpBufSize }}

user nobody

## TCP port defaults
defaults
log global
option tcplog
maxconn 500000
timeout queue 1s
timeout connect 10s
timeout server 60s
timeout client 60s
timeout client-fin 5s

frontend validator
mode tcp
#option tcplog
option dontlog-normal
log-format "%ci:%cp - %sp[%rt] [%t] %ft %Tw/%Tc/%Tt %B [%ts] %ac/%fc/%bc/%sc/%rc %sq/%bq"
maxconn 8192 #Validator network mesh + FN x2
retries 3
timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/
timeout connect 5s
# enough for 1 successfull + 5 unsuccessfull HB(10 sec interval) + 20 sec timeout
timeout server 80s
timeout client 80s

timeout client-fin 3s #How long to hold an interrupted client connection.
timeout server-fin 1s

frontend fe-{{ include "aptos-validator.fullname" $ }}-validator
bind :6180
default_backend validator
default_backend {{ include "aptos-validator.fullname" $ }}-validator

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips }

acl ip_high_conn_rate sc0_conn_rate gt {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }}

stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory
tcp-request connection track-sc0 src #update table with src ip as key, store in sc0

#We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs.
tcp-request connection track-sc1 int(1) table CONN_RATE

#tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with:
#parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug!
#<1> Mark Blacklist
tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate

#This connection is silently dropped no reason to count it for rateLimitSession
tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 }

# an IP that was blacklisted due to to many unsucsessfull tcp attempts
#-1- Enforece Blacklist
tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 }

# Limit to N TCP connections per minute per source IP
stick-table type ip size 500k expire 1m store gpc0_rate(1m)
tcp-request connection track-sc0 src
# TODO: Reject at content phase for now so we get logs, but this should be
# done at connection phase for higher efficiency
tcp-request content reject if { sc_gpc0_rate(0) ge {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }} }
tcp-request content sc-inc-gpc0(0) unless { nbsrv(validator) eq 0 }
#an IP that had a sucessfull connection.
#-2- Allow Whitelist
tcp-request connection accept if { sc0_get_gpc1() ge 1 }

backend validator
default-server maxconn 1024 {{ if $.Values.haproxy.config.send_proxy_protocol }}send-proxy-v2{{ end }}
#-3- Enforece RateLimit
tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt {{ $.Values.haproxy.limits.validator.rateLimitSession }} }

# This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min
#tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 }
#<2> Mark Whitelist
tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 }

# -4- Break a long high rate connection
tcp-request session reject if { sc0_bytes_out_rate gt {{ $.Values.haproxy.limits.validator.maxBytesOutRate10sec }} }

backend {{ include "aptos-validator.fullname" $ }}-validator
default-server maxconn 8192
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:6180

frontend fe-{{ include "aptos-validator.fullname" $ }}-validator-fn
bind :6181
default_backend {{ include "aptos-validator.fullname" $ }}-validator-fn

# Deny requests from blocked IPs
tcp-request connection silent-drop if { src -n -f /usr/local/etc/haproxy/blocked.ips }

acl ip_high_conn_rate sc0_conn_rate gt {{ $.Values.haproxy.limits.validator.connectionsPerIPPerMin }}

stick-table type ip size 10m expire 30m store gpc0,gpc1,conn_rate(1m),bytes_out_rate(10s),bytes_out_cnt ##about 500MB of memory
tcp-request connection track-sc0 src #update table with src ip as key, store in sc0

#We Count rate-limit manualy -- Will be more CPU intensieve but will allow whitelists to enter and up to rateLimitSession non blacklisted IPs.
tcp-request connection track-sc1 int(1) table CONN_RATE

#tcp-request connection sc-set-gpt0(0) int(...) if ip_high_conn_rate is better but dies with:
#parsing [/usr/local/etc/haproxy/haproxy.cfg:53] : internal error, unexpected rule->from=0, please report this bug!
#<1> Mark Blacklist
tcp-request connection sc-inc-gpc0(0) if ip_high_conn_rate

#This connection is silently dropped no reason to count it for rateLimitSession
tcp-request connection sc-inc-gpc1(1) unless { sc0_get_gpc0() ge 1 }

# an IP that was blacklisted due to to many unsucsessfull tcp attempts
#-1- Enforece Blacklist
tcp-request connection silent-drop if { sc0_get_gpc0() ge 1 }

#an IP that had a sucessfull connection.
#-2- Allow Whitelist
tcp-request connection accept if { sc0_get_gpc1() ge 1 }

#-3- Enforece RateLimit
tcp-request connection reject if { sc1_gpc1_rate(CONN_RATE) gt {{ $.Values.haproxy.limits.validator.rateLimitSession }} }

# This is a successfull connection i.e., was sent more than 16K bytes in the last 30 min
#tcp-request session sc-set-gpt0(0) int(...) if { sc0_kbytes_out gt 16 }
#<2> Mark Whitelist
tcp-request session sc-inc-gpc1(0) if { sc0_kbytes_out gt 4 }

# -4- Break a long high rate connection
tcp-request session reject if { sc0_bytes_out_rate gt {{ $.Values.haproxy.limits.validator.maxBytesOutRate10sec }} }

backend {{ include "aptos-validator.fullname" $ }}-validator-fn
default-server maxconn 8192
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:6181


#CONNRATE holds only entry with key 1: used for determening global conn rate
backend CONN_RATE
stick-table type integer size 1 expire 10m store gpc1,gpc1_rate(1s)

################## HTTP: metrics & API
defaults
mode http
retries 3
timeout queue 5s #limits num of concurrent connections. Not clear if t/o connect is needed. #https://www.papertrail.com/solution/tips/haproxy-logging-how-to-tune-timeouts-for-performance/
timeout connect 5s
timeout server 60s #what makes sense? for silence between nodes?
timeout client 60s

timeout client-fin 3s #How long to hold an interrupted client connection.
timeout server-fin 1s

timeout http-request 60s #len of http request
timeout http-keep-alive 2s

rate-limit sessions 256

frontend validator-metrics
mode http
option httplog
bind :9102
default_backend validator-metrics
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend validator-metrics
mode http
default-server maxconn 1024
default-server maxconn 16
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:9101

# Exposes the validator's own REST API
Expand All @@ -55,14 +169,14 @@ frontend validator-api
option httplog
bind :8180
default_backend validator-api
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend validator-api
mode http
default-server maxconn 1024
default-server maxconn 16
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-validator:8080
{{- end }}

Expand All @@ -87,29 +201,29 @@ frontend {{ $config.name }}-api
default_backend {{ $config.name }}-api
# add Forwarded header, which behaves differently than X-Forwarded-For
# see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Forwarded
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend {{ $config.name }}-api
mode http
default-server maxconn 1024
default-server maxconn 16
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-{{ $config.name }} {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-{{ $config.name }}:8080

frontend {{ $config.name }}-metrics
mode http
option httplog
bind :{{ add 9103 $index }}
default_backend {{ $config.name }}-metrics
http-request add-header Forwarded "for=%ci"

# Deny requests from blocked IPs
tcp-request connection reject if { src -n -f /usr/local/etc/haproxy/blocked.ips }
http-request add-header Forwarded "for=%ci"

backend {{ $config.name }}-metrics
mode http
default-server maxconn 1024
default-server maxconn 16
server {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-{{ $config.name }} {{ include "aptos-validator.fullname" $ }}-{{ $.Values.i }}-{{ $config.name }}:9101

{{- end }}
Expand Down
8 changes: 3 additions & 5 deletions testsuite/forge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1186,8 +1186,7 @@ def create_forge_command(
"--duration-secs", forge_runner_duration_secs
])

if forge_num_validators:
forge_args.extend(["--num-validators", forge_num_validators])
forge_args.extend(["--num-validators", "100"])
if forge_num_validator_fullnodes:
forge_args.extend([
"--num-validator-fullnodes",
Expand All @@ -1213,8 +1212,7 @@ def create_forge_command(
forge_args.append("--reuse")
if forge_namespace_keep == "true":
forge_args.append("--keep")
if forge_enable_haproxy == "true":
forge_args.append("--enable-haproxy")
forge_args.append("--enable-haproxy")

if test_args:
forge_args.extend(test_args)
Expand Down Expand Up @@ -1535,7 +1533,7 @@ def test(
forge_cli_args=forge_cli_args,
test_args=test_args,
)

print(f"Using cluster: {forge_cluster_name}")
temp = context.filesystem.mkstemp()
forge_cluster = ForgeCluster(forge_cluster_name, temp)
Expand Down