Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

op-conductor fixes + bootstrap script #322

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/localnet-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ jobs:
- name: "run localnet"
run: docker compose -f ./e2e/docker-compose.yml up -d

- name: "kill an op-node after a minute"
run: sleep 60 && docker compose -f ./e2e/docker-compose.yml down op-node
- name: "kill an op-node after 15 seconds, then wait 3 minutes (the healthcheck interval + time for another sequencer to take over)"
run: sleep 15 && docker compose -f ./e2e/docker-compose.yml down op-node && sleep 180

- name: "get localnet stats"
working-directory: ./e2e/monitor
Expand Down
68 changes: 49 additions & 19 deletions e2e/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,10 @@ services:
depends_on:
- "geth-l1"
healthcheck:
test: ["CMD-SHELL", "ls /l2configs/rollup.json"]
timeout: 60s
test: ["CMD-SHELL", "nc -w 1 -vz 0.0.0.0 30303"]
timeout: 1s
retries: 300
interval: 1s
environment:
ADMIN_PRIVATE_KEY: "${ADMIN_PRIVATE_KEY}"
OP_GETH_L1_RPC: "http://geth-l1:8545"
Expand Down Expand Up @@ -240,8 +242,10 @@ services:
op-geth-l2:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "ls /l2configs/rollup.json"]
timeout: 60s
test: ["CMD-SHELL", "nc -w 1 -vz 0.0.0.0 30303"]
timeout: 1s
retries: 300
interval: 1s
environment:
ADMIN_PRIVATE_KEY: "${ADMIN_PRIVATE_KEY}"
OP_GETH_L1_RPC: "http://geth-l1:8545"
Expand Down Expand Up @@ -273,8 +277,10 @@ services:
op-geth-l2:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "ls /l2configs/rollup.json"]
timeout: 60s
test: ["CMD-SHELL", "nc -w 1 -vz 0.0.0.0 30303"]
timeout: 1s
retries: 300
interval: 1s
environment:
ADMIN_PRIVATE_KEY: "${ADMIN_PRIVATE_KEY}"
OP_GETH_L1_RPC: "http://geth-l1:8545"
Expand Down Expand Up @@ -310,6 +316,11 @@ services:
condition: "service_started"
op-geth-l2:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "nc -w 1 -vz 0.0.0.0 9222"]
timeout: 1s
retries: 300
interval: 1s
environment:
OP_NODE_BSS_WS: "http://bssd:8081/v1/ws"
command:
Expand All @@ -329,7 +340,7 @@ services:
- "--l1.trustrpc"
- "--log.level=info"
- "--l1.trustrpc=true"
- "--l1.http-poll-interval=6s"
- "--l1.http-poll-interval=1s"
- "--p2p.no-discovery"
- "--p2p.priv.path=/tmp/op-node-priv-key.txt"
- "--p2p.sequencer.key=${ADMIN_PRIVATE_KEY}"
Expand Down Expand Up @@ -364,6 +375,11 @@ services:
condition: "service_started"
op-geth-l2-2:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "nc -w 1 -vz 0.0.0.0 9222"]
timeout: 1s
retries: 300
interval: 1s
environment:
OP_NODE_BSS_WS: "http://bssd:8081/v1/ws"
command:
Expand Down Expand Up @@ -415,6 +431,11 @@ services:
condition: "service_started"
op-geth-l2-3:
condition: "service_healthy"
healthcheck:
test: ["CMD-SHELL", "nc -w 1 -vz 0.0.0.0 9222"]
timeout: 1s
retries: 300
interval: 1s
environment:
OP_NODE_BSS_WS: "http://bssd:8081/v1/ws"
command:
Expand Down Expand Up @@ -575,6 +596,8 @@ services:
- "--l2oo-address=${L2OO_ADDRESS}"
- "--private-key=${ADMIN_PRIVATE_KEY}"
- "--l1-eth-rpc=http://geth-l1:8545"
networks:
e2e:

op-proposer-2:
build:
Expand Down Expand Up @@ -634,19 +657,20 @@ services:
- "op-conductor/bin/op-conductor"
- "--consensus.addr=op-conductor"
- "--consensus.port=50050"
- "--raft.server.id=op-conductor-1"
- "--raft.server.id=op-conductor-1:50050"
- "--raft.storage.dir=/tmp/raft"
- "--raft.bootstrap"
- "--node.rpc=http://op-node:8547"
- "--execution.rpc=http://op-geth-l2:8546"
- "--healthcheck.unsafe-interval=10"
- "--healthcheck.unsafe-interval=12"
- "--healthcheck.safe-interval=200"
- "--healthcheck.min-peer-count=1"
- "--healthcheck.interval=60"
- "--healthcheck.interval=120"
- "--rollup.config=/l2configs/rollup.json"
- "--log.format=terminal"
- "--rpc.addr=0.0.0.0"
- "--rpc.port=8547"
- "--paused"
volumes:
- "l2configs:/l2configs"
- "./jwt.txt:/tmp/jwt.txt"
Expand All @@ -655,7 +679,7 @@ services:
e2e:
depends_on:
op-node:
condition: "service_started"
condition: "service_healthy"
op-geth-l2:
condition: "service_healthy"
ports:
Expand All @@ -669,18 +693,19 @@ services:
- "op-conductor/bin/op-conductor"
- "--consensus.addr=op-conductor-2"
- "--consensus.port=50051"
- "--raft.server.id=op-conductor-2"
- "--raft.server.id=op-conductor-2:50051"
- "--raft.storage.dir=/tmp/raft"
- "--node.rpc=http://op-node-2:8547"
- "--execution.rpc=http://op-geth-l2-2:8546"
- "--healthcheck.unsafe-interval=10"
- "--healthcheck.unsafe-interval=12"
- "--healthcheck.safe-interval=200"
- "--healthcheck.min-peer-count=1"
- "--healthcheck.interval=60"
- "--healthcheck.interval=120"
- "--rollup.config=/l2configs/rollup.json"
- "--log.format=terminal"
- "--rpc.addr=0.0.0.0"
- "--rpc.port=8547"
- "--paused"
volumes:
- "l2configs:/l2configs"
- "./jwt.txt:/tmp/jwt.txt"
Expand All @@ -689,7 +714,7 @@ services:
e2e:
depends_on:
op-node-2:
condition: "service_started"
condition: "service_healthy"
op-geth-l2-2:
condition: "service_healthy"
ports:
Expand All @@ -703,18 +728,19 @@ services:
- "op-conductor/bin/op-conductor"
- "--consensus.addr=op-conductor-3"
- "--consensus.port=50052"
- "--raft.server.id=op-conductor-3"
- "--raft.server.id=op-conductor-3:50052"
- "--raft.storage.dir=/tmp/raft"
- "--node.rpc=http://op-node-3:8547"
- "--execution.rpc=http://op-geth-l2-3:8546"
- "--healthcheck.unsafe-interval=10"
- "--healthcheck.unsafe-interval=12"
- "--healthcheck.safe-interval=200"
- "--healthcheck.min-peer-count=1"
- "--healthcheck.interval=60"
- "--healthcheck.interval=120"
- "--rollup.config=/l2configs/rollup.json"
- "--log.format=terminal"
- "--rpc.addr=0.0.0.0"
- "--rpc.port=8547"
- "--paused"
volumes:
- "l2configs:/l2configs"
- "./jwt.txt:/tmp/jwt.txt"
Expand All @@ -723,7 +749,7 @@ services:
e2e:
depends_on:
op-node-3:
condition: "service_started"
condition: "service_healthy"
op-geth-l2-3:
condition: "service_healthy"
ports:
Expand All @@ -735,6 +761,10 @@ services:
context: "."
entrypoint:
- "bash"
environment:
OPNODE_RPCS: 'http://op-node:8547,http://op-node-2:8547,http://op-node-3:8547'
OPCONDUCTOR_RPCS: 'http://op-conductor:8547,http://op-conductor-2:8547,http://op-conductor-3:8547'
OPCONDUCTOR_RAFT_VOTERS: 'op-conductor:50050,op-conductor-2:50051,op-conductor-3:50052'
command:
- "/tmp/setup-raft.bash"
depends_on:
Expand Down
1 change: 0 additions & 1 deletion e2e/entrypointl2.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ fi
--authrpc.addr=0.0.0.0 \
--authrpc.port=8551 \
--authrpc.jwtsecret=/tmp/jwt.txt \
--verbosity=5 \
--gpo.maxprice=1 \
--tbc.network=localnet \
--tbc.initheight=1 \
Expand Down
2 changes: 2 additions & 0 deletions e2e/optimism-stack.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,5 @@ RUN forge build
WORKDIR /git/optimism

RUN make devnet-allocs

RUN apt-get install -y netcat-openbsd
53 changes: 40 additions & 13 deletions e2e/setup-raft.bash
Original file line number Diff line number Diff line change
@@ -1,20 +1,47 @@
#! /bin/bash

set -ev
set -evx

curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_pause","params":[],"id":4}' http://op-conductor:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_pause","params":[],"id":4}' http://op-conductor-2:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_pause","params":[],"id":4}' http://op-conductor-3:8547
curl -X POST -H "Content-Type: application/json" --data "{\"jsonrpc\":\"2.0\",\"method\":\"admin_stopSequencer\",\"params\":[],\"id\":3}" http://op-node:8547
IFS=',' read -ra conductor_rpcs <<< "$OPCONDUCTOR_RPCS"
IFS=',' read -ra conductor_rafts <<< "$OPCONDUCTOR_RAFT_VOTERS"
IFS=',' read -ra opnode_rpcs <<< "$OPNODE_RPCS"
opnode_rpc=

# find the leader
for i in "${!conductor_rpcs[@]}"; do
is_leader=$(curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_leader","params":[],"id":4}' "${conductor_rpcs[$i]}" | jq '.result')
if [ "$is_leader" = 'true' ]; then
opnode_rpc=${opnode_rpcs[$i]}
fi
done

curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"optimism_syncStatus","params":[],"id":1}' http://op-node:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_addServerAsVoter","params":["op-conductor-2", "op-conductor-2:50051"],"id":4}' http://op-conductor:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_addServerAsVoter","params":["op-conductor-3", "op-conductor-3:50052"],"id":4}' http://op-conductor:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_resume","params":[],"id":4}' http://op-conductor:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_resume","params":[],"id":4}' http://op-conductor-2:8547
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_resume","params":[],"id":4}' http://op-conductor-3:8547

unsafe_head=$(curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"optimism_syncStatus","params":[],"id":2}' http://op-node:8547 | jq '.result.unsafe_l2.hash' )
if [ "$opnode_rpc" = '' ]; then
echo "could not find leader, aborting"
exit 1
fi

# pause each conductor so we can modify state
for rpc in "${conductor_rpcs[@]}"; do
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_pause","params":[],"id":4}' $rpc
done

for rpc in "${opnode_rpcs[@]}"; do
curl -X POST -H "Content-Type: application/json" --data "{\"jsonrpc\":\"2.0\",\"method\":\"admin_stopSequencer\",\"params\":[],\"id\":3}" $rpc
done


# for each raft port in op-conductor, add as a voter. this may error when adding self as a voter with the leader, but that's ok, the others should succeed
for raft in "${conductor_rafts[@]}"; do
curl -X POST -H "Content-Type: application/json" --data "{\"jsonrpc\":\"2.0\",\"method\":\"conductor_addServerAsVoter\",\"params\":[\"$raft\", \"$raft\"],\"id\":4}" ${conductor_rpcs[0]}
done

# resume the conductors
for rpc in "${conductor_rpcs[@]}"; do
curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"conductor_resume","params":[],"id":4}' $rpc
done

# restart the sequencer using the unsafe head from the leader's sync status
unsafe_head=$(curl -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"optimism_syncStatus","params":[],"id":2}' $opnode_rpc | jq '.result.unsafe_l2.hash' )
echo "unsafe_head=$unsafe_head"
curl -X POST -H "Content-Type: application/json" --data "{\"jsonrpc\":\"2.0\",\"method\":\"admin_startSequencer\",\"params\":[$unsafe_head],\"id\":3}" http://op-node:8547
curl -X POST -H "Content-Type: application/json" --data "{\"jsonrpc\":\"2.0\",\"method\":\"admin_startSequencer\",\"params\":[$unsafe_head],\"id\":3}" $opnode_rpc