-
Notifications
You must be signed in to change notification settings - Fork 3.7k
218 lines (200 loc) · 7.74 KB
/
workflow-run-replay-verify.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
name: "*run replay-verify reusable workflow"
on:
# This allows the workflow to be triggered from another workflow
workflow_call:
inputs:
GIT_SHA:
required: true
type: string
description: The git SHA1 to test.
# replay-verify config
BUCKET:
required: true
type: string
description: The bucket to use for the backup. If not specified, it will use the default bucket.
SUB_DIR:
required: true
type: string
description: The subdirectory to use for the backup. If not specified, it will use the default subdirectory.
HISTORY_START:
required: true
type: string
description: The history start to use for the backup. If not specified, it will use the default history start.
TXNS_TO_SKIP:
required: false
type: string
description: The list of transaction versions to skip. If not specified, it will use the default list.
RANGES_TO_SKIP:
required: false
type: string
description: The optional list of transaction ranges to skip..
BACKUP_CONFIG_TEMPLATE_PATH:
description: "The path to the backup config template to use."
type: string
required: true
# GHA job config
RUNS_ON:
description: "The runner to use for the job."
type: string
required: true
default: "high-perf-docker-with-local-ssd"
TIMEOUT_MINUTES:
description: "Github job timeout in minutes"
type: number
required: true
default: 180
# This allows the workflow to be triggered manually from the Github UI or CLI
# NOTE: because the "number" type is not supported, we default to 720 minute timeout
workflow_dispatch:
inputs:
GIT_SHA:
required: true
type: string
description: The git SHA1 to test.
# replay-verify config
BUCKET:
required: true
type: string
description: The bucket to use for the backup. If not specified, it will use the default bucket.
SUB_DIR:
required: true
type: string
description: The subdirectory to use for the backup. If not specified, it will use the default subdirectory.
HISTORY_START:
required: true
type: string
description: The history start to use for the backup. If not specified, it will use the default history start.
TXNS_TO_SKIP:
required: false
type: string
description: The list of transaction versions to skip. If not specified, it will use the default list.
RANGES_TO_SKIP:
required: false
type: string
description: The optional list of transaction ranges to skip..
BACKUP_CONFIG_TEMPLATE_PATH:
description: "The path to the backup config template to use."
type: string
required: true
# GHA job config
RUNS_ON:
description: "The runner to use for the job."
type: string
required: true
default: "high-perf-docker-with-local-ssd"
jobs:
prepare:
runs-on: ${{ inputs.RUNS_ON }}
outputs:
jobs_ids: ${{ steps.gen-jobs.outputs.job_ids }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
ref: ${{ inputs.GIT_SHA }}
- name: Load cached aptos-debugger binary
id: cache-aptos-debugger-binary
uses: actions/cache@v4
with:
# copy the binary to the root of the repo and cache it there, because rust-setup calls a cache-rust action
# which cleans up the target directory in its post action
path: |
aptos-debugger
testsuite/replay_verify.py
key: aptos-debugger-${{ inputs.GIT_SHA || github.sha }}
- name: Prepare for build if not cached
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true'
uses: aptos-debugger-ee4ecab92c937d27426acce2c8e9e3da88f94c53 #aptos-labs/aptos-core/.github/actions/rust-setup@main
with:
GIT_CREDENTIALS: ${{ inputs.GIT_CREDENTIALS }}
- name: Build and strip aptos-debugger binary if not cached
if: steps.cache-aptos-debugger-binary.outputs.cache-hit != 'true'
shell: bash
run: |
cargo build --release -p aptos-debugger
strip -s target/release/aptos-debugger
cp target/release/aptos-debugger .
- name: Install GCloud SDK
uses: "google-github-actions/setup-gcloud@v2"
with:
version: ">= 418.0.0"
install_components: "kubectl,gke-gcloud-auth-plugin"
- name: get timestamp to use in cache key
id: get-timestamp
run: echo "ts=$(date +%s)" >> $GITHUB_OUTPUT
- name: Load cached backup storage metadata cache dir (and save back afterwards)
uses: actions/cache@v4
with:
path: metadata_cache
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ steps.get-timestamp.outputs.ts }}
restore-keys: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-
- name: Generate job ranges
id: gen-jobs
env:
BUCKET: ${{ inputs.BUCKET }}
SUB_DIR: ${{ inputs.SUB_DIR }}
run: |
./aptos-debugger aptos-db gen-replay-verify-jobs \
--metadata-cache-dir ./metadata_cache \
--command-adapter-config ${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }} \
--start-version ${{ inputs.HISTORY_START }} \
--ranges-to-skip "${{ inputs.RANGES_TO_SKIP }}" \
\
--max-ranges-per-job 16 \
--output-json-file jobs.json \
JOB_IDS=`jq 'length as $N | [range(0; $N)]' jobs.json`
echo "jobs_ids=$JOB_IDS" >> $GITHUB_OUTPUT
- name: Cache backup storage config and job definition
uses: actions/cache/save@v4
with:
path: |
${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }}
jobs.json
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }}
replay-verify:
needs: prepare
timeout-minutes: ${{ inputs.TIMEOUT_MINUTES || 180 }}
runs-on: ${{ inputs.RUNS_ON }}
strategy:
fail-fast: false
matrix:
job_id: ${{ fromJson(needs.prepare.outputs.job_ids) }}
steps:
- name: Load cached aptos-debugger binary and replay_verify.py script
uses: actions/cache/restore@v4
with:
path: |
aptos-debugger
testsuite/replay_verify.py
key: aptos-debugger-ee4ecab92c937d27426acce2c8e9e3da88f94c53 #aptos-debugger-${{ inputs.GIT_SHA || github.sha }}
fail-on-cache-miss: true
- name: Load cached backup storage metadata cache dir
uses: actions/cache/restore@v4
with:
path: metadata_cache
key: metadata-cache-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-
fail-on-cache-miss: true
- name: Load cached backup storage config and job definitions
uses: actions/cache/restore@v4
with:
path: |
${{ inputs.BACKUP_CONFIG_TEMPLATE_PATH }}
jobs.json
key: backup-config-${{ inputs.BUCKET }}/${{ inputs.SUB_DIR }}-${{ github.run_id }}
fail-on-cache-miss: true
- name: Install GCloud SDK
uses: "google-github-actions/setup-gcloud@v2"
with:
version: ">= 418.0.0"
install_components: "kubectl,gke-gcloud-auth-plugin"
- name: Run replay-verify in parallel
shell: bash
run: |
# extract job by job_id
jq '.[${{ matrix.job_id }}].[]' jobs.json | while read _desc begin end msg; do
echo $begin-$end: $msg
sleep 30&
done
echo "start waiting"
wait
echo "done waiting"