Skip to content

Commit

Permalink
fix: Try fix e2e epochs in CI (#10804)
Browse files Browse the repository at this point in the history
Attempts two fixes at e2e epochs test. First, it increases the L1 block
time, to account for general CI slowness. Second, it adds more retries
to the L1 gas utils getTx, since the e2e epochs test works using the tx
delayer, which artificially introduces a delay between a tx being sent
and it being available in anvil, so it triggered a timeout in the utils.

**Update**: Increasing the retries caused the error to change, we were
getting a timeout in teardown. This was caused because the sequencer got
stuck in waiting for the tx to be mined for way too long (more than 5
minutes, the afterAll hook timeout), and the `node.stop()` waits for the
current loops to end before returning.

But what's interesting is _why_ the sequencer got stuck in waiting for
its tx to be mined. The tx was being delayed by the tx-delayer, which
intercepts txs, manually computes their tx hash to return it to the
caller immediately, and holds on the tx to submit it to anvil at a later
point in time. What came up is that the tx hash we were manually
computing over txs with blobs did not match the tx hash returned by
anvil. This has been logged as #10824. However, we can sidestep this
issue by just choosing a reasonable value for max attempts so teardown
doesn't timeout.

---------

Co-authored-by: ludamad <[email protected]>
  • Loading branch information
spalladino and ludamad authored Dec 17, 2024
1 parent b8bdb52 commit ba28788
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 20 deletions.
34 changes: 20 additions & 14 deletions yarn-project/end-to-end/src/e2e_epochs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ describe('e2e_epochs', () => {
let l1BlockNumber: number;
let handle: NodeJS.Timeout;

const EPOCH_DURATION = 4;
const L1_BLOCK_TIME = 5;
const L2_SLOT_DURATION_IN_L1_BLOCKS = 2;
const EPOCH_DURATION_IN_L2_SLOTS = 4;
const L2_SLOT_DURATION_IN_L1_SLOTS = 2;
const L1_BLOCK_TIME_IN_S = 8;

beforeAll(async () => {
// Set up system without any account nor protocol contracts
Expand All @@ -35,13 +35,16 @@ describe('e2e_epochs', () => {
assumeProvenThrough: undefined,
skipProtocolContracts: true,
salt: 1,
aztecEpochDuration: EPOCH_DURATION,
aztecSlotDuration: L1_BLOCK_TIME * L2_SLOT_DURATION_IN_L1_BLOCKS,
ethereumSlotDuration: L1_BLOCK_TIME,
aztecEpochProofClaimWindowInL2Slots: EPOCH_DURATION / 2,
aztecEpochDuration: EPOCH_DURATION_IN_L2_SLOTS,
aztecSlotDuration: L1_BLOCK_TIME_IN_S * L2_SLOT_DURATION_IN_L1_SLOTS,
ethereumSlotDuration: L1_BLOCK_TIME_IN_S,
aztecEpochProofClaimWindowInL2Slots: EPOCH_DURATION_IN_L2_SLOTS / 2,
minTxsPerBlock: 0,
realProofs: false,
startProverNode: true,
// This must be enough so that the tx from the prover is delayed properly,
// but not so much to hang the sequencer and timeout the teardown
txPropagationMaxQueryAttempts: 12,
});

logger = context.logger;
Expand Down Expand Up @@ -87,8 +90,8 @@ describe('e2e_epochs', () => {

// Constants used for time calculation
constants = {
epochDuration: EPOCH_DURATION,
slotDuration: L1_BLOCK_TIME * L2_SLOT_DURATION_IN_L1_BLOCKS,
epochDuration: EPOCH_DURATION_IN_L2_SLOTS,
slotDuration: L1_BLOCK_TIME_IN_S * L2_SLOT_DURATION_IN_L1_SLOTS,
l1GenesisBlock: await rollup.getL1StartBlock(),
l1GenesisTime: await rollup.getL1GenesisTime(),
};
Expand All @@ -105,7 +108,7 @@ describe('e2e_epochs', () => {
const waitUntilEpochStarts = async (epoch: number) => {
const [start] = getTimestampRangeForEpoch(BigInt(epoch), constants);
logger.info(`Waiting until L1 timestamp ${start} is reached as the start of epoch ${epoch}`);
await waitUntilL1Timestamp(l1Client, start - BigInt(L1_BLOCK_TIME));
await waitUntilL1Timestamp(l1Client, start - BigInt(L1_BLOCK_TIME_IN_S));
return start;
};

Expand All @@ -124,12 +127,14 @@ describe('e2e_epochs', () => {
proverDelayer.pauseNextTxUntilTimestamp(epoch2Start);
logger.info(`Delayed prover tx until epoch 2 starts at ${epoch2Start}`);

// Wait until the last block of epoch 1 is published and then hold off the sequencer
await waitUntilL2BlockNumber(blockNumberAtEndOfEpoch0 + EPOCH_DURATION);
sequencerDelayer.pauseNextTxUntilTimestamp(epoch2Start + BigInt(L1_BLOCK_TIME));
// Wait until the last block of epoch 1 is published and then hold off the sequencer.
// Note that the tx below will block the sequencer until it times out
// the txPropagationMaxQueryAttempts until #10824 is fixed.
await waitUntilL2BlockNumber(blockNumberAtEndOfEpoch0 + EPOCH_DURATION_IN_L2_SLOTS);
sequencerDelayer.pauseNextTxUntilTimestamp(epoch2Start + BigInt(L1_BLOCK_TIME_IN_S));

// Next sequencer to publish a block should trigger a rollback to block 1
await waitUntilL1Timestamp(l1Client, epoch2Start + BigInt(L1_BLOCK_TIME));
await waitUntilL1Timestamp(l1Client, epoch2Start + BigInt(L1_BLOCK_TIME_IN_S));
expect(await rollup.getBlockNumber()).toEqual(1n);
expect(await rollup.getSlotNumber()).toEqual(8n);

Expand All @@ -142,5 +147,6 @@ describe('e2e_epochs', () => {
const lastL2BlockTxReceipt = await l1Client.getTransactionReceipt({ hash: lastL2BlockTxHash! });
expect(lastL2BlockTxReceipt.status).toEqual('success');
expect(lastL2BlockTxReceipt.blockNumber).toBeGreaterThan(lastProverTxReceipt!.blockNumber);
logger.info(`Test succeeded`);
});
});
17 changes: 15 additions & 2 deletions yarn-project/ethereum/src/l1_tx_utils.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { times } from '@aztec/foundation/collection';
import {
type ConfigMappingsType,
bigintConfigHelper,
Expand Down Expand Up @@ -71,6 +72,11 @@ export interface L1TxUtilsConfig {
* How long to wait for a tx to be mined before giving up
*/
txTimeoutMs?: number;
/**
* How many attempts will be done to get a tx after it was sent?
* First attempt is done at 1s, second at 2s, third at 3s, etc.
*/
txPropagationMaxQueryAttempts?: number;
}

export const l1TxUtilsConfigMappings: ConfigMappingsType<L1TxUtilsConfig> = {
Expand Down Expand Up @@ -119,6 +125,11 @@ export const l1TxUtilsConfigMappings: ConfigMappingsType<L1TxUtilsConfig> = {
env: 'L1_TX_MONITOR_TX_TIMEOUT_MS',
...numberConfigHelper(300_000), // 5 mins
},
txPropagationMaxQueryAttempts: {
description: 'How many attempts will be done to get a tx after it was sent',
env: 'L1_TX_PROPAGATION_MAX_QUERY_ATTEMPTS',
...numberConfigHelper(3),
},
};

export const defaultL1TxUtilsConfig = getDefaultConfig<L1TxUtilsConfig>(l1TxUtilsConfigMappings);
Expand Down Expand Up @@ -213,12 +224,14 @@ export class L1TxUtils {
const gasConfig = { ...this.config, ..._gasConfig };
const account = this.walletClient.account;
const blobInputs = _blobInputs || {};
const makeGetTransactionBackoff = () =>
makeBackoff(times(gasConfig.txPropagationMaxQueryAttempts ?? 3, i => i + 1));

// Retry a few times, in case the tx is not yet propagated.
const tx = await retry<GetTransactionReturnType>(
() => this.publicClient.getTransaction({ hash: initialTxHash }),
`Getting L1 transaction ${initialTxHash}`,
makeBackoff([1, 2, 3]),
makeGetTransactionBackoff(),
this.logger,
true,
);
Expand Down Expand Up @@ -261,7 +274,7 @@ export class L1TxUtils {
const tx = await retry<GetTransactionReturnType>(
() => this.publicClient.getTransaction({ hash: currentTxHash }),
`Getting L1 transaction ${currentTxHash}`,
makeBackoff([1, 2, 3]),
makeGetTransactionBackoff(),
this.logger,
true,
);
Expand Down
20 changes: 16 additions & 4 deletions yarn-project/ethereum/src/test/tx_delayer.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { omit } from '@aztec/foundation/collection';
import { type Logger, createLogger } from '@aztec/foundation/log';
import { retryUntil } from '@aztec/foundation/retry';

Expand All @@ -8,6 +9,7 @@ import {
type PublicClient,
type WalletClient,
keccak256,
parseTransaction,
publicActions,
walletActions,
} from 'viem';
Expand Down Expand Up @@ -89,6 +91,7 @@ class DelayerImpl implements Delayer {
/**
* Returns a new client (without modifying the one passed in) with an injected tx delayer.
* The delayer can be used to hold off the next tx to be sent until a given block number.
* TODO(#10824): This doesn't play along well with blob txs for some reason.
*/
export function withDelayer<T extends WalletClient>(
client: T,
Expand Down Expand Up @@ -116,16 +119,25 @@ export function withDelayer<T extends WalletClient>(
// Compute the tx hash manually so we emulate sendRawTransaction response
const { serializedTransaction } = args[0];
const txHash = keccak256(serializedTransaction);
logger.info(`Delaying tx ${txHash} until ${inspect(waitUntil)}`);
logger.info(`Delaying tx ${txHash} until ${inspect(waitUntil)}`, {
argsLen: args.length,
...omit(parseTransaction(serializedTransaction), 'data', 'sidecars'),
});

// Do not await here so we can return the tx hash immediately as if it had been sent on the spot.
// Instead, delay it so it lands on the desired block number or timestamp, assuming anvil will
// mine it immediately.
void wait
.then(async () => {
const txHash = await client.sendRawTransaction(...args);
logger.info(`Sent previously delayed tx ${txHash} to land on ${inspect(waitUntil)}`);
delayer.txs.push(txHash);
const clientTxHash = await client.sendRawTransaction(...args);
if (clientTxHash !== txHash) {
logger.error(`Tx hash returned by the client does not match computed one`, {
clientTxHash,
computedTxHash: txHash,
});
}
logger.info(`Sent previously delayed tx ${clientTxHash} to land on ${inspect(waitUntil)}`);
delayer.txs.push(clientTxHash);
})
.catch(err => logger.error(`Error sending tx after delay`, err));

Expand Down
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ export type EnvVar =
| 'L1_TX_MONITOR_CHECK_INTERVAL_MS'
| 'L1_TX_MONITOR_STALL_TIME_MS'
| 'L1_TX_MONITOR_TX_TIMEOUT_MS'
| 'L1_TX_PROPAGATION_MAX_QUERY_ATTEMPTS'
| 'FAUCET_MNEMONIC_ACCOUNT_INDEX'
| 'FAUCET_ETH_AMOUNT'
| 'FAUCET_INTERVAL_MS'
Expand Down

0 comments on commit ba28788

Please sign in to comment.