Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

more fork-choice fixes #1388

Merged
merged 4 commits into from
Jul 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions beacon_chain/attestation_aggregation.nim
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,16 @@ proc isValidAttestation*(
# Not in spec - check that rewinding to the state is sane
return false

let tgtBlck = pool.blockPool.getRef(attestation.data.target.root)
if tgtBlck.isNil:
debug "Target block not found"
pool.blockPool.addMissing(attestation.data.beacon_block_root)
return

# TODO this could be any state in the target epoch
pool.blockPool.withState(
pool.blockPool.tmpState,
BlockSlot(blck: attestationBlck, slot: attestation.data.slot)):
tgtBlck.atSlot(attestation.data.target.epoch.compute_start_slot_at_epoch)):
# https://github.com/ethereum/eth2.0-specs/blob/v0.12.1/specs/phase0/p2p-interface.md#attestation-subnets
# [REJECT] The attestation is for the correct subnet (i.e.
# compute_subnet_for_attestation(state, attestation) == subnet_id).
Expand Down Expand Up @@ -274,10 +281,17 @@ proc isValidAggregatedAttestation*(
# [REJECT] aggregate_and_proof.selection_proof selects the validator as an
# aggregator for the slot -- i.e. is_aggregator(state, aggregate.data.slot,
# aggregate.data.index, aggregate_and_proof.selection_proof) returns True.
# TODO use withEpochState when it works more reliably

let tgtBlck = pool.blockPool.getRef(aggregate.data.target.root)
if tgtBlck.isNil:
debug "Target block not found"
pool.blockPool.addMissing(aggregate.data.beacon_block_root)
return

# TODO this could be any state in the target epoch
pool.blockPool.withState(
pool.blockPool.tmpState,
BlockSlot(blck: attestationBlck, slot: aggregate.data.slot)):
tgtBlck.atSlot(aggregate.data.target.epoch.compute_start_slot_at_epoch)):
var cache = getEpochCache(blck, state)
if not is_aggregator(
state, aggregate.data.slot, aggregate.data.index.CommitteeIndex,
Expand Down
12 changes: 6 additions & 6 deletions beacon_chain/beacon_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ import
spec/state_transition,
conf, time, beacon_chain_db, validator_pool, extras,
attestation_pool, block_pool, eth2_network, eth2_discovery,
beacon_node_common, beacon_node_types, block_pools/block_pools_types,
beacon_node_common, beacon_node_types,
block_pools/[block_pools_types, candidate_chains],
nimbus_binary_common, network_metadata,
mainchain_monitor, version, ssz/[merkleization], sszdump, merkle_minimal,
sync_protocol, request_manager, keystore_management, interop, statusbar,
Expand Down Expand Up @@ -247,7 +248,7 @@ proc init*(T: type BeaconNode,
enrForkId = enrForkIdFromState(blockPool.headState.data.data)
topicBeaconBlocks = getBeaconBlocksTopic(enrForkId.forkDigest)
topicAggregateAndProofs = getAggregateAndProofsTopic(enrForkId.forkDigest)
network = await createEth2Node(rng, conf, enrForkId)
network = createEth2Node(rng, conf, enrForkId)

var res = BeaconNode(
nickname: nickname,
Expand All @@ -273,7 +274,7 @@ proc init*(T: type BeaconNode,
onBeaconBlock(res, signedBlock)
)

await res.addLocalValidators()
res.addLocalValidators()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like this might create race conditions with other parts of the code perhaps if the local validators aren't yet there?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

well, the code is more synchronous now without the await because addlocalvalidators is no longer asynchronous (notably, because it no longer has to schedule anything on the async loop)


# This merely configures the BeaconSync
# The traffic will be started when we join the network.
Expand Down Expand Up @@ -928,9 +929,8 @@ when hasPrompt:
# p.useHistoryFile()

proc dataResolver(expr: string): string =
template justified: untyped = node.blockPool.head.atSlot(
node.blockPool.headState.data.data.current_justified_checkpoint.epoch.
compute_start_slot_at_epoch)
template justified: untyped = node.blockPool.head.atEpochStart(
node.blockPool.headState.data.data.current_justified_checkpoint.epoch)
# TODO:
# We should introduce a general API for resolving dot expressions
# such as `db.latest_block.slot` or `metrics.connected_peers`.
Expand Down
3 changes: 2 additions & 1 deletion beacon_chain/block_pools/block_pools_types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,8 @@ type

slot*: Slot # TODO could calculate this by walking to root, but..

epochsInfo*: seq[EpochRef]
epochsInfo*: seq[EpochRef] ##\
## Cached information about the epochs starting at this block.
## Could be multiple, since blocks could skip slots, but usually, not many
## Even if competing forks happen later during this epoch, potential empty
## slots beforehand must all be from this fork. getEpochInfo() is the only
Expand Down
65 changes: 47 additions & 18 deletions beacon_chain/block_pools/candidate_chains.nim
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ func atSlot*(blck: BlockRef, slot: Slot): BlockSlot =
## block proposal)
BlockSlot(blck: blck.getAncestorAt(slot), slot: slot)

func atEpochStart*(blck: BlockRef, epoch: Epoch): BlockSlot =
## Return the BlockSlot corresponding to the first slot in the given epoch
atSlot(blck, epoch.compute_start_slot_at_epoch)

func atEpochEnd*(blck: BlockRef, epoch: Epoch): BlockSlot =
## Return the BlockSlot corresponding to the last slot in the given epoch
atSlot(blck, (epoch + 1).compute_start_slot_at_epoch - 1)

func getEpochInfo*(blck: BlockRef, state: BeaconState): EpochRef =
# This is the only intended mechanism by which to get an EpochRef
let
Expand All @@ -186,7 +194,20 @@ func getEpochInfo*(blck: BlockRef, state: BeaconState): EpochRef =

func getEpochCache*(blck: BlockRef, state: BeaconState): StateCache =
let epochInfo = getEpochInfo(blck, state)
result = StateCache()
if epochInfo.epoch > 0:
# When doing state transitioning, both the current and previous epochs are
# useful from a cache perspective since attestations may come from either -
# we'll use the last slot from the epoch because it is more likely to
# be filled in already, compared to the first slot where the block might
# be from the epoch before.
let
prevEpochBlck = blck.atEpochEnd(epochInfo.epoch - 1).blck

for ei in prevEpochBlck.epochsInfo:
if ei.epoch == epochInfo.epoch - 1:
result.shuffled_active_validator_indices[ei.epoch] =
ei.shuffled_active_validator_indices

result.shuffled_active_validator_indices[state.get_current_epoch()] =
epochInfo.shuffled_active_validator_indices

Expand Down Expand Up @@ -279,9 +300,8 @@ proc init*(T: type CandidateChains,
# from the same epoch as the head, thus the finalized and justified slots are
# the same - these only change on epoch boundaries.
let
finalizedSlot =
tmpState.data.data.finalized_checkpoint.epoch.compute_start_slot_at_epoch()
finalizedHead = headRef.atSlot(finalizedSlot)
finalizedHead = headRef.atEpochStart(
tmpState.data.data.finalized_checkpoint.epoch)

let res = CandidateChains(
blocks: blocks,
Expand Down Expand Up @@ -316,12 +336,18 @@ proc init*(T: type CandidateChains,
res

proc getEpochRef*(pool: CandidateChains, blck: BlockRef, epoch: Epoch): EpochRef =
let bs = blck.atSlot(epoch.compute_start_slot_at_epoch)
for e in bs.blck.epochsInfo:
if e.epoch == epoch:
return e
var bs = blck.atEpochEnd(epoch)

while true:
# Any block from within the same epoch will carry the same epochinfo, so
# we start at the most recent one
for e in bs.blck.epochsInfo:
if e.epoch == epoch:
return e
if bs.slot == epoch.compute_start_slot_at_epoch:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't blocks skip slots here, including the first slot of an epoch?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's why BlockSlot.parent moves slot-by-slot instead of block by block :)

break
bs = bs.parent

# TODO use any state from epoch
pool.withState(pool.tmpState, bs):
getEpochInfo(blck, state)

Expand Down Expand Up @@ -723,13 +749,19 @@ proc updateHead*(dag: CandidateChains, newHead: BlockRef) =
lastHead = dag.head
dag.db.putHeadBlock(newHead.root)

# Start off by making sure we have the right state
updateStateData(
dag, dag.headState, BlockSlot(blck: newHead, slot: newHead.slot))
# Start off by making sure we have the right state - as a special case, we'll
# check the last block that was cleared by clearance - it might be just the
# thing we're looking for

if dag.clearanceState.blck == newHead and
dag.clearanceState.data.data.slot == newHead.slot:
assign(dag.headState, dag.clearanceState)
else:
updateStateData(
dag, dag.headState, newHead.atSlot(newHead.slot))

dag.head = newHead

# TODO isAncestorOf may be expensive - too expensive?
if not lastHead.isAncestorOf(newHead):
info "Updated head block with reorg",
lastHead = shortLog(lastHead),
Expand All @@ -750,10 +782,8 @@ proc updateHead*(dag: CandidateChains, newHead: BlockRef) =
justified = shortLog(dag.headState.data.data.current_justified_checkpoint),
finalized = shortLog(dag.headState.data.data.finalized_checkpoint)
let
finalizedEpochStartSlot =
dag.headState.data.data.finalized_checkpoint.epoch.
compute_start_slot_at_epoch()
finalizedHead = newHead.atSlot(finalizedEpochStartSlot)
finalizedHead = newHead.atEpochStart(
dag.headState.data.data.finalized_checkpoint.epoch)

doAssert (not finalizedHead.blck.isNil),
"Block graph should always lead to a finalized block"
Expand All @@ -773,7 +803,6 @@ proc updateHead*(dag: CandidateChains, newHead: BlockRef) =
# cur = cur.parent
# dag.delState(cur)


block: # Clean up block refs, walking block by block
# Finalization means that we choose a single chain as the canonical one -
# it also means we're no longer interested in any branches from that chain
Expand Down
41 changes: 19 additions & 22 deletions beacon_chain/block_pools/clearance.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
{.push raises: [Defect].}

import
std/tables,
std/[sequtils, tables],
chronicles,
metrics, stew/results,
../extras,
Expand Down Expand Up @@ -46,20 +46,21 @@ proc addResolvedBlock(
parent: BlockRef, cache: StateCache,
onBlockAdded: OnBlockAdded
): BlockRef =
# TODO: `addResolvedBlock` is accumulating significant cruft
# and is in dire need of refactoring
# - the ugly `quarantine.inAdd` field
# - the callback
# - callback may be problematic as it's called in async validator duties
# TODO move quarantine processing out of here
logScope: pcs = "block_resolution"
doAssert state.data.slot == signedBlock.message.slot, "state must match block"

let
blockRoot = signedBlock.root
blockRef = BlockRef.init(blockRoot, signedBlock.message)
if parent.slot.compute_epoch_at_slot() == blockRef.slot.compute_epoch_at_slot:
blockRef.epochsInfo = @[parent.epochsInfo[0]]
blockEpoch = blockRef.slot.compute_epoch_at_slot()
if parent.slot.compute_epoch_at_slot() == blockEpoch:
# If the parent and child blocks are from the same epoch, we can reuse
# the epoch cache - but we'll only use the current epoch because the new
# block might have affected what the next epoch looks like
blockRef.epochsInfo = filterIt(parent.epochsInfo, it.epoch == blockEpoch)
else:
# Ensure we collect the epoch info if it's missing
discard getEpochInfo(blockRef, state.data)

link(parent, blockRef)
Expand Down Expand Up @@ -88,7 +89,8 @@ proc addResolvedBlock(
blockRoot = shortLog(blockRoot),
heads = dag.heads.len()

# This MUST be added before the quarantine
# Notify others of the new block before processing the quarantine, such that
# notifications for parents happens before those of the children
if onBlockAdded != nil:
onBlockAdded(blockRef, signedBlock, state)

Expand Down Expand Up @@ -119,13 +121,8 @@ proc addRawBlock*(
signedBlock: SignedBeaconBlock,
onBlockAdded: OnBlockAdded
): Result[BlockRef, BlockError] =
## return the block, if resolved...

# TODO: `addRawBlock` is accumulating significant cruft
# and is in dire need of refactoring
# - the ugly `quarantine.inAdd` field
# - the callback
# - callback may be problematic as it's called in async validator duties
## Try adding a block to the chain, verifying first that it passes the state
## transition function.

logScope:
blck = shortLog(signedBlock.message)
Expand All @@ -134,14 +131,12 @@ proc addRawBlock*(
template blck(): untyped = signedBlock.message # shortcuts without copy
template blockRoot(): untyped = signedBlock.root

# Already seen this block??
if blockRoot in dag.blocks:
debug "Block already exists"

# There can be a scenario where we receive a block we already received.
# However this block was before the last finalized epoch and so its parent
# was pruned from the ForkChoice. Trying to add it again, even if the fork choice
# supports duplicate will lead to a crash.
# We should not call the block added callback for blocks that already
# existed in the pool, as that may confuse consumers such as the fork
# choice.
return err Duplicate

quarantine.missing.del(blockRoot)
Expand All @@ -168,7 +163,9 @@ proc addRawBlock*(

return err Invalid

if parent.slot < dag.finalizedHead.slot:
if (parent.slot < dag.finalizedHead.slot) or
(parent.slot == dag.finalizedHead.slot and
parent != dag.finalizedHead.blck):
# We finalized a block that's newer than the parent of this block - this
# block, although recent, is thus building on a history we're no longer
# interested in pursuing. This can happen if a client produces a block
Expand Down
2 changes: 1 addition & 1 deletion beacon_chain/eth2_network.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1142,7 +1142,7 @@ func gossipId(data: openArray[byte]): string =
func msgIdProvider(m: messages.Message): string =
gossipId(m.data)

proc createEth2Node*(rng: ref BrHmacDrbgContext, conf: BeaconNodeConf, enrForkId: ENRForkID): Future[Eth2Node] {.async, gcsafe.} =
proc createEth2Node*(rng: ref BrHmacDrbgContext, conf: BeaconNodeConf, enrForkId: ENRForkID): Eth2Node {.gcsafe.} =
var
(extIp, extTcpPort, extUdpPort) = setupNat(conf)
hostAddress = tcpEndPoint(conf.libp2pAddress, conf.tcpPort)
Expand Down
Loading