Skip to content

Commit

Permalink
verify blocks before storing in block pool / database
Browse files Browse the repository at this point in the history
* fix state db lookup typo
* fix randao reveal slot when proposing blocks
* only store blocks that can be applied to a state
* store state at every epoch boundary (yes, needs pruning!)
* split out state advancement function when there's no block
* default state sim to 0.9 attestation ratio
  • Loading branch information
arnetheduck committed Mar 7, 2019
1 parent 72749f4 commit 31a478e
Show file tree
Hide file tree
Showing 10 changed files with 227 additions and 147 deletions.
2 changes: 1 addition & 1 deletion beacon_chain/beacon_chain_db.nim
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ proc containsBlock*(

proc containsState*(
db: BeaconChainDB, key: Eth2Digest): bool =
db.backend.contains(subkey(BeaconBlock, key))
db.backend.contains(subkey(BeaconState, key))

iterator getAncestors*(db: BeaconChainDB, root: Eth2Digest):
tuple[root: Eth2Digest, blck: BeaconBlock] =
Expand Down
78 changes: 41 additions & 37 deletions beacon_chain/beacon_node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -330,13 +330,13 @@ proc proposeBlock(node: BeaconNode,
var newBlock = BeaconBlock(
slot: slot,
parent_root: node.state.blck.root,
randao_reveal: validator.genRandaoReveal(state, state.slot),
randao_reveal: validator.genRandaoReveal(state, slot),
eth1_data: node.mainchainMonitor.getBeaconBlockRef(),
signature: ValidatorSig(), # we need the rest of the block first!
body: blockBody)

let ok =
updateState(state, node.state.blck.root, some(newBlock), {skipValidation})
updateState(state, node.state.blck.root, newBlock, {skipValidation})
doAssert ok # TODO: err, could this fail somehow?

newBlock.state_root = Eth2Digest(data: hash_tree_root(state))
Expand Down Expand Up @@ -428,43 +428,40 @@ proc scheduleEpochActions(node: BeaconNode, epoch: Epoch) =
epoch = humaneEpochNum(epoch),
stateEpoch = humaneEpochNum(node.state.data.slot.slot_to_epoch())

# In case some late blocks dropped in
node.updateHead()

# Sanity check - verify that the current head block is not too far behind
if node.state.data.slot.slot_to_epoch() + 1 < epoch:
# Normally, we update the head state lazily, just before making an
# attestation. However, if we skip scheduling attestations, we'll never
# run the head update - thus we make an attempt now:
node.updateHead()

if node.state.data.slot.slot_to_epoch() + 1 < epoch:
# We're still behind!
#
# There's a few ways this can happen:
#
# * we receive no attestations or blocks for an extended period of time
# * all the attestations we receive are bogus - maybe we're connected to
# the wrong network?
# * we just started and still haven't synced
#
# TODO make an effort to find other nodes and sync? A worst case scenario
# here is that the network stalls because nobody is sending out
# attestations because nobody is scheduling them, in a vicious
# circle
# TODO diagnose the various scenarios and do something smart...

let
expectedSlot = node.state.data.getSlotFromTime()
nextSlot = expectedSlot + 1
at = node.slotStart(nextSlot)

notice "Delaying epoch scheduling, head too old - scheduling new attempt",
stateSlot = humaneSlotNum(node.state.data.slot),
expectedEpoch = humaneEpochNum(epoch),
expectedSlot = humaneSlotNum(expectedSlot),
fromNow = (at - fastEpochTime()) div 1000
# We're hopelessly behind!
#
# There's a few ways this can happen:
#
# * we receive no attestations or blocks for an extended period of time
# * all the attestations we receive are bogus - maybe we're connected to
# the wrong network?
# * we just started and still haven't synced
#
# TODO make an effort to find other nodes and sync? A worst case scenario
# here is that the network stalls because nobody is sending out
# attestations because nobody is scheduling them, in a vicious
# circle
# TODO diagnose the various scenarios and do something smart...

addTimer(at) do (p: pointer):
node.scheduleEpochActions(nextSlot.slot_to_epoch())
return
let
expectedSlot = node.state.data.getSlotFromTime()
nextSlot = expectedSlot + 1
at = node.slotStart(nextSlot)

notice "Delaying epoch scheduling, head too old - scheduling new attempt",
stateSlot = humaneSlotNum(node.state.data.slot),
expectedEpoch = humaneEpochNum(epoch),
expectedSlot = humaneSlotNum(expectedSlot),
fromNow = (at - fastEpochTime()) div 1000

addTimer(at) do (p: pointer):
node.scheduleEpochActions(nextSlot.slot_to_epoch())
return

# TODO: is this necessary with the new shuffling?
# see get_beacon_proposer_index
Expand Down Expand Up @@ -580,7 +577,14 @@ proc onBeaconBlock(node: BeaconNode, blck: BeaconBlock) =
voluntary_exits = blck.body.voluntary_exits.len,
transfers = blck.body.transfers.len

if not node.blockPool.add(blockRoot, blck):
var
# TODO We could avoid this copy by having node.state as a general cache
# that just holds a random recent state - that would however require
# rethinking scheduling etc, which relies on there being a fairly
# accurate representation of the state available. Notably, when there's
# a reorg, the scheduling might change!
stateTmp = node.state
if not node.blockPool.add(stateTmp, blockRoot, blck):
# TODO the fact that add returns a bool that causes the parent block to be
# pre-emptively fetched is quite ugly - fix.
node.fetchBlocks(@[blck.parent_root])
Expand Down
110 changes: 80 additions & 30 deletions beacon_chain/block_pool.nim
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import
bitops, chronicles, options, sequtils, sets, tables,
ssz, beacon_chain_db, state_transition, extras,
spec/[crypto, datatypes, digest]
spec/[crypto, datatypes, digest, helpers]

type
BlockPool* = ref object
Expand Down Expand Up @@ -140,10 +140,18 @@ proc init*(T: type BlockPool, db: BeaconChainDB): BlockPool =
db: db
)

proc add*(pool: var BlockPool, blockRoot: Eth2Digest, blck: BeaconBlock): bool =
proc updateState*(
pool: BlockPool, state: var StateData, blck: BlockRef) {.gcsafe.}

proc add*(
pool: var BlockPool, state: var StateData, blockRoot: Eth2Digest,
blck: BeaconBlock): bool {.gcsafe.} =
## return false indicates that the block parent was missing and should be
## fetched
## TODO reevaluate this API - it's pretty ugly with the bool return
## the state parameter may be updated to include the given block, if
## everything checks out
# TODO reevaluate passing the state in like this
# TODO reevaluate this API - it's pretty ugly with the bool return
doAssert blockRoot == hash_tree_root_final(blck)

# Already seen this block??
Expand All @@ -169,23 +177,40 @@ proc add*(pool: var BlockPool, blockRoot: Eth2Digest, blck: BeaconBlock): bool =

return true

# TODO we should now validate the block to ensure that it's sane - but the
# only way to do that is to apply it to the state... for now, we assume
# all blocks are good!
let parent = pool.blocks.getOrDefault(blck.parent_root)

if parent != nil:
# The block is resolved, nothing more to do!
# The block might have been in either of these - we don't want any more
# work done on its behalf
pool.unresolved.del(blockRoot)
pool.pending.del(blockRoot)

# The block is resolved, now it's time to validate it to ensure that the
# blocks we add to the database are clean for the given state
updateState(pool, state, parent)
skipSlots(state.data, parent.root, blck.slot - 1)

if not updateState(state.data, parent.root, blck, {}):
# TODO find a better way to log all this block data
notice "Invalid block",
blockRoot = shortLog(blockRoot),
slot = humaneSlotNum(blck.slot),
stateRoot = shortLog(blck.state_root),
parentRoot = shortLog(blck.parent_root),
signature = shortLog(blck.signature),
proposer_slashings = blck.body.proposer_slashings.len,
attester_slashings = blck.body.attester_slashings.len,
attestations = blck.body.attestations.len,
deposits = blck.body.deposits.len,
voluntary_exits = blck.body.voluntary_exits.len,
transfers = blck.body.transfers.len

let blockRef = BlockRef(
root: blockRoot
)
link(parent, blockRef)

pool.blocks[blockRoot] = blockRef
# The block might have been in either of these - we don't want any more
# work done on its behalf
pool.unresolved.del(blockRoot)
pool.pending.del(blockRoot)

# Resolved blocks should be stored in database
pool.db.putBlock(blockRoot, blck)
Expand All @@ -209,7 +234,7 @@ proc add*(pool: var BlockPool, blockRoot: Eth2Digest, blck: BeaconBlock): bool =
# running out of stack etc
let retries = pool.pending
for k, v in retries:
discard pool.add(k, v)
discard pool.add(state, k, v)

return true

Expand Down Expand Up @@ -271,6 +296,8 @@ proc checkUnresolved*(pool: var BlockPool): seq[Eth2Digest] =
inc v.tries

for k in done:
# TODO Need to potentially remove from pool.pending - this is currently a
# memory leak here!
pool.unresolved.del(k)

# simple (simplistic?) exponential backoff for retries..
Expand All @@ -279,24 +306,43 @@ proc checkUnresolved*(pool: var BlockPool): seq[Eth2Digest] =
result.add(k)

proc skipAndUpdateState(
state: var BeaconState, blck: BeaconBlock, flags: UpdateFlags): bool =
skipSlots(state, blck.parent_root, blck.slot - 1)
updateState(state, blck.parent_root, some(blck), flags)
state: var BeaconState, blck: BeaconBlock, flags: UpdateFlags,
afterUpdate: proc (state: BeaconState)): bool =
skipSlots(state, blck.parent_root, blck.slot - 1, afterUpdate)
let ok = updateState(state, blck.parent_root, blck, flags)

proc updateState*(
pool: BlockPool, state: var StateData, blck: BlockRef) =
if state.blck.root == blck.root:
return # State already at the right spot
afterUpdate(state)

ok

# TODO this blockref should never be created, since we trace every blockref
# back to the tail block
doAssert (not blck.parent.isNil), "trying to apply genesis block!"
proc maybePutState(pool: BlockPool, state: BeaconState) =
# TODO we save state at every epoch start but never remove them - we also
# potentially save multiple states per slot if reorgs happen, meaning
# we could easily see a state explosion
if state.slot mod SLOTS_PER_EPOCH == 0:
info "Storing state",
stateSlot = humaneSlotNum(state.slot),
stateRoot = hash_tree_root_final(state) # TODO cache?
pool.db.putState(state)

proc updateState*(
pool: BlockPool, state: var StateData, blck: BlockRef) =
# Rewind or advance state such that it matches the given block - this may
# include replaying from an earlier snapshot if blck is on a different branch
# or has advanced to a higher slot number than blck
var ancestors = @[pool.get(blck)]

# We need to check the slot because the state might have moved forwards
# without blocks
if state.blck.root == blck.root and state.data.slot == ancestors[0].data.slot:
return # State already at the right spot

# Common case: blck points to a block that is one step ahead of state
if state.blck.root == blck.parent.root:
let ok = skipAndUpdateState(state.data, ancestors[0].data, {skipValidation})
if state.blck.root == ancestors[0].data.parent_root and
state.data.slot + 1 == ancestors[0].data.slot:
let ok = skipAndUpdateState(
state.data, ancestors[0].data, {skipValidation}) do (state: BeaconState):
pool.maybePutState(state)
doAssert ok, "Blocks in database should never fail to apply.."
state.blck = blck
state.root = ancestors[0].data.state_root
Expand Down Expand Up @@ -329,6 +375,7 @@ proc updateState*(

notice "Replaying state transitions",
stateSlot = humaneSlotNum(state.data.slot),
stateRoot = shortLog(ancestor.data.state_root),
prevStateSlot = humaneSlotNum(ancestorState.get().slot),
ancestors = ancestors.len

Expand All @@ -345,18 +392,21 @@ proc updateState*(
for i in countdown(ancestors.len - 2, 0):
let last = ancestors[i]

skipSlots(state.data, last.data.parent_root, last.data.slot - 1)
skipSlots(
state.data, last.data.parent_root,
last.data.slot - 1) do(state: BeaconState):
pool.maybePutState(state)

# TODO technically, we should be adding states to the database here because
# we're going down a different fork..
let ok = updateState(
state.data, last.data.parent_root, some(last.data), {skipValidation})

doAssert(ok)
state.data, last.data.parent_root, last.data, {skipValidation})
doAssert ok,
"We only keep validated blocks in the database, should never fail"

state.blck = blck
state.root = ancestors[0].data.state_root

pool.maybePutState(state.data)

proc loadTailState*(pool: BlockPool): StateData =
## Load the state associated with the current tail in the pool
StateData(
Expand Down
Loading

0 comments on commit 31a478e

Please sign in to comment.