Skip to content

Commit

Permalink
feat(log): Show the current network upgrade in progress logs (#4694)
Browse files Browse the repository at this point in the history
* Improve time logging using humantime

* Only log full seconds, ignore the fractional part

* Move humantime_seconds to tracing::fmt

* Move the progress task to its own module

* Add missing humantime dependency

* Log the network upgrade in progress logs

* Log when Zebra verifies the final checkpoint
  • Loading branch information
teor2345 authored Jun 28, 2022
1 parent 3b839b7 commit d4b9353
Show file tree
Hide file tree
Showing 8 changed files with 288 additions and 243 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions zebra-consensus/src/checkpoint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,11 @@ where
if verified_height == self.checkpoint_list.max_height() {
metrics::gauge!("checkpoint.verified.height", verified_height.0 as f64);
self.verifier_progress = FinalCheckpoint;

tracing::info!(
final_checkpoint_height = ?verified_height,
"verified final checkpoint: starting full validation",
);
} else if self.checkpoint_list.contains(verified_height) {
metrics::gauge!("checkpoint.verified.height", verified_height.0 as f64);
self.verifier_progress = PreviousCheckpoint(verified_height);
Expand Down
1 change: 1 addition & 0 deletions zebrad/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ zebra-state = { path = "../zebra-state" }
abscissa_core = "0.5"
gumdrop = "0.7"
chrono = "0.4.19"
humantime = "2.1.0"
humantime-serde = "1.1.1"
indexmap = "1.9.1"
lazy_static = "1.4.0"
Expand Down
246 changes: 3 additions & 243 deletions zebrad/src/commands/start.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,32 +67,23 @@
//!
//! Some of the diagnostic features are optional, and need to be enabled at compile-time.
use std::{cmp::max, ops::Add, time::Duration};
use std::cmp::max;

use abscissa_core::{config, Command, FrameworkError, Options, Runnable};
use chrono::Utc;
use color_eyre::eyre::{eyre, Report};
use futures::FutureExt;
use num_integer::div_ceil;
use tokio::{pin, select, sync::oneshot};
use tower::{builder::ServiceBuilder, util::BoxService};
use tracing_futures::Instrument;

use zebra_chain::{
block::Height,
chain_tip::ChainTip,
parameters::{Network, NetworkUpgrade, POST_BLOSSOM_POW_TARGET_SPACING},
};
use zebra_consensus::CheckpointList;

use zebra_rpc::server::RpcServer;

use crate::{
application::app_version,
components::{
inbound::{self, InboundSetupData},
mempool::{self, Mempool},
sync::{self, SyncStatus},
sync::{self, show_block_chain_progress},
tokio::{RuntimeRun, TokioComponent},
ChainSync, Inbound,
},
Expand Down Expand Up @@ -216,7 +207,7 @@ impl StartCmd {
);

let progress_task_handle = tokio::spawn(
Self::update_progress(config.network.network, latest_chain_tip, sync_status)
show_block_chain_progress(config.network.network, latest_chain_tip, sync_status)
.in_current_span(),
);

Expand Down Expand Up @@ -361,237 +352,6 @@ impl StartCmd {
),
)
}

/// Logs Zebra's estimated progress towards the chain tip.
async fn update_progress(
network: Network,
latest_chain_tip: impl ChainTip,
sync_status: SyncStatus,
) {
// The amount of time between progress logs.
const LOG_INTERVAL: Duration = Duration::from_secs(60);

// The number of blocks we consider to be close to the tip.
//
// Most chain forks are 1-7 blocks long.
const MAX_CLOSE_TO_TIP_BLOCKS: i32 = 1;

// Skip slow sync warnings when we are this close to the tip.
//
// In testing, we've seen warnings around 30 blocks.
//
// TODO: replace with `MAX_CLOSE_TO_TIP_BLOCKS` after fixing slow syncing near tip (#3375)
const MIN_SYNC_WARNING_BLOCKS: i32 = 60;

// The number of fractional digits in sync percentages.
const SYNC_PERCENT_FRAC_DIGITS: usize = 3;

// The minimum number of extra blocks mined between updating a checkpoint list,
// and running an automated test that depends on that list.
//
// Makes sure that the block finalization code always runs in sync tests,
// even if the miner or test node clock is wrong by a few minutes.
//
// This is an estimate based on the time it takes to:
// - get the tip height from `zcashd`,
// - run `zebra-checkpoints` to update the checkpoint list,
// - submit a pull request, and
// - run a CI test that logs progress based on the new checkpoint height.
//
// We might add tests that sync from a cached tip state,
// so we only allow a few extra blocks here.
const MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE: i32 = 10;

// The minimum number of extra blocks after the highest checkpoint, based on:
// - the non-finalized state limit, and
// - the minimum number of extra blocks mined between a checkpoint update,
// and the automated tests for that update.
let min_after_checkpoint_blocks = i32::try_from(zebra_state::MAX_BLOCK_REORG_HEIGHT)
.expect("constant fits in i32")
+ MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE;

// The minimum height of the valid best chain, based on:
// - the hard-coded checkpoint height,
// - the minimum number of blocks after the highest checkpoint.
let after_checkpoint_height = CheckpointList::new(network)
.max_height()
.add(min_after_checkpoint_blocks)
.expect("hard-coded checkpoint height is far below Height::MAX");

let target_block_spacing = NetworkUpgrade::target_spacing_for_height(network, Height::MAX);
let max_block_spacing =
NetworkUpgrade::minimum_difficulty_spacing_for_height(network, Height::MAX);

// We expect the state height to increase at least once in this interval.
//
// Most chain forks are 1-7 blocks long.
//
// TODO: remove the target_block_spacing multiplier,
// after fixing slow syncing near tip (#3375)
let min_state_block_interval = max_block_spacing.unwrap_or(target_block_spacing * 4) * 2;

// Formatted string for logging.
let max_block_spacing = max_block_spacing
.map(|duration| duration.to_string())
.unwrap_or_else(|| "None".to_string());

// The last time we downloaded and verified at least one block.
//
// Initialized to the start time to simplify the code.
let mut last_state_change_time = Utc::now();

// The state tip height, when we last downloaded and verified at least one block.
//
// Initialized to the genesis height to simplify the code.
let mut last_state_change_height = Height(0);

loop {
let now = Utc::now();
let is_syncer_stopped = sync_status.is_close_to_tip();

if let Some(estimated_height) =
latest_chain_tip.estimate_network_chain_tip_height(network, now)
{
// The estimate/actual race doesn't matter here,
// because we're only using it for metrics and logging.
let current_height = latest_chain_tip
.best_tip_height()
.expect("unexpected empty state: estimate requires a block height");

// Work out the sync progress towards the estimated tip.
let sync_progress = f64::from(current_height.0) / f64::from(estimated_height.0);
let sync_percent = format!(
"{:.frac$} %",
sync_progress * 100.0,
frac = SYNC_PERCENT_FRAC_DIGITS,
);

let remaining_sync_blocks = estimated_height - current_height;

// Work out how long it has been since the state height has increased.
//
// Non-finalized forks can decrease the height, we only want to track increases.
if current_height > last_state_change_height {
last_state_change_height = current_height;
last_state_change_time = now;
}

let time_since_last_state_block = last_state_change_time.signed_duration_since(now);

// TODO:
// - log progress, remaining blocks, and remaining time to next network upgrade
// - add some of this info to the metrics

if time_since_last_state_block > min_state_block_interval {
// The state tip height hasn't increased for a long time.
//
// Block verification can fail if the local node's clock is wrong.
warn!(
%sync_percent,
?current_height,
%time_since_last_state_block,
%target_block_spacing,
%max_block_spacing,
?is_syncer_stopped,
"chain updates have stalled, \
state height has not increased for {} minutes. \
Hint: check your network connection, \
and your computer clock and time zone",
time_since_last_state_block.num_minutes(),
);
} else if is_syncer_stopped && remaining_sync_blocks > MIN_SYNC_WARNING_BLOCKS {
// We've stopped syncing blocks, but we estimate we're a long way from the tip.
//
// TODO: warn after fixing slow syncing near tip (#3375)
info!(
%sync_percent,
?current_height,
?remaining_sync_blocks,
?after_checkpoint_height,
%time_since_last_state_block,
"initial sync is very slow, or estimated tip is wrong. \
Hint: check your network connection, \
and your computer clock and time zone",
);
} else if is_syncer_stopped && current_height <= after_checkpoint_height {
// We've stopped syncing blocks,
// but we're below the minimum height estimated from our checkpoints.
let min_minutes_after_checkpoint_update: i64 = div_ceil(
i64::from(MIN_BLOCKS_MINED_AFTER_CHECKPOINT_UPDATE)
* POST_BLOSSOM_POW_TARGET_SPACING,
60,
);

warn!(
%sync_percent,
?current_height,
?remaining_sync_blocks,
?after_checkpoint_height,
%time_since_last_state_block,
"initial sync is very slow, and state is below the highest checkpoint. \
Hint: check your network connection, \
and your computer clock and time zone. \
Dev Hint: were the checkpoints updated in the last {} minutes?",
min_minutes_after_checkpoint_update,
);
} else if is_syncer_stopped {
// We've stayed near the tip for a while, and we've stopped syncing lots of blocks.
// So we're mostly using gossiped blocks now.
info!(
%sync_percent,
?current_height,
?remaining_sync_blocks,
%time_since_last_state_block,
"finished initial sync to chain tip, using gossiped blocks",
);
} else if remaining_sync_blocks <= MAX_CLOSE_TO_TIP_BLOCKS {
// We estimate we're near the tip, but we have been syncing lots of blocks recently.
// We might also be using some gossiped blocks.
info!(
%sync_percent,
?current_height,
?remaining_sync_blocks,
%time_since_last_state_block,
"close to finishing initial sync, \
confirming using syncer and gossiped blocks",
);
} else {
// We estimate we're far from the tip, and we've been syncing lots of blocks.
info!(
%sync_percent,
?current_height,
?remaining_sync_blocks,
%time_since_last_state_block,
"estimated progress to chain tip",
);
}
} else {
let sync_percent = format!("{:.frac$} %", 0.0f64, frac = SYNC_PERCENT_FRAC_DIGITS,);

if is_syncer_stopped {
// We've stopped syncing blocks,
// but we haven't downloaded and verified the genesis block.
warn!(
%sync_percent,
current_height = %"None",
"initial sync can't download and verify the genesis block. \
Hint: check your network connection, \
and your computer clock and time zone",
);
} else {
// We're waiting for the genesis block to be committed to the state,
// before we can estimate the best chain tip.
info!(
%sync_percent,
current_height = %"None",
"initial sync is waiting to download the genesis block",
);
}
}

tokio::time::sleep(LOG_INTERVAL).await;
}
}
}

impl Runnable for StartCmd {
Expand Down
2 changes: 2 additions & 0 deletions zebrad/src/components/sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use crate::{

mod downloads;
mod gossip;
mod progress;
mod recent_sync_lengths;
mod status;

Expand All @@ -39,6 +40,7 @@ mod tests;
use downloads::{AlwaysHedge, Downloads};

pub use gossip::{gossip_best_tip_block_hashes, BlockGossipError};
pub use progress::show_block_chain_progress;
pub use recent_sync_lengths::RecentSyncLengths;
pub use status::SyncStatus;

Expand Down
Loading

0 comments on commit d4b9353

Please sign in to comment.