Skip to content

Commit

Permalink
Move history tree and value balance to typed column families (#8115)
Browse files Browse the repository at this point in the history
* impl TryFrom<zcash_primitives::BlockHeight> for Height

* Add type-safe read and write database methods

* Only allow typed access to the scanner DB

* Update docs

* Implement a common method as a trait

* Fix imports

* Tidy state imports

* Activate tracing logging macros in the whole scanner crate

* Fix dead code warnings

* Use a more sensible export order

* Remove a 1.72 lint exception now 1.74 is stable

* Switch history trees over to TypedColumnFamily, and remove redundant code

* Add typed batch creation methods, and switch history trees to them

* Convert ValueBalance to typed column families

* Make the APIs compatible after a merge

* Use `ZebraDb` instead of `DiskDb` where needed

---------

Co-authored-by: Marek <[email protected]>
  • Loading branch information
teor2345 and upbqdn authored Dec 20, 2023
1 parent 3c8b93d commit ad015e0
Show file tree
Hide file tree
Showing 12 changed files with 221 additions and 126 deletions.
6 changes: 6 additions & 0 deletions zebra-chain/src/history_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,12 @@ impl From<NonEmptyHistoryTree> for HistoryTree {
}
}

impl From<Option<NonEmptyHistoryTree>> for HistoryTree {
fn from(tree: Option<NonEmptyHistoryTree>) -> Self {
HistoryTree(tree)
}
}

impl Deref for HistoryTree {
type Target = Option<NonEmptyHistoryTree>;
fn deref(&self) -> &Self::Target {
Expand Down
10 changes: 0 additions & 10 deletions zebra-chain/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,6 @@
#![doc(html_root_url = "https://docs.rs/zebra_chain")]
// Required by bitvec! macro
#![recursion_limit = "256"]
//
// Rust 1.72 has a false positive when nested generics are used inside Arc.
// This makes the `arc_with_non_send_sync` lint trigger on a lot of proptest code.
//
// TODO: remove this allow when Rust 1.73 is stable, because this lint bug is fixed in that release:
// <https://github.com/rust-lang/rust-clippy/issues/11076>
#![cfg_attr(
any(test, feature = "proptest-impl"),
allow(clippy::arc_with_non_send_sync)
)]

#[macro_use]
extern crate bitflags;
Expand Down
10 changes: 0 additions & 10 deletions zebra-consensus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,6 @@
#![doc(html_favicon_url = "https://zfnd.org/wp-content/uploads/2022/03/zebra-favicon-128.png")]
#![doc(html_logo_url = "https://zfnd.org/wp-content/uploads/2022/03/zebra-icon.png")]
#![doc(html_root_url = "https://docs.rs/zebra_consensus")]
//
// Rust 1.72 has a false positive when nested generics are used inside Arc.
// This makes the `arc_with_non_send_sync` lint trigger on a lot of proptest code.
//
// TODO: remove this allow when Rust 1.73 is stable, because this lint bug is fixed in that release:
// <https://github.com/rust-lang/rust-clippy/issues/11076>
#![cfg_attr(
any(test, feature = "proptest-impl"),
allow(clippy::arc_with_non_send_sync)
)]

mod block;
mod checkpoint;
Expand Down
10 changes: 0 additions & 10 deletions zebra-network/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,6 @@
#![doc(html_favicon_url = "https://zfnd.org/wp-content/uploads/2022/03/zebra-favicon-128.png")]
#![doc(html_logo_url = "https://zfnd.org/wp-content/uploads/2022/03/zebra-icon.png")]
#![doc(html_root_url = "https://docs.rs/zebra_network")]
//
// Rust 1.72 has a false positive when nested generics are used inside Arc.
// This makes the `arc_with_non_send_sync` lint trigger on a lot of proptest code.
//
// TODO: remove this allow when Rust 1.73 is stable, because this lint bug is fixed in that release:
// <https://github.com/rust-lang/rust-clippy/issues/11076>
#![cfg_attr(
any(test, feature = "proptest-impl"),
allow(clippy::arc_with_non_send_sync)
)]

#[macro_use]
extern crate pin_project;
Expand Down
12 changes: 6 additions & 6 deletions zebra-scan/src/storage/db/sapling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ use itertools::Itertools;

use zebra_chain::block::Height;
use zebra_state::{
SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
DiskWriteBatch, SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
SaplingScanningKey, TransactionIndex, TransactionLocation, TypedColumnFamily, WriteTypedBatch,
};

Expand All @@ -53,7 +53,7 @@ pub type SaplingTxIdsCf<'cf> =
/// This constant should be used so the compiler can detect incorrectly typed accesses to the
/// column family.
pub type WriteSaplingTxIdsBatch<'cf> =
WriteTypedBatch<'cf, SaplingScannedDatabaseIndex, Option<SaplingScannedResult>>;
WriteTypedBatch<'cf, SaplingScannedDatabaseIndex, Option<SaplingScannedResult>, DiskWriteBatch>;

impl Storage {
// Reading Sapling database entries
Expand Down Expand Up @@ -167,7 +167,7 @@ impl Storage {
) {
// We skip key heights that have one or more results, so the results for each key height
// must be in a single batch.
let mut batch = self.sapling_tx_ids_cf().for_writing();
let mut batch = self.sapling_tx_ids_cf().new_batch_for_writing();

// Every `INSERT_CONTROL_INTERVAL` we add a new entry to the scanner database for each key
// so we can track progress made in the last interval even if no transaction was yet found.
Expand All @@ -192,7 +192,7 @@ impl Storage {
value: Some(sapling_result),
};

batch = batch.zs_insert(entry.index, entry.value);
batch = batch.zs_insert(&entry.index, &entry.value);
}

batch
Expand Down Expand Up @@ -228,7 +228,7 @@ impl Storage {
// TODO: ignore incorrect changes to birthday heights,
// and redundant birthday heights
self.sapling_tx_ids_cf()
.for_writing()
.new_batch_for_writing()
.insert_sapling_height(sapling_key, skip_up_to_height)
.write_batch()
.expect("unexpected database write failure");
Expand All @@ -249,6 +249,6 @@ impl<'cf> InsertSaplingHeight for WriteSaplingTxIdsBatch<'cf> {
let index = SaplingScannedDatabaseIndex::min_for_key_and_height(sapling_key, height);

// TODO: assert that we don't overwrite any entries here.
self.zs_insert(index, None)
self.zs_insert(&index, &None)
}
}
27 changes: 12 additions & 15 deletions zebra-state/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,16 @@
#![doc(html_favicon_url = "https://zfnd.org/wp-content/uploads/2022/03/zebra-favicon-128.png")]
#![doc(html_logo_url = "https://zfnd.org/wp-content/uploads/2022/03/zebra-icon.png")]
#![doc(html_root_url = "https://docs.rs/zebra_state")]
//
// Rust 1.72 has a false positive when nested generics are used inside Arc.
// This makes the `arc_with_non_send_sync` lint trigger on a lot of proptest code.
//
// TODO: remove this allow when Rust 1.73 is stable, because this lint bug is fixed in that release:
// <https://github.com/rust-lang/rust-clippy/issues/11076>
#![cfg_attr(
any(test, feature = "proptest-impl"),
allow(clippy::arc_with_non_send_sync)
)]

#[macro_use]
extern crate tracing;

// TODO: only export the Config struct and a few other important methods
pub mod config;
// Most constants are exported by default
pub mod constants;

// Allow use in external tests
#[cfg(any(test, feature = "proptest-impl"))]
pub mod arbitrary;

Expand Down Expand Up @@ -59,12 +52,14 @@ pub use service::{
OutputIndex, OutputLocation, TransactionIndex, TransactionLocation,
};

// Allow use in the scanner
#[cfg(feature = "shielded-scan")]
pub use service::finalized_state::{
SaplingScannedDatabaseEntry, SaplingScannedDatabaseIndex, SaplingScannedResult,
SaplingScanningKey,
};

// Allow use in the scanner and external tests
#[cfg(any(test, feature = "proptest-impl", feature = "shielded-scan"))]
pub use service::{
finalized_state::{
Expand All @@ -77,6 +72,7 @@ pub use service::{
#[cfg(feature = "getblocktemplate-rpcs")]
pub use response::GetBlockTemplateChainInfo;

// Allow use in external tests
#[cfg(any(test, feature = "proptest-impl"))]
pub use service::{
arbitrary::{populated_state, CHAIN_TIP_UPDATE_WAIT_LIMIT},
Expand All @@ -88,14 +84,15 @@ pub use service::{
#[cfg(any(test, feature = "proptest-impl"))]
pub use constants::latest_version_for_adding_subtrees;

#[cfg(not(any(test, feature = "proptest-impl")))]
#[allow(unused_imports)]
pub(crate) use config::hidden::{
#[cfg(any(test, feature = "proptest-impl"))]
pub use config::hidden::{
write_database_format_version_to_disk, write_state_database_format_version_to_disk,
};

#[cfg(any(test, feature = "proptest-impl"))]
pub use config::hidden::{
// Allow use only inside the crate in production
#[cfg(not(any(test, feature = "proptest-impl")))]
#[allow(unused_imports)]
pub(crate) use config::hidden::{
write_database_format_version_to_disk, write_state_database_format_version_to_disk,
};

Expand Down
66 changes: 50 additions & 16 deletions zebra-state/src/service/finalized_state/column_family.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,15 @@ where
/// This type is also drop-safe: unwritten batches have to be specifically ignored.
#[must_use = "batches must be written to the database"]
#[derive(Debug, Eq, PartialEq)]
pub struct WriteTypedBatch<'cf, Key, Value>
pub struct WriteTypedBatch<'cf, Key, Value, Batch>
where
Key: IntoDisk + FromDisk + Debug,
Value: IntoDisk + FromDisk,
Batch: WriteDisk,
{
inner: TypedColumnFamily<'cf, Key, Value>,

batch: DiskWriteBatch,
batch: Batch,
}

impl<'cf, Key, Value> Debug for TypedColumnFamily<'cf, Key, Value>
Expand Down Expand Up @@ -115,17 +116,41 @@ where
})
}

/// Returns a new writeable typed column family for this column family.
// Writing

/// Returns a typed writer for this column family for a new batch.
///
/// This is the only way to get a writeable column family, which ensures
/// These methods are the only way to get a `WriteTypedBatch`, which ensures
/// that the read and write types are consistent.
pub fn for_writing(self) -> WriteTypedBatch<'cf, Key, Value> {
pub fn new_batch_for_writing(self) -> WriteTypedBatch<'cf, Key, Value, DiskWriteBatch> {
WriteTypedBatch {
inner: self,
batch: DiskWriteBatch::new(),
}
}

/// Wraps an existing write batch, and returns a typed writer for this column family.
///
/// These methods are the only way to get a `WriteTypedBatch`, which ensures
/// that the read and write types are consistent.
pub fn take_batch_for_writing(
self,
batch: DiskWriteBatch,
) -> WriteTypedBatch<'cf, Key, Value, DiskWriteBatch> {
WriteTypedBatch { inner: self, batch }
}

/// Wraps an existing write batch reference, and returns a typed writer for this column family.
///
/// These methods are the only way to get a `WriteTypedBatch`, which ensures
/// that the read and write types are consistent.
pub fn with_batch_for_writing(
self,
batch: &mut DiskWriteBatch,
) -> WriteTypedBatch<'cf, Key, Value, &mut DiskWriteBatch> {
WriteTypedBatch { inner: self, batch }
}

// Reading

/// Returns true if this rocksdb column family does not contain any entries.
Expand Down Expand Up @@ -250,30 +275,24 @@ where
}
}

impl<'cf, Key, Value> WriteTypedBatch<'cf, Key, Value>
impl<'cf, Key, Value, Batch> WriteTypedBatch<'cf, Key, Value, Batch>
where
Key: IntoDisk + FromDisk + Debug,
Value: IntoDisk + FromDisk,
Batch: WriteDisk,
{
// Writing batches

/// Writes this batch to this column family in the database.
pub fn write_batch(self) -> Result<(), rocksdb::Error> {
self.inner.db.write(self.batch)
}

// Batching before writing

/// Serialize and insert the given key and value into this column family,
/// overwriting any existing `value` for `key`.
pub fn zs_insert(mut self, key: Key, value: Value) -> Self {
pub fn zs_insert(mut self, key: &Key, value: &Value) -> Self {
self.batch.zs_insert(&self.inner.cf, key, value);

self
}

/// Remove the given key from this column family, if it exists.
pub fn zs_delete(mut self, key: Key) -> Self {
pub fn zs_delete(mut self, key: &Key) -> Self {
self.batch.zs_delete(&self.inner.cf, key);

self
Expand All @@ -284,10 +303,25 @@ where
//.
// TODO: convert zs_delete_range() to take std::ops::RangeBounds
// see zs_range_iter() for an example of the edge cases
pub fn zs_delete_range(mut self, from: Key, until_strictly_before: Key) -> Self {
pub fn zs_delete_range(mut self, from: &Key, until_strictly_before: &Key) -> Self {
self.batch
.zs_delete_range(&self.inner.cf, from, until_strictly_before);

self
}
}

// Writing a batch to the database requires an owned batch.
impl<'cf, Key, Value> WriteTypedBatch<'cf, Key, Value, DiskWriteBatch>
where
Key: IntoDisk + FromDisk + Debug,
Value: IntoDisk + FromDisk,
{
// Writing batches

/// Writes this batch to this column family in the database,
/// taking ownership and consuming it.
pub fn write_batch(self) -> Result<(), rocksdb::Error> {
self.inner.db.write(self.batch)
}
}
31 changes: 31 additions & 0 deletions zebra-state/src/service/finalized_state/disk_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,37 @@ impl WriteDisk for DiskWriteBatch {
}
}

// Allow &mut DiskWriteBatch as well as owned DiskWriteBatch
impl<T> WriteDisk for &mut T
where
T: WriteDisk,
{
fn zs_insert<C, K, V>(&mut self, cf: &C, key: K, value: V)
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + Debug,
V: IntoDisk,
{
(*self).zs_insert(cf, key, value)
}

fn zs_delete<C, K>(&mut self, cf: &C, key: K)
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + Debug,
{
(*self).zs_delete(cf, key)
}

fn zs_delete_range<C, K>(&mut self, cf: &C, from: K, until_strictly_before: K)
where
C: rocksdb::AsColumnFamilyRef,
K: IntoDisk + Debug,
{
(*self).zs_delete_range(cf, from, until_strictly_before)
}
}

/// Helper trait for retrieving and deserializing values from rocksdb column families.
///
/// # Deprecation
Expand Down
15 changes: 2 additions & 13 deletions zebra-state/src/service/finalized_state/disk_format/chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,8 @@ use std::collections::BTreeMap;
use bincode::Options;

use zebra_chain::{
amount::NonNegative,
block::Height,
history_tree::{HistoryTree, NonEmptyHistoryTree},
parameters::Network,
primitives::zcash_history,
value_balance::ValueBalance,
amount::NonNegative, block::Height, history_tree::NonEmptyHistoryTree, parameters::Network,
primitives::zcash_history, value_balance::ValueBalance,
};

use crate::service::finalized_state::disk_format::{FromDisk, IntoDisk};
Expand Down Expand Up @@ -82,10 +78,3 @@ impl FromDisk for NonEmptyHistoryTree {
.expect("deserialization format should match the serialization format used by IntoDisk")
}
}

// We don't write empty history trees to disk, so we know this one is non-empty.
impl FromDisk for HistoryTree {
fn from_bytes(bytes: impl AsRef<[u8]>) -> Self {
NonEmptyHistoryTree::from_bytes(bytes).into()
}
}
3 changes: 2 additions & 1 deletion zebra-state/src/service/finalized_state/zebra_db/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ impl DiskWriteBatch {
prev_note_commitment_trees: Option<NoteCommitmentTrees>,
) -> Result<(), BoxError> {
let db = &zebra_db.db;

// Commit block, transaction, and note commitment tree data.
self.prepare_block_header_and_transaction_data_batch(db, finalized)?;

Expand Down Expand Up @@ -486,7 +487,7 @@ impl DiskWriteBatch {

// Commit UTXOs and value pools
self.prepare_chain_value_pools_batch(
db,
zebra_db,
finalized,
spent_utxos_by_outpoint,
value_pool,
Expand Down
Loading

0 comments on commit ad015e0

Please sign in to comment.