From b05bb70e0033380f9525dcbb127e1c6493485b1b Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 08:51:23 +1000 Subject: [PATCH 1/3] Document how to add a column family --- book/src/dev/state-db-upgrades.md | 107 ++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/book/src/dev/state-db-upgrades.md b/book/src/dev/state-db-upgrades.md index 1c162c8ae29..1e793a5eb9d 100644 --- a/book/src/dev/state-db-upgrades.md +++ b/book/src/dev/state-db-upgrades.md @@ -1,5 +1,112 @@ # Zebra Cached State Database Implementation +## Adding a Column Family + +Most Zebra column families are implemented using low-level methods that allow accesses using any +type. But this is error-prone, because we can accidentally use different types to read and write +them. (Or read using different types in different methods.) + +If we type the column family name out every time, a typo can lead to a panic, because the column +family doesn't exist. + +Instead, define the name and type of each column family at the top of the implementation module, +and a method on the database that returns that type: + +For example: +```rust +/// The name of the sapling transaction IDs result column family. +pub const SAPLING_TX_IDS: &str = "sapling_tx_ids"; + +/// The type for reading sapling transaction IDs results from the database. +pub type SaplingTxIdsCf<'cf> = + TypedColumnFamily<'cf, SaplingScannedDatabaseIndex, Option>; + +impl Storage { + /// Returns a typed handle to the `sapling_tx_ids` column family. + pub(crate) fn sapling_tx_ids_cf(&self) -> SaplingTxIdsCf { + SaplingTxIdsCf::new(&self.db, SAPLING_TX_IDS) + .expect("column family was created when database was created") + } +} +``` + +Then, every read of the column family uses that method, which enforces the correct types: +(These methods have the same name as the low-level methods, but are easier to call.) +```rust +impl Storage { + /// Returns the result for a specific database index (key, block height, transaction index). + pub fn sapling_result_for_index( + &self, + index: &SaplingScannedDatabaseIndex, + ) -> Option { + self.sapling_tx_ids_cf().zs_get(index).flatten() + } + + /// Returns the Sapling indexes and results in the supplied range. + fn sapling_results_in_range( + &self, + range: impl RangeBounds, + ) -> BTreeMap> { + self.sapling_tx_ids_cf().zs_items_in_range_ordered(range) + } +} +``` + +This simplifies the implementation compared with the raw `ReadDisk` methods. + +To write to the database, use the `new_batch_for_writing()` method on the column family type. +This returns a batch that enforces the correct types. Use `write_batch()` to write it to the +database: +```rust +impl Storage { + /// Insert a sapling scanning `key`, and mark all heights before `birthday_height` so they + /// won't be scanned. + pub(crate) fn insert_sapling_key( + &mut self, + storage: &Storage, + sapling_key: &SaplingScanningKey, + birthday_height: Option, + ) { + ... + self.sapling_tx_ids_cf() + .new_batch_for_writing() + .zs_insert(&index, &None) + .write_batch() + .expect("unexpected database write failure"); + } +} +``` + +To write to an existing batch in legacy code, use `with_batch_for_writing()` instead. +This relies on the caller to write the batch to the database: +``` +impl DiskWriteBatch { + /// Updates the history tree for the tip, if it is not empty. + /// + /// The batch must be written to the database by the caller. + pub fn update_history_tree(&mut self, db: &ZebraDb, tree: &HistoryTree) { + let history_tree_cf = db.history_tree_cf().with_batch_for_writing(self); + + if let Some(tree) = tree.as_ref().as_ref() { + // The batch is modified by this method and written by the caller. + let _ = history_tree_cf.zs_insert(&(), tree); + } + } +} +``` + +To write to a legacy batch, then write it to the database, you can use +`take_batch_for_writing(batch).write_batch()`. + +During database upgrades, you might need to access the same column family using different types. +[Define a type](https://github.com/ZcashFoundation/zebra/pull/8115/files#diff-ba689ca6516946a903da62153652d91dc1bb3d0100bcf08698cb3f38ead57734R36-R53) +and [convenience method](https://github.com/ZcashFoundation/zebra/pull/8115/files#diff-ba689ca6516946a903da62153652d91dc1bb3d0100bcf08698cb3f38ead57734R69-R87) +for each legacy type, and use them during the upgrade. + +Some full examples of legacy code conversions, and the typed column family implementation itself +are in [PR #8112](https://github.com/ZcashFoundation/zebra/pull/8112/files) and +[PR #8115](https://github.com/ZcashFoundation/zebra/pull/8115/files). + ## Current Implementation ### Verification Modes From 7b34dbb226c7a970fd0cdaf44f6aa32e162d33de Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 10:05:32 +1000 Subject: [PATCH 2/3] Add the column family to the list in the DB --- book/src/dev/state-db-upgrades.md | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/book/src/dev/state-db-upgrades.md b/book/src/dev/state-db-upgrades.md index 1e793a5eb9d..f2ebb043c19 100644 --- a/book/src/dev/state-db-upgrades.md +++ b/book/src/dev/state-db-upgrades.md @@ -9,14 +9,21 @@ them. (Or read using different types in different methods.) If we type the column family name out every time, a typo can lead to a panic, because the column family doesn't exist. -Instead, define the name and type of each column family at the top of the implementation module, -and a method on the database that returns that type: +Instead: +- define the name and type of each column family at the top of the implementation module, +- add a method on the database that returns that type, and +- add the column family name to the list of column families in the database: For example: ```rust /// The name of the sapling transaction IDs result column family. pub const SAPLING_TX_IDS: &str = "sapling_tx_ids"; +/// The column families supported by the running `zebra-scan` database code. +pub const SCANNER_COLUMN_FAMILIES_IN_CODE: &[&str] = &[ + sapling::SAPLING_TX_IDS, +]; + /// The type for reading sapling transaction IDs results from the database. pub type SaplingTxIdsCf<'cf> = TypedColumnFamily<'cf, SaplingScannedDatabaseIndex, Option>; From 74b805e7a3187fba2ae1b0be59dd001f3eb5b352 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 10:12:57 +1000 Subject: [PATCH 3/3] Fix rust formatting --- book/src/dev/state-db-upgrades.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/book/src/dev/state-db-upgrades.md b/book/src/dev/state-db-upgrades.md index f2ebb043c19..15f962e88b4 100644 --- a/book/src/dev/state-db-upgrades.md +++ b/book/src/dev/state-db-upgrades.md @@ -86,7 +86,7 @@ impl Storage { To write to an existing batch in legacy code, use `with_batch_for_writing()` instead. This relies on the caller to write the batch to the database: -``` +```rust impl DiskWriteBatch { /// Updates the history tree for the tip, if it is not empty. ///