From 5e1c7e2cee46f09b0cf640c890acd2fc6b8470b5 Mon Sep 17 00:00:00 2001 From: Gus Gutoski Date: Mon, 10 Jun 2024 17:29:22 -0400 Subject: [PATCH] chore: rewrite block payload (#1499) * new struct Payload2 * WIP new fns usize_to_bytes, max_from_byte_len with tests * implement NamespaceBuider * WIP begin implementing from_transactions * dead end: const generics not stable in Rust https://stackoverflow.com/a/72467535 * finish impl for from_transactions, use macro_rules to generalize usize_to_bytes * WIP friendly deserializers * generalized friendly deserializer * usize_from_bytes const generic param, add xxx_from_bytes functions * impl namespace_with_proof and some helpers * WIP test infra for namespace proofs * tweak test * Payload2:namespace_with_proof pass tests (yay) * don't double count dupliate namespace ids * tidy * restore block.rs from main, new file block2.rs * move mod tx_table to separate file payload_bytes.rs * rename block2::Payload2 -> Payload * set Payload::ns_iter() Item to NamespaceId * move namespace iterator to a separate file * rename payload2 -> ns_payload_builder * visibility tweaks for ns_iter * new fn verify_namespace_proof, temporary re-use of old parse_ns_payload, enforce maximum ns payload index in ns_iter, rename a few things * move namespace_with_proof and test to ns_proof.rs, use new verify_namespace_proof in test * move Payload::ns_iter, etc to ns_iter.rs * rename ns_payload_builder -> ns_payload * new mod tx_iter, a proper impl for parse_ns_payload * WIP combined iterator for QueryablePayload * move the combined iterator to iter.rs, delete the extra namespace iterator * stub impl of QueryablePayload for Payload * more stubs, tidy, new file tx_proof.rs * fix bug in TxIter, fix test * impl Payload::transaction with test * move tests to new file test.rs * tidy and comments * NsProof do not store VID common data * tidying and stub * new fn tx_table_range with doc * impl transaction_with_proof, still pretty messy tho * WIP tx proof only for num_txs * fix bug in iter, fix test * test: verify transaction proofs * major rework of ns_iter: new struct NsTable, NsIter::Item is now just usize * newtype NsIndex * TxIndex is now a serialized index, newtype NsPayload with awesome helper methods * fix name _max_from_byte_len2 -> _max_from_byte_len * xxx_from_bytes allow small byte lengths * NsIndex in serialized form like TxIndex * move tx_iter mod into ns_payload to enable private data in TxIndex * rename module ns_iter -> ns_table * tweak todo comments * move ns_payload, ns_proof, tx_proof modules inside ns_table * tidy * put TxIndex in a new mod tx_iter, move NsPayload::read_tx_offset into tx_iter, new method NsPayload::read_tx_offset_pref * add tx table range proof to TxProof * NsPayload now a DST, add newtype NsPayloadOwned so that it's to NsPayload as Vec is to [T] * untested: TxProof::verify check tx table proof * dumbest bug ever * TxProof::verify now check tx payload proof (yay) * tidy: new module ns_iter like tx_iter, new method NsTable::read_ns_offset_prev like NsPayload::read_tx_offset_prev * new struct NsPayloadRange with helpers * WIP tweak tx_payload_range[_relative] * make NsPayloadRange a Range * WIP prep for experiments with NsPayload * add range field to NsPayload, it can no longer be a DST (boo) * revert back to DST for NsPayload * move tx_payload_range method from NsPayload to NsPayloadRange, add args to make it work * move modules ns_proof, tx_proof from ns_table up to block * move module ns_payload_range to its own file * Index fields private * move module ns_iter to its own file * move module tx_iter to its own file * newtype NumTxs * manual serde impl for NumTxs * NsPayloadRange::tx_payload_range arg type change usize -> NumTxs * new struct TxTableEntries * manual serde impl for TxTableEntries * tidy ns_table * move module num_txs into its own file * move module tx_table_entries to its own file * remove pub(crate) from NsPayloadRange newtype field * add TODOs, ugh Rust is killing me * TxIndex newtype from array to usize * NsIndex newtype from array to usize * move module num_txs up to block, experiment with access key design pattern * move module ns_iter up to block * move module tx_table_entries up to block * move module tx_iter up to block * move module ns_payload up to block * move module ns_payload_range up to block * move some NsTable impls into ns_table module * NsTable member private * move some impl Payload to block module * move NsProof construction from Payload to NsProof * move TxProof construction from Payload to TxProof * move struct Payload to a new module payload * visibility restrictions to payload module * oops * delete num_txs_as_bytes from payload_bytes * delete num_txs_from_bytes from payload_bytes * delete tx_offset_as_bytes from payload_bytes * delete tx_offset_from_bytes from payload_bytes * delete num_nss_as_bytes from payload_bytes * delete num_nss_from_bytes from payload_bytes * delete ns_offset_as_bytes from payload_bytes * delete ns_offset_from_bytes from payload_bytes * delete ns_offset_[as|from]_bytes from payload_bytes * tweak: Payload::ns_payload re-use ns_payload_range * move byte len constants to block module * rename module payload_bytes -> uint_bytes * tidy, minor refactor new function usize_fits * replace NsTable::num_nss_with_duplicates -> in_bounds, reflect the change in NsIter, use it in TxProof; also remove NsPayload::find_ns_id and reorder arg list in TxProof::new * check tx index in TxProof::new, new method NsPayload::in_bounds * WIP new model for NsPayload[Range] * WIP read_tx_offset * new traits AsBytes, BytesReader, new test for TxProof2 * PoC TxTableEntries in the new model * tidy, rename * remove const generic param from AsPayloadBytes trait * new structs NumTxs2, TxTableEntries2 using traits AsPayloadBytes * add tx payload range to TxProof2 * error checking in TxProof::new * TxProof::verify: add ns_table arg, remove ns_payload_range from proof, add error checking * derive serde for types in TxProof2 * delete old type TxProof in favor of TxProof2 * NsProofExistence use NsPayloadOwned2 instead of NsPayloadOwned * Iter use TxIter::new2 instead of new (progress toward switching from NsPayload to NsPayload2) * move NamespacePayloadBuilder to module newtypes * delete module ns_payload_range * delete old modules * newtype NsPayloadByteLen * newtype NumTxsChecked * move tx_table_entries_range_relative into TxTableEntriesRange::new * move module tx_iter into newtypes * impl AsPayloadBytes for TxIndex * WIP test fails: AsPayloadBytes new param T * fix test, but AsPayloadBytes trait is now unusable (boo) * fix TxTableEntries deserialization * delete unneeded stuff * rename a bunch of types in module newtypes * make AsPayloadBytes readable and rename it to FromPayloadBytes * tidy and rename * rename ns_payload[_range]2.rx -> without the 2 * tidy and renaming * newtype PayloadByteLen * tidy and docs * tidy ns_table * tidy payload * fix macro bytes_serde_impl * delete ns_iter.rs, move contents to ns_table.rs * restrict visibility of magic constants to a single file (yay) * tidy ns_payload * replace NsPayloadRange::offset with block_payload_range, simplify NsPayloadBytesRange * tidy tx_proof, rename some things * tidy * new method export_tx, in prep for reduced visibility * fix use statements * new module full_payload * WIP new module namespace_payload * move tx_proof, iter to namespace_payload; add helpers to avoid excessive visibility * new helper Payload::ns_payload * doc for bytes_serde_impl macro * move ns_payload_traits module into newtypes * rename module newtypes -> types * fix build after merge main * WIP swap out block for block2 * WIP fix test_namespace_query for block2 * WIP fix nasty_client except for NsIndex serialization issue * fix nasty-client for new block2, appease clippy * fix reference test for new ns table format * fix test demo, tidy * accounting for block byte length limit * temporary hack to pass tests * set forge-std git submodule to correct commit * fix test_message_compat * failing test for large namespace ids * single-character bug fix (damn that feels good) * fix doctest * update reference tests (again) * add test enforce_max_block_size * delete old block module * tidy TODOs, some general tidying * NsTable::read_ns_id check index in bounds * NsTable::ns_range check index in bounds * use read_ns_id_unchecked in NsIter * revert NsTable::ns_range to unchecked and restrict visibility * revert NsTable::read_ns_id to unchecked * re-arrange methods * NsTable::as_bytes_slice restrict visibility * delete NsTable::as_bytes_slice in favor of Encode trait * delete Payload::as_byte_slice in favor of Encode trait * tidy PayloadByteLen * restrict visibility for NsIter * restrict visibility of PayloadByteLen * restrict visibility of NsPayload * restrict visibility of NsPayloadRange * restrict visibility of NsPayloadBuilder * restrict visibility of TxIndex, TxIter * rename module block2 -> block * rename ns_payload_unchecked -> ns_payload * remove obsolete todo * revert https://github.com/EspressoSystems/espresso-sequencer/pull/1504 ; this PR supports arbitrary 8-byte namespace IDs * detailed specification rustdoc for Payload, NsTable * detailed specification in rustdoc for namespace payload * rename Payload::payload -> ns_payloads * NsProof do not prove non-existence * Payload::is_consistent return Result instead of bool, eliminate panic * NsProof::new take NsIndex arg instead of NamespaceId * NamespaceProofQueryData::proof is now optional * fix: NsProof::ns_proof for empty payload should be None * address https://github.com/EspressoSystems/espresso-sequencer/pull/1499#discussion_r1619283985 * NsTable field for backwards compatibility * set NS_ID_BYTE_LEN to 4 for backwards compatibility * Payload::builder_commitment hack for backwards compatibility * TODOs for NamespaceId, fix tests in block/test.rs * restore data/ files from main branch * remove obsolete comment * fix doc for NsProof::new as per https://github.com/EspressoSystems/espresso-sequencer/pull/1499#discussion_r1631262648 * new method NsTable::read_ns_id_unchecked as per https://github.com/EspressoSystems/espresso-sequencer/pull/1499#discussion_r1631276747 * NamespaceId manual Deserialize impl enforce u32::MAX * NamespaceId impl From as per https://github.com/EspressoSystems/espresso-sequencer/pull/1499#discussion_r1631800857 * doc: links to github issues in code comments --- Cargo.lock | 47 + builder/src/lib.rs | 2 +- sequencer/Cargo.toml | 2 + sequencer/src/api.rs | 37 +- sequencer/src/api/endpoints.rs | 63 +- sequencer/src/bin/nasty-client.rs | 26 +- sequencer/src/bin/submit-transactions.rs | 6 +- sequencer/src/block.rs | 108 +- sequencer/src/block/entry.rs | 85 - sequencer/src/block/full_payload.rs | 10 + sequencer/src/block/full_payload/ns_proof.rs | 164 ++ sequencer/src/block/full_payload/ns_table.rs | 337 ++++ sequencer/src/block/full_payload/payload.rs | 280 ++++ sequencer/src/block/namespace_payload.rs | 12 + sequencer/src/block/namespace_payload/iter.rs | 81 + .../src/block/namespace_payload/ns_payload.rs | 137 ++ .../namespace_payload/ns_payload_range.rs | 34 + .../src/block/namespace_payload/tx_proof.rs | 255 +++ .../src/block/namespace_payload/types.rs | 435 ++++++ sequencer/src/block/payload.rs | 1369 ----------------- sequencer/src/block/queryable.rs | 334 ---- sequencer/src/block/tables.rs | 305 ---- sequencer/src/block/test.rs | 193 +++ sequencer/src/block/tx_iterator.rs | 66 - sequencer/src/block/uint_bytes.rs | 231 +++ sequencer/src/header.rs | 32 +- sequencer/src/lib.rs | 5 +- sequencer/src/reference_tests.rs | 18 +- sequencer/src/transaction.rs | 78 +- 29 files changed, 2381 insertions(+), 2371 deletions(-) delete mode 100644 sequencer/src/block/entry.rs create mode 100644 sequencer/src/block/full_payload.rs create mode 100644 sequencer/src/block/full_payload/ns_proof.rs create mode 100644 sequencer/src/block/full_payload/ns_table.rs create mode 100644 sequencer/src/block/full_payload/payload.rs create mode 100644 sequencer/src/block/namespace_payload.rs create mode 100644 sequencer/src/block/namespace_payload/iter.rs create mode 100644 sequencer/src/block/namespace_payload/ns_payload.rs create mode 100644 sequencer/src/block/namespace_payload/ns_payload_range.rs create mode 100644 sequencer/src/block/namespace_payload/tx_proof.rs create mode 100644 sequencer/src/block/namespace_payload/types.rs delete mode 100644 sequencer/src/block/payload.rs delete mode 100644 sequencer/src/block/queryable.rs delete mode 100644 sequencer/src/block/tables.rs create mode 100644 sequencer/src/block/test.rs delete mode 100644 sequencer/src/block/tx_iterator.rs create mode 100644 sequencer/src/block/uint_bytes.rs diff --git a/Cargo.lock b/Cargo.lock index 95d4235aa..f4168a878 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3347,6 +3347,16 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fluent-asserter" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62cd2a1243f15c8c9d37acc8ab4ba837e50823561cb124af8406a6f676d04341" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "flume" version = "0.9.2" @@ -6561,6 +6571,20 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + [[package]] name = "num-bigint" version = "0.4.5" @@ -6588,6 +6612,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -6614,6 +6647,18 @@ dependencies = [ "num-traits", ] +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.19" @@ -8581,6 +8626,7 @@ dependencies = [ "espresso-macros", "ethers", "ethers-contract-derive", + "fluent-asserter", "futures", "hotshot", "hotshot-contract-adapter", @@ -8602,6 +8648,7 @@ dependencies = [ "jf-vid", "libp2p", "num-traits", + "paste", "portpicker", "pretty_assertions", "rand 0.8.5", diff --git a/builder/src/lib.rs b/builder/src/lib.rs index 2f8702bcb..5e846587f 100644 --- a/builder/src/lib.rs +++ b/builder/src/lib.rs @@ -669,7 +669,7 @@ mod test { vid_commitment, BlockHeader, BlockPayload, EncodeBytes, GENESIS_VID_NUM_STORAGE_NODES, }; use hotshot_types::utils::BuilderCommitment; - use sequencer::block::payload::Payload; + use sequencer::block::Payload; use sequencer::persistence::no_storage::{self, NoStorage}; use sequencer::persistence::sql; use sequencer::{empty_builder_commitment, Header}; diff --git a/sequencer/Cargo.toml b/sequencer/Cargo.toml index 0d0e47e40..8c8c5fe58 100644 --- a/sequencer/Cargo.toml +++ b/sequencer/Cargo.toml @@ -16,6 +16,7 @@ required-features = ["testing"] [dev-dependencies] escargot = "0.5.10" espresso-macros = { git = "https://github.com/EspressoSystems/espresso-macros.git", tag = "0.1.0" } +fluent-asserter = "0.1.9" hotshot-query-service = { workspace = true, features = ["testing"] } hotshot-testing = { workspace = true } pretty_assertions = { workspace = true } @@ -60,6 +61,7 @@ es-version = { workspace = true } ethers = { workspace = true } ethers-contract-derive = "2.0.10" futures = { workspace = true } +paste = "1.0" hotshot = { workspace = true } hotshot-contract-adapter = { workspace = true } diff --git a/sequencer/src/api.rs b/sequencer/src/api.rs index 39d38cb6c..5ac192578 100644 --- a/sequencer/src/api.rs +++ b/sequencer/src/api.rs @@ -687,7 +687,7 @@ mod api_tests { use crate::{ persistence::no_storage, testing::{wait_for_decide_on_handle, TestConfig}, - Header, + Header, NamespaceId, }; use async_compatibility_layer::logging::{setup_backtrace, setup_logging}; use committable::Committable; @@ -696,8 +696,7 @@ mod api_tests { use es_version::SequencerVersion; use ethers::utils::Anvil; use futures::stream::StreamExt; - use hotshot_query_service::availability::LeafQueryData; - use hotshot_types::vid::vid_scheme; + use hotshot_query_service::availability::{LeafQueryData, VidCommonQueryData}; use portpicker::pick_unused_port; use surf_disco::Client; use test_helpers::{ @@ -729,8 +728,9 @@ mod api_tests { setup_logging(); setup_backtrace(); - let vid = vid_scheme(5); - let txn = Transaction::new(Default::default(), vec![1, 2, 3, 4]); + // Arbitrary transaction, arbitrary namespace ID + let ns_id = NamespaceId::from(42); + let txn = Transaction::new(ns_id, vec![1, 2, 3, 4]); // Start query service. let port = pick_unused_port().expect("No ports free"); @@ -783,14 +783,31 @@ mod api_tests { .await .unwrap(); let ns_query_res: NamespaceProofQueryData = client - .get(&format!("availability/block/{block_num}/namespace/0")) + .get(&format!("availability/block/{block_num}/namespace/{ns_id}")) .send() .await .unwrap(); - ns_query_res - .proof - .verify(&vid, &header.payload_commitment, &header.ns_table) - .unwrap(); + + // Verify namespace proof if present + if let Some(ns_proof) = ns_query_res.proof { + let vid_common: VidCommonQueryData = client + .get(&format!("availability/vid/common/{block_num}")) + .send() + .await + .unwrap(); + + ns_proof + .verify( + &header.ns_table, + &header.payload_commitment, + vid_common.common(), + ) + .unwrap(); + } else { + // Namespace proof should be present if ns_id exists in ns_table + assert!(header.ns_table.find_ns_id(&ns_id).is_none()); + assert!(ns_query_res.transactions.is_empty()); + } found_empty_block = found_empty_block || ns_query_res.transactions.is_empty(); diff --git a/sequencer/src/api/endpoints.rs b/sequencer/src/api/endpoints.rs index ccb073c3e..f91e5153c 100644 --- a/sequencer/src/api/endpoints.rs +++ b/sequencer/src/api/endpoints.rs @@ -14,10 +14,7 @@ use super::{ StorageState, }; use crate::{ - block::payload::{parse_ns_payload, NamespaceProof}, - network, - persistence::SequencerPersistence, - NamespaceId, SeqTypes, Transaction, + block::NsProof, network, persistence::SequencerPersistence, NamespaceId, SeqTypes, Transaction, }; use anyhow::Result; use async_std::sync::{Arc, RwLock}; @@ -45,7 +42,7 @@ use vbs::version::StaticVersionType; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct NamespaceProofQueryData { - pub proof: NamespaceProof, + pub proof: Option, pub transactions: Vec, } @@ -74,8 +71,7 @@ where api.get("getnamespaceproof", move |req, state| { async move { let height: usize = req.integer_param("height")?; - let ns_id: u64 = req.integer_param("namespace")?; - let ns_id = NamespaceId::from(ns_id); + let ns_id = NamespaceId::from(req.integer_param::<_, u32>("namespace")?); let (block, common) = try_join!( async move { state @@ -99,32 +95,25 @@ where } )?; - let proof = block - .payload() - .namespace_with_proof( - block.payload().get_ns_table(), - ns_id, - common.common().clone(), - ) - .context(CustomSnafu { - message: format!("failed to make proof for namespace {ns_id}"), - status: StatusCode::NOT_FOUND, - })?; - - let transactions = if let NamespaceProof::Existence { - ref ns_payload_flat, - .. - } = proof - { - parse_ns_payload(ns_payload_flat, ns_id) + if let Some(ns_index) = block.payload().ns_table().find_ns_id(&ns_id) { + let proof = NsProof::new(block.payload(), &ns_index, common.common()).context( + CustomSnafu { + message: format!("failed to make proof for namespace {ns_id}"), + status: StatusCode::NOT_FOUND, + }, + )?; + + Ok(NamespaceProofQueryData { + transactions: proof.export_all_txs(&ns_id), + proof: Some(proof), + }) } else { - Vec::new() - }; - - Ok(NamespaceProofQueryData { - transactions, - proof, - }) + // ns_id not found in ns_table + Ok(NamespaceProofQueryData { + proof: None, + transactions: Vec::new(), + }) + } } .boxed() })?; @@ -178,16 +167,6 @@ where .body_auto::(Ver::instance()) .map_err(Error::from_request_error)?; - // Transactions with namespaces that do not fit in the u32 - // cannot be included in the block. - // TODO: This issue will be addressed in the next release. - if tx.namespace() > NamespaceId::from(u32::MAX as u64) { - return Err(Error::Custom { - message: "Transaction namespace > u32::MAX".to_string(), - status: StatusCode::BAD_REQUEST, - }); - } - let hash = tx.commit(); state .read(|state| state.submit(tx).boxed()) diff --git a/sequencer/src/bin/nasty-client.rs b/sequencer/src/bin/nasty-client.rs index 048b2009d..f752ba18d 100644 --- a/sequencer/src/bin/nasty-client.rs +++ b/sequencer/src/bin/nasty-client.rs @@ -31,14 +31,10 @@ use hotshot_query_service::{ metrics::PrometheusMetrics, node::TimeWindowQueryData, }; -use hotshot_types::{ - traits::metrics::{Counter, Gauge, Histogram, Metrics as _}, - vid::{vid_scheme, VidSchemeType}, -}; +use hotshot_types::traits::metrics::{Counter, Gauge, Histogram, Metrics as _}; use jf_merkle_tree::{ ForgetableMerkleTreeScheme, MerkleCommitment, MerkleTreeScheme, UniversalMerkleTreeScheme, }; -use jf_vid::VidScheme; use rand::{seq::SliceRandom, RngCore}; use sequencer::{ api::endpoints::NamespaceProofQueryData, @@ -1015,11 +1011,13 @@ impl ResourceManager> { self.get(format!("availability/header/{block}")).await }) .await?; - if header.ns_table.is_empty() { + let num_namespaces = header.ns_table.iter().count(); + if num_namespaces == 0 { tracing::info!("not fetching namespace because block {block} is empty"); return Ok(()); } - let ns = header.ns_table.get_table_entry(index).0; + let ns_index = header.ns_table.iter().nth(index % num_namespaces).unwrap(); + let ns = header.ns_table.read_ns_id(&ns_index).unwrap(); let ns_proof: NamespaceProofQueryData = self .retry(info_span!("fetch namespace", %ns), || async { @@ -1034,13 +1032,21 @@ impl ResourceManager> { self.get(format!("availability/vid/common/{block}")).await }) .await?; - let vid = vid_scheme(VidSchemeType::get_num_storage_nodes(vid_common.common()) as usize); + ensure!( + ns_proof.proof.is_some(), + format!("missing namespace proof for {block}:{ns}") + ); ensure!( ns_proof .proof - .verify(&vid, &header.payload_commitment, &header.ns_table) + .unwrap() + .verify( + &header.ns_table, + &header.payload_commitment, + vid_common.common() + ) .is_some(), - format!("namespace proof for {block}:{ns} is invalid") + format!("failure to verify namespace proof for {block}:{ns}") ); self.metrics.query_namespace_actions.add(1); diff --git a/sequencer/src/bin/submit-transactions.rs b/sequencer/src/bin/submit-transactions.rs index ed520ece9..ba2941d04 100644 --- a/sequencer/src/bin/submit-transactions.rs +++ b/sequencer/src/bin/submit-transactions.rs @@ -60,7 +60,7 @@ struct Options { default_value = "10000", env = "ESPRESSO_SUBMIT_TRANSACTIONS_MIN_NAMESPACE" )] - min_namespace: u64, + min_namespace: u32, /// Maximum namespace ID to submit to. #[clap( @@ -68,7 +68,7 @@ struct Options { default_value = "10010", env = "ESPRESSO_SUBMIT_TRANSACTIONS_MAX_NAMESPACE" )] - max_namespace: u64, + max_namespace: u32, /// Mean delay between submitting transactions. /// @@ -327,6 +327,8 @@ async fn server(port: u16, bind_version: Ver) } fn random_transaction(opt: &Options, rng: &mut ChaChaRng) -> Transaction { + // TODO instead use NamespaceId::random, but that does not allow us to + // enforce `gen_range(opt.min_namespace..=opt.max_namespace)` let namespace = rng.gen_range(opt.min_namespace..=opt.max_namespace); let len = rng.gen_range(opt.min_size..=opt.max_size); diff --git a/sequencer/src/block.rs b/sequencer/src/block.rs index 12cae22e1..4d2a2e809 100644 --- a/sequencer/src/block.rs +++ b/sequencer/src/block.rs @@ -1,104 +1,8 @@ -use crate::{BlockBuildingSnafu, ChainConfig, NodeState, SeqTypes, Transaction, ValidatedState}; -use async_trait::async_trait; -use committable::{Commitment, Committable}; -use hotshot_query_service::availability::QueryablePayload; -use hotshot_types::traits::{BlockPayload, EncodeBytes}; -use hotshot_types::utils::BuilderCommitment; -use serde::{Deserialize, Serialize}; -use sha2::Digest; -use snafu::OptionExt; -use std::sync::Arc; +mod full_payload; +mod namespace_payload; +mod uint_bytes; -pub mod entry; -pub mod payload; -pub mod queryable; -pub mod tables; -pub mod tx_iterator; +pub use full_payload::{NsProof, NsTable, Payload}; -use entry::TxTableEntryWord; -use payload::Payload; -use tables::NameSpaceTable; - -pub type NsTable = NameSpaceTable; -impl EncodeBytes for Payload { - fn encode(&self) -> Arc<[u8]> { - Arc::from(self.raw_payload.clone()) - } -} -#[async_trait] -impl BlockPayload for Payload { - type Error = crate::Error; - type Transaction = Transaction; - type Instance = NodeState; - type Metadata = NsTable; - type ValidatedState = ValidatedState; - - /// Returns (Self, metadata). - /// - /// `metadata` is a bytes representation of the namespace table. - /// Why bytes? To make it easy to move metadata into payload in the future. - /// - /// Namespace table defined as follows for j>0: - /// word[0]: [number of entries in namespace table] - /// word[2j-1]: [id for the jth namespace] - /// word[2j]: [end byte index of the jth namespace in the payload] - /// - /// Thus, for j>2 the jth namespace payload bytes range is word[2(j-1)]..word[2j]. - /// Edge case: for j=1 the jth namespace start index is implicitly 0. - /// - /// Word type is `TxTableEntry`. - /// TODO(746) don't use `TxTableEntry`; make a different type for type safety. - /// - /// TODO final entry should be implicit: - /// https://github.com/EspressoSystems/espresso-sequencer/issues/757 - /// - /// TODO(746) refactor and make pretty "table" code for tx, namespace tables? - /// - /// This function also performs catchup from peers - /// when the ValidatedState's chain config commitment differs from the NodeState's chain config, and the ValidatedState does not have full chain config. - async fn from_transactions( - txs: impl IntoIterator + Send, - _validated_state: &Self::ValidatedState, - instance_state: &Self::Instance, - ) -> Result<(Self, Self::Metadata), Self::Error> { - let payload = Payload::from_txs(txs, &instance_state.chain_config)?; - let ns_table = payload.get_ns_table().clone(); // TODO don't clone ns_table - Some((payload, ns_table)).context(BlockBuildingSnafu) - } - - fn from_bytes(encoded_transactions: &[u8], metadata: &Self::Metadata) -> Self { - Self { - raw_payload: encoded_transactions.to_vec(), - ns_table: metadata.clone(), // TODO don't clone ns_table - } - } - - fn transaction_commitments(&self, meta: &Self::Metadata) -> Vec> { - self.enumerate(meta).map(|(_, tx)| tx.commit()).collect() - } - - /// Generate commitment that builders use to sign block options. - fn builder_commitment(&self, metadata: &Self::Metadata) -> BuilderCommitment { - let mut digest = sha2::Sha256::new(); - digest.update((self.raw_payload.len() as u64).to_le_bytes()); - digest.update((self.ns_table.bytes.len() as u64).to_le_bytes()); - digest.update((metadata.bytes.len() as u64).to_le_bytes()); - digest.update(&self.raw_payload); - digest.update(&self.ns_table.bytes); - digest.update(&metadata.bytes); - BuilderCommitment::from_raw_digest(digest.finalize()) - } - - fn transactions<'a>( - &'a self, - metadata: &'a Self::Metadata, - ) -> impl 'a + Iterator { - self.enumerate(metadata).map(|(_, t)| t) - } - - fn empty() -> (Self, Self::Metadata) { - let payload = Payload::from_txs(vec![], &ChainConfig::default()).unwrap(); - let ns_table = payload.get_ns_table().clone(); - (payload, ns_table) - } -} +#[cfg(test)] +mod test; diff --git a/sequencer/src/block/entry.rs b/sequencer/src/block/entry.rs deleted file mode 100644 index 0ba801d49..000000000 --- a/sequencer/src/block/entry.rs +++ /dev/null @@ -1,85 +0,0 @@ -use super::{Deserialize, Serialize}; -use crate::NamespaceId; -use core::fmt; -use std::mem::size_of; - -// Use newtype pattern so that tx table entries cannot be confused with other types. -#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize, Default)] -pub struct TxTableEntry(TxTableEntryWord); -// TODO Get rid of TxTableEntryWord. We might use const generics in order to parametrize the set of functions below with u32,u64 etc... -// See https://github.com/EspressoSystems/espresso-sequencer/issues/1076 -pub type TxTableEntryWord = u32; - -impl TxTableEntry { - pub const MAX: TxTableEntry = Self(TxTableEntryWord::MAX); - - /// Adds `rhs` to `self` in place. Returns `None` on overflow. - pub fn checked_add_mut(&mut self, rhs: Self) -> Option<()> { - self.0 = self.0.checked_add(rhs.0)?; - Some(()) - } - pub const fn zero() -> Self { - Self(0) - } - pub const fn one() -> Self { - Self(1) - } - pub const fn to_bytes(&self) -> [u8; size_of::()] { - self.0.to_le_bytes() - } - pub fn from_bytes(bytes: &[u8]) -> Option { - Some(Self(TxTableEntryWord::from_le_bytes( - bytes.try_into().ok()?, - ))) - } - /// Infallible constructor. - pub fn from_bytes_array(bytes: [u8; TxTableEntry::byte_len()]) -> Self { - Self(TxTableEntryWord::from_le_bytes(bytes)) - } - pub const fn byte_len() -> usize { - size_of::() - } - - pub fn from_usize(val: usize) -> Self { - Self( - val.try_into() - .expect("usize -> TxTableEntry should succeed"), - ) - } -} - -impl fmt::Display for TxTableEntry { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.0) - } -} - -impl TryFrom for TxTableEntry { - type Error = >::Error; - - fn try_from(value: usize) -> Result { - TxTableEntryWord::try_from(value).map(Self) - } -} -impl TryFrom for usize { - type Error = >::Error; - - fn try_from(value: TxTableEntry) -> Result { - usize::try_from(value.0) - } -} - -impl TryFrom for TxTableEntry { - type Error = >::Error; - - fn try_from(value: NamespaceId) -> Result { - TxTableEntryWord::try_from(u64::from(value)).map(Self) - } -} -impl TryFrom for NamespaceId { - type Error = >::Error; - - fn try_from(value: TxTableEntry) -> Result { - Ok((value.0 as u64).into()) - } -} diff --git a/sequencer/src/block/full_payload.rs b/sequencer/src/block/full_payload.rs new file mode 100644 index 000000000..8e4f32dc7 --- /dev/null +++ b/sequencer/src/block/full_payload.rs @@ -0,0 +1,10 @@ +mod ns_proof; +mod ns_table; +mod payload; + +pub use ns_proof::NsProof; +pub use ns_table::{NsIndex, NsTable}; +pub use payload::Payload; + +pub(in crate::block) use ns_table::NsIter; +pub(in crate::block) use payload::PayloadByteLen; diff --git a/sequencer/src/block/full_payload/ns_proof.rs b/sequencer/src/block/full_payload/ns_proof.rs new file mode 100644 index 000000000..9382ba08c --- /dev/null +++ b/sequencer/src/block/full_payload/ns_proof.rs @@ -0,0 +1,164 @@ +use crate::{ + block::{ + full_payload::{NsIndex, NsTable, Payload, PayloadByteLen}, + namespace_payload::NsPayloadOwned, + }, + NamespaceId, Transaction, +}; +use hotshot_types::{ + traits::EncodeBytes, + vid::{vid_scheme, LargeRangeProofType, VidCommitment, VidCommon, VidSchemeType}, +}; +use jf_vid::{ + payload_prover::{PayloadProver, Statement}, + VidScheme, +}; +use serde::{Deserialize, Serialize}; + +/// Proof of correctness for namespace payload bytes in a block. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct NsProof { + ns_index: NsIndex, + ns_payload: NsPayloadOwned, + ns_proof: Option, // `None` if ns_payload is empty +} + +impl NsProof { + /// Returns the payload bytes for the `index`th namespace, along with a + /// proof of correctness for those bytes. Returns `None` on error. + /// + /// The namespace payload [`NsPayloadOwned`] is included as a hidden field + /// in the returned [`NsProof`]. A conventional API would instead return + /// `(NsPayload, NsProof)` and [`NsProof`] would not contain the namespace + /// payload. + /// ([`TxProof::new`](crate::block::namespace_payload::TxProof::new) + /// conforms to this convention.) In the future we should change this API to + /// conform to convention. But that would require a change to our RPC + /// endpoint API at [`endpoints`](crate::api::endpoints), which is a hassle. + pub fn new(payload: &Payload, index: &NsIndex, common: &VidCommon) -> Option { + let payload_byte_len = payload.byte_len(); + payload_byte_len.is_consistent(common).ok()?; + if !payload.ns_table().in_bounds(index) { + return None; // error: index out of bounds + } + let ns_payload_range = payload.ns_table().ns_range(index, &payload_byte_len); + + // TODO vid_scheme() arg should be u32 to match get_num_storage_nodes + // https://github.com/EspressoSystems/HotShot/issues/3298 + let vid = vid_scheme( + VidSchemeType::get_num_storage_nodes(common) + .try_into() + .ok()?, // error: failure to convert u32 to usize + ); + + let ns_proof = if ns_payload_range.as_block_range().is_empty() { + None + } else { + Some( + vid.payload_proof(payload.encode(), ns_payload_range.as_block_range()) + .ok()?, // error: internal to payload_proof() + ) + }; + + Some(NsProof { + ns_index: index.clone(), + ns_payload: payload.read_ns_payload(&ns_payload_range).to_owned(), + ns_proof, + }) + } + + /// Verify a [`NsProof`] against a payload commitment. Returns `None` on + /// error or if verification fails. + /// + /// There is no [`NsPayload`](crate::block::namespace_payload::NsPayload) + /// arg because this data is already included in the [`NsProof`]. See + /// [`NsProof::new`] for discussion. + /// + /// If verification is successful then return `(Vec, + /// NamespaceId)` obtained by post-processing the underlying + /// [`NsPayload`](crate::block::namespace_payload::NsPayload). Why? This + /// method might be run by a client in a WASM environment who might be + /// running non-Rust code, in which case the client is unable to perform + /// this post-processing himself. + pub fn verify( + &self, + ns_table: &NsTable, + commit: &VidCommitment, + common: &VidCommon, + ) -> Option<(Vec, NamespaceId)> { + VidSchemeType::is_consistent(commit, common).ok()?; + if !ns_table.in_bounds(&self.ns_index) { + return None; // error: index out of bounds + } + + let range = ns_table + .ns_range(&self.ns_index, &PayloadByteLen::from_vid_common(common)) + .as_block_range(); + + match (&self.ns_proof, range.is_empty()) { + (Some(proof), false) => { + // TODO vid_scheme() arg should be u32 to match get_num_storage_nodes + // https://github.com/EspressoSystems/HotShot/issues/3298 + let vid = vid_scheme( + VidSchemeType::get_num_storage_nodes(common) + .try_into() + .ok()?, // error: failure to convert u32 to usize + ); + + vid.payload_verify( + Statement { + payload_subslice: self.ns_payload.as_bytes_slice(), + range, + commit, + common, + }, + proof, + ) + .ok()? // error: internal to payload_verify() + .ok()?; // verification failure + } + (None, true) => {} // 0-length namespace, nothing to verify + (None, false) => { + tracing::error!( + "ns verify: missing proof for nonempty ns payload range {:?}", + range + ); + return None; + } + (Some(_), true) => { + tracing::error!("ns verify: unexpected proof for empty ns payload range"); + return None; + } + } + + // verification succeeded, return some data + let ns_id = ns_table.read_ns_id_unchecked(&self.ns_index); + Some((self.ns_payload.export_all_txs(&ns_id), ns_id)) + } + + /// Return all transactions in the namespace whose payload is proven by + /// `self`. The namespace ID for each returned [`Transaction`] is set to + /// `ns_id`. + /// + /// # Design warning + /// + /// This method relies on a promise that a [`NsProof`] stores the entire + /// namespace payload. If in the future we wish to remove the payload from a + /// [`NsProof`] then this method can no longer be supported. + /// + /// In that case, use the following a workaround: + /// - Given a [`NamespaceId`], get a [`NsIndex`] `i` via + /// [`NsTable::find_ns_id`]. + /// - Use `i` to get a + /// [`NsPayload`](crate::block::namespace_payload::NsPayload) `p` via + /// [`Payload::ns_payload`]. + /// - Use `p` to get the desired [`Vec`] via + /// [`NsPayload::export_all_txs`](crate::block::namespace_payload::NsPayload::export_all_txs). + /// + /// This workaround duplicates the work done in [`NsProof::new`]. If you + /// don't like that then you could instead hack [`NsProof::new`] to return a + /// pair `(NsProof, Vec)`. + pub fn export_all_txs(&self, ns_id: &NamespaceId) -> Vec { + self.ns_payload.export_all_txs(ns_id) + } +} diff --git a/sequencer/src/block/full_payload/ns_table.rs b/sequencer/src/block/full_payload/ns_table.rs new file mode 100644 index 000000000..959f91952 --- /dev/null +++ b/sequencer/src/block/full_payload/ns_table.rs @@ -0,0 +1,337 @@ +//! Types related to a namespace table. +//! +//! All code that needs to know the binary format of a namespace table is +//! restricted to this file. +//! +//! See [`NsTable`] for a full specification of the binary format of a namespace +//! table. +use crate::{ + block::{ + full_payload::payload::PayloadByteLen, + namespace_payload::NsPayloadRange, + uint_bytes::{ + bytes_serde_impl, u32_from_bytes, u32_to_bytes, usize_from_bytes, usize_to_bytes, + }, + }, + NamespaceId, +}; +use committable::{Commitment, Committable, RawCommitmentBuilder}; +use hotshot_types::traits::EncodeBytes; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::{collections::HashSet, sync::Arc}; + +/// Byte lengths for the different items that could appear in a namespace table. +const NUM_NSS_BYTE_LEN: usize = 4; +const NS_OFFSET_BYTE_LEN: usize = 4; + +// TODO prefer [`NS_ID_BYTE_LEN`] set to `8` because [`NamespaceId`] is a `u64` +// but we need to maintain serialization compatibility. +// https://github.com/EspressoSystems/espresso-sequencer/issues/1574 +const NS_ID_BYTE_LEN: usize = 4; + +/// Raw binary data for a namespace table. +/// +/// Any sequence of bytes is a valid [`NsTable`]. +/// +/// # Binary format of a namespace table +/// +/// Byte lengths for the different items that could appear in a namespace table +/// are specified in local private constants [`NUM_NSS_BYTE_LEN`], +/// [`NS_OFFSET_BYTE_LEN`], [`NS_ID_BYTE_LEN`]. +/// +/// ## Number of entries in the namespace table +/// +/// The first [`NUM_NSS_BYTE_LEN`] bytes of the namespace table indicate the +/// number `n` of entries in the table as a little-endian unsigned integer. If +/// the entire table length is smaller than [`NUM_NSS_BYTE_LEN`] then the +/// missing bytes are zero-padded. +/// +/// The bytes in the namespace table beyond the first [`NUM_NSS_BYTE_LEN`] bytes +/// encode table entries. Each entry consumes exactly [`NS_ID_BYTE_LEN`] `+` +/// [`NS_OFFSET_BYTE_LEN`] bytes. +/// +/// The number `n` could be anything, including a number much larger than the +/// number of entries that could fit in the namespace table. As such, the actual +/// number of entries in the table is defined as the minimum of `n` and the +/// maximum number of whole entries that could fit in the table. +/// +/// See [`Self::in_bounds`] for clarification. +/// +/// ## Namespace table entry +/// +/// ### Namespace ID +/// +/// The first [`NS_ID_BYTE_LEN`] bytes of each table entry indicate the +/// [`NamespaceId`] for this namespace. Any table entry whose [`NamespaceId`] is +/// a duplicate of a previous entry is ignored. A correct count of the number of +/// *unique* (non-ignored) entries is given by `NsTable::iter().count()`. +/// +/// ### Namespace offset +/// +/// The next [`NS_OFFSET_BYTE_LEN`] bytes of each table entry indicate the +/// end-index of a namespace in the block payload bytes +/// [`Payload`](super::payload::Payload). This end-index is a little-endian +/// unsigned integer. +/// +/// # How to deduce a namespace's byte range +/// +/// In order to extract the payload bytes of a single namespace `N` from the +/// block payload one needs both the start- and end-indices for `N`. +/// +/// See [`Self::ns_range`] for clarification. What follows is a description of +/// what's implemented in [`Self::ns_range`]. +/// +/// If `N` occupies the `i`th entry in the namespace table for `i>0` then the +/// start-index for `N` is defined as the end-index of the `(i-1)`th entry in +/// the table. +/// +/// Even if the `(i-1)`the entry would otherwise be ignored (due to a duplicate +/// [`NamespaceId`] or any other reason), that entry's end-index still defines +/// the start-index of `N`. This rule guarantees that both start- and +/// end-indices for any namespace `N` can be read from a constant-size byte +/// range in the namespace table, and it eliminates the need to traverse an +/// unbounded number of previous entries of the namespace table looking for a +/// previous non-ignored entry. +/// +/// The start-index of the 0th entry in the table is implicitly defined to be +/// `0`. +/// +/// The start- and end-indices `(declared_start, declared_end)` declared in the +/// namespace table could be anything. As such, the actual start- and +/// end-indices `(start, end)` are defined so as to ensure that the byte range +/// is well-defined and in-bounds for the block payload: +/// ```ignore +/// end = min(declared_end, block_payload_byte_length) +/// start = min(declared_start, end) +/// ``` +/// +/// In a "honestly-prepared" namespace table the end-index of the final +/// namespace equals the byte length of the block payload. (Otherwise the block +/// payload might have bytes that are not included in any namespace.) +/// +/// It is possible that a namespace table could indicate two distinct namespaces +/// whose byte ranges overlap, though no "honestly-prepared" namespace table +/// would do this. +/// +/// TODO prefer [`NsTable`] to be a newtype like this +/// ```ignore +/// #[repr(transparent)] +/// #[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] +/// #[serde(transparent)] +/// pub struct NsTable(#[serde(with = "base64_bytes")] Vec); +/// ``` +/// but we need to maintain serialization compatibility. +/// +#[derive(Clone, Debug, Default, Deserialize, Eq, Hash, PartialEq, Serialize)] +pub struct NsTable { + #[serde(with = "base64_bytes")] + bytes: Vec, +} + +impl NsTable { + /// Search the namespace table for the ns_index belonging to `ns_id`. + pub fn find_ns_id(&self, ns_id: &NamespaceId) -> Option { + self.iter() + .find(|index| self.read_ns_id_unchecked(index) == *ns_id) + } + + /// Iterator over all unique namespaces in the namespace table. + pub fn iter(&self) -> impl Iterator + '_ { + NsIter::new(self) + } + + /// Read the namespace id from the `index`th entry from the namespace table. + /// Returns `None` if `index` is out of bounds. + /// + /// TODO I want to restrict visibility to `pub(crate)` or lower but this + /// method is currently used in `nasty-client`. + pub fn read_ns_id(&self, index: &NsIndex) -> Option { + if !self.in_bounds(index) { + None + } else { + Some(self.read_ns_id_unchecked(index)) + } + } + + /// Like [`Self::read_ns_id`] except `index` is not checked. Use [`Self::in_bounds`] as needed. + pub fn read_ns_id_unchecked(&self, index: &NsIndex) -> NamespaceId { + let start = index.0 * (NS_ID_BYTE_LEN + NS_OFFSET_BYTE_LEN) + NUM_NSS_BYTE_LEN; + + // TODO hack to deserialize `NamespaceId` from `NS_ID_BYTE_LEN` bytes + // https://github.com/EspressoSystems/espresso-sequencer/issues/1574 + NamespaceId::from(u32_from_bytes::( + &self.bytes[start..start + NS_ID_BYTE_LEN], + )) + } + + /// Does the `index`th entry exist in the namespace table? + pub fn in_bounds(&self, index: &NsIndex) -> bool { + // The number of entries in the namespace table, including all duplicate + // namespace IDs. + let num_nss_with_duplicates = std::cmp::min( + // Number of namespaces declared in the ns table + self.read_num_nss(), + // Max number of entries that could fit in the namespace table + self.bytes.len().saturating_sub(NUM_NSS_BYTE_LEN) + / NS_ID_BYTE_LEN.saturating_add(NS_OFFSET_BYTE_LEN), + ); + + index.0 < num_nss_with_duplicates + } + + // CRATE-VISIBLE HELPERS START HERE + + /// Read subslice range for the `index`th namespace from the namespace + /// table. + pub(in crate::block) fn ns_range( + &self, + index: &NsIndex, + payload_byte_len: &PayloadByteLen, + ) -> NsPayloadRange { + let end = self.read_ns_offset(index).min(payload_byte_len.as_usize()); + let start = if index.0 == 0 { + 0 + } else { + self.read_ns_offset(&NsIndex(index.0 - 1)) + } + .min(end); + NsPayloadRange::new(start, end) + } + + // PRIVATE HELPERS START HERE + + /// Read the number of namespaces declared in the namespace table. This + /// quantity might exceed the number of entries that could fit in the + /// namespace table. + /// + /// For a correct count of the number of unique namespaces in this + /// namespace table use `iter().count()`. + fn read_num_nss(&self) -> usize { + let num_nss_byte_len = NUM_NSS_BYTE_LEN.min(self.bytes.len()); + usize_from_bytes::(&self.bytes[..num_nss_byte_len]) + } + + /// Read the namespace offset from the `index`th entry from the namespace table. + fn read_ns_offset(&self, index: &NsIndex) -> usize { + let start = + index.0 * (NS_ID_BYTE_LEN + NS_OFFSET_BYTE_LEN) + NUM_NSS_BYTE_LEN + NS_ID_BYTE_LEN; + usize_from_bytes::(&self.bytes[start..start + NS_OFFSET_BYTE_LEN]) + } +} + +impl EncodeBytes for NsTable { + fn encode(&self) -> Arc<[u8]> { + Arc::from(self.bytes.as_ref()) + } +} + +impl Committable for NsTable { + fn commit(&self) -> Commitment { + RawCommitmentBuilder::new(&Self::tag()) + .var_size_bytes(&self.bytes) + .finalize() + } + + fn tag() -> String { + "NSTABLE".into() + } +} + +pub struct NsTableBuilder { + bytes: Vec, + num_entries: usize, +} + +impl NsTableBuilder { + pub fn new() -> Self { + // pre-allocate space for the ns table header + Self { + bytes: Vec::from([0; NUM_NSS_BYTE_LEN]), + num_entries: 0, + } + } + + /// Add an entry to the namespace table. + pub fn append_entry(&mut self, ns_id: NamespaceId, offset: usize) { + // hack to serialize `NamespaceId` to `NS_ID_BYTE_LEN` bytes + self.bytes + .extend(u32_to_bytes::(u32::from(ns_id))); + self.bytes + .extend(usize_to_bytes::(offset)); + self.num_entries += 1; + } + + /// Serialize to bytes and consume self. + pub fn into_ns_table(self) -> NsTable { + let mut bytes = self.bytes; + // write the number of entries to the ns table header + bytes[..NUM_NSS_BYTE_LEN] + .copy_from_slice(&usize_to_bytes::(self.num_entries)); + NsTable { bytes } + } + + /// Byte length of a namespace table with zero entries. + /// + /// Currently this quantity equals the byte length of the ns table header. + pub const fn fixed_overhead_byte_len() -> usize { + NUM_NSS_BYTE_LEN + } + + /// Byte length added to a namespace table by a new entry. + /// + /// Currently this quantity equals the byte length of a single ns table + /// entry. + pub const fn ns_overhead_byte_len() -> usize { + NS_ID_BYTE_LEN + NS_OFFSET_BYTE_LEN + } +} + +/// Index for an entry in a ns table. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct NsIndex(usize); +bytes_serde_impl!(NsIndex, to_bytes, [u8; NUM_NSS_BYTE_LEN], from_bytes); + +impl NsIndex { + pub fn to_bytes(&self) -> [u8; NUM_NSS_BYTE_LEN] { + usize_to_bytes::(self.0) + } + fn from_bytes(bytes: &[u8]) -> Self { + Self(usize_from_bytes::(bytes)) + } +} + +/// Return type for [`Payload::ns_iter`]. +pub(in crate::block) struct NsIter<'a> { + cur_index: usize, + repeat_nss: HashSet, + ns_table: &'a NsTable, +} + +impl<'a> NsIter<'a> { + pub fn new(ns_table: &'a NsTable) -> Self { + Self { + cur_index: 0, + repeat_nss: HashSet::new(), + ns_table, + } + } +} + +impl<'a> Iterator for NsIter<'a> { + type Item = NsIndex; + + fn next(&mut self) -> Option { + loop { + let candidate_result = NsIndex(self.cur_index); + let ns_id = self.ns_table.read_ns_id(&candidate_result)?; + self.cur_index += 1; + + // skip duplicate namespace IDs + if !self.repeat_nss.insert(ns_id) { + continue; + } + + break Some(candidate_result); + } + } +} diff --git a/sequencer/src/block/full_payload/payload.rs b/sequencer/src/block/full_payload/payload.rs new file mode 100644 index 000000000..762fe327e --- /dev/null +++ b/sequencer/src/block/full_payload/payload.rs @@ -0,0 +1,280 @@ +use crate::{ + block::{ + full_payload::ns_table::{NsIndex, NsTable, NsTableBuilder}, + namespace_payload::{Index, Iter, NsPayload, NsPayloadBuilder, NsPayloadRange, TxProof}, + }, + NamespaceId, NodeState, SeqTypes, Transaction, ValidatedState, +}; +use async_trait::async_trait; +use hotshot_query_service::availability::QueryablePayload; +use hotshot_types::{ + traits::{BlockPayload, EncodeBytes}, + utils::BuilderCommitment, + vid::{VidCommon, VidSchemeType}, +}; +use jf_vid::VidScheme; +use serde::{Deserialize, Serialize}; +use sha2::Digest; +use std::{collections::HashMap, fmt::Display, sync::Arc}; + +/// Raw payload data for an entire block. +/// +/// A block consists of two sequences of arbitrary bytes: +/// - `ns_table`: namespace table +/// - `ns_payloads`: namespace payloads +/// +/// Any sequence of bytes is a valid `ns_table`. Any sequence of bytes is a +/// valid `ns_payloads`. The contents of `ns_table` determine how to interpret +/// `ns_payload`. +/// +/// # Namespace table +/// +/// See [`NsTable`] for the format of a namespace table. +/// +/// # Namespace payloads +/// +/// A concatenation of payload bytes for multiple individual namespaces. +/// Namespace boundaries are dictated by `ns_table`. See [`NsPayload`] for the +/// format of a namespace payload. +#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] +pub struct Payload { + // Concatenated payload bytes for each namespace + #[serde(with = "base64_bytes")] + ns_payloads: Vec, + + ns_table: NsTable, +} + +impl Payload { + pub fn ns_table(&self) -> &NsTable { + &self.ns_table + } + + /// Like [`QueryablePayload::transaction_with_proof`] except without the + /// proof. + pub fn transaction(&self, index: &Index) -> Option { + let ns_id = self.ns_table.read_ns_id(index.ns())?; + let ns_payload = self.ns_payload(index.ns()); + ns_payload.export_tx(&ns_id, index.tx()) + } + + // CRATE-VISIBLE HELPERS START HERE + + pub(in crate::block) fn read_ns_payload(&self, range: &NsPayloadRange) -> &NsPayload { + NsPayload::from_bytes_slice(&self.ns_payloads[range.as_block_range()]) + } + + /// Convenience wrapper for [`Self::read_ns_payload`]. + /// + /// `index` is not checked. Use `self.ns_table().in_bounds()` as needed. + pub(in crate::block) fn ns_payload(&self, index: &NsIndex) -> &NsPayload { + let ns_payload_range = self.ns_table().ns_range(index, &self.byte_len()); + self.read_ns_payload(&ns_payload_range) + } + + pub(in crate::block) fn byte_len(&self) -> PayloadByteLen { + PayloadByteLen(self.ns_payloads.len()) + } + + // PRIVATE HELPERS START HERE + + /// Need a sync version of [`BlockPayload::from_transactions`] in order to impl [`BlockPayload::empty`]. + fn from_transactions_sync( + transactions: impl IntoIterator>::Transaction> + Send, + _validated_state: &>::ValidatedState, + instance_state: &>::Instance, + ) -> Result< + (Self, >::Metadata), + >::Error, + > { + // accounting for block byte length limit + let max_block_byte_len: usize = u64::from(instance_state.chain_config.max_block_size) + .try_into() + .map_err(|_| >::Error::BlockBuilding)?; + let mut block_byte_len = NsTableBuilder::fixed_overhead_byte_len(); + + // add each tx to its namespace + let mut ns_builders = HashMap::::new(); + for tx in transactions.into_iter() { + // accounting for block byte length limit + block_byte_len += tx.payload().len() + NsPayloadBuilder::tx_overhead_byte_len(); + if !ns_builders.contains_key(&tx.namespace()) { + // each new namespace adds overhead + block_byte_len += NsTableBuilder::ns_overhead_byte_len() + + NsPayloadBuilder::fixed_overhead_byte_len(); + } + if block_byte_len > max_block_byte_len { + tracing::warn!("transactions truncated to fit in maximum block byte length {max_block_byte_len}"); + break; + } + + let ns_builder = ns_builders.entry(tx.namespace()).or_default(); + ns_builder.append_tx(tx); + } + + // build block payload and namespace table + let mut payload = Vec::new(); + let mut ns_table_builder = NsTableBuilder::new(); + for (ns_id, ns_builder) in ns_builders { + payload.extend(ns_builder.into_bytes()); + ns_table_builder.append_entry(ns_id, payload.len()); + } + let ns_table = ns_table_builder.into_ns_table(); + let metadata = ns_table.clone(); + Ok(( + Self { + ns_payloads: payload, + ns_table, + }, + metadata, + )) + } +} + +#[async_trait] +impl BlockPayload for Payload { + // TODO BlockPayload trait eliminate unneeded args, return vals of type + // `Self::Metadata` https://github.com/EspressoSystems/HotShot/issues/3300 + type Error = crate::Error; + type Transaction = Transaction; + type Instance = NodeState; + type Metadata = NsTable; + type ValidatedState = ValidatedState; + + async fn from_transactions( + transactions: impl IntoIterator + Send, + validated_state: &Self::ValidatedState, + instance_state: &Self::Instance, + ) -> Result<(Self, Self::Metadata), Self::Error> { + Self::from_transactions_sync(transactions, validated_state, instance_state) + } + + // TODO avoid cloning the entire payload here? + fn from_bytes(block_payload_bytes: &[u8], ns_table: &Self::Metadata) -> Self { + Self { + ns_payloads: block_payload_bytes.to_vec(), + ns_table: ns_table.clone(), + } + } + + fn empty() -> (Self, Self::Metadata) { + let payload = + Self::from_transactions_sync(vec![], &Default::default(), &Default::default()) + .unwrap() + .0; + let ns_table = payload.ns_table().clone(); + (payload, ns_table) + } + + fn builder_commitment(&self, metadata: &Self::Metadata) -> BuilderCommitment { + let ns_table_bytes = self.ns_table.encode(); + + // TODO `metadata_bytes` equals `ns_table_bytes`, so we are + // double-hashing the ns_table. Why? To maintain serialization + // compatibility. + // https://github.com/EspressoSystems/espresso-sequencer/issues/1576 + let metadata_bytes = metadata.encode(); + + let mut digest = sha2::Sha256::new(); + digest.update((self.ns_payloads.len() as u64).to_le_bytes()); + digest.update((ns_table_bytes.len() as u64).to_le_bytes()); + digest.update((metadata_bytes.len() as u64).to_le_bytes()); // https://github.com/EspressoSystems/espresso-sequencer/issues/1576 + digest.update(&self.ns_payloads); + digest.update(ns_table_bytes); + digest.update(metadata_bytes); // https://github.com/EspressoSystems/espresso-sequencer/issues/1576 + BuilderCommitment::from_raw_digest(digest.finalize()) + } + + fn transactions<'a>( + &'a self, + metadata: &'a Self::Metadata, + ) -> impl 'a + Iterator { + self.enumerate(metadata).map(|(_, t)| t) + } +} + +impl QueryablePayload for Payload { + // TODO changes to QueryablePayload trait: + // https://github.com/EspressoSystems/hotshot-query-service/issues/639 + type TransactionIndex = Index; + type Iter<'a> = Iter<'a>; + type InclusionProof = TxProof; + + fn len(&self, _meta: &Self::Metadata) -> usize { + // Counting txs is nontrivial. The easiest solution is to consume an + // iterator. If performance is a concern then we could cache this count + // on construction of `Payload`. + self.iter(_meta).count() + } + + fn iter<'a>(&'a self, _meta: &'a Self::Metadata) -> Self::Iter<'a> { + Iter::new(self) + } + + fn transaction_with_proof( + &self, + _meta: &Self::Metadata, + index: &Self::TransactionIndex, + ) -> Option<(Self::Transaction, Self::InclusionProof)> { + // TODO HACK! THE RETURNED PROOF MIGHT FAIL VERIFICATION. + // https://github.com/EspressoSystems/hotshot-query-service/issues/639 + // + // Need a `VidCommon` to proceed. Need to modify `QueryablePayload` + // trait to add a `VidCommon` arg. In the meantime tests fail if I leave + // it `todo!()`, so this hack allows tests to pass. + let common = hotshot_types::vid::vid_scheme(10) + .disperse(&self.ns_payloads) + .unwrap() + .common; + + TxProof::new(index, self, &common) + } +} + +impl Display for Payload { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{self:#?}") + } +} + +impl EncodeBytes for Payload { + fn encode(&self) -> Arc<[u8]> { + Arc::from(self.ns_payloads.as_ref()) + } +} + +/// Byte length of a block payload, which includes all namespaces but *not* the +/// namespace table. +pub(in crate::block) struct PayloadByteLen(usize); + +impl PayloadByteLen { + /// Extract payload byte length from a [`VidCommon`] and construct a new [`Self`] from it. + pub fn from_vid_common(common: &VidCommon) -> Self { + Self(usize::try_from(VidSchemeType::get_payload_byte_len(common)).unwrap()) + } + + /// Is the payload byte length declared in a [`VidCommon`] equal [`Self`]? + pub fn is_consistent(&self, common: &VidCommon) -> Result<(), ()> { + // failure to convert to usize implies that `common` cannot be + // consistent with `self`. + let expected = + usize::try_from(VidSchemeType::get_payload_byte_len(common)).map_err(|_| ())?; + + (self.0 == expected).then_some(()).ok_or(()) + } + + pub(in crate::block::full_payload) fn as_usize(&self) -> usize { + self.0 + } +} + +#[cfg(any(test, feature = "testing"))] +impl hotshot_types::traits::block_contents::TestableBlock for Payload { + fn genesis() -> Self { + BlockPayload::empty().0 + } + + fn txn_count(&self) -> u64 { + self.len(&self.ns_table) as u64 + } +} diff --git a/sequencer/src/block/namespace_payload.rs b/sequencer/src/block/namespace_payload.rs new file mode 100644 index 000000000..ecd894f86 --- /dev/null +++ b/sequencer/src/block/namespace_payload.rs @@ -0,0 +1,12 @@ +mod iter; +mod ns_payload; +mod ns_payload_range; +mod tx_proof; +mod types; + +pub use iter::{Index, Iter}; +pub use tx_proof::TxProof; + +pub(in crate::block) use ns_payload::{NsPayload, NsPayloadOwned}; +pub(in crate::block) use ns_payload_range::NsPayloadRange; +pub(in crate::block) use types::NsPayloadBuilder; diff --git a/sequencer/src/block/namespace_payload/iter.rs b/sequencer/src/block/namespace_payload/iter.rs new file mode 100644 index 000000000..e5aa9e87e --- /dev/null +++ b/sequencer/src/block/namespace_payload/iter.rs @@ -0,0 +1,81 @@ +use crate::block::{ + full_payload::{NsIndex, NsIter, Payload}, + namespace_payload::types::{TxIndex, TxIter}, +}; +use serde::{Deserialize, Serialize}; +use std::iter::Peekable; + +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct Index { + ns_index: NsIndex, + tx_index: TxIndex, +} + +impl Index { + pub fn ns(&self) -> &NsIndex { + &self.ns_index + } + pub(in crate::block) fn tx(&self) -> &TxIndex { + &self.tx_index + } +} + +// TODO don't impl `PartialOrd` +// It's needed only for `QueryablePayload` trait: +// https://github.com/EspressoSystems/hotshot-query-service/issues/639 +impl PartialOrd for Index { + fn partial_cmp(&self, _other: &Self) -> Option { + Some(self.cmp(_other)) + } +} +// TODO don't impl `Ord` +// It's needed only for `QueryablePayload` trait: +// https://github.com/EspressoSystems/hotshot-query-service/issues/639 +impl Ord for Index { + fn cmp(&self, _other: &Self) -> std::cmp::Ordering { + unimplemented!() + } +} + +/// Cartesian product of [`NsIter`], [`TxIter`]. +pub struct Iter<'a> { + ns_iter: Peekable>, + tx_iter: Option, + block: &'a Payload, +} + +impl<'a> Iter<'a> { + pub fn new(block: &'a Payload) -> Self { + Self { + ns_iter: NsIter::new(block.ns_table()).peekable(), + tx_iter: None, + block, + } + } +} + +impl<'a> Iterator for Iter<'a> { + type Item = Index; + + fn next(&mut self) -> Option { + loop { + let Some(ns_index) = self.ns_iter.peek() else { + break None; // ns_iter consumed + }; + + if let Some(tx_index) = self + .tx_iter + .get_or_insert_with(|| self.block.ns_payload(ns_index).iter()) + .next() + { + break Some(Index { + ns_index: ns_index.clone(), + tx_index, + }); + } + + self.tx_iter = None; // unset `tx_iter`; it's consumed for this namespace + self.ns_iter.next(); + } + } +} diff --git a/sequencer/src/block/namespace_payload/ns_payload.rs b/sequencer/src/block/namespace_payload/ns_payload.rs new file mode 100644 index 000000000..f2997839d --- /dev/null +++ b/sequencer/src/block/namespace_payload/ns_payload.rs @@ -0,0 +1,137 @@ +use crate::{ + block::namespace_payload::types::{ + FromNsPayloadBytes, NsPayloadByteLen, NsPayloadBytesRange, NumTxs, NumTxsRange, + NumTxsUnchecked, TxIndex, TxIter, TxPayloadRange, TxTableEntriesRange, + }, + NamespaceId, Transaction, +}; +use serde::{Deserialize, Serialize}; + +/// Raw binary data for a single namespace's payload. +/// +/// Any sequence of bytes is a valid [`NsPayload`]. +/// +/// See module-level documentation [`types`](super::types) for a full +/// specification of the binary format of a namespace. +pub(in crate::block) struct NsPayload([u8]); + +impl NsPayload { + pub fn from_bytes_slice(bytes: &[u8]) -> &NsPayload { + NsPayload::new_private(bytes) + } + pub fn as_bytes_slice(&self) -> &[u8] { + &self.0 + } + pub fn byte_len(&self) -> NsPayloadByteLen { + NsPayloadByteLen::from_usize(self.0.len()) + } + + /// Read and parse bytes from the ns payload. + /// + /// Arg `range: &R` is convertible into a `Range` via + /// [`NsPayloadBytesRange`]. The payload bytes are parsed into a `R::Output` + /// via [`FromNsPayloadBytes`]. + pub fn read<'a, R>(&'a self, range: &R) -> R::Output + where + R: NsPayloadBytesRange<'a>, + { + >::from_payload_bytes(&self.0[range.ns_payload_range()]) + } + + /// Iterator over all transactions in this namespace. + pub fn iter(&self) -> TxIter { + self.iter_from_num_txs(&self.read_num_txs()) + } + + /// Return all transactions in this namespace. The namespace ID for each + /// returned [`Transaction`] is set to `ns_id`. + pub fn export_all_txs(&self, ns_id: &NamespaceId) -> Vec { + let num_txs = self.read_num_txs(); + self.iter_from_num_txs(&num_txs) + .map(|i| self.tx_from_num_txs(ns_id, &i, &num_txs)) + .collect() + } + + /// Return a transaction from this namespace. Set its namespace ID to + /// `ns_id`. + /// + /// Return `None` if `index` is out of bounds. + pub fn export_tx(&self, ns_id: &NamespaceId, index: &TxIndex) -> Option { + let num_txs_unchecked = self.read_num_txs(); + let num_txs = NumTxs::new(&num_txs_unchecked, &self.byte_len()); + if !num_txs.in_bounds(index) { + return None; // error: tx index out of bounds + } + Some(self.tx_from_num_txs(ns_id, index, &num_txs_unchecked)) + } + + /// Private helper. (Could be pub if desired.) + fn read_num_txs(&self) -> NumTxsUnchecked { + self.read(&NumTxsRange::new(&self.byte_len())) + } + + /// Private helper + fn iter_from_num_txs(&self, num_txs: &NumTxsUnchecked) -> TxIter { + let num_txs = NumTxs::new(num_txs, &self.byte_len()); + TxIter::new(&num_txs) + } + + /// Private helper + fn tx_from_num_txs( + &self, + ns_id: &NamespaceId, + index: &TxIndex, + num_txs_unchecked: &NumTxsUnchecked, + ) -> Transaction { + let tx_table_entries = self.read(&TxTableEntriesRange::new(index)); + let tx_range = TxPayloadRange::new(num_txs_unchecked, &tx_table_entries, &self.byte_len()); + + // TODO don't copy the tx bytes into the return value + // https://github.com/EspressoSystems/hotshot-query-service/issues/267 + let tx_payload = self.read(&tx_range).to_payload_bytes().to_vec(); + Transaction::new(*ns_id, tx_payload) + } +} + +#[repr(transparent)] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +#[serde(transparent)] +pub(in crate::block) struct NsPayloadOwned(#[serde(with = "base64_bytes")] Vec); + +/// Crazy boilerplate code to make it so that [`NsPayloadOwned`] is to +/// [`NsPayload`] as [`Vec`] is to `[T]`. See [How can I create newtypes for +/// an unsized type and its owned counterpart (like `str` and `String`) in safe +/// Rust? - Stack Overflow](https://stackoverflow.com/q/64977525) +mod ns_payload_owned { + use super::{NsPayload, NsPayloadOwned}; + use std::borrow::Borrow; + use std::ops::Deref; + + impl NsPayload { + // pub(super) because I want it visible everywhere in this file but I + // also want this boilerplate code quarrantined in `ns_payload_owned`. + pub(super) fn new_private(p: &[u8]) -> &NsPayload { + unsafe { &*(p as *const [u8] as *const NsPayload) } + } + } + + impl Deref for NsPayloadOwned { + type Target = NsPayload; + fn deref(&self) -> &NsPayload { + NsPayload::new_private(&self.0) + } + } + + impl Borrow for NsPayloadOwned { + fn borrow(&self) -> &NsPayload { + self.deref() + } + } + + impl ToOwned for NsPayload { + type Owned = NsPayloadOwned; + fn to_owned(&self) -> NsPayloadOwned { + NsPayloadOwned(self.0.to_owned()) + } + } +} diff --git a/sequencer/src/block/namespace_payload/ns_payload_range.rs b/sequencer/src/block/namespace_payload/ns_payload_range.rs new file mode 100644 index 000000000..f2812f6fd --- /dev/null +++ b/sequencer/src/block/namespace_payload/ns_payload_range.rs @@ -0,0 +1,34 @@ +use super::types::{NsPayloadByteLen, NsPayloadBytesRange}; +use std::ops::Range; + +/// Index range for a namespace payload inside a block payload. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub(in crate::block) struct NsPayloadRange(Range); + +impl NsPayloadRange { + /// TODO restrict visibility? + pub fn new(start: usize, end: usize) -> Self { + Self(start..end) + } + + /// Access the underlying index range for this namespace inside a block + /// payload. + pub fn as_block_range(&self) -> Range { + self.0.clone() + } + + /// Return the byte length of this namespace. + pub fn byte_len(&self) -> NsPayloadByteLen { + NsPayloadByteLen::from_usize(self.0.len()) + } + + /// Convert a [`NsPayloadBytesRange`] into a range that's relative to the + /// entire block payload. + pub fn block_range<'a, R>(&self, range: &R) -> Range + where + R: NsPayloadBytesRange<'a>, + { + let range = range.ns_payload_range(); + range.start + self.0.start..range.end + self.0.start + } +} diff --git a/sequencer/src/block/namespace_payload/tx_proof.rs b/sequencer/src/block/namespace_payload/tx_proof.rs new file mode 100644 index 000000000..f3418eaec --- /dev/null +++ b/sequencer/src/block/namespace_payload/tx_proof.rs @@ -0,0 +1,255 @@ +use crate::{ + block::{ + full_payload::{ + NsTable, {Payload, PayloadByteLen}, + }, + namespace_payload::{ + iter::Index, + types::{ + NumTxs, NumTxsRange, NumTxsUnchecked, TxIndex, TxPayloadRange, TxTableEntries, + TxTableEntriesRange, + }, + }, + }, + Transaction, +}; +use hotshot_query_service::{VidCommitment, VidCommon}; +use hotshot_types::{ + traits::EncodeBytes, + vid::{vid_scheme, SmallRangeProofType, VidSchemeType}, +}; +use jf_vid::{ + payload_prover::{PayloadProver, Statement}, + VidScheme, +}; +use serde::{Deserialize, Serialize}; + +/// Proof of correctness for transaction bytes in a block. +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub struct TxProof { + // Naming conventions for this struct's fields: + // - `payload_x`: bytes from the payload + // - `payload_proof_x`: a proof of those bytes from the payload + tx_index: TxIndex, + + // Number of txs declared in the tx table + payload_num_txs: NumTxsUnchecked, + payload_proof_num_txs: SmallRangeProofType, + + // Tx table entries for this tx + payload_tx_table_entries: TxTableEntries, + payload_proof_tx_table_entries: SmallRangeProofType, + + // This tx's payload bytes. + // `None` if this tx has zero length. + payload_proof_tx: Option, +} + +impl TxProof { + /// Returns the [`Transaction`] indicated by `index`, along with a proof of + /// correctness for that transaction. Returns `None` on error. + pub fn new( + index: &Index, + payload: &Payload, + common: &VidCommon, + ) -> Option<(Transaction, Self)> { + let payload_byte_len = payload.byte_len(); + payload_byte_len.is_consistent(common).ok()?; + if !payload.ns_table().in_bounds(index.ns()) { + tracing::warn!("ns_index {:?} out of bounds", index.ns()); + return None; // error: ns index out of bounds + } + // check tx index below + + let payload_bytes_arc = payload.encode(); // pacify borrow checker + let payload_bytes = payload_bytes_arc.as_ref(); + let ns_range = payload.ns_table().ns_range(index.ns(), &payload_byte_len); + let ns_byte_len = ns_range.byte_len(); + let ns_payload = payload.read_ns_payload(&ns_range); + let vid = vid_scheme( + VidSchemeType::get_num_storage_nodes(common) + .try_into() + .unwrap(), + ); + + // Read the tx table len from this namespace's tx table and compute a + // proof of correctness. + let num_txs_range = NumTxsRange::new(&ns_byte_len); + let payload_num_txs = ns_payload.read(&num_txs_range); + + // Check tx index. + // + // TODO the next line of code (and other code) could be easier to read + // if we make a helpers that repeat computation we've already done. + if !NumTxs::new(&payload_num_txs, &ns_byte_len).in_bounds(index.tx()) { + return None; // error: tx index out of bounds + } + + let payload_proof_num_txs = vid + .payload_proof(payload_bytes, ns_range.block_range(&num_txs_range)) + .ok()?; + + // Read the tx table entries for this tx and compute a proof of + // correctness. + let tx_table_entries_range = TxTableEntriesRange::new(index.tx()); + let payload_tx_table_entries = ns_payload.read(&tx_table_entries_range); + let payload_proof_tx_table_entries = { + vid.payload_proof(payload_bytes, ns_range.block_range(&tx_table_entries_range)) + .ok()? + }; + + // Read the tx payload and compute a proof of correctness. + let tx_payload_range = + TxPayloadRange::new(&payload_num_txs, &payload_tx_table_entries, &ns_byte_len); + let payload_proof_tx = { + let range = ns_range.block_range(&tx_payload_range); + + tracing::info!( + "prove: (ns,tx) ({:?},{:?}), tx_payload_range {:?}, content {:?}", + index.ns(), + index.tx(), + range, + &payload_bytes[range.clone()] + ); + + if range.is_empty() { + None + } else { + Some(vid.payload_proof(payload_bytes, range).ok()?) + } + }; + + let tx = { + let ns_id = payload.ns_table().read_ns_id_unchecked(index.ns()); + let tx_payload = ns_payload + .read(&tx_payload_range) + .to_payload_bytes() + .to_vec(); + Transaction::new(ns_id, tx_payload) + }; + + Some(( + tx, + TxProof { + tx_index: index.tx().clone(), + payload_num_txs, + payload_proof_num_txs, + payload_tx_table_entries, + payload_proof_tx_table_entries, + payload_proof_tx, + }, + )) + } + + /// Verify a [`TxProof`] for `tx` against a payload commitment. Returns + /// `None` on error. + pub fn verify( + &self, + ns_table: &NsTable, + tx: &Transaction, + commit: &VidCommitment, + common: &VidCommon, + ) -> Option { + VidSchemeType::is_consistent(commit, common).ok()?; + let Some(ns_index) = ns_table.find_ns_id(&tx.namespace()) else { + tracing::info!("ns id {} does not exist", tx.namespace()); + return None; // error: ns id does not exist + }; + let ns_range = ns_table.ns_range(&ns_index, &PayloadByteLen::from_vid_common(common)); + let ns_byte_len = ns_range.byte_len(); + + if !NumTxs::new(&self.payload_num_txs, &ns_byte_len).in_bounds(&self.tx_index) { + tracing::info!("tx index {:?} out of bounds", self.tx_index); + return None; // error: tx index out of bounds + } + + let vid = vid_scheme( + VidSchemeType::get_num_storage_nodes(common) + .try_into() + .unwrap(), + ); + + // Verify proof for tx table len + { + let range = ns_range.block_range(&NumTxsRange::new(&ns_byte_len)); + if vid + .payload_verify( + Statement { + payload_subslice: &self.payload_num_txs.to_payload_bytes(), + range, + commit, + common, + }, + &self.payload_proof_num_txs, + ) + .ok()? + .is_err() + { + return Some(false); + } + } + + // Verify proof for tx table entries + { + let range = ns_range.block_range(&TxTableEntriesRange::new(&self.tx_index)); + if vid + .payload_verify( + Statement { + payload_subslice: &self.payload_tx_table_entries.to_payload_bytes(), + range, + commit, + common, + }, + &self.payload_proof_tx_table_entries, + ) + .ok()? + .is_err() + { + return Some(false); + } + } + + // Verify proof for tx payload + { + let range = ns_range.block_range(&TxPayloadRange::new( + &self.payload_num_txs, + &self.payload_tx_table_entries, + &ns_byte_len, + )); + + match (&self.payload_proof_tx, range.is_empty()) { + (Some(proof), false) => { + if vid + .payload_verify( + Statement { + payload_subslice: tx.payload(), + range, + commit, + common, + }, + proof, + ) + .ok()? + .is_err() + { + return Some(false); + } + } + (None, true) => {} // 0-length tx, nothing to verify + (None, false) => { + tracing::error!( + "tx verify: missing proof for nonempty tx payload range {:?}", + range + ); + return None; + } + (Some(_), true) => { + tracing::error!("tx verify: unexpected proof for empty tx payload range"); + return None; + } + } + } + + Some(true) + } +} diff --git a/sequencer/src/block/namespace_payload/types.rs b/sequencer/src/block/namespace_payload/types.rs new file mode 100644 index 000000000..150932b03 --- /dev/null +++ b/sequencer/src/block/namespace_payload/types.rs @@ -0,0 +1,435 @@ +//! Types related to a namespace payload and its transaction table. +//! +//! All code that needs to know the binary format of a namespace payload and its +//! transaction table is restricted to this file. +//! +//! There are many newtypes in this file to facilitate transaction proofs. +//! +//! # Binary format of a namespace payload +//! +//! Any sequence of bytes is a valid [`NsPayload`]. +//! +//! A namespace payload consists of two concatenated byte sequences: +//! - `tx_table`: transaction table +//! - `tx_payloads`: transaction payloads +//! +//! # Transaction table +//! +//! Byte lengths for the different items that could appear in a `tx_table` are +//! specified in local private constants [`NUM_TXS_BYTE_LEN`], +//! [`TX_OFFSET_BYTE_LEN`]. +//! +//! ## Number of entries in the transaction table +//! +//! The first [`NUM_TXS_BYTE_LEN`] bytes of the `tx_table` indicate the number +//! `n` of entries in the table as a little-endian unsigned integer. If the +//! entire namespace payload byte length is smaller than [`NUM_TXS_BYTE_LEN`] +//! then the missing bytes are zero-padded. +//! +//! The bytes in the namespace payload beyond the first [`NUM_TXS_BYTE_LEN`] +//! bytes encode entries in the `tx_table`. Each entry consumes exactly +//! [`TX_OFFSET_BYTE_LEN`] bytes. +//! +//! The number `n` could be anything, including a number much larger than the +//! number of entries that could fit in the namespace payload. As such, the +//! actual number of entries in the `tx_table` is defined as the minimum of `n` +//! and the maximum number of whole `tx_table` entries that could fit in the +//! namespace payload. +//! +//! The `tx_payloads` consist of any bytes in the namespace payload beyond the +//! `tx_table`. +//! +//! ## Transaction table entry +//! +//! Each entry in the `tx_table` is exactly [`TX_OFFSET_BYTE_LEN`] bytes. These +//! bytes indicate the end-index of a transaction in the namespace payload +//! bytes. This end-index is a little-endian unsigned integer. +//! +//! This offset is relative to the end of the `tx_table` within the current +//! namespace. +//! +//! ### Example +//! +//! Suppose a block payload has 3000 bytes and 3 namespaces of 1000 bytes each. +//! Suppose the `tx_table` for final namespace in the block has byte length 100, +//! and suppose an entry in that `tx_table` indicates an end-index of `10`. The +//! actual end-index of that transaction relative to the current namespace is +//! `110`: `10` bytes for the offset plus `100` bytes for the `tx_table`. +//! Relative to the entire block payload, the end-index of that transaction is +//! `2110`: `10` bytes for the offset plus `100` bytes for the `tx_table` plus +//! `2000` bytes for this namespace. +//! +//! # How to deduce a transaction's byte range +//! +//! In order to extract the payload bytes of a single transaction `T` from the +//! namespace payload one needs both the start- and end-indices for `T`. +//! +//! See [`TxPayloadRange::new`] for clarification. What follows is a description +//! of what's implemented in [`TxPayloadRange::new`]. +//! +//! If `T` occupies the `i`th entry in the `tx_table` for `i>0` then the +//! start-index for `T` is defined as the end-index of the `(i-1)`th entry in +//! the table. +//! +//! Thus, both start- and end-indices for any transaction `T` can be read from a +//! contiguous, constant-size byte range in the `tx_table`. This property +//! facilitates transaction proofs. +//! +//! The start-index of the 0th entry in the table is implicitly defined to be +//! `0`. +//! +//! The start- and end-indices `(declared_start, declared_end)` declared in the +//! `tx_table` could be anything. As such, the actual start- and end-indices +//! `(start, end)` are defined so as to ensure that the byte range is +//! well-defined and in-bounds for the namespace payload: +//! ```ignore +//! end = min(declared_end, namespace_payload_byte_length) +//! start = min(declared_start, end) +//! ``` +//! +//! To get the byte range for `T` relative to the current namespace, the above +//! range is translated by the byte length of the `tx_table` *as declared in the +//! `tx_table` itself*, suitably truncated to fit within the current namespace. +//! +//! In particular, if the `tx_table` declares a huge number `n` of entries that +//! cannot fit into the namespace payload then all transactions in this +//! namespace have a zero-length byte range whose start- and end-indices are +//! both `namespace_payload_byte_length`. +//! +//! In a "honestly-prepared" `tx_table` the end-index of the final transaction +//! equals the byte length of the namespace payload minus the byte length of the +//! `tx_table`. (Otherwise the namespace payload might have bytes that are not +//! included in any transaction.) +//! +//! It is possible that a `tx_table` table could indicate two distinct +//! transactions whose byte ranges overlap, though no "honestly-prepared" +//! `tx_table` would do this. +use crate::block::uint_bytes::{bytes_serde_impl, usize_from_bytes, usize_to_bytes}; +use crate::Transaction; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::ops::Range; + +/// Byte lengths for the different items that could appear in a tx table. +const NUM_TXS_BYTE_LEN: usize = 4; +const TX_OFFSET_BYTE_LEN: usize = 4; + +/// Data that can be deserialized from a subslice of namespace payload bytes. +/// +/// Companion trait for [`NsPayloadBytesRange`], which specifies the subslice of +/// namespace payload bytes to read. +pub trait FromNsPayloadBytes<'a> { + /// Deserialize `Self` from namespace payload bytes. + fn from_payload_bytes(bytes: &'a [u8]) -> Self; +} + +/// Specifies a subslice of namespace payload bytes to read. +/// +/// Companion trait for [`FromNsPayloadBytes`], which holds data that can be +/// deserialized from that subslice of bytes. +pub trait NsPayloadBytesRange<'a> { + type Output: FromNsPayloadBytes<'a>; + + /// Range relative to this ns payload + fn ns_payload_range(&self) -> Range; +} + +/// Number of txs in a namespace. +/// +/// Like [`NumTxsUnchecked`] but checked against a [`NsPayloadByteLen`]. +pub struct NumTxs(usize); + +impl NumTxs { + /// Returns the minimum of: + /// - `num_txs` + /// - The maximum number of tx table entries that could fit in a namespace + /// whose byte length is `byte_len`. + pub fn new(num_txs: &NumTxsUnchecked, byte_len: &NsPayloadByteLen) -> Self { + Self(std::cmp::min( + // Number of txs declared in the tx table + num_txs.0, + // Max number of tx table entries that could fit in the namespace payload + byte_len.0.saturating_sub(NUM_TXS_BYTE_LEN) / TX_OFFSET_BYTE_LEN, + )) + } + + pub fn in_bounds(&self, index: &TxIndex) -> bool { + index.0 < self.0 + } +} + +/// Byte length of a namespace payload. +pub struct NsPayloadByteLen(usize); + +impl NsPayloadByteLen { + // TODO restrict visibility? + pub fn from_usize(n: usize) -> Self { + Self(n) + } +} + +/// The part of a tx table that declares the number of txs in the payload. +/// +/// "Unchecked" because this quantity might exceed the number of tx table +/// entries that could fit into the namespace that contains it. +/// +/// Use [`NumTxs`] for the actual number of txs in this namespace. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct NumTxsUnchecked(usize); +bytes_serde_impl!( + NumTxsUnchecked, + to_payload_bytes, + [u8; NUM_TXS_BYTE_LEN], + from_payload_bytes +); + +impl NumTxsUnchecked { + pub fn to_payload_bytes(&self) -> [u8; NUM_TXS_BYTE_LEN] { + usize_to_bytes::(self.0) + } +} + +impl FromNsPayloadBytes<'_> for NumTxsUnchecked { + fn from_payload_bytes(bytes: &[u8]) -> Self { + Self(usize_from_bytes::(bytes)) + } +} + +/// Byte range for the part of a tx table that declares the number of txs in the +/// payload. +pub struct NumTxsRange(Range); + +impl NumTxsRange { + pub fn new(byte_len: &NsPayloadByteLen) -> Self { + Self(0..NUM_TXS_BYTE_LEN.min(byte_len.0)) + } +} + +impl NsPayloadBytesRange<'_> for NumTxsRange { + type Output = NumTxsUnchecked; + + fn ns_payload_range(&self) -> Range { + self.0.clone() + } +} + +/// Entries from a tx table in a namespace for use in a transaction proof. +/// +/// Contains either one or two entries according to whether it was derived from +/// the first transaction in the namespace. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct TxTableEntries { + cur: usize, + prev: Option, // `None` if derived from the first transaction +} + +// This serde impl uses Vec. We could save space by using an array of +// length `TWO_ENTRIES_BYTE_LEN`, but then we need a way to distinguish +// `prev=Some(0)` from `prev=None`. +bytes_serde_impl!( + TxTableEntries, + to_payload_bytes, + Vec, + from_payload_bytes +); + +impl TxTableEntries { + const TWO_ENTRIES_BYTE_LEN: usize = 2 * TX_OFFSET_BYTE_LEN; + + pub fn to_payload_bytes(&self) -> Vec { + let mut bytes = Vec::with_capacity(Self::TWO_ENTRIES_BYTE_LEN); + if let Some(prev) = self.prev { + bytes.extend(usize_to_bytes::(prev)); + } + bytes.extend(usize_to_bytes::(self.cur)); + bytes + } +} + +impl FromNsPayloadBytes<'_> for TxTableEntries { + fn from_payload_bytes(bytes: &[u8]) -> Self { + match bytes.len() { + TX_OFFSET_BYTE_LEN => Self { + cur: usize_from_bytes::(bytes), + prev: None, + }, + Self::TWO_ENTRIES_BYTE_LEN => Self { + cur: usize_from_bytes::(&bytes[TX_OFFSET_BYTE_LEN..]), + prev: Some(usize_from_bytes::( + &bytes[..TX_OFFSET_BYTE_LEN], + )), + }, + len => panic!( + "unexpected bytes len {} should be either {} or {}", + len, + TX_OFFSET_BYTE_LEN, + Self::TWO_ENTRIES_BYTE_LEN + ), + } + } +} + +/// Byte range for entries from a tx table for use in a transaction proof. +/// +/// This range covers either one or two entries from a tx table according to +/// whether it was derived from the first transaction in the namespace. +pub struct TxTableEntriesRange(Range); + +impl TxTableEntriesRange { + pub fn new(index: &TxIndex) -> Self { + let start = if index.0 == 0 { + // Special case: the desired range includes only one entry from + // the tx table: the first entry. This entry starts immediately + // following the bytes that encode the tx table length. + NUM_TXS_BYTE_LEN + } else { + // The desired range starts at the beginning of the previous tx + // table entry. + (index.0 - 1) + .saturating_mul(TX_OFFSET_BYTE_LEN) + .saturating_add(NUM_TXS_BYTE_LEN) + }; + // The desired range ends at the end of this transaction's tx table entry + let end = index + .0 + .saturating_add(1) + .saturating_mul(TX_OFFSET_BYTE_LEN) + .saturating_add(NUM_TXS_BYTE_LEN); + Self(start..end) + } +} + +impl NsPayloadBytesRange<'_> for TxTableEntriesRange { + type Output = TxTableEntries; + + fn ns_payload_range(&self) -> Range { + self.0.clone() + } +} + +/// A transaction's payload data. +pub struct TxPayload<'a>(&'a [u8]); + +impl<'a> TxPayload<'a> { + pub fn to_payload_bytes(&self) -> &'a [u8] { + self.0 + } +} + +impl<'a> FromNsPayloadBytes<'a> for TxPayload<'a> { + fn from_payload_bytes(bytes: &'a [u8]) -> Self { + Self(bytes) + } +} + +/// Byte range for a transaction's payload data. +pub struct TxPayloadRange(Range); + +impl TxPayloadRange { + pub fn new( + num_txs: &NumTxsUnchecked, + tx_table_entries: &TxTableEntries, + byte_len: &NsPayloadByteLen, + ) -> Self { + let tx_table_byte_len = num_txs + .0 + .saturating_mul(TX_OFFSET_BYTE_LEN) + .saturating_add(NUM_TXS_BYTE_LEN); + let end = tx_table_entries + .cur + .saturating_add(tx_table_byte_len) + .min(byte_len.0); + let start = tx_table_entries + .prev + .unwrap_or(0) + .saturating_add(tx_table_byte_len) + .min(end); + Self(start..end) + } +} + +impl<'a> NsPayloadBytesRange<'a> for TxPayloadRange { + type Output = TxPayload<'a>; + + fn ns_payload_range(&self) -> Range { + self.0.clone() + } +} + +/// Index for an entry in a tx table. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub(in crate::block) struct TxIndex(usize); +bytes_serde_impl!(TxIndex, to_bytes, [u8; NUM_TXS_BYTE_LEN], from_bytes); + +impl TxIndex { + pub fn to_bytes(&self) -> [u8; NUM_TXS_BYTE_LEN] { + usize_to_bytes::(self.0) + } + fn from_bytes(bytes: &[u8]) -> Self { + Self(usize_from_bytes::(bytes)) + } +} + +pub(in crate::block) struct TxIter(Range); + +impl TxIter { + pub fn new(num_txs: &NumTxs) -> Self { + Self(0..num_txs.0) + } +} + +// Simple `impl Iterator` delegates to `Range`. +impl Iterator for TxIter { + type Item = TxIndex; + + fn next(&mut self) -> Option { + self.0.next().map(TxIndex) + } +} + +/// Build an individual namespace payload one transaction at a time. +/// +/// Use [`Self::append_tx`] to add each transaction. Use [`Self::into_bytes`] +/// when you're done. The returned bytes include a well-formed tx table and all +/// tx payloads. +#[derive(Default)] +pub(in crate::block) struct NsPayloadBuilder { + tx_table_entries: Vec, + tx_bodies: Vec, +} + +impl NsPayloadBuilder { + /// Add a transaction's payload to this namespace + pub fn append_tx(&mut self, tx: Transaction) { + self.tx_bodies.extend(tx.into_payload()); + self.tx_table_entries + .extend(usize_to_bytes::(self.tx_bodies.len())); + } + + /// Serialize to bytes and consume self. + pub fn into_bytes(self) -> Vec { + let mut result = Vec::with_capacity( + NUM_TXS_BYTE_LEN + self.tx_table_entries.len() + self.tx_bodies.len(), + ); + let num_txs = NumTxsUnchecked(self.tx_table_entries.len() / TX_OFFSET_BYTE_LEN); + result.extend(num_txs.to_payload_bytes()); + result.extend(self.tx_table_entries); + result.extend(self.tx_bodies); + result + } + + /// Byte length of a namespace with zero transactions. + /// + /// Currently this quantity equals the byte length of the tx table header. + pub const fn fixed_overhead_byte_len() -> usize { + NUM_TXS_BYTE_LEN + } + + /// Byte length added to a namespace by a new transaction beyond that + /// transaction's payload byte length. + /// + /// Currently this quantity equals the byte length of a single tx table + /// entry. + pub const fn tx_overhead_byte_len() -> usize { + TX_OFFSET_BYTE_LEN + } +} diff --git a/sequencer/src/block/payload.rs b/sequencer/src/block/payload.rs deleted file mode 100644 index 664cde38d..000000000 --- a/sequencer/src/block/payload.rs +++ /dev/null @@ -1,1369 +0,0 @@ -use crate::block::entry::{TxTableEntry, TxTableEntryWord}; -use crate::block::payload; -use crate::block::tables::NameSpaceTable; -use crate::block::tables::TxTable; -use crate::{BlockBuildingSnafu, ChainConfig, Error, NamespaceId, SeqTypes, Transaction}; -use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; -use derivative::Derivative; -use hotshot::traits::BlockPayload; -use hotshot_types::vid::{ - vid_scheme, LargeRangeProofType, VidCommitment, VidCommon, VidSchemeType, -}; -use jf_vid::{ - payload_prover::{PayloadProver, Statement}, - VidScheme, -}; -use num_traits::PrimInt; -use serde::{Deserialize, Serialize}; -use snafu::OptionExt; -use std::default::Default; -use std::mem::size_of; -use std::{collections::HashMap, fmt::Display}; -use trait_set::trait_set; - -trait_set! { - - pub trait TableWordTraits = CanonicalSerialize - + CanonicalDeserialize - + TryFrom - + TryInto - + Default - + PrimInt - + std::marker::Sync; - - // Note: this trait is not used yet as for now the Payload structs are only parametrized with the TableWord parameter. - pub trait OffsetTraits = CanonicalSerialize - + CanonicalDeserialize - + TryFrom - + TryInto - + Default - + std::marker::Sync; - - // Note: this trait is not used yet as for now the Payload structs are only parametrized with the TableWord parameter. - pub trait NsIdTraits =CanonicalSerialize + CanonicalDeserialize + Default + std::marker::Sync; -} -pub(super) struct NamespaceInfo { - // `tx_table` is a bytes representation of the following table: - // word[0]: [number n of entries in tx table] - // word[j>0]: [end byte index of the (j-1)th tx in the payload] - // - // Thus, the ith tx payload bytes range is word[i-1]..word[i]. - // Edge case: tx_table[-1] is implicitly 0. - // - // Word type is `TxTableEntry`. - // - // TODO final entry should be implicit: - // https://github.com/EspressoSystems/espresso-sequencer/issues/757 - pub(crate) tx_table: Vec, - pub(crate) tx_bodies: Vec, // concatenation of all tx payloads - pub(crate) tx_bytes_end: TxTableEntry, // TODO make this field a usize instead - pub(crate) tx_table_len: TxTableEntry, // TODO make this field a usize instead -} - -#[allow(dead_code)] // TODO temporary -#[derive(Clone, Debug, Derivative, Deserialize, Eq, Serialize)] -#[derivative(Hash, PartialEq)] -// TODO remove the generic type param, use local constants instead -pub struct Payload { - // Sequence of bytes representing the concatenated payloads for each namespace - #[serde(with = "base64_bytes")] - pub(super) raw_payload: Vec, - - // Sequence of bytes representing the namespace table - pub(super) ns_table: NameSpaceTable, - // TODO(X) Revisit caching of frequently used items - // - // TODO type should be `OnceLock` instead of `OnceLock>`. - // We can correct this after `once_cell_try` is stabilized . - // #[derivative(Hash = "ignore")] - // #[derivative(PartialEq = "ignore")] - // #[serde(skip)] - // pub tx_table_len_proof: OnceLock>, -} - -impl Payload { - // TODO dead code even with `pub` because this module is private in lib.rs - #[allow(dead_code)] - pub fn num_namespaces(&self) -> usize { - self.ns_table.len() - } - - // TODO dead code even with `pub` because this module is private in lib.rs - #[allow(dead_code)] - pub fn namespace_iter(&self) -> impl Iterator { - 0..self.ns_table.len() - } - - /// Returns the list of txs for namespace `ns_id`. - pub fn namespace(&self, ns_id: NamespaceId) -> Option> { - let ns_index = self.ns_table.lookup(ns_id)?; - let ns_payload_range = self - .ns_table - .get_payload_range(ns_index, self.raw_payload.len()) - .1; - Some(parse_ns_payload( - self.raw_payload.get(ns_payload_range)?, - ns_id, - )) - } - - // TODO dead code even with `pub` because this module is private in lib.rs - #[allow(dead_code)] - /// Returns the flat bytes for namespace `ns_id`, along with a proof of correctness for those bytes. - /// - /// RPC-friendly proof contains: - /// - the namespace bytes - /// - `vid_common` needed to verify the proof. This data is not accessible to the verifier because it's not part of the block header. - pub fn namespace_with_proof( - &self, - // TODO don't need ns_table any more, it's part of self - ns_table: &NameSpaceTable, - ns_id: NamespaceId, - vid_common: VidCommon, - ) -> Option { - if self.raw_payload.len() != VidSchemeType::get_payload_byte_len(&vid_common) as usize { - return None; // error: vid_common inconsistent with self - } - - let ns_index = if let Some(ns_index) = ns_table.lookup(ns_id) { - ns_index - } else { - return Some(NamespaceProof::NonExistence { ns_id }); - }; - - let ns_payload_range = ns_table - .get_payload_range(ns_index, self.raw_payload.len()) - .1; - - // TODO log output for each `?` - // fix this when we settle on an error handling pattern - Some(NamespaceProof::Existence { - ns_id, - ns_payload_flat: self.raw_payload.get(ns_payload_range.clone())?.into(), - ns_proof: vid_scheme(VidSchemeType::get_num_storage_nodes(&vid_common) as usize) - .payload_proof(&self.raw_payload, ns_payload_range) - .ok()?, - vid_common, - }) - } - - pub fn get_ns_table(&self) -> &NameSpaceTable { - &self.ns_table - } - - pub fn from_txs( - txs: impl IntoIterator< - Item = as BlockPayload>::Transaction, - >, - chain_config: &ChainConfig, - ) -> Result { - let mut namespaces: HashMap = Default::default(); - let mut structured_payload = Self { - raw_payload: vec![], - ns_table: NameSpaceTable::default(), - }; - - let mut block_size = 0u64; - for tx in txs.into_iter() { - block_size += (tx.payload().len() + size_of::()) as u64; - - // block_size is updated when we encounter a new namespace - if !namespaces.contains_key(&tx.namespace()) { - block_size += size_of::() as u64; - } - - if block_size > *chain_config.max_block_size { - break; - } - - Payload::::update_namespace_with_tx(&mut namespaces, tx); - } - - structured_payload.generate_raw_payload(namespaces)?; - Ok(structured_payload) - } - - fn update_namespace_with_tx( - namespaces: &mut HashMap, - tx: as BlockPayload>::Transaction, - ) { - let tx_bytes_len: TxTableEntry = tx.payload().len().try_into().unwrap(); // TODO (Philippe) error handling - - let namespace = namespaces.entry(tx.namespace()).or_insert(NamespaceInfo { - tx_table: Vec::new(), - tx_bodies: Vec::new(), - tx_bytes_end: TxTableEntry::zero(), - tx_table_len: TxTableEntry::zero(), - }); - - namespace - .tx_bytes_end - .checked_add_mut(tx_bytes_len) - .unwrap(); // TODO (Philippe) error handling - namespace.tx_table.extend(namespace.tx_bytes_end.to_bytes()); - namespace.tx_bodies.extend(tx.payload()); - - namespace - .tx_table_len - .checked_add_mut(TxTableEntry::one()) - .unwrap(); // TODO (Philippe) error handling - } - - fn generate_raw_payload( - &mut self, - namespaces: HashMap, - ) -> Result<(), Error> { - // fill payload and namespace table - let mut payload = vec![]; - - self.ns_table = NameSpaceTable::from_bytes(Vec::from( - TxTableEntry::try_from(namespaces.len()) - .ok() - .context(BlockBuildingSnafu)? - .to_bytes(), - )); - - let mut namespaces_offsets = vec![]; - for (id, namespace) in namespaces { - payload.extend(namespace.tx_table_len.to_bytes()); - payload.extend(namespace.tx_table); - payload.extend(namespace.tx_bodies); - namespaces_offsets.push((id, payload.len())); - } - self.ns_table = NameSpaceTable::from_namespace_offsets(namespaces_offsets).unwrap(); - - self.raw_payload = payload; - Ok(()) - } -} - -impl Display for Payload { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{self:#?}") - } -} - -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -#[serde(bound = "")] // for V -pub enum NamespaceProof { - Existence { - #[serde(with = "base64_bytes")] - ns_payload_flat: Vec, - ns_id: NamespaceId, - ns_proof: LargeRangeProofType, - vid_common: VidCommon, - }, - NonExistence { - ns_id: NamespaceId, - }, -} - -impl NamespaceProof { - /// Verify a [`NamespaceProof`]. - /// - /// All args must be available to the verifier in the block header. - #[allow(dead_code)] // TODO temporary - pub fn verify( - &self, - vid: &VidSchemeType, - commit: &VidCommitment, - ns_table: &NameSpaceTable, - ) -> Option<(Vec, NamespaceId)> { - match self { - NamespaceProof::Existence { - ns_payload_flat, - ns_id, - ns_proof, - vid_common, - } => { - let ns_index = ns_table.lookup(*ns_id)?; - - let (ns_id, ns_payload_range) = ns_table.get_payload_range( - ns_index, - VidSchemeType::get_payload_byte_len(vid_common) as usize, - ); - - // verify self against args - vid.payload_verify( - Statement { - payload_subslice: ns_payload_flat, - range: ns_payload_range, - commit, - common: vid_common, - }, - ns_proof, - ) - .ok()? - .ok()?; - - // verification succeeded, return some data - // we know ns_id is correct because the corresponding ns_payload_range passed verification - Some((parse_ns_payload(ns_payload_flat, ns_id), ns_id)) - } - NamespaceProof::NonExistence { ns_id } => { - if ns_table.lookup(*ns_id).is_some() { - return None; // error: expect not to find ns_id in ns_table - } - Some((Vec::new(), *ns_id)) - } - } - } -} - -pub fn parse_ns_payload(ns_bytes: &[u8], ns_id: NamespaceId) -> Vec { - let num_txs = TxTable::get_tx_table_len(ns_bytes); - (0..TxTable::get_tx_table_len(ns_bytes)) - .map(|tx_idx| TxTable::get_payload_range(ns_bytes, tx_idx, num_txs)) - .map(|tx_range| Transaction::new(ns_id, ns_bytes[tx_range].to_vec())) - .collect() -} - -#[cfg(any(test, feature = "testing"))] -impl hotshot_types::traits::block_contents::TestableBlock - for Payload -{ - fn genesis() -> Self { - BlockPayload::empty().0 - } - - fn txn_count(&self) -> u64 { - use hotshot_query_service::availability::QueryablePayload; - self.len(&self.ns_table) as u64 - } -} - -#[cfg(test)] -mod test { - use super::NamespaceProof; - use crate::{ - block::{ - entry::{TxTableEntry, TxTableEntryWord}, - payload::{parse_ns_payload, Payload, TableWordTraits}, - queryable, - tables::{test::TxTableTest, NameSpaceTable, Table, TxTable}, - tx_iterator::TxIndex, - }, - transaction::NamespaceId, - ChainConfig, NodeState, Transaction, ValidatedState, - }; - use async_compatibility_layer::logging::{setup_backtrace, setup_logging}; - use helpers::*; - use hotshot_query_service::availability::QueryablePayload; - use hotshot_types::{ - traits::{block_contents::TestableBlock, BlockPayload}, - vid::vid_scheme, - }; - use jf_vid::{payload_prover::PayloadProver, VidScheme}; - use rand::RngCore; - use std::{collections::HashMap, marker::PhantomData, mem::size_of, ops::Range}; - - const NUM_STORAGE_NODES: usize = 10; - - #[test] - fn enforce_max_block_size() { - // sum of all payloads + table entry of each - let target_payload_total = 1000usize; - // include name space entry in max_block_size - let max_block_size = (target_payload_total + size_of::()) as u64; - let payload_size = 6; - // `tx_size` is payload + table entry size - let tx_size = (payload_size + size_of::()) as u64; - // check our sanity - assert_eq!(tx_size, 10); - - let n_txs = target_payload_total as u64 / tx_size; - let chain_config = ChainConfig { - max_block_size: max_block_size.into(), - ..Default::default() - }; - - let mut txs = (0..n_txs) - .map(|_| Transaction::of_size(payload_size)) - .collect::>(); - - assert_eq!(txs.len(), 100); - - txs.push(Transaction::of_size(payload_size)); - - // The final txn will be omitted - let payload = Payload::::from_txs(txs.clone(), &chain_config).unwrap(); - assert_eq!(payload.txn_count(), txs.len() as u64 - 1u64); - - txs.pop(); - // All txns will be included. - let payload = Payload::::from_txs(txs.clone(), &chain_config).unwrap(); - - assert_eq!(payload.txn_count(), txs.len() as u64); - } - - #[async_std::test] - async fn basic_correctness() { - check_basic_correctness::().await - } - - async fn check_basic_correctness() { - // play with this - let test_cases = [ - // 1 namespace only - vec![vec![5, 8, 8]], // 3 non-empty txs - vec![vec![0, 8, 8]], // 1 empty tx at the beginning - vec![vec![5, 0, 8]], // 1 empty tx in the middle - vec![vec![5, 8, 0]], // 1 empty tx at the end - vec![vec![5]], // 1 nonempty tx - vec![vec![0]], // 1 empty tx - // vec![], // zero txs - vec![vec![1000, 1000, 1000]], // large payload - //multiple namespaces - vec![vec![5, 8, 8], vec![7, 9, 11], vec![10, 5, 8]], // 3 non-empty namespaces - ]; - // TODO(746) future test cases - // vec![vec![], vec![7, 9, 11], vec![10, 5, 8]], // 1 empty namespace at the beginning - // vec![vec![5, 8, 8], vec![], vec![10, 5, 8]], // 1 empty namespace in the middle - // vec![vec![5, 8, 8], vec![7, 9, 11], vec![]], // 1 empty namespace at the end - // vec![vec![0], vec![0, 0]], // 2 non-empty namespaces with all-empty txs - // vec![vec![], vec![]], // 2 empty namespaces - // vec![vec![1000, 1000, 1000], vec![2000, 2000, 2000]], // large payload - - // vec![(0,5), (0,8), (0,8), (1,7), (1,9), (1,11), (2,10), (2,5), (2,8)], // 3 non-empty namespaces, in order - // vec![(14,5), (3,8), (7,8), (7,7), (14,9), (7,11), (3,10), (3,5), (14,8)], // 3 non-empty namespaces, out of order - // vec![(0,0), (1,7), (1,9), (1,11), (2,10), (2,5), (2,8)], // a namespace with 1 empty tx at the beginning - // vec![(0,5), (0,8), (0,8), (1,0), (2,10), (2,5), (2,8)], // a namespace with 1 empty tx in the middle - // vec![(0,0), (1,0)], // 2 namespaces, each with 1 empty tx - - setup_logging(); - setup_backtrace(); - let mut rng = jf_utils::test_rng(); - struct NamespaceInfo { - payload_flat: Vec, - tx_table: Vec, // TODO Philippe => change - #[allow(dead_code)] // TODO temporary - txs: Vec, - } - - let mut vid = vid_scheme(NUM_STORAGE_NODES); - let num_test_cases = test_cases.len(); - for (t, test_case) in test_cases.iter().enumerate() { - // DERIVE A BUNCH OF STUFF FOR THIS TEST CASE - let mut derived_nss = HashMap::new(); - let mut total_num_txs = 0; - for (n, tx_lengths) in test_case.iter().enumerate() { - tracing::info!( - "test block {} of {}, namespace {} of {}, with {} txs", - t + 1, - num_test_cases, - n + 1, - test_case.len(), - tx_lengths.len(), - ); - total_num_txs += tx_lengths.len(); - - // generate this namespace's tx payloads - let entries = entries_from_lengths(tx_lengths); - let tx_payloads_flat = random_bytes(tx_bodies_byte_len(&entries), &mut rng); - let tx_payloads = extract_tx_payloads(&entries, &tx_payloads_flat); - - // enforce well-formed test case - assert_eq!( - tx_payloads_flat, - tx_payloads.iter().flatten().cloned().collect::>(), - "test block {} namespace {} is malformed", - t + 1, - n + 1 - ); - - // derive this namespace's tx table - let tx_table_derived: Vec = tx_payloads - .iter() - .scan(TxTableEntry::zero(), |end, tx| { - end.checked_add_mut(TxTableEntry::try_from(tx.len()).unwrap()) - .unwrap(); - Some(end.clone()) - }) - .collect(); - - // derive this namespace's payload - let ns_payload_flat = { - let mut ns_payload = Vec::new(); - - // write tx table bytes - ns_payload.extend(TxTableEntry::from_usize(tx_table_derived.len()).to_bytes()); - for entry in tx_table_derived.iter() { - ns_payload.extend(entry.to_bytes()); - } - - ns_payload.extend(tx_payloads_flat); - ns_payload - }; - - let new_ns_id = (n as u64).into(); - let already_exists = derived_nss.insert( - new_ns_id, - NamespaceInfo { - payload_flat: ns_payload_flat, - tx_table: tx_table_derived, - txs: tx_payloads - .into_iter() - .map(|p| Transaction::new(new_ns_id, p)) - .collect::>(), - }, - ); - assert!(already_exists.is_none()); - } - assert_eq!(derived_nss.len(), test_case.len()); - - // COMPUTE ACTUAL STUFF AGAINST WHICH TO TEST DERIVED STUFF - let all_txs_iter = derived_nss - .iter() - .flat_map(|(_ns_id, ns)| ns.txs.iter().cloned()); - let (block, actual_ns_table) = Payload::from_transactions( - all_txs_iter, - &ValidatedState::default(), - &NodeState::mock(), - ) - .await - .unwrap(); - let disperse_data = vid.disperse(&block.raw_payload).unwrap(); - - // TEST ACTUAL STUFF AGAINST DERIVED STUFF - // test total ns length - assert_eq!(block.num_namespaces(), derived_nss.len()); - - // test total tx length - tracing::info!("actual_ns_table {:?}", actual_ns_table); - assert_eq!(block.len(&actual_ns_table), total_num_txs); - // TODO assert the final ns table entry offset == self.payload.len() - - // test namespace table length - let actual_ns_table_len = - TxTableEntry::from_bytes(&actual_ns_table.get_bytes()[..TxTableEntry::byte_len()]) - .unwrap(); - assert_eq!( - actual_ns_table_len, - TxTableEntry::try_from(test_case.len()).unwrap(), - "namespace table length expect {} got {}", - test_case.len(), - actual_ns_table_len - ); - - // test each namespace - // let mut tx_index_offset = 0; - let mut ns_iter = block.namespace_iter(); - let mut block_iter = block.iter(&actual_ns_table); // test iterator correctness - let mut prev_entry = TxTableEntry::zero(); - let mut derived_block_payload = Vec::new(); - for (ns_idx, (ns_id, entry)) in - ns_table_iter::(actual_ns_table.get_bytes()).enumerate() - { - // warning! ns_id may not equal NamespaceId(ns_idx) due to HashMap nondeterminism - - let derived_ns = derived_nss.remove(&ns_id).unwrap(); - - // test ns iterator - let ns_iter_idx = ns_iter.next().unwrap(); - assert_eq!(ns_iter_idx, ns_idx); - - // test ns payload - let actual_ns_payload_range = Range { - start: usize::try_from(prev_entry.clone()).unwrap(), - end: usize::try_from(entry.clone()).unwrap(), - }; - let actual_ns_payload_flat = block - .raw_payload - .get(actual_ns_payload_range.clone()) - .unwrap(); - assert_eq!( - actual_ns_payload_flat, derived_ns.payload_flat, - "namespace {ns_id} incorrect payload bytes", - ); - - // test ns without proof - let ns_txs = block.namespace(ns_id).unwrap(); - assert_eq!( - ns_txs, derived_ns.txs, - "namespace {ns_id} incorrect payload bytes returned from `namespace`", - ); - - // test ns proof - let ns_proof = block - .namespace_with_proof(&actual_ns_table, ns_id, disperse_data.common.clone()) - .unwrap(); - - if let NamespaceProof::Existence { - ref ns_payload_flat, - .. - } = ns_proof - { - assert_eq!( - ns_payload_flat, &derived_ns.payload_flat, - "namespace {ns_id} incorrect payload bytes returned from namespace_with_proof", - ); - } else { - // TODO test for non-existence - panic!("expect NamespaceProof::Existence variant"); - }; - - let (ns_proof_txs, ns_proof_ns_id) = ns_proof - .verify(&vid, &disperse_data.commit, &actual_ns_table) - .unwrap_or_else(|| panic!("namespace {ns_id} proof verification failure")); - assert_eq!(ns_proof_ns_id, ns_id); - assert_eq!(ns_proof_txs, derived_ns.txs); - - // test tx table length - let actual_tx_table_len_bytes = &actual_ns_payload_flat[..TxTableEntry::byte_len()]; - let actual_tx_table_len = - usize::try_from(TxTableEntry::from_bytes(actual_tx_table_len_bytes).unwrap()) - .unwrap(); - assert_eq!( - actual_tx_table_len, - derived_ns.tx_table.len(), - "namespace {ns_id} tx table length expect {} got {}", - derived_ns.tx_table.len(), - actual_tx_table_len - ); - - // test tx table contents - let actual_tx_table_body_bytes = &actual_ns_payload_flat[TxTableEntry::byte_len() - ..(actual_tx_table_len + 1) * TxTableEntry::byte_len()]; - // tracing::info!(ns t"x table bytes {:?}", actual_tx_table_body_bytes); - let actual_tx_table: Vec = actual_tx_table_body_bytes - .chunks(TxTableEntry::byte_len()) - .map(|bytes| TxTableEntry::from_bytes(bytes).unwrap()) - .collect(); - assert_eq!( - actual_tx_table, derived_ns.tx_table, - "namespace {ns_id} incorrect tx table for", - ); - - // testing tx iterator - for tx_idx in 0..derived_ns.tx_table.len() { - let next_tx = block_iter.next().unwrap(); - assert_eq!(ns_idx, next_tx.ns_idx); - assert_eq!(tx_idx, next_tx.tx_idx); - - let idx = TxIndex { ns_idx, tx_idx }; - - // test `transaction()` - let tx = block.transaction(&actual_ns_table, &idx).unwrap(); - assert_eq!(tx, derived_ns.txs[tx_idx]); - - // test `transaction_with_proof()` - let (tx_with_proof, proof) = block - .transaction_with_proof(&actual_ns_table, &idx) - .unwrap(); - assert_eq!(tx, tx_with_proof); - proof - .verify( - &tx_with_proof, - idx, - &vid, - &disperse_data.commit, - &disperse_data.common, - ) - .unwrap() - .unwrap(); - } - - prev_entry = entry; - derived_block_payload.extend(derived_ns.payload_flat.clone()); - } - assert!( - ns_iter.next().is_none(), - "expected ns iterator to be exhausted" - ); - assert!( - block_iter.next().is_none(), - "expected tx iterator to be exhausted" - ); - assert!( - derived_nss.is_empty(), - "some derived namespaces missing from namespace table" - ); - - // test full block payload - // assert_eq!(tx_index_offset, block.len()); - assert_eq!(block.raw_payload, derived_block_payload); - } - } - - #[test] - fn malformed_payloads() { - check_malformed_payloads::(); - //check_malformed_payloads::(); // TODO Philippe this test is failing - } - fn check_malformed_payloads() { - // play with this - let mut rng = jf_utils::test_rng(); - let test_cases = vec![ - // negative-length txs - TestCase::::from_entries(&[30, 10, 20], &mut rng), // 1 negative-length tx - TestCase::from_entries(&[30, 20, 10], &mut rng), // 2 negative-length txs - // truncated payload - TestCase::with_total_len(&[10, 20, 30], 20, &mut rng), // truncated tx payload - TestCase::with_trimmed_body(&[10, 20, 30], 0, &mut rng), // 0-length tx payload - TestCase::with_total_len(&[10, 20, u32::MAX as usize], 1000, &mut rng), // large tx truncated - // negative-length txs AND truncated payload - TestCase::with_total_len(&[30, 20, 10], 20, &mut rng), // negative-len txs, truncated tx payload - TestCase::with_trimmed_body(&[30, 20, 10], 0, &mut rng), // negative-len txs, 0-len tx payload - TestCase::with_total_len(&[10, u32::MAX as usize, 30], 1000, &mut rng), // negative-len tx, large tx truncated - // tx table fits inside payload - TestCase::from_tx_table_len(5, 100, &mut rng), - TestCase::from_tx_table_len(25, 1000, &mut rng), - // tx table too large for payload - TestCase::from_tx_table_len_unchecked(100, 40, &mut rng), - TestCase::from_tx_table_len_unchecked( - 10000, // TODO (Philippe) was TxTableEntry::MAX.try_into().unwrap(), - 100, &mut rng, - ), // huge tx table length - // extra payload bytes - TestCase::with_total_len(&[10, 20, 30], 1000, &mut rng), - TestCase::with_total_len(&[], 1000, &mut rng), // 0 txs - // extremely small payload - TestCase::from_tx_table_len_unchecked(1, 3, &mut rng), // 3-byte payload too small to store tx table len - TestCase::from_tx_table_len_unchecked(1000, 3, &mut rng), // 3-byte payload, large number of txs - TestCase::from_tx_table_len_unchecked(0, 3, &mut rng), // 3-byte payload, 0 txs - TestCase::from_tx_table_len_unchecked(6, 0, &mut rng), // 0-byte payload - ]; - - // TODO(817) more test cases: - // - this will break for extremely large payloads - // - should we hard-code an upper limit so arithmetic never overflows? - - setup_logging(); - setup_backtrace(); - - let mut vid = vid_scheme(NUM_STORAGE_NODES); - let num_test_cases = test_cases.len(); - for (t, test_case) in test_cases.into_iter().enumerate() { - let payload_byte_len = test_case.payload.len(); - tracing::info!( - "test payload {} of {} with {} txs and byte length {}", - t + 1, - num_test_cases, - test_case.num_txs, - payload_byte_len - ); - - // TODO don't initialize Payload with empty namespace table - let block = Payload::from_bytes(&test_case.payload, &NameSpaceTable::default()); - // assert_eq!(block.len(), test_case.num_txs); - assert_eq!(block.raw_payload.len(), payload_byte_len); - - let _disperse_data = vid.disperse(&block.raw_payload).unwrap(); - - // let mut tx_count: ::TransactionIndex = 0; // test iterator correctness - // for index in block.iter() { - // // tracing::info!("tx index {}", index,); - // let (tx, proof) = block.transaction_with_proof(&index).unwrap(); - // proof - // .verify( - // &tx, - // index, - // &vid, - // &disperse_data.commit, - // &disperse_data.common, - // ) - // .unwrap() - // .unwrap(); - // tx_count += 1; - // } - // assert_eq!(test_case.num_txs, usize::try_from(tx_count).unwrap()); - - // test: cannot make a proof for txs outside the tx table - // assert!(block.transaction_with_proof(&tx_count).is_none()); - } - } - - #[test] - fn malicious_tx_inclusion_proof() { - check_malicious_tx_inclusion_proof::(); - check_malicious_tx_inclusion_proof::(); - } - - fn check_malicious_tx_inclusion_proof() { - setup_logging(); - setup_backtrace(); - - let mut rng = jf_utils::test_rng(); - let test_case = TestCase::::from_tx_table_len_unchecked(1, 3, &mut rng); // 3-byte payload too small to store tx table len - - // TODO don't initialize Payload with empty namespace table - let block = Payload::from_bytes(&test_case.payload, &NameSpaceTable::default()); - assert_eq!(block.raw_payload.len(), test_case.payload.len()); - // assert_eq!(block.len(), test_case.num_txs); - - // test: cannot make a proof for such a small block - // assert!(block.transaction_with_proof(&0).is_none()); - - let mut vid = vid_scheme(NUM_STORAGE_NODES); - let disperse_data = vid.disperse(&block.raw_payload).unwrap(); - - // make a fake proof for a nonexistent tx in the small block - let tx = Transaction::new(Default::default(), Vec::new()); - let proof = queryable::gen_tx_proof_for_testing( - 0..block.raw_payload.len(), - TxTableEntry::from_usize(TxTable::get_tx_table_len(&block.raw_payload)), - vid.payload_proof( - &block.raw_payload, - 0..std::cmp::min(TxTableEntry::byte_len(), block.raw_payload.len()), - ) - .unwrap(), - vid.payload_proof(&block.raw_payload, 0..3).unwrap(), - ); - - // test: fake proof should get rejected - // TODO should return Some(Err()) instead of None - assert!(proof - .verify( - &tx, - TxIndex { - ns_idx: 0, - tx_idx: 0 - }, - &vid, - &disperse_data.commit, - &disperse_data.common - ) - .is_none()); - } - - #[test] - fn arbitrary_payloads() { - check_arbitrary_ns_table::(); - check_arbitrary_tx_table::(); - } - - fn check_arbitrary_ns_table() { - setup_logging(); - setup_backtrace(); - let mut rng = jf_utils::test_rng(); - let entry_len = TxTableEntry::byte_len(); - let mut vid = vid_scheme(NUM_STORAGE_NODES); - - // test 1 - let mut ns1 = vec![0; 100]; - rng.fill_bytes(&mut ns1); - write_usize(&mut ns1, 0, 13); - - // test 2 - let mut ns2 = vec![0; 100]; - rng.fill_bytes(&mut ns2); - write_usize(&mut ns2, 0, 12); - - // test 3 - let mut ns3 = vec![0; 100]; - rng.fill_bytes(&mut ns3); - write_usize(&mut ns3, 0, 12); - write_usize(&mut ns3, 2 * entry_len, 26); - - // test 4 - let namespace_offsets = vec![ - (NamespaceId::from(0), 100), - (NamespaceId::from(1), 200), - (NamespaceId::from(2), 300), - (NamespaceId::from(3), 50), - (NamespaceId::from(4), 150), - ]; - let ns4 = NameSpaceTable::::from_namespace_offsets(namespace_offsets) - .unwrap() - .get_bytes() - .to_vec(); - - let test_cases = vec![ - // test 0: arbitrary random bytes - vec![random_bytes(100, &mut rng), random_bytes(2000, &mut rng)], - vec![vec![], random_bytes(100, &mut rng)], - vec![random_bytes(100, &mut rng), vec![]], - vec![vec![0u8, 0u8, 3u8], random_bytes(100, &mut rng)], - // test 1: ns-table suggests 13 entries but ns-table length is only 100 bytes (max 12 namespaces) - vec![ns1, random_bytes(130, &mut rng)], - // test 2: ns-table suggests 12 entries but payload is 47 bytes => 11 empty namespaces - // vec![ns2, random_bytes(47, &mut rng)], - - // test 3: first entry in ns-table points to offset (26 * entry_len) but payload is only 100 bytes (max 25 namespaces) - vec![ns3, random_bytes(100, &mut rng)], - // test 4: overlapping namespaces is allowed but results in a zero-length namespace - vec![ns4, random_bytes(300, &mut rng)], - // test 5: more than one namespace with the same namespace id - ]; - - for test_case in test_cases.into_iter() { - let actual_ns_table_bytes = &test_case[0]; - let actual_payload_bytes = &test_case[1]; - - let block = Payload::from_bytes( - actual_payload_bytes, - &NameSpaceTable::from_bytes(actual_ns_table_bytes.to_vec()), - ); - let disperse_data = vid.disperse(&block.raw_payload).unwrap(); - - let ns_table = block.get_ns_table(); - let ns_table_len = ns_table.len(); - - let actual_ns_table_len = { - let left = read_usize(actual_ns_table_bytes, 0); - let right = actual_ns_table_bytes - .len() - .saturating_sub(TxTableEntry::byte_len()) - / (2 * TxTableEntry::byte_len()); - std::cmp::min(left, right) - }; - - assert_eq!( - ns_table_len, actual_ns_table_len, - "deduced ns table len is {} but actual ns table len is {}", - ns_table_len, actual_ns_table_len - ); - - let mut last_offset = 0; - for ns_idx in 0..ns_table_len { - let (ns_id, ns_range) = ns_table.get_payload_range(ns_idx, block.raw_payload.len()); - // test ns range - let start = ns_range.start; - let end = ns_range.end; - assert!(start <= end, "ensure valid range for namespace",); - assert!( - end <= block.raw_payload.len(), - "deduced range of ns_idx: {} is ending at: {} but payload length is only: {}", - ns_idx, - end, - actual_ns_table_bytes.len(), - ); - - // test ns proof - let ns_proof_option = block.namespace_with_proof( - block.get_ns_table(), - ns_id, - disperse_data.common.clone(), - ); - if let Some(ns_proof) = ns_proof_option { - if let NamespaceProof::Existence { - ref ns_payload_flat, - .. - } = ns_proof - { - assert_eq!( - ns_payload_flat, &block.raw_payload[ns_range.clone()], - "namespace {} incorrect payload bytes returned from namespace_with_proof", - ns_id, - ); - } else { - panic!("expect NamespaceProof::Existence variant"); - }; - } else { - assert!(ns_range.is_empty()); - } - - // test overlapping namespaces - if ns_range.end < last_offset { - assert!(ns_range.is_empty(), "identified overlapping namespaces but the resulting namespace range is not empty"); - } - last_offset = ns_range.end; - } - } - } - - fn check_arbitrary_tx_table() { - setup_logging(); - setup_backtrace(); - let mut rng = jf_utils::test_rng(); - let entry_len = TxTableEntry::byte_len(); - - // test 1 - let namespace_offsets = vec![ - (NamespaceId::from(0), 100), - (NamespaceId::from(1), 200), - (NamespaceId::from(2), 300), - ]; - let ns1 = NameSpaceTable::::from_namespace_offsets(namespace_offsets) - .unwrap() - .get_bytes() - .to_vec(); - let mut payload1 = vec![0; 300]; - rng.fill_bytes(&mut payload1); - write_usize(&mut payload1, 0, 25); - - // test 2 - let ns2 = ns1.clone(); - let mut payload2 = vec![0; 300]; - rng.fill_bytes(&mut payload2); - write_usize(&mut payload2, 0, 5); - write_usize(&mut payload2, entry_len, 101); - - // test 3 - let ns3 = ns1.clone(); - let mut payload3 = vec![0; 300]; - rng.fill_bytes(&mut payload3); - write_usize(&mut payload3, 200, 5); - write_usize(&mut payload3, 200 + entry_len, 6); - write_usize(&mut payload3, 200 + (2 * entry_len), 6); - write_usize(&mut payload3, 200 + (3 * entry_len), 101); - - // test 4 - let namespace_offsets = vec![ - (NamespaceId::from(0), 1000), - (NamespaceId::from(1), 1300), - (NamespaceId::from(2), 2300), - ]; - let ns4 = NameSpaceTable::::from_namespace_offsets(namespace_offsets) - .unwrap() - .get_bytes() - .to_vec(); - let mut payload4 = vec![0; 2300]; - rng.fill_bytes(&mut payload4); - write_usize(&mut payload4, 1000, 5); - write_usize(&mut payload4, 1000 + entry_len, 100); - write_usize(&mut payload4, 1000 + (2 * entry_len), 200); - write_usize(&mut payload4, 1000 + (3 * entry_len), 300); - write_usize(&mut payload4, 1000 + (4 * entry_len), 50); - write_usize(&mut payload4, 1000 + (5 * entry_len), 150); - - let test_cases = vec![ - // test 1: tx-table suggests 25 entries but ns length is only 100 bytes (max 24 txs) - vec![ns1, payload1], - // test 2: first entry in tx-table points to offset 101 but ns is only 100 bytes - vec![ns2, payload2], - // test 3: first two namespaces are random bytes. - // the third namespace has 5 txs - vec![ns3, payload3], - // test 4: 3 namespaces where first and last are random bytes. - // the middle namespace has overlapping transaction payloads - vec![ns4, payload4], - ]; - - for test_case in test_cases.into_iter() { - let actual_ns_table_bytes = &test_case[0]; - let actual_payload_bytes = &test_case[1]; - - let block = Payload::from_bytes( - actual_payload_bytes, - &NameSpaceTable::from_bytes(actual_ns_table_bytes.to_vec()), - ); - let ns_table = block.get_ns_table(); - let mut total_tx_num = 0; - let mut tx_iter = block.iter(ns_table); - for ns_idx in 0..ns_table.len() { - let (ns_id, ns_range) = ns_table.get_payload_range(ns_idx, block.raw_payload.len()); - let ns_bytes = &block.raw_payload[ns_range.clone()]; - - // ns cannot hold more than max num of txs - let tx_table_len = TxTable::get_tx_table_len(ns_bytes); - let max_tx_table_len = ns_bytes.len().saturating_sub(TxTableEntry::byte_len()) - / TxTableEntry::byte_len(); - assert!( - tx_table_len <= max_tx_table_len, - "derived tx table len is {} but actual ns has room only for {} txs", - tx_table_len, - max_tx_table_len - ); - - let txs = parse_ns_payload(ns_bytes, ns_id); - total_tx_num += txs.len(); - - let actual_tx_table_len = read_usize(ns_bytes, 0); - if max_tx_table_len < actual_tx_table_len { - assert!(txs.iter().all(|tx| tx.payload().is_empty()), - "advertised tx-table length cannot possibly fit in namespace; all txs should be empty"); - } - - let tx_payloads_offset = (tx_table_len + 1) * TxTableEntry::byte_len(); - let mut last_offset = tx_payloads_offset; - let mut tx_offset_bytes = vec![0u8; TxTableEntry::byte_len()]; - - for (tx_idx, tx) in txs.iter().enumerate() { - assert!(tx_iter.next().is_some()); - - let tx_range = TxTable::get_payload_range(ns_bytes, tx_idx, tx_table_len); - // read tx end offset directly from raw payload bytes - tx_offset_bytes[..TxTableEntry::byte_len()].copy_from_slice( - &actual_payload_bytes[ns_range.start - + (tx_idx + 1) * TxTableEntry::byte_len() - ..(ns_range.start + (tx_idx + 2) * TxTableEntry::byte_len())], - ); - let tx_offset = usize::try_from( - TxTableEntry::from_bytes(&tx_offset_bytes).unwrap_or(TxTableEntry::zero()), - ) - .unwrap_or(0); - - let mut malformed = false; - let actual_tx_offset = tx_payloads_offset + tx_offset; - - // check derived tx byte range - if actual_tx_offset > ns_bytes.len() { - assert_eq!( - tx_range.end, - ns_bytes.len(), - "tx offset should be clamped at the end of namespace" - ); - if last_offset > ns_bytes.len() { - assert_eq!( - tx.payload().len(), - 0, - "tx payload should be empty if start and end are both clamped" - ); - } - malformed = true; - } - - // check overlapping tx payloads - if actual_tx_offset < last_offset { - assert_eq!( - tx.payload().len(), - 0, - "identified overlapping tx payloads; negative length tx is empty" - ); - malformed = true; - } - - // derive tx-length if tx is not malformed - if !malformed { - assert_eq!( - tx.payload().len(), - actual_tx_offset - last_offset, - "tx payload is derived to be {} but should be {}", - tx.payload().len(), - actual_tx_offset - last_offset - ); - } - last_offset = actual_tx_offset; - } - } - assert_eq!( - block.len(&block.ns_table), - total_tx_num, - "block has {} txs but number of total tx from all namespaces is {}", - block.len(&block.ns_table), - total_tx_num - ) - } - } - - struct TestCase { - payload: Vec, - num_txs: usize, - phantomdata: PhantomData, - } - impl TestCase { - /// Return a well-formed random block whose tx table is derived from `lengths`. - #[allow(dead_code)] - fn from_lengths(lengths: &[usize], rng: &mut R) -> Self { - Self::from_entries(&entries_from_lengths(lengths), rng) - } - - /// Return a random block whose tx table is derived from `entries`. - /// - /// If `entries` is well-formed then the result is well-formed. - fn from_entries(entries: &[usize], rng: &mut R) -> Self { - let tx_table = TxTableTest::::from_entries(entries); - Self { - payload: [ - tx_table.get_payload(), - random_bytes(tx_bodies_byte_len(entries), rng), - ] - .concat(), - num_txs: entries.len(), - phantomdata: Default::default(), - } - } - - /// Like `from_entries` except the tx bodies byte length is `body_len`. - /// - /// Panics if `body_len` would not actually decrease the block size. - fn with_trimmed_body(entries: &[usize], body_len: usize, rng: &mut R) -> Self { - assert!( - body_len < tx_bodies_byte_len(entries), - "body_len too large to trim the body" - ); - let tx_table = TxTableTest::::from_entries(entries); - Self { - payload: [tx_table.get_payload(), random_bytes(body_len, rng)].concat(), - num_txs: entries.len(), - phantomdata: Default::default(), - } - } - - /// Like `from_entries` except the byte length of the block is `block_byte_len`. - /// - /// Panics if `block_byte_len` would truncate the tx table. - /// If you want to truncate the tx table then use `with_total_len_unchecked`. - /// - /// If `block_byte_len` would increase block size then new space is filled with random bytes. - fn with_total_len( - entries: &[usize], - block_byte_len: usize, - rng: &mut R, - ) -> Self { - assert!( - tx_table_byte_len::(entries) <= block_byte_len, - "tx table size {} for entries {:?} exceeds block_byte_len {}", - tx_table_byte_len::(entries), - entries, - block_byte_len - ); - Self::with_total_len_unchecked(entries, block_byte_len, rng) - } - - /// Like `with_total_len` except `block_byte_len` may truncate the tx table. - fn with_total_len_unchecked( - entries: &[usize], - block_byte_len: usize, - rng: &mut R, - ) -> Self { - let tx_table = TxTableTest::::from_entries(entries); - let mut payload = tx_table.get_payload(); - let num_txs = if block_byte_len > payload.len() { - payload.extend(random_bytes(block_byte_len - payload.len(), rng)); - entries.len() - } else { - payload.truncate(block_byte_len); - (block_byte_len / TxTableTest::::byte_len()).saturating_sub(1) - }; - Self { - payload, - num_txs, - phantomdata: Default::default(), - } - } - - /// Return a random block whose tx table indicates `tx_table_len` txs and whose total byte length is `block_byte_len`. - /// - /// Every byte of the block is random except the tx table header. - /// - /// Panics if `txs_byte_len` would truncate the tx table. - /// If you want to truncate the tx table then use `with_total_len_unchecked`. - fn from_tx_table_len( - tx_table_len: usize, - block_byte_len: usize, - rng: &mut R, - ) -> Self { - let tx_table_byte_len = (tx_table_len + 1) * TxTableTest::::byte_len(); - assert!( - tx_table_byte_len <= block_byte_len, - "tx table size {} exceeds block size {}", - tx_table_byte_len, - block_byte_len - ); - Self::from_tx_table_len_unchecked(tx_table_len, block_byte_len, rng) - } - - /// Like `from_tx_table_len` except `block_byte_len` may truncate the tx table. - fn from_tx_table_len_unchecked( - tx_table_len: usize, - block_byte_len: usize, - rng: &mut R, - ) -> Self { - // accommodate extremely small block payload - let header_byte_len = - std::cmp::min(TxTableTest::::byte_len(), block_byte_len); - let mut payload = vec![0; block_byte_len]; - rng.fill_bytes(&mut payload); - payload[..header_byte_len].copy_from_slice( - &TxTableEntry::from_usize(tx_table_len).to_bytes()[..header_byte_len], // TODO (Philippe) remove - ); - Self { - payload, - num_txs: std::cmp::min( - tx_table_len, - (block_byte_len / TxTableTest::::byte_len()).saturating_sub(1), - ), - phantomdata: Default::default(), - } - } - } - - mod helpers { - use crate::block::entry::TxTableEntry; - use crate::block::payload::TableWordTraits; - use crate::block::tables::{test::TxTableTest, NameSpaceTable, Table}; - use crate::NamespaceId; - use rand::RngCore; - - pub fn tx_table_byte_len(entries: &[usize]) -> usize { - (entries.len() + 1) * TxTableTest::::byte_len() - } - - pub fn entries_from_lengths(lengths: &[usize]) -> Vec { - lengths - .iter() - .scan(0, |sum, &len| { - *sum += len; - Some(*sum) - }) - .collect() - } - - #[test] - fn tx_table_helpers() { - assert_eq!(vec![10, 20, 30], entries_from_lengths(&[10, 10, 10])); - } - - pub fn tx_bodies_byte_len(entries: &[usize]) -> usize { - // largest entry in the tx table dictates size of tx payloads - *entries.iter().max().unwrap_or(&0) - } - - pub fn write_usize(bytes: &mut [u8], pos: usize, val: usize) { - let end = std::cmp::min(pos + TxTableEntry::byte_len(), bytes.len()); - let start = std::cmp::min(pos, end); - let range = start..end; - bytes[range.clone()] - .copy_from_slice(&TxTableEntry::from_usize(val).to_bytes()[..range.len()]); - } - - pub fn read_usize(bytes: &[u8], pos: usize) -> usize { - let end = std::cmp::min(pos + TxTableEntry::byte_len(), bytes.len()); - let start = std::cmp::min(pos, end); - let range = start..end; - let mut entry_bytes = [0u8; TxTableEntry::byte_len()]; - entry_bytes[..range.len()].copy_from_slice(&bytes[start..end]); - TxTableEntry::from_bytes_array(entry_bytes) - .try_into() - .unwrap() - } - - pub fn random_bytes(len: usize, rng: &mut R) -> Vec { - let mut result = vec![0; len]; - rng.fill_bytes(&mut result); - result - } - - pub fn extract_tx_payloads(entries: &[usize], tx_payloads_flat: &[u8]) -> Vec> { - let mut result = Vec::with_capacity(entries.len()); - let mut start = 0; - for end in entries { - let end = std::cmp::min(*end, tx_payloads_flat.len()); - let tx_payload = if start >= end { - Vec::new() - } else { - tx_payloads_flat[start..end].to_vec() - }; - start = end; - result.push(tx_payload); - } - assert_eq!( - result.len(), - entries.len(), - "bug in test code: expect to extract {} txs but got {}", - entries.len(), - result.len() - ); - result - } - - pub fn ns_table_iter( - ns_table_bytes: &[u8], - ) -> impl Iterator + '_ { - ns_table_bytes[NameSpaceTable::::byte_len()..] // first few bytes is the table length, skip that - .chunks(2 * TxTableEntry::byte_len()) - .map(|bytes| { - // read (namespace id, entry) from the namespace table - let ns_id = NamespaceId::try_from( - TxTableEntry::from_bytes(&bytes[..TxTableEntry::byte_len()]).unwrap(), - ) - .unwrap(); - let entry = - TxTableEntry::from_bytes(&bytes[TxTableEntry::byte_len()..]).unwrap(); - (ns_id, entry) - }) - } - } -} diff --git a/sequencer/src/block/queryable.rs b/sequencer/src/block/queryable.rs deleted file mode 100644 index 114f0a826..000000000 --- a/sequencer/src/block/queryable.rs +++ /dev/null @@ -1,334 +0,0 @@ -use crate::block::payload::Payload; -use crate::block::tables::TxTable; -use crate::{block::entry::TxTableEntryWord, SeqTypes}; -use hotshot_query_service::availability::QueryablePayload; -use hotshot_types::vid::{vid_scheme, SmallRangeProofType}; -use jf_vid::payload_prover::{PayloadProver, Statement}; -use serde::{Deserialize, Serialize}; -use std::ops::Range; - -use crate::Transaction; - -use super::{ - entry::TxTableEntry, - tx_iterator::{TxIndex, TxIterator}, -}; - -// TODO don't hard-code TxTableEntryWord generic param -impl QueryablePayload for Payload { - type TransactionIndex = TxIndex; - type Iter<'a> = TxIterator<'a, TxTableEntryWord>; - type InclusionProof = TxInclusionProof; - - fn len(&self, ns_table: &Self::Metadata) -> usize { - (0..ns_table.len()) - .map(|ns_idx| ns_table.get_payload_range(ns_idx, self.raw_payload.len()).1) - .map(|ns_range| TxTable::get_tx_table_len(&self.raw_payload[ns_range])) - .sum() - } - - fn iter<'a>(&'a self, ns_table: &'a Self::Metadata) -> Self::Iter<'a> { - TxIterator::new(ns_table, self) - } - - fn transaction( - &self, - meta: &Self::Metadata, - index: &Self::TransactionIndex, - ) -> Option { - let (ns_idx, tx_idx) = (index.ns_idx, index.tx_idx); - if ns_idx >= meta.len() { - return None; // error: index out of bounds - } - let (ns_id, ns_range) = meta.get_payload_range(ns_idx, self.raw_payload.len()); - - let tx_table_len = TxTable::get_tx_table_len(&self.raw_payload[ns_range.clone()]); - if tx_idx >= tx_table_len { - return None; // error: index out of bounds - } - let ns_payload = &self.raw_payload[ns_range.clone()]; - - let tx_within_ns = TxTable::get_payload_range(ns_payload, tx_idx, tx_table_len); - let (start, end) = (tx_within_ns.start, tx_within_ns.end); - let ns_start = ns_range.start; - let tx_payload_range = start.saturating_add(ns_start)..end.saturating_add(ns_start); - - let tx_payload = self.raw_payload.get(tx_payload_range)?.to_vec(); - - Some(Transaction::new(ns_id, tx_payload)) - } - - fn transaction_with_proof( - &self, - meta: &Self::Metadata, - index: &Self::TransactionIndex, - ) -> Option<(Self::Transaction, Self::InclusionProof)> { - let (ns_idx, tx_idx) = (index.ns_idx, index.tx_idx); - if ns_idx >= meta.len() { - return None; // error: index out of bounds - } - let (ns_id, ns_range) = meta.get_payload_range(ns_idx, self.raw_payload.len()); - let ns_start_offset = ns_range.start; - - let tx_table_len = TxTable::get_tx_table_len(&self.raw_payload[ns_range.clone()]); - if tx_idx >= tx_table_len { - return None; // error: index out of bounds - } - - let tx_payloads_offset = tx_table_len - .checked_add(1)? - .checked_mul(TxTableEntry::byte_len())? - .checked_add(ns_start_offset)?; - - // TODO temporary VID construction. We need to get the number of storage nodes from the VID - // common data. May need the query service to pass common into this function along with - // metadata. - let vid = vid_scheme(10); - - // Read the tx payload range from the tx table into `tx_table_range_[start|end]` and compute a proof that this range is correct. - // - // This correctness proof requires a range of its own, which we read into `tx_table_range_proof_[start|end]`. - // - // Edge case--the first transaction: tx payload range `start` is implicitly 0 and we do not include this item in the correctness proof. - // - // TODO why isn't cargo fmt wrapping these comments? - - // start - let (tx_table_range_proof_start, tx_table_range_start) = if tx_idx == 0 { - (TxTableEntry::byte_len().checked_add(ns_start_offset)?, None) - } else { - let range_proof_start = tx_idx - .checked_mul(TxTableEntry::byte_len())? - .checked_add(ns_start_offset)?; - ( - range_proof_start, - Some(TxTableEntry::from_bytes(self.raw_payload.get( - range_proof_start..range_proof_start.checked_add(TxTableEntry::byte_len())?, - )?)?), - ) - }; - - // end - let tx_table_range_proof_end = tx_idx - .checked_add(2)? - .checked_mul(TxTableEntry::byte_len())? - .checked_add(ns_start_offset)?; - - let tx_table_range_end = TxTableEntry::from_bytes(self.raw_payload.get( - tx_table_range_proof_end.checked_sub(TxTableEntry::byte_len())? - ..tx_table_range_proof_end, - )?)?; - - let tx_payload_range = { - let start = - usize::try_from(tx_table_range_start.clone().unwrap_or(TxTableEntry::zero())) - .ok()? - .checked_add(tx_payloads_offset)?; - let end = usize::try_from(tx_table_range_end.clone()) - .ok()? - .checked_add(tx_payloads_offset)?; - let end = std::cmp::min(end, ns_range.end); - let start = std::cmp::min(start, end); - start..end - }; - - // correctness proof for the tx payload range - let tx_table_range_proof = vid - .payload_proof( - &self.raw_payload, - tx_table_range_proof_start..tx_table_range_proof_end, - ) - .ok()?; - let tx_table_len_range = ns_range.start - ..std::cmp::min( - ns_range.end, - ns_range.start.checked_add(TxTableEntry::byte_len())?, - ); - Some(( - // TODO don't copy the tx bytes into the return value - // https://github.com/EspressoSystems/hotshot-query-service/issues/267 - Transaction::new( - ns_id, - self.raw_payload.get(tx_payload_range.clone())?.to_vec(), - ), - TxInclusionProof { - ns_range: ns_range.clone(), - tx_table_len: TxTableEntry::from_usize(tx_table_len), - tx_table_len_proof: vid - .payload_proof(&self.raw_payload, tx_table_len_range) - .unwrap(), - tx_table_range_start, - tx_table_range_end, - tx_table_range_proof, - tx_payload_proof: if tx_payload_range.is_empty() { - None - } else { - vid.payload_proof(&self.raw_payload, tx_payload_range).ok() - }, - }, - )) - } -} - -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct TxInclusionProof { - ns_range: Range, - tx_table_len: TxTableEntry, - tx_table_len_proof: SmallRangeProofType, - - tx_table_range_start: Option, // `None` for the 0th tx - tx_table_range_end: TxTableEntry, - tx_table_range_proof: SmallRangeProofType, - - tx_payload_proof: Option, // `None` if the tx has zero length -} - -impl TxInclusionProof { - // TODO currently broken, fix in https://github.com/EspressoSystems/espresso-sequencer/issues/1010 - // - // - We need to decide where to store VID params. - // - Returns `None` if an error occurred. - // - Use of `Result<(),()>` pattern to enable use of `?` for concise abort-on-failure. - #[allow(dead_code)] // TODO temporary - #[allow(clippy::too_many_arguments)] - pub fn verify( - &self, - tx: &Transaction, - tx_index: TxIndex, - vid: &V, - vid_commit: &V::Commit, - vid_common: &V::Common, - ) -> Option> - where - V: PayloadProver, - { - V::is_consistent(vid_commit, vid_common).ok()?; - - // Verify proof for tx payload. - // Proof is `None` if and only if tx has zero length. - let tx_payloads_offset = usize::try_from(self.tx_table_len.clone()) - .ok()? - .checked_add(1)? - .checked_mul(TxTableEntry::byte_len())? - .checked_add(self.ns_range.start)?; - let tx_payload_range = { - let start = usize::try_from( - self.tx_table_range_start - .clone() - .unwrap_or(TxTableEntry::zero()), - ) - .ok()? - .checked_add(tx_payloads_offset)?; - let end = usize::try_from(self.tx_table_range_end.clone()) - .ok()? - .checked_add(tx_payloads_offset)?; - let end = std::cmp::min(end, self.ns_range.end); - let start = std::cmp::min(start, end); - start..end - }; - match &self.tx_payload_proof { - Some(tx_payload_proof) => { - if vid - .payload_verify( - Statement { - payload_subslice: tx.payload(), - range: tx_payload_range, - commit: vid_commit, - common: vid_common, - }, - tx_payload_proof, - ) - .ok()? - .is_err() - { - return Some(Err(())); // TODO it would be nice to use ? here... - } - } - None => { - if !tx.payload().is_empty() || !tx_payload_range.is_empty() { - return None; // error: nonempty payload but no proof - } - } - }; - - // Verify proof for tx table len. - if vid - .payload_verify( - Statement { - payload_subslice: &self.tx_table_len.to_bytes(), - range: self.ns_range.start - ..self.ns_range.start.checked_add(TxTableEntry::byte_len())?, - commit: vid_commit, - common: vid_common, - }, - &self.tx_table_len_proof, - ) - .ok()? - .is_err() - { - return Some(Err(())); - } - - // Verify proof for tx table entries. - // Start index missing for the 0th tx - let index: usize = tx_index.tx_idx; - let mut tx_table_range_bytes = - Vec::with_capacity(2usize.checked_mul(TxTableEntry::byte_len())?); - let start = if let Some(tx_table_range_start) = &self.tx_table_range_start { - if index == 0 { - return None; // error: first tx should have empty start index - } - tx_table_range_bytes.extend(tx_table_range_start.to_bytes()); - index - .checked_mul(TxTableEntry::byte_len())? - .checked_add(self.ns_range.start)? - } else { - if index != 0 { - return None; // error: only the first tx should have empty start index - } - TxTableEntry::byte_len().checked_add(self.ns_range.start)? - }; - tx_table_range_bytes.extend(self.tx_table_range_end.to_bytes()); - let range = start - ..index - .checked_add(2)? - .checked_mul(TxTableEntry::byte_len())? - .checked_add(self.ns_range.start)?; - - if vid - .payload_verify( - Statement { - payload_subslice: &tx_table_range_bytes, - range, - commit: vid_commit, - common: vid_common, - }, - &self.tx_table_range_proof, - ) - .ok()? - .is_err() - { - return Some(Err(())); - } - - Some(Ok(())) - } -} - -#[cfg(test)] -pub(crate) fn gen_tx_proof_for_testing( - ns_range: Range, - tx_table_len: TxTableEntry, - tx_table_len_proof: SmallRangeProofType, - payload_proof: SmallRangeProofType, -) -> TxInclusionProof { - TxInclusionProof { - ns_range, - tx_table_len, - tx_table_len_proof, - tx_table_range_start: None, - tx_table_range_end: TxTableEntry::from_usize(1), - tx_table_range_proof: payload_proof, - tx_payload_proof: None, - } -} diff --git a/sequencer/src/block/tables.rs b/sequencer/src/block/tables.rs deleted file mode 100644 index bb00150e7..000000000 --- a/sequencer/src/block/tables.rs +++ /dev/null @@ -1,305 +0,0 @@ -use crate::block::entry::TxTableEntry; -use crate::block::payload::TableWordTraits; -use crate::{BlockBuildingSnafu, Error, NamespaceId}; -use derivative::Derivative; -use hotshot_types::traits::EncodeBytes; -use serde::{Deserialize, Serialize}; -use snafu::OptionExt; -use std::marker::PhantomData; -use std::mem::size_of; -use std::ops::Range; -use std::sync::Arc; - -pub trait Table { - // Read TxTableEntry::byte_len() bytes from `table_bytes` starting at `offset`. - // if `table_bytes` has too few bytes at this `offset` then pad with zero. - // Parse these bytes into a `TxTableEntry` and return. - // Returns raw bytes, no checking for large values - fn get_table_len(&self, offset: usize) -> TxTableEntry; - - fn byte_len() -> usize { - size_of::() - } -} - -impl Table for NameSpaceTable { - // TODO (Philippe) avoid code duplication with similar function in TxTable? - fn get_table_len(&self, offset: usize) -> TxTableEntry { - let end = std::cmp::min( - offset.saturating_add(TxTableEntry::byte_len()), - self.bytes.len(), - ); - let start = std::cmp::min(offset, end); - let tx_table_len_range = start..end; - let mut entry_bytes = [0u8; TxTableEntry::byte_len()]; - entry_bytes[..tx_table_len_range.len()].copy_from_slice(&self.bytes[tx_table_len_range]); - TxTableEntry::from_bytes_array(entry_bytes) - } -} - -#[derive(Clone, Debug, Derivative, Deserialize, Eq, Serialize, Default)] -#[derivative(Hash, PartialEq)] -pub struct NameSpaceTable { - #[serde(with = "base64_bytes")] - pub(super) bytes: Vec, - #[serde(skip)] - pub(super) phantom: PhantomData, -} - -impl EncodeBytes for NameSpaceTable { - fn encode(&self) -> std::sync::Arc<[u8]> { - Arc::from(self.bytes.clone()) - } -} - -impl NameSpaceTable { - pub fn from_bytes(bytes: impl Into>) -> Self { - Self { - bytes: bytes.into(), - phantom: Default::default(), - } - } - - pub fn from_namespace_offsets( - namespace_offsets: Vec<(NamespaceId, usize)>, - ) -> Result { - let mut ns_table = NameSpaceTable::from_bytes( - TxTableEntry::try_from(namespace_offsets.len()) - .ok() - .context(BlockBuildingSnafu)? - .to_bytes(), - ); - for (id, offset) in namespace_offsets { - ns_table.add_new_entry_ns_id(id)?; - ns_table.add_new_entry_payload_len(offset)?; - } - Ok(ns_table) - } - - pub fn get_bytes(&self) -> &[u8] { - &self.bytes - } - - /// Find `ns_id` and return its index into this namespace table. - /// - /// TODO return Result or Option? Want to avoid catch-all Error type :( - pub fn lookup(&self, ns_id: NamespaceId) -> Option { - (0..self.len()).find(|&ns_index| ns_id == self.get_table_entry(ns_index).0) - } - - fn add_new_entry_ns_id(&mut self, id: NamespaceId) -> Result<(), Error> { - self.bytes.extend( - TxTableEntry::try_from(id) - .ok() - .context(BlockBuildingSnafu)? - .to_bytes(), - ); - Ok(()) - } - - fn add_new_entry_payload_len(&mut self, l: usize) -> Result<(), Error> { - self.bytes.extend( - TxTableEntry::try_from(l) - .ok() - .context(BlockBuildingSnafu)? - .to_bytes(), - ); - Ok(()) - } - - // Parse the table length from the beginning of the namespace table. - // Returned value is guaranteed to be no larger than the number of ns table entries that could possibly fit into `ns_table_bytes`. - pub fn len(&self) -> usize { - let left = self.get_table_len(0).try_into().unwrap_or(0); - let right = self.bytes.len().saturating_sub(TxTableEntry::byte_len()) - / (2 * TxTableEntry::byte_len()); - std::cmp::min(left, right) - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - // returns (ns_id, ns_offset) - // ns_offset is not checked, could be anything - pub fn get_table_entry(&self, ns_index: usize) -> (NamespaceId, usize) { - // get the range for ns_id bytes in ns table - // ensure `range` is within range for ns_table_bytes - let start = std::cmp::min( - ns_index - .saturating_mul(2) - .saturating_add(1) - .saturating_mul(TxTableEntry::byte_len()), - self.bytes.len(), - ); - let end = std::cmp::min( - start.saturating_add(TxTableEntry::byte_len()), - self.bytes.len(), - ); - let ns_id_range = start..end; - - // parse ns_id bytes from ns table - // any failure -> NamespaceId::default() - let mut ns_id_bytes = [0u8; TxTableEntry::byte_len()]; - ns_id_bytes[..ns_id_range.len()].copy_from_slice(&self.bytes[ns_id_range]); - let ns_id = NamespaceId::try_from( - TxTableEntry::from_bytes(&ns_id_bytes).unwrap_or(TxTableEntry::zero()), - ) - .unwrap_or_default(); - - // get the range for ns_offset bytes in ns table - // ensure `range` is within range for ns_table_bytes - // TODO refactor range checking code - let start = end; - let end = std::cmp::min( - start.saturating_add(TxTableEntry::byte_len()), - self.bytes.len(), - ); - let ns_offset_range = start..end; - - // parse ns_offset bytes from ns table - // any failure -> 0 offset (?) - // TODO refactor parsing code? - let mut ns_offset_bytes = [0u8; TxTableEntry::byte_len()]; - ns_offset_bytes[..ns_offset_range.len()].copy_from_slice(&self.bytes[ns_offset_range]); - let ns_offset = usize::try_from( - TxTableEntry::from_bytes(&ns_offset_bytes).unwrap_or(TxTableEntry::zero()), - ) - .unwrap_or(0); - - (ns_id, ns_offset) - } - - /// Like `tx_payload_range` except for namespaces. - /// Returns the ns id and the ns byte range in the block payload bytes. - /// - /// Ensures that the returned range is valid: `start <= end <= block_payload_byte_len`. - pub fn get_payload_range( - &self, - ns_index: usize, - block_payload_byte_len: usize, - ) -> (NamespaceId, Range) { - let (ns_id, offset) = self.get_table_entry(ns_index); - let end = std::cmp::min(offset, block_payload_byte_len); - let start = if ns_index == 0 { - 0 - } else { - std::cmp::min(self.get_table_entry(ns_index - 1).1, end) - }; - (ns_id, start..end) - } -} - -pub struct TxTable {} -impl TxTable { - // Parse `TxTableEntry::byte_len()`` bytes from `raw_payload`` starting at `offset` into a `TxTableEntry` - fn get_len(raw_payload: &[u8], offset: usize) -> TxTableEntry { - let end = std::cmp::min( - offset.saturating_add(TxTableEntry::byte_len()), - raw_payload.len(), - ); - let start = std::cmp::min(offset, end); - let tx_table_len_range = start..end; - let mut entry_bytes = [0u8; TxTableEntry::byte_len()]; - entry_bytes[..tx_table_len_range.len()].copy_from_slice(&raw_payload[tx_table_len_range]); - TxTableEntry::from_bytes_array(entry_bytes) - } - - // Parse the table length from the beginning of the tx table inside `ns_bytes`. - // - // Returned value is guaranteed to be no larger than the number of tx table entries that could possibly fit into `ns_bytes`. - // TODO tidy this is a sloppy wrapper for get_len - pub(crate) fn get_tx_table_len(ns_bytes: &[u8]) -> usize { - std::cmp::min( - Self::get_len(ns_bytes, 0).try_into().unwrap_or(0), - (ns_bytes.len().saturating_sub(TxTableEntry::byte_len())) / TxTableEntry::byte_len(), - ) - } - - // returns tx_offset - // if tx_index would reach beyond ns_bytes then return 0. - // tx_offset is not checked, could be anything - fn get_table_entry(ns_bytes: &[u8], tx_index: usize) -> usize { - // get the range for tx_offset bytes in tx table - let tx_offset_range = { - let start = std::cmp::min( - tx_index - .saturating_add(1) - .saturating_mul(TxTableEntry::byte_len()), - ns_bytes.len(), - ); - let end = std::cmp::min( - start.saturating_add(TxTableEntry::byte_len()), - ns_bytes.len(), - ); - start..end - }; - - // parse tx_offset bytes from tx table - let mut tx_offset_bytes = [0u8; TxTableEntry::byte_len()]; - tx_offset_bytes[..tx_offset_range.len()].copy_from_slice(&ns_bytes[tx_offset_range]); - usize::try_from(TxTableEntry::from_bytes(&tx_offset_bytes).unwrap_or(TxTableEntry::zero())) - .unwrap_or(0) - } - - /// Ensures that the returned range is valid: `start <= end <= ns_bytes`. - pub fn get_payload_range(ns_bytes: &[u8], tx_idx: usize, tx_len: usize) -> Range { - let tx_payloads_offset = tx_len - .saturating_add(1) - .saturating_mul(TxTableEntry::byte_len()); - - let end = std::cmp::min( - TxTable::get_table_entry(ns_bytes, tx_idx).saturating_add(tx_payloads_offset), - ns_bytes.len(), - ); - - let start = if tx_idx == 0 { - tx_payloads_offset - } else { - std::cmp::min( - TxTable::get_table_entry(ns_bytes, tx_idx - 1).saturating_add(tx_payloads_offset), - end, - ) - }; - - start..end - } -} -#[cfg(test)] -pub(super) mod test { - use crate::block::entry::TxTableEntry; - use crate::block::payload::TableWordTraits; - use crate::block::tables::{Table, TxTable}; - use std::marker::PhantomData; - - pub struct TxTableTest { - raw_payload: Vec, - phantom: PhantomData, - } - - impl Table for TxTableTest { - fn get_table_len(&self, offset: usize) -> TxTableEntry { - TxTable::get_len(&self.raw_payload, offset) - } - } - impl TxTableTest { - #[cfg(test)] - pub fn from_entries(entries: &[usize]) -> Self { - let tx_table_byte_len = entries.len() + 1; - let mut tx_table = Vec::with_capacity(tx_table_byte_len); - tx_table.extend(TxTableEntry::from_usize(entries.len()).to_bytes()); - for entry in entries { - tx_table.extend(TxTableEntry::from_usize(*entry).to_bytes()); - } - - Self { - raw_payload: tx_table, - phantom: Default::default(), - } - } - - pub fn get_payload(&self) -> Vec { - self.raw_payload.clone() - } - } -} diff --git a/sequencer/src/block/test.rs b/sequencer/src/block/test.rs new file mode 100644 index 000000000..02adf867d --- /dev/null +++ b/sequencer/src/block/test.rs @@ -0,0 +1,193 @@ +use crate::{ + block::{ + full_payload::{NsProof, Payload}, + namespace_payload::TxProof, + }, + chain_config::BlockSize, + ChainConfig, NamespaceId, NodeState, Transaction, +}; +use async_compatibility_layer::logging::{setup_backtrace, setup_logging}; +use hotshot::traits::BlockPayload; +use hotshot_query_service::availability::QueryablePayload; +use hotshot_types::{traits::EncodeBytes, vid::vid_scheme}; +use jf_vid::VidScheme; +use rand::RngCore; +use std::collections::HashMap; + +#[async_std::test] +async fn basic_correctness() { + // play with this + let test_cases = vec![ + vec![vec![5, 8, 8], vec![7, 9, 11], vec![10, 5, 8]], // 3 non-empty namespaces + ]; + + setup_logging(); + setup_backtrace(); + let mut rng = jf_utils::test_rng(); + let valid_tests = ValidTest::many_from_tx_lengths(test_cases, &mut rng); + + let mut vid = vid_scheme(10); + + for mut test in valid_tests { + let mut all_txs = test.all_txs(); + tracing::info!("test case {} nss {} txs", test.nss.len(), all_txs.len()); + + let block = + Payload::from_transactions(test.all_txs(), &Default::default(), &Default::default()) + .await + .unwrap() + .0; + tracing::info!( + "ns_table {:?}, payload {:?}", + block.ns_table().encode(), + block.encode() + ); + + // test correct number of nss, txs + assert_eq!(block.ns_table().iter().count(), test.nss.len()); + assert_eq!(block.len(block.ns_table()), all_txs.len()); + assert_eq!(block.iter(block.ns_table()).count(), all_txs.len()); + + tracing::info!("all_txs {:?}", all_txs); + + let (vid_commit, vid_common) = { + let disperse_data = vid.disperse(block.encode()).unwrap(); + (disperse_data.commit, disperse_data.common) + }; + + // test iterate over all txs + for tx_index in block.iter(block.ns_table()) { + let tx = block.transaction(&tx_index).unwrap(); + tracing::info!("tx {:?}, {:?}", tx_index, tx); + + // warning: linear search for a tx + let test_tx = all_txs.remove(all_txs.iter().position(|t| t == &tx).unwrap()); + assert_eq!(tx, test_tx); + + let tx_proof2 = { + let (tx2, tx_proof) = TxProof::new(&tx_index, &block, &vid_common).unwrap(); + assert_eq!(tx, tx2); + tx_proof + }; + assert!(tx_proof2 + .verify(block.ns_table(), &tx, &vid_commit, &vid_common) + .unwrap()); + } + assert!( + all_txs.is_empty(), + "not all test txs consumed by block.iter" + ); + + // test iterate over all namespaces + for ns_index in block.ns_table().iter() { + let ns_id = block.ns_table().read_ns_id(&ns_index).unwrap(); + tracing::info!("test ns_id {ns_id}"); + + let txs = test + .nss + .remove(&ns_id) + .expect("block ns_id missing from test"); + + let ns_proof = NsProof::new(&block, &ns_index, &vid_common) + .expect("namespace_with_proof should succeed"); + + let (ns_proof_txs, ns_proof_ns_id) = ns_proof + .verify(block.ns_table(), &vid_commit, &vid_common) + .unwrap_or_else(|| panic!("namespace {} proof verification failure", ns_id)); + + assert_eq!(ns_proof_ns_id, ns_id); + assert_eq!(ns_proof_txs, txs); + } + assert!( + test.nss.is_empty(), + "not all test namespaces consumed by ns_iter" + ); + } +} + +#[async_std::test] +async fn enforce_max_block_size() { + setup_logging(); + setup_backtrace(); + let test_case = vec![vec![5, 8, 8], vec![7, 9, 11], vec![10, 5, 8]]; + let payload_byte_len_expected: usize = 119; + let ns_table_byte_len_expected: usize = 28; + + let mut rng = jf_utils::test_rng(); + let test = ValidTest::from_tx_lengths(test_case, &mut rng); + let tx_count_expected = test.all_txs().len(); + + // test: actual block size equals max block size + let instance_state = NodeState::default().with_chain_config(ChainConfig { + max_block_size: BlockSize::from( + (payload_byte_len_expected + ns_table_byte_len_expected) as u64, + ), + ..Default::default() + }); + + let block = Payload::from_transactions(test.all_txs(), &Default::default(), &instance_state) + .await + .unwrap() + .0; + assert_eq!(block.encode().len(), payload_byte_len_expected); + assert_eq!(block.ns_table().encode().len(), ns_table_byte_len_expected); + assert_eq!(block.len(block.ns_table()), tx_count_expected); + + // test: actual block size exceeds max block size, so 1 tx is dropped + // WARN log should be emitted + let instance_state = NodeState::default().with_chain_config(ChainConfig { + max_block_size: BlockSize::from( + (payload_byte_len_expected + ns_table_byte_len_expected - 1) as u64, + ), + ..Default::default() + }); + let block = Payload::from_transactions(test.all_txs(), &Default::default(), &instance_state) + .await + .unwrap() + .0; + assert!(block.encode().len() < payload_byte_len_expected); + assert_eq!(block.ns_table().encode().len(), ns_table_byte_len_expected); + assert_eq!(block.len(block.ns_table()), tx_count_expected - 1); +} + +// TODO lots of infra here that could be reused in other tests. +struct ValidTest { + nss: HashMap>, +} + +impl ValidTest { + fn from_tx_lengths(tx_lengths: Vec>, rng: &mut R) -> Self + where + R: RngCore, + { + let mut nss = HashMap::new(); + for tx_lens in tx_lengths.into_iter() { + let ns_id = NamespaceId::random(rng); + for len in tx_lens { + let ns: &mut Vec<_> = nss.entry(ns_id).or_default(); + ns.push(Transaction::new(ns_id, random_bytes(len, rng))); + } + } + Self { nss } + } + + fn many_from_tx_lengths(test_cases: Vec>>, rng: &mut R) -> Vec + where + R: RngCore, + { + test_cases + .into_iter() + .map(|t| Self::from_tx_lengths(t, rng)) + .collect() + } + + fn all_txs(&self) -> Vec { + self.nss.iter().flat_map(|(_, txs)| txs.clone()).collect() + } +} + +fn random_bytes(len: usize, rng: &mut R) -> Vec { + let mut result = vec![0; len]; + rng.fill_bytes(&mut result); + result +} diff --git a/sequencer/src/block/tx_iterator.rs b/sequencer/src/block/tx_iterator.rs deleted file mode 100644 index 5d8d9d82a..000000000 --- a/sequencer/src/block/tx_iterator.rs +++ /dev/null @@ -1,66 +0,0 @@ -use std::ops::Range; - -use crate::block::payload::{Payload, TableWordTraits}; -use crate::block::tables::{NameSpaceTable, TxTable}; -use serde::{Deserialize, Serialize}; - -/// TODO do we really need `PartialOrd`, `Ord` here? -/// Could the `Ord` bound be removed from `QueryablePayload::TransactionIndex`?` -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] -pub struct TxIndex { - pub ns_idx: usize, - pub tx_idx: usize, -} - -/// TODO Decompose this iterator into -/// - a tx iterator `T` over only 1 namespace -/// - a namespace-tx iterator that reuses `T` over all namespaces -pub struct TxIterator<'a, TableWord: TableWordTraits> { - ns_idx: usize, // simpler than using `Peekable` - ns_iter: Range, - tx_iter: Range, - block_payload: &'a Payload, - ns_table: &'a NameSpaceTable, -} - -impl<'a, TableWord: TableWordTraits> TxIterator<'a, TableWord> { - pub(super) fn new( - ns_table: &'a NameSpaceTable, - block_payload: &'a Payload, - ) -> Self { - Self { - ns_idx: 0, // arbitrary value, changed in first call to next() - ns_iter: 0..ns_table.len(), - tx_iter: 0..0, // empty range - block_payload, - ns_table, - } - } -} - -impl<'a, TableWord: TableWordTraits> Iterator for TxIterator<'a, TableWord> { - type Item = TxIndex; - - fn next(&mut self) -> Option { - if let Some(tx_idx) = self.tx_iter.next() { - // we still have txs left to consume in current ns - Some(TxIndex { - ns_idx: self.ns_idx, - tx_idx, - }) - } else { - // move to the next name space - let payload = &self.block_payload.raw_payload; - for ns_idx in self.ns_iter.by_ref() { - self.ns_idx = ns_idx; - let ns_range = self.ns_table.get_payload_range(ns_idx, payload.len()).1; - let tx_table_len = TxTable::get_tx_table_len(&payload[ns_range]); - self.tx_iter = 0..tx_table_len; - if let Some(tx_idx) = self.tx_iter.next() { - return Some(TxIndex { ns_idx, tx_idx }); - } - } - None // all namespaces consumed - } - } -} diff --git a/sequencer/src/block/uint_bytes.rs b/sequencer/src/block/uint_bytes.rs new file mode 100644 index 000000000..2296a8182 --- /dev/null +++ b/sequencer/src/block/uint_bytes.rs @@ -0,0 +1,231 @@ +//! Serialization (and deserialization) of primitive unsigned integer types to +//! (and from) an arbitrary fixed-length byte array. +//! +use paste::paste; +use std::mem::size_of; + +// Use an ugly macro because it's difficult or impossible to be generic over +// primitive types such as `usize`, `u64`. +macro_rules! uint_bytes_impl { + ($T:ty) => { + paste! { + /// Serialize `n` into `BYTE_LEN` bytes in little-endian form, padding with + /// 0 as needed. + /// + /// # Panics + /// If `n` cannot fit into `BYTE_LEN` bytes. + pub fn [<$T _to_bytes>](n: $T) -> [u8; BYTE_LEN] { + if size_of::<$T>() > BYTE_LEN { + assert!( + [<$T _fits>](n, BYTE_LEN), + "n {n} cannot fit into {BYTE_LEN} bytes" + ); + n.to_le_bytes()[..BYTE_LEN].try_into().unwrap() // panic is impossible + } else { + // convert `n` to bytes and pad with 0 + let mut result = [0; BYTE_LEN]; + result[..size_of::<$T>()].copy_from_slice(&n.to_le_bytes()[..]); + result + } + } + + /// Deserialize `bytes` in little-endian form into a `$T`, padding with 0 + /// as needed. + /// + /// # Panics + /// If `bytes.len()` is too large to fit into a `$T`. + pub fn [<$T _from_bytes>](bytes: &[u8]) -> $T { + assert!(bytes.len() <= BYTE_LEN, "bytes len {} exceeds BYTE_LEN {BYTE_LEN}", bytes.len()); + assert!( + BYTE_LEN <= size_of::<$T>(), + "BYTE_LEN {BYTE_LEN} cannot fit into {}", + stringify!($T) + ); + let mut [<$T _bytes>] = [0; size_of::<$T>()]; + [<$T _bytes>][..bytes.len()].copy_from_slice(bytes); + $T::from_le_bytes([<$T _bytes>]) + } + + /// Return the largest `$T` value that can fit into `byte_len` bytes. + pub const fn [<$T _max_from_byte_len>](byte_len: usize) -> $T { + if byte_len >= size_of::<$T>() { + $T::MAX + } else { + // overflow cannot occur because `byte_len < size_of::<$T>()` + (1 << (byte_len * 8)) - 1 + } + } + + /// Can `n` fit into `byte_len` bytes? + pub const fn [<$T _fits>](n: $T, byte_len: usize) -> bool { + n <= [<$T _max_from_byte_len>](byte_len) + } + } + }; + } + +uint_bytes_impl!(usize); +uint_bytes_impl!(u32); + +/// Impl [`serde`] for type `$T` with methods named `$to_bytes`, `$from_bytes` +/// of the form +/// ```ignore +/// $T::$to_bytes(&self) -> $B +/// $T::$from_bytes(bytes: &[u8]) -> Self +/// ``` +/// where `$B` is any type that impls [`serde::Deserialize`] and has a method +/// `as_ref` of the form +/// ```ignore +/// $B::as_ref(&self) -> &[u8] +/// ``` +/// Typical examples of `$B` include array `[u8; N]`, slice `&[u8]`, or +/// `Vec`. +macro_rules! bytes_serde_impl { + ($T:ty, $to_bytes:ident, $B:ty, $from_bytes:ident) => { + impl Serialize for $T { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + self.$to_bytes().serialize(serializer) + } + } + + impl<'de> Deserialize<'de> for $T { + fn deserialize(deserializer: D) -> Result<$T, D::Error> + where + D: Deserializer<'de>, + { + <$B as Deserialize>::deserialize(deserializer) + .map(|bytes| <$T>::$from_bytes(bytes.as_ref())) + } + } + }; +} + +pub(super) use bytes_serde_impl; + +#[cfg(test)] +mod test { + use fluent_asserter::prelude::*; + use paste::paste; + use std::mem::size_of; + + macro_rules! uint_bytes_test_impl { + ($T:ty) => { + paste! { + use super::{[<$T _max_from_byte_len>], [<$T _to_bytes>], [<$T _from_bytes>]}; + + #[test] + fn [<$T _max_from_byte_len_correctness>]() { + // test byte lengths 0 to size_of::<$T>() + let mut bytes = [0; size_of::<$T>()]; + assert_eq!([<$T _max_from_byte_len>](0), 0); + for i in 0..bytes.len() { + bytes[i] = 0xff; + assert_eq!([<$T _max_from_byte_len>](i + 1).to_le_bytes(), bytes); + } + + // test byte lengths size_of::<$T>() to twice that length + for i in size_of::<$T>()..2 * size_of::<$T>() { + assert_eq!([<$T _max_from_byte_len>](i + 1), $T::MAX); + } + } + + #[test] + fn [<$T _to_bytes_correctness>]() { + // byte length 0 + assert_eq!([<$T _to_bytes>](0), [0; 0]); + assert_that_code!(|| [<$T _to_bytes>]::<0>(1)).panics(); + + // byte length 1 + assert_eq!([<$T _to_bytes>](0), [0; 1]); + assert_eq!([<$T _to_bytes>](255), [255; 1]); + assert_that_code!(|| [<$T _to_bytes>]::<1>(256)).panics(); + + // byte length 2 + assert_eq!([<$T _to_bytes>](0), [0; 2]); + assert_eq!([<$T _to_bytes>](65535), [255; 2]); + assert_that_code!(|| [<$T _to_bytes>]::<2>(65536)).panics(); + + // byte length size_of::<$T>() + assert_eq!([<$T _to_bytes>](0), [0; size_of::<$T>()]); + assert_eq!([<$T _to_bytes>]($T::MAX), [255; size_of::<$T>()]); + + // byte length size_of::<$T>() + 1 + assert_eq!([<$T _to_bytes>](0), [0; size_of::<$T>() + 1]); + let [<$T _max_bytes>] = { + let mut bytes = [255; size_of::<$T>() + 1]; + bytes[bytes.len() - 1] = 0; + bytes + }; + assert_eq!([<$T _to_bytes>]($T::MAX), [<$T _max_bytes>]); + } + + #[test] + fn [<$T _from_bytes_correctness>]() { + let bytes = [255; size_of::<$T>() + 1]; + + // It would be nice to iterate through + // `0..size_of::<$T>()` but this is not possible with + // const generics for `[<$T _from_bytes>]`. We could + // use `seq-macro` crate but it requires an integer + // literal whereas our range includes `size_of::<$T>()`. + // + // Instead we just hard code four constants: + // `0`, `1`, `size_of::<$T>() - 1`, `size_of::<$T>()`. + assert_eq!( + [<$T _from_bytes>]::<0>(&bytes[..0]), + [<$T _max_from_byte_len>](0) + ); + assert_eq!( + [<$T _from_bytes>]::<1>(&bytes[..1]), + [<$T _max_from_byte_len>](1) + ); + assert_eq!( + [<$T _from_bytes>]::<{size_of::<$T>() - 1}>(&bytes[..size_of::<$T>() - 1]), + [<$T _max_from_byte_len>](size_of::<$T>() - 1) + ); + assert_eq!( + [<$T _from_bytes>]::<{size_of::<$T>()}>(&bytes[..size_of::<$T>()]), + [<$T _max_from_byte_len>](size_of::<$T>()) + ); + + assert_that_code!(|| [<$T _from_bytes>]::<{size_of::<$T>() + 1}>(&bytes[..])).panics(); + } + + #[test] + fn [<$T _from_bytes_allows_smaller_byte_lens>]() { + // This test same as `xxx_from_bytes_correctness` except + // we set the const param `BYTE_LEN` to + // `size_of::<$T>()` in all cases. Why? To ensure that + // `xxx_from_bytes` allows its arg to have length + // smaller than `BYTE_LEN`. + let bytes = [255; size_of::<$T>() + 1]; + + assert_eq!( + [<$T _from_bytes>]::<{size_of::<$T>()}>(&bytes[..0]), + [<$T _max_from_byte_len>](0) + ); + assert_eq!( + [<$T _from_bytes>]::<{size_of::<$T>()}>(&bytes[..1]), + [<$T _max_from_byte_len>](1) + ); + assert_eq!( + [<$T _from_bytes>]::<{size_of::<$T>()}>(&bytes[..size_of::<$T>() - 1]), + [<$T _max_from_byte_len>](size_of::<$T>() - 1) + ); + assert_eq!( + [<$T _from_bytes>]::<{size_of::<$T>()}>(&bytes[..size_of::<$T>()]), + [<$T _max_from_byte_len>](size_of::<$T>()) + ); + + assert_that_code!(|| [<$T _from_bytes>]::<{size_of::<$T>()}>(&bytes[..])).panics(); + } + } + }; + } + + uint_bytes_test_impl!(usize); + uint_bytes_test_impl!(u32); +} diff --git a/sequencer/src/header.rs b/sequencer/src/header.rs index 9f6e68745..5a1e6a038 100644 --- a/sequencer/src/header.rs +++ b/sequencer/src/header.rs @@ -1,5 +1,5 @@ use crate::{ - block::{entry::TxTableEntryWord, tables::NameSpaceTable, NsTable}, + block::NsTable, chain_config::ResolvableChainConfig, eth_signature_key::BuilderSignature, l1_client::L1Snapshot, @@ -78,7 +78,7 @@ pub struct Header { pub payload_commitment: VidCommitment, pub builder_commitment: BuilderCommitment, - pub ns_table: NameSpaceTable, + pub ns_table: NsTable, /// Root Commitment of Block Merkle Tree pub block_merkle_tree_root: BlockMerkleCommitment, /// Root Commitment of `FeeMerkleTree` @@ -131,18 +131,6 @@ impl Committable for Header { } } -impl Committable for NameSpaceTable { - fn commit(&self) -> Commitment { - RawCommitmentBuilder::new(&Self::tag()) - .var_size_bytes(self.get_bytes()) - .finalize() - } - - fn tag() -> String { - "NSTABLE".into() - } -} - impl Header { #[allow(clippy::too_many_arguments)] fn from_info( @@ -465,14 +453,10 @@ impl ExplorerHeader for Header { } fn namespace_ids(&self) -> Vec { - let l = self.ns_table.len(); - let mut result: Vec = Vec::with_capacity(l); - for i in 0..l { - let (ns_id, _) = self.ns_table.get_table_entry(i); - result.push(ns_id); - } - - result + self.ns_table + .iter() + .map(|i| self.ns_table.read_ns_id_unchecked(&i)) + .collect() } } @@ -761,7 +745,7 @@ mod test_headers { pub validated_state: ValidatedState, pub leaf: Leaf, pub header: Header, - pub ns_table: NameSpaceTable, + pub ns_table: NsTable, } impl GenesisForTest { @@ -770,7 +754,7 @@ mod test_headers { let validated_state = ValidatedState::genesis(&instance_state).0; let leaf = Leaf::genesis(&validated_state, &instance_state).await; let header = leaf.block_header().clone(); - let ns_table = leaf.block_payload().unwrap().get_ns_table().clone(); + let ns_table = leaf.block_payload().unwrap().ns_table().clone(); Self { instance_state, validated_state, diff --git a/sequencer/src/lib.rs b/sequencer/src/lib.rs index 750bf8515..9906dc03e 100644 --- a/sequencer/src/lib.rs +++ b/sequencer/src/lib.rs @@ -16,7 +16,6 @@ mod reference_tests; use anyhow::Context; use async_std::sync::RwLock; use async_trait::async_trait; -use block::entry::TxTableEntryWord; use catchup::{StateCatchup, StatePeers}; use context::SequencerContext; use ethers::types::U256; @@ -83,7 +82,7 @@ use std::time::Duration; #[cfg(feature = "libp2p")] use hotshot::traits::implementations::{CombinedNetworks, Libp2pNetwork}; -pub use block::payload::Payload; +pub use block::Payload; pub use chain_config::ChainConfig; pub use genesis::Genesis; pub use header::Header; @@ -245,7 +244,7 @@ impl InstanceState for NodeState {} impl NodeType for SeqTypes { type Time = ViewNumber; type BlockHeader = Header; - type BlockPayload = Payload; + type BlockPayload = Payload; type SignatureKey = PubKey; type Transaction = Transaction; type InstanceState = NodeState; diff --git a/sequencer/src/reference_tests.rs b/sequencer/src/reference_tests.rs index c5647cabe..892011677 100644 --- a/sequencer/src/reference_tests.rs +++ b/sequencer/src/reference_tests.rs @@ -22,8 +22,8 @@ //! test. use crate::{ - block::tables::NameSpaceTable, state::FeeInfo, ChainConfig, FeeAccount, Header, L1BlockInfo, - Payload, Transaction, TxTableEntryWord, ValidatedState, + block::NsTable, state::FeeInfo, ChainConfig, FeeAccount, Header, L1BlockInfo, Payload, + Transaction, ValidatedState, }; use async_compatibility_layer::logging::{setup_backtrace, setup_logging}; use committable::Committable; @@ -41,7 +41,7 @@ use vbs::BinarySerializer; type Serializer = vbs::Serializer; -async fn reference_payload() -> Payload { +async fn reference_payload() -> Payload { Payload::from_transactions( vec![reference_transaction()], &Default::default(), @@ -52,8 +52,8 @@ async fn reference_payload() -> Payload { .0 } -async fn reference_ns_table() -> NameSpaceTable { - reference_payload().await.get_ns_table().clone() +async fn reference_ns_table() -> NsTable { + reference_payload().await.ns_table().clone() } const REFERENCE_NS_TABLE_COMMITMENT: &str = "NSTABLE~jqBfNUW1lSijWpKpPNc9yxQs28YckB80gFJWnHIwOQMC"; @@ -96,13 +96,13 @@ async fn reference_header() -> Header { let builder_key = FeeAccount::generated_from_seed_indexed(Default::default(), 0).1; let fee_info = reference_fee_info(); let payload = reference_payload().await; - let ns_table = payload.get_ns_table(); + let ns_table = payload.ns_table().clone(); let payload_commitment = vid_commitment(&payload.encode(), 1); - let builder_commitment = payload.builder_commitment(ns_table); + let builder_commitment = payload.builder_commitment(&ns_table); let builder_signature = FeeAccount::sign_fee( &builder_key, fee_info.amount().as_u64().unwrap(), - ns_table, + &ns_table, &payload_commitment, ) .unwrap(); @@ -116,7 +116,7 @@ async fn reference_header() -> Header { l1_finalized: Some(reference_l1_block()), payload_commitment, builder_commitment, - ns_table: ns_table.clone(), + ns_table, block_merkle_tree_root: state.block_merkle_tree.commitment(), fee_merkle_tree_root: state.fee_merkle_tree.commitment(), fee_info, diff --git a/sequencer/src/transaction.rs b/sequencer/src/transaction.rs index d1015a097..e4ac5f4ae 100644 --- a/sequencer/src/transaction.rs +++ b/sequencer/src/transaction.rs @@ -1,23 +1,40 @@ use ark_serialize::{CanonicalDeserialize, CanonicalSerialize}; use committable::{Commitment, Committable}; -use derive_more::{Display, From, Into}; +use derive_more::Display; use hotshot_query_service::explorer::ExplorerTransaction; use hotshot_types::traits::block_contents::Transaction as HotShotTransaction; use jf_merkle_tree::namespaced_merkle_tree::{Namespace, Namespaced}; -use serde::{Deserialize, Serialize}; +use serde::{de::Error, Deserialize, Deserializer, Serialize}; +/// TODO [`NamespaceId`] has historical debt to repay: +/// - +/// - It must fit into 4 bytes in order to maintain serialization compatibility +/// for [`crate::block::NsTable`], yet it currently occupies 8 bytes in order +/// to maintain [`serde`] serialization compatibility with [`Transaction`]. +/// - Thus, it's a newtype for `u64` that impls `From` and has a manual +/// impl for [`serde::Deserialize`] that deserializes a `u64` but then returns +/// an error if the value cannot fit into a `u32`. This is ugly. In the future +/// we need to break serialization compatibility so that `NsTable` and +/// `Transaction` can agree on the byte length for `NamespaceId` and all this +/// cruft should be removed. +/// - We should move [`NamespaceId`] to `crate::block::full_payload::ns_table` +/// module because that's where it's byte length is dictated, so that's where +/// it makes the most sense to put serialization. See +/// +/// - It impls [`Namespace`] from [`jf_merkle_tree`], but this seems unneeded +/// now that we're not using jellyfish's namespace merkle tree. +/// - We derive lots of things that perhaps we shouldn't: `Into`, `From`, +/// `Default`, `Ord`. Perhaps derivations for [`NamespaceId`] should match +/// that of [`Transaction`]. #[derive( Clone, Copy, Serialize, - Deserialize, Debug, Display, PartialEq, Eq, Hash, - Into, - From, Default, CanonicalDeserialize, CanonicalSerialize, @@ -27,13 +44,51 @@ use serde::{Deserialize, Serialize}; #[display(fmt = "{_0}")] pub struct NamespaceId(u64); +impl From for NamespaceId { + fn from(value: u32) -> Self { + Self(value as u64) + } +} + +impl From for u32 { + fn from(value: NamespaceId) -> Self { + value.0 as Self + } +} + +impl<'de> Deserialize<'de> for NamespaceId { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + use serde::de::Unexpected; + + let ns_id = ::deserialize(deserializer)?; + if ns_id > u32::MAX as u64 { + Err(D::Error::invalid_value( + Unexpected::Unsigned(ns_id), + &"exceeds u32::MAX", + )) + } else { + Ok(NamespaceId(ns_id)) + } + } +} + +impl NamespaceId { + #[cfg(any(test, feature = "testing"))] + pub fn random(rng: &mut dyn rand::RngCore) -> Self { + Self(rng.next_u32() as u64) + } +} + impl Namespace for NamespaceId { fn max() -> Self { - Self(u64::max_value()) + Self(u32::max_value() as u64) } fn min() -> Self { - Self(u64::min_value()) + Self(u32::min_value() as u64) } } @@ -67,12 +122,16 @@ impl Transaction { &self.payload } + pub fn into_payload(self) -> Vec { + self.payload + } + #[cfg(any(test, feature = "testing"))] pub fn random(rng: &mut dyn rand::RngCore) -> Self { use rand::Rng; let len = rng.gen_range(0..100); Self::new( - NamespaceId(rng.gen_range(0..10)), + NamespaceId::random(rng), (0..len).map(|_| rand::random::()).collect::>(), ) } @@ -88,6 +147,7 @@ impl Transaction { impl HotShotTransaction for Transaction {} +// TODO seems that `Namespaced` is unneeded. impl Namespaced for Transaction { type Namespace = NamespaceId; fn get_namespace(&self) -> Self::Namespace { @@ -98,7 +158,7 @@ impl Namespaced for Transaction { impl Committable for Transaction { fn commit(&self) -> Commitment { committable::RawCommitmentBuilder::new("Transaction") - .u64_field("namespace", self.namespace.into()) + .u64_field("namespace", self.namespace.0) .var_size_bytes(&self.payload) .finalize() }