From 2c4b61e777c20379d91396340aea4f0b8d9984c1 Mon Sep 17 00:00:00 2001 From: hana <81144685+2501babe@users.noreply.github.com> Date: Fri, 19 Jul 2024 08:11:48 -0700 Subject: [PATCH] solana-program: improve VoteState::deserialize_into() (#2146) * make VoteState::deserialize_into() much faster * relax some constraints to bring behavior in line with bincode::deserialize --- sdk/program/src/vote/state/mod.rs | 123 ++++++++++++------ .../src/vote/state/vote_state_0_23_5.rs | 45 +++++++ .../src/vote/state/vote_state_1_14_11.rs | 41 ++++++ .../src/vote/state/vote_state_deserialize.rs | 49 ++----- 4 files changed, 178 insertions(+), 80 deletions(-) diff --git a/sdk/program/src/vote/state/mod.rs b/sdk/program/src/vote/state/mod.rs index c4a98e8a9819e2..a40a7ba3476a26 100644 --- a/sdk/program/src/vote/state/mod.rs +++ b/sdk/program/src/vote/state/mod.rs @@ -18,7 +18,7 @@ use { sysvar::clock::Clock, vote::{authorized_voters::AuthorizedVoters, error::VoteError}, }, - bincode::{serialize_into, serialized_size, ErrorKind}, + bincode::{serialize_into, ErrorKind}, serde_derive::{Deserialize, Serialize}, std::{collections::VecDeque, fmt::Debug, io::Cursor}, }; @@ -323,6 +323,7 @@ const MAX_ITEMS: usize = 32; #[cfg_attr(feature = "frozen-abi", derive(AbiExample))] #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[cfg_attr(test, derive(Arbitrary))] pub struct CircBuf { buf: [I; MAX_ITEMS], /// next pointer @@ -368,23 +369,6 @@ impl CircBuf { } } -#[cfg(test)] -impl<'a, I: Default + Copy> Arbitrary<'a> for CircBuf -where - I: Arbitrary<'a>, -{ - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let mut circbuf = Self::default(); - - let len = u.arbitrary_len::()?; - for _ in 0..len { - circbuf.append(I::arbitrary(u)?); - } - - Ok(circbuf) - } -} - #[cfg_attr( feature = "frozen-abi", frozen_abi(digest = "EeenjJaSrm9hRM39gK6raRNtzG61hnk7GciUCJJRDUSQ"), @@ -475,8 +459,11 @@ impl VoteState { 3762 // see test_vote_state_size_of. } - // we retain bincode deserialize for not(target_os = "solana") - // because the hand-written parser does not support V0_23_5 + // NOTE we retain `bincode::deserialize` for `not(target_os = "solana")` pending testing on mainnet-beta + // once that testing is done, `VoteState::deserialize_into` may be used for all targets + // conversion of V0_23_5 to current must be handled specially, however + // because it inserts a null voter into `authorized_voters` + // which `VoteStateVersions::is_uninitialized` erroneously reports as initialized pub fn deserialize(input: &[u8]) -> Result { #[cfg(not(target_os = "solana"))] { @@ -492,26 +479,32 @@ impl VoteState { } } - /// Deserializes the input buffer into the provided `VoteState` + /// Deserializes the input `VoteStateVersions` buffer directly into a provided `VoteState` struct /// - /// This function exists to deserialize `VoteState` in a BPF context without going above - /// the compute limit, and must be kept up to date with `bincode::deserialize`. + /// In a BPF context, V0_23_5 is not supported, but in non-BPF, all versions are supported for + /// compatibility with `bincode::deserialize` pub fn deserialize_into( input: &[u8], vote_state: &mut VoteState, ) -> Result<(), InstructionError> { - let minimum_size = - serialized_size(vote_state).map_err(|_| InstructionError::InvalidAccountData)?; - if (input.len() as u64) < minimum_size { - return Err(InstructionError::InvalidAccountData); - } - let mut cursor = Cursor::new(input); let variant = read_u32(&mut cursor)?; match variant { - // V0_23_5. not supported; these should not exist on mainnet - 0 => Err(InstructionError::InvalidAccountData), + // V0_23_5. not supported for bpf targets; these should not exist on mainnet + // supported for non-bpf targets for backwards compatibility + 0 => { + #[cfg(not(target_os = "solana"))] + { + *vote_state = bincode::deserialize::(input) + .map(|versioned| versioned.convert_to_current()) + .map_err(|_| InstructionError::InvalidAccountData)?; + + Ok(()) + } + #[cfg(target_os = "solana")] + Err(InstructionError::InvalidAccountData) + } // V1_14_11. substantially different layout and data from V0_23_5 1 => deserialize_vote_state_into(&mut cursor, vote_state, false), // Current. the only difference from V1_14_11 is the addition of a slot-latency to each vote @@ -519,10 +512,6 @@ impl VoteState { _ => Err(InstructionError::InvalidAccountData), }?; - if cursor.position() > input.len() as u64 { - return Err(InstructionError::InvalidAccountData); - } - Ok(()) } @@ -1089,7 +1078,7 @@ pub mod serde_tower_sync { #[cfg(test)] mod tests { - use {super::*, itertools::Itertools, rand::Rng}; + use {super::*, bincode::serialized_size, itertools::Itertools, rand::Rng}; #[test] fn test_vote_serialize() { @@ -1147,16 +1136,70 @@ mod tests { assert_eq!(e, InstructionError::InvalidAccountData); // variant - let serialized_len_x4 = serialized_size(&test_vote_state).unwrap() * 4; + let serialized_len_x4 = serialized_size(&VoteState::default()).unwrap() * 4; let mut rng = rand::thread_rng(); for _ in 0..1000 { let raw_data_length = rng.gen_range(1..serialized_len_x4); - let raw_data: Vec = (0..raw_data_length).map(|_| rng.gen::()).collect(); + let mut raw_data: Vec = (0..raw_data_length).map(|_| rng.gen::()).collect(); + + // pure random data will ~never have a valid enum tag, so lets help it out + if raw_data_length >= 4 && rng.gen::() { + let tag = rng.gen::() % 3; + raw_data[0] = tag; + raw_data[1] = 0; + raw_data[2] = 0; + raw_data[3] = 0; + } // it is extremely improbable, though theoretically possible, for random bytes to be syntactically valid - // so we only check that the deserialize function does not panic + // so we only check that the parser does not panic and that it succeeds or fails exactly in line with bincode + let mut test_vote_state = VoteState::default(); + let test_res = VoteState::deserialize_into(&raw_data, &mut test_vote_state); + let bincode_res = bincode::deserialize::(&raw_data) + .map(|versioned| versioned.convert_to_current()); + + if test_res.is_err() { + assert!(bincode_res.is_err()); + } else { + assert_eq!(test_vote_state, bincode_res.unwrap()); + } + } + } + + #[test] + fn test_vote_deserialize_into_ill_sized() { + // provide 4x the minimum struct size in bytes to ensure we typically touch every field + let struct_bytes_x4 = std::mem::size_of::() * 4; + for _ in 0..1000 { + let raw_data: Vec = (0..struct_bytes_x4).map(|_| rand::random::()).collect(); + let mut unstructured = Unstructured::new(&raw_data); + + let original_vote_state_versions = + VoteStateVersions::arbitrary(&mut unstructured).unwrap(); + let original_buf = bincode::serialize(&original_vote_state_versions).unwrap(); + + let mut truncated_buf = original_buf.clone(); + let mut expanded_buf = original_buf.clone(); + + truncated_buf.resize(original_buf.len() - 8, 0); + expanded_buf.resize(original_buf.len() + 8, 0); + + // truncated fails let mut test_vote_state = VoteState::default(); - let _ = VoteState::deserialize_into(&raw_data, &mut test_vote_state); + let test_res = VoteState::deserialize_into(&truncated_buf, &mut test_vote_state); + let bincode_res = bincode::deserialize::(&truncated_buf) + .map(|versioned| versioned.convert_to_current()); + + assert!(test_res.is_err()); + assert!(bincode_res.is_err()); + + // expanded succeeds + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&expanded_buf, &mut test_vote_state).unwrap(); + let bincode_res = bincode::deserialize::(&expanded_buf) + .map(|versioned| versioned.convert_to_current()); + + assert_eq!(test_vote_state, bincode_res.unwrap()); } } diff --git a/sdk/program/src/vote/state/vote_state_0_23_5.rs b/sdk/program/src/vote/state/vote_state_0_23_5.rs index ae3b9207fe494e..eff88adca6dd75 100644 --- a/sdk/program/src/vote/state/vote_state_0_23_5.rs +++ b/sdk/program/src/vote/state/vote_state_0_23_5.rs @@ -1,9 +1,12 @@ #![allow(clippy::arithmetic_side_effects)] use super::*; +#[cfg(test)] +use arbitrary::{Arbitrary, Unstructured}; const MAX_ITEMS: usize = 32; #[derive(Debug, Default, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[cfg_attr(test, derive(Arbitrary))] pub struct VoteState0_23_5 { /// the node that votes in this account pub node_pubkey: Pubkey, @@ -35,6 +38,7 @@ pub struct VoteState0_23_5 { } #[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[cfg_attr(test, derive(Arbitrary))] pub struct CircBuf { pub buf: [I; MAX_ITEMS], /// next pointer @@ -59,3 +63,44 @@ impl CircBuf { self.buf[self.idx] = item; } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_vote_deserialize_0_23_5() { + // base case + let target_vote_state = VoteState0_23_5::default(); + let target_vote_state_versions = VoteStateVersions::V0_23_5(Box::new(target_vote_state)); + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!( + target_vote_state_versions.convert_to_current(), + test_vote_state + ); + + // variant + // provide 4x the minimum struct size in bytes to ensure we typically touch every field + let struct_bytes_x4 = std::mem::size_of::() * 4; + for _ in 0..100 { + let raw_data: Vec = (0..struct_bytes_x4).map(|_| rand::random::()).collect(); + let mut unstructured = Unstructured::new(&raw_data); + + let arbitrary_vote_state = VoteState0_23_5::arbitrary(&mut unstructured).unwrap(); + let target_vote_state_versions = + VoteStateVersions::V0_23_5(Box::new(arbitrary_vote_state)); + + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + let target_vote_state = target_vote_state_versions.convert_to_current(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!(target_vote_state, test_vote_state); + } + } +} diff --git a/sdk/program/src/vote/state/vote_state_1_14_11.rs b/sdk/program/src/vote/state/vote_state_1_14_11.rs index 9a2365674171c2..645e73dc353d3e 100644 --- a/sdk/program/src/vote/state/vote_state_1_14_11.rs +++ b/sdk/program/src/vote/state/vote_state_1_14_11.rs @@ -82,3 +82,44 @@ impl From for VoteState1_14_11 { } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_vote_deserialize_1_14_11() { + // base case + let target_vote_state = VoteState1_14_11::default(); + let target_vote_state_versions = VoteStateVersions::V1_14_11(Box::new(target_vote_state)); + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!( + target_vote_state_versions.convert_to_current(), + test_vote_state + ); + + // variant + // provide 4x the minimum struct size in bytes to ensure we typically touch every field + let struct_bytes_x4 = std::mem::size_of::() * 4; + for _ in 0..1000 { + let raw_data: Vec = (0..struct_bytes_x4).map(|_| rand::random::()).collect(); + let mut unstructured = Unstructured::new(&raw_data); + + let arbitrary_vote_state = VoteState1_14_11::arbitrary(&mut unstructured).unwrap(); + let target_vote_state_versions = + VoteStateVersions::V1_14_11(Box::new(arbitrary_vote_state)); + + let vote_state_buf = bincode::serialize(&target_vote_state_versions).unwrap(); + let target_vote_state = target_vote_state_versions.convert_to_current(); + + let mut test_vote_state = VoteState::default(); + VoteState::deserialize_into(&vote_state_buf, &mut test_vote_state).unwrap(); + + assert_eq!(target_vote_state, test_vote_state); + } + } +} diff --git a/sdk/program/src/vote/state/vote_state_deserialize.rs b/sdk/program/src/vote/state/vote_state_deserialize.rs index b457395ccbd38a..69fdf0636d9b57 100644 --- a/sdk/program/src/vote/state/vote_state_deserialize.rs +++ b/sdk/program/src/vote/state/vote_state_deserialize.rs @@ -1,11 +1,9 @@ use { crate::{ instruction::InstructionError, - pubkey::Pubkey, serialize_utils::cursor::*, vote::state::{BlockTimestamp, LandedVote, Lockout, VoteState, MAX_ITEMS}, }, - bincode::serialized_size, std::io::Cursor, }; @@ -67,46 +65,17 @@ fn read_prior_voters_into>( cursor: &mut Cursor, vote_state: &mut VoteState, ) -> Result<(), InstructionError> { - // record our position at the start of the struct - let prior_voters_position = cursor.position(); - - // `serialized_size()` must be used over `mem::size_of()` because of alignment - let is_empty_position = serialized_size(&vote_state.prior_voters) - .ok() - .and_then(|v| v.checked_add(prior_voters_position)) - .and_then(|v| v.checked_sub(1)) - .ok_or(InstructionError::InvalidAccountData)?; - - // move to the end, to check if we need to parse the data - cursor.set_position(is_empty_position); - - // if empty, we already read past the end of this struct and need to do no further work - // otherwise we go back to the start and proceed to decode the data - let is_empty = read_bool(cursor)?; - if !is_empty { - cursor.set_position(prior_voters_position); - - let mut encountered_null_voter = false; - for i in 0..MAX_ITEMS { - let prior_voter = read_pubkey(cursor)?; - let from_epoch = read_u64(cursor)?; - let until_epoch = read_u64(cursor)?; - let item = (prior_voter, from_epoch, until_epoch); - - if item == (Pubkey::default(), 0, 0) { - encountered_null_voter = true; - } else if encountered_null_voter { - // `prior_voters` should never be sparse - return Err(InstructionError::InvalidAccountData); - } else { - vote_state.prior_voters.buf[i] = item; - } - } - - vote_state.prior_voters.idx = read_u64(cursor)? as usize; - vote_state.prior_voters.is_empty = read_bool(cursor)?; + for i in 0..MAX_ITEMS { + let prior_voter = read_pubkey(cursor)?; + let from_epoch = read_u64(cursor)?; + let until_epoch = read_u64(cursor)?; + + vote_state.prior_voters.buf[i] = (prior_voter, from_epoch, until_epoch); } + vote_state.prior_voters.idx = read_u64(cursor)? as usize; + vote_state.prior_voters.is_empty = read_bool(cursor)?; + Ok(()) }