Skip to content

Commit

Permalink
Merge branch 'master' into deprec-shard-v0
Browse files Browse the repository at this point in the history
  • Loading branch information
eagr authored Oct 30, 2024
2 parents 0bf8d43 + 8e30ccd commit 1c502af
Show file tree
Hide file tree
Showing 54 changed files with 1,885 additions and 688 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,23 @@
* **Archival nodes only:** Stop saving partial chunks to `PartialChunks` column in the Cold DB. Instead, archival nodes will reconstruct partial chunks from the `Chunks` column.
* Decentralized state sync: Before, nodes that needed to download state (either because they're several epochs behind the chain or because they're going to start producing chunks for a shard they don't currently track) would download them from a centralized GCS bucket. Now, nodes will attempt to download pieces of the state from peers in the network, and only fallback to downloading from GCS if that fails. Please note that in order to participate in providing state parts to peers, your node may generate snapshots of the state. These snapshots should not take too much space, since they're hard links to database files that get cleaned up on every epoch.

### 2.2.1

This release patches a bug found in the 2.2.0 release

# Non-protocol changes
There was a bug in the integration between ethereum implicit accounts and the compiled contract cache which sometimes caused the nodes to get stuck. This would most often happen during state sync, but could also happen by itself. Please update your nodes to avoid getting stuck.

A node that hits this bug will print an error about an `InvalidStateRoot` in the logs and then it'll be unable to sync.
It's possible to recover a stalled node by clearing the compiled contract cache and rolling back one block:
1. Stop the neard process
2. Download the new version of neard
3. Clear the compiled contract cache: rm -rf ~/.near/data/contracts
4. Undo the last block: ./neard undo-block
5. Start neard

After that the node should be able to recover and sync with the rest of the network.

### 2.2.0

### Protocol Changes
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions chain/chain-primitives/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use near_primitives::block::BlockValidityError;
use near_primitives::challenge::{ChunkProofs, ChunkState};
use near_primitives::errors::{ChunkAccessError, EpochError, StorageError};
use near_primitives::shard_layout::ShardLayoutError;
use near_primitives::sharding::{ChunkHash, ShardChunkHeader};
use near_primitives::sharding::{BadHeaderForProtocolVersionError, ChunkHash, ShardChunkHeader};
use near_primitives::types::{BlockHeight, EpochId, ShardId};
use near_time::Utc;
use std::io;
Expand Down Expand Up @@ -241,6 +241,9 @@ pub enum Error {
/// EpochSyncProof validation error.
#[error("EpochSyncProof Validation Error: {0}")]
InvalidEpochSyncProof(String),
/// Invalid chunk header version for protocol version
#[error(transparent)]
BadHeaderForProtocolVersion(#[from] BadHeaderForProtocolVersionError),
/// Anything else
#[error("Other Error: {0}")]
Other(String),
Expand Down Expand Up @@ -326,7 +329,8 @@ impl Error {
| Error::InvalidProtocolVersion
| Error::NotAValidator(_)
| Error::NotAChunkValidator
| Error::InvalidChallengeRoot => true,
| Error::InvalidChallengeRoot
| Error::BadHeaderForProtocolVersion(_) => true,
}
}

Expand Down Expand Up @@ -407,6 +411,7 @@ impl Error {
Error::NotAChunkValidator => "not_a_chunk_validator",
Error::InvalidChallengeRoot => "invalid_challenge_root",
Error::ReshardingError(_) => "resharding_error",
Error::BadHeaderForProtocolVersion(_) => "bad_header_for_protocol_version",
}
}
}
Expand Down
1 change: 1 addition & 0 deletions chain/chain/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ near-schema-checker-lib.workspace = true

[dev-dependencies]
near-primitives = { workspace = true, features = ["clock"] }
near-test-contracts.workspace = true
serde_json.workspace = true
primitive-types.workspace = true
insta.workspace = true
Expand Down
6 changes: 2 additions & 4 deletions chain/chain/src/chain_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,7 @@ impl<'a> ChainUpdate<'a> {
shard_id,
apply_result.proof,
apply_result.applied_receipts_hash,
apply_result.contract_accesses,
apply_result.contract_deploys,
apply_result.contract_updates,
);
}
}
Expand Down Expand Up @@ -188,8 +187,7 @@ impl<'a> ChainUpdate<'a> {
shard_uid.shard_id(),
apply_result.proof,
apply_result.applied_receipts_hash,
apply_result.contract_accesses,
apply_result.contract_deploys,
apply_result.contract_updates,
);
}
}
Expand Down
5 changes: 1 addition & 4 deletions chain/chain/src/resharding/manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,7 @@ impl ReshardingManager {
new_shard_uid.shard_id(),
Some(partial_storage),
CryptoHash::default(),
// No contract code is accessed during resharding.
// TODO(#11099): Confirm if sending no contracts is ok here.
Default::default(),
// No contract code is deployed during resharding.
// No contract code is accessed or deployed during resharding.
// TODO(#11099): Confirm if sending no contracts is ok here.
Default::default(),
);
Expand Down
89 changes: 44 additions & 45 deletions chain/chain/src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,58 +468,14 @@ impl NightshadeRuntime {
processed_yield_timeouts: apply_result.processed_yield_timeouts,
applied_receipts_hash: hash(&borsh::to_vec(receipts).unwrap()),
congestion_info: apply_result.congestion_info,
contract_accesses: apply_result.contract_accesses,
bandwidth_requests: apply_result.bandwidth_requests,
bandwidth_scheduler_state_hash: apply_result.bandwidth_scheduler_state_hash,
contract_deploys: apply_result.contract_deploys,
contract_updates: apply_result.contract_updates,
};

Ok(result)
}

fn precompile_contracts(
&self,
epoch_id: &EpochId,
contract_codes: Vec<ContractCode>,
) -> Result<(), Error> {
let _span = tracing::debug_span!(
target: "runtime",
"precompile_contracts",
num_contracts = contract_codes.len())
.entered();
let protocol_version = self.epoch_manager.get_epoch_protocol_version(epoch_id)?;
let runtime_config = self.runtime_config_store.get_config(protocol_version);
let compiled_contract_cache: Option<Box<dyn ContractRuntimeCache>> =
Some(Box::new(self.compiled_contract_cache.handle()));
// Execute precompile_contract in parallel but prevent it from using more than half of all
// threads so that node will still function normally.
rayon::scope(|scope| {
let (slot_sender, slot_receiver) = std::sync::mpsc::channel();
// Use up-to half of the threads for the compilation.
let max_threads = std::cmp::max(rayon::current_num_threads() / 2, 1);
for _ in 0..max_threads {
slot_sender.send(()).expect("both sender and receiver are owned here");
}
for code in contract_codes {
slot_receiver.recv().expect("could not receive a slot to compile contract");
let contract_cache = compiled_contract_cache.as_deref();
let slot_sender = slot_sender.clone();
scope.spawn(move |_| {
precompile_contract(
&code,
Arc::clone(&runtime_config.wasm_config),
contract_cache,
)
.ok();
// If this fails, it just means there won't be any more attempts to recv the
// slots
let _ = slot_sender.send(());
});
}
});
Ok(())
}

fn get_gc_stop_height_impl(&self, block_hash: &CryptoHash) -> Result<BlockHeight, Error> {
let epoch_manager = self.epoch_manager.read();
// an epoch must have a first block.
Expand Down Expand Up @@ -1350,6 +1306,49 @@ impl RuntimeAdapter for NightshadeRuntime {
fn compiled_contract_cache(&self) -> &dyn ContractRuntimeCache {
self.compiled_contract_cache.as_ref()
}

fn precompile_contracts(
&self,
epoch_id: &EpochId,
contract_codes: Vec<ContractCode>,
) -> Result<(), Error> {
let _span = tracing::debug_span!(
target: "runtime",
"precompile_contracts",
num_contracts = contract_codes.len())
.entered();
let protocol_version = self.epoch_manager.get_epoch_protocol_version(epoch_id)?;
let runtime_config = self.runtime_config_store.get_config(protocol_version);
let compiled_contract_cache: Option<Box<dyn ContractRuntimeCache>> =
Some(Box::new(self.compiled_contract_cache.handle()));
// Execute precompile_contract in parallel but prevent it from using more than half of all
// threads so that node will still function normally.
rayon::scope(|scope| {
let (slot_sender, slot_receiver) = std::sync::mpsc::channel();
// Use up-to half of the threads for the compilation.
let max_threads = std::cmp::max(rayon::current_num_threads() / 2, 1);
for _ in 0..max_threads {
slot_sender.send(()).expect("both sender and receiver are owned here");
}
for code in contract_codes {
slot_receiver.recv().expect("could not receive a slot to compile contract");
let contract_cache = compiled_contract_cache.as_deref();
let slot_sender = slot_sender.clone();
scope.spawn(move |_| {
precompile_contract(
&code,
Arc::clone(&runtime_config.wasm_config),
contract_cache,
)
.ok();
// If this fails, it just means there won't be any more attempts to recv the
// slots
let _ = slot_sender.send(());
});
}
});
Ok(())
}
}

/// Get the limit on the number of new receipts imposed by the local congestion control.
Expand Down
73 changes: 73 additions & 0 deletions chain/chain/src/runtime/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::collections::BTreeSet;

use crate::types::{ChainConfig, RuntimeStorageConfig};
use crate::{Chain, ChainGenesis, ChainStoreAccess, DoomslugThresholdMode};
use assert_matches::assert_matches;
use near_chain_configs::test_utils::{TESTING_INIT_BALANCE, TESTING_INIT_STAKE};
use near_epoch_manager::shard_tracker::ShardTracker;
use near_epoch_manager::{EpochManager, RngSeed};
Expand All @@ -16,8 +17,10 @@ use near_primitives::epoch_block_info::BlockInfo;
use near_primitives::receipt::{ActionReceipt, ReceiptV1};
use near_primitives::test_utils::create_test_signer;
use near_primitives::types::validator_stake::{ValidatorStake, ValidatorStakeIter};
use near_primitives::version::PROTOCOL_VERSION;
use near_store::flat::{FlatStateChanges, FlatStateDelta, FlatStateDeltaMetadata};
use near_store::genesis::initialize_genesis_state;
use near_vm_runner::{get_contract_cache_key, CompiledContract, CompiledContractInfo};
use num_rational::Ratio;
use rand::{rngs::StdRng, seq::SliceRandom, SeedableRng};

Expand Down Expand Up @@ -1828,6 +1831,76 @@ fn test_storage_proof_garbage() {
assert_eq!(total_size / 1000_000, garbage_size_mb);
}

/// Tests that precompiling a set of contracts updates the compiled contract cache.
#[test]
fn test_precompile_contracts_updates_cache() {
struct FakeTestCompiledContractType; // For testing AnyCache.
let genesis = Genesis::test(vec!["test0".parse().unwrap()], 1);
let store = near_store::test_utils::create_test_store();
let tempdir = tempfile::tempdir().unwrap();
initialize_genesis_state(store.clone(), &genesis, Some(tempdir.path()));
let epoch_manager = EpochManager::new_arc_handle(store.clone(), &genesis.config, None);

let contract_cache = FilesystemContractRuntimeCache::new(tempdir.path(), None::<&str>)
.expect("filesystem contract cache");
let runtime = NightshadeRuntime::test_with_runtime_config_store(
tempdir.path(),
store,
contract_cache.handle(),
&genesis.config,
epoch_manager,
RuntimeConfigStore::new(None),
StateSnapshotType::EveryEpoch,
);

let contracts = vec![
ContractCode::new(near_test_contracts::sized_contract(100).to_vec(), None),
ContractCode::new(near_test_contracts::rs_contract().to_vec(), None),
ContractCode::new(near_test_contracts::trivial_contract().to_vec(), None),
];
let code_hashes: Vec<CryptoHash> = contracts.iter().map(|c| c.hash()).cloned().collect();

// First check that the cache does not have the contracts.
for code_hash in code_hashes.iter() {
let cache_key = get_contract_cache_key(
*code_hash,
&runtime.get_runtime_config(PROTOCOL_VERSION).unwrap().wasm_config,
);
let contract = contract_cache.get(&cache_key).unwrap();
assert!(contract.is_none());
}

runtime.precompile_contracts(&EpochId::default(), contracts).unwrap();

// Check that the persistent cache contains the compiled contract after precompilation,
// but it does not populate the in-memory cache (so that the value is generated by try_lookup call).
for code_hash in code_hashes.into_iter() {
let cache_key = get_contract_cache_key(
code_hash,
&runtime.get_runtime_config(PROTOCOL_VERSION).unwrap().wasm_config,
);

let contract = contract_cache.get(&cache_key).unwrap();
assert_matches!(
contract,
Some(CompiledContractInfo { compiled: CompiledContract::Code(_), .. })
);

let result = contract_cache
.memory_cache()
.try_lookup(
cache_key,
|| Ok::<_, ()>(Box::new(FakeTestCompiledContractType)),
|v| {
assert!(v.is::<FakeTestCompiledContractType>());
"compiled code"
},
)
.unwrap();
assert_eq!(result, "compiled code");
}
}

fn stake(
nonce: Nonce,
signer: &Signer,
Expand Down
5 changes: 5 additions & 0 deletions chain/chain/src/stateless_validation/chunk_validation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ pub fn pre_validate_chunk_state_witness(
) -> Result<PreValidationOutput, Error> {
let store = chain.chain_store();

// Ensure that the chunk header version is supported in this protocol version
let protocol_version =
epoch_manager.get_epoch_info(&state_witness.epoch_id)?.protocol_version();
state_witness.chunk_header.validate_version(protocol_version)?;

// First, go back through the blockchain history to locate the last new chunk
// and last last new chunk for the shard.
let StateWitnessBlockRange {
Expand Down
8 changes: 4 additions & 4 deletions chain/chain/src/store/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::collections::hash_map::Entry;
use std::collections::{BTreeSet, HashMap, HashSet};
use std::collections::{HashMap, HashSet};
use std::io;

use borsh::{BorshDeserialize, BorshSerialize};
Expand All @@ -23,7 +23,7 @@ use near_primitives::sharding::{
use near_primitives::state_sync::{
ReceiptProofResponse, ShardStateSyncResponseHeader, StateHeaderKey, StateSyncDumpProgress,
};
use near_primitives::stateless_validation::contract_distribution::CodeHash;
use near_primitives::stateless_validation::contract_distribution::ContractUpdates;
use near_primitives::stateless_validation::stored_chunk_state_transition_data::{
StoredChunkStateTransitionData, StoredChunkStateTransitionDataV2,
};
Expand Down Expand Up @@ -2010,10 +2010,10 @@ impl<'a> ChainStoreUpdate<'a> {
shard_id: ShardId,
partial_storage: Option<PartialStorage>,
applied_receipts_hash: CryptoHash,
contract_accesses: BTreeSet<CodeHash>,
contract_deploys: BTreeSet<CodeHash>,
contract_updates: ContractUpdates,
) {
if let Some(partial_storage) = partial_storage {
let ContractUpdates { contract_accesses, contract_deploys } = contract_updates;
self.state_transition_data.insert(
(block_hash, shard_id),
StoredChunkStateTransitionData::V2(StoredChunkStateTransitionDataV2 {
Expand Down
14 changes: 11 additions & 3 deletions chain/chain/src/test_utils/kv_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ use near_store::{
set_genesis_hash, set_genesis_state_roots, DBCol, ShardTries, Store, StoreUpdate, Trie,
TrieChanges, WrappedTrieChanges,
};
use near_vm_runner::{ContractRuntimeCache, NoContractRuntimeCache};
use near_vm_runner::{ContractCode, ContractRuntimeCache, NoContractRuntimeCache};
use num_rational::Ratio;
use rand::Rng;
use std::cmp::Ordering;
Expand Down Expand Up @@ -1388,8 +1388,7 @@ impl RuntimeAdapter for KeyValueRuntime {
congestion_info: Self::get_congestion_info(PROTOCOL_VERSION),
bandwidth_requests: BandwidthRequests::default_for_protocol_version(PROTOCOL_VERSION),
bandwidth_scheduler_state_hash: CryptoHash::default(),
contract_accesses: Default::default(),
contract_deploys: Default::default(),
contract_updates: Default::default(),
})
}

Expand Down Expand Up @@ -1581,4 +1580,13 @@ impl RuntimeAdapter for KeyValueRuntime {
fn compiled_contract_cache(&self) -> &dyn ContractRuntimeCache {
&self.contract_cache
}

fn precompile_contracts(
&self,
_epoch_id: &EpochId,
_contract_codes: Vec<ContractCode>,
) -> Result<(), Error> {
// Note that KeyValueRuntime does not use compiled contract cache, so this is no-op.
Ok(())
}
}
Loading

0 comments on commit 1c502af

Please sign in to comment.