Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: fix some issues in zero downtime upgrade #2846

Merged
merged 4 commits into from
Jun 15, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions chain/client/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,11 +316,8 @@ impl Client {
&next_block_proposer,
)?;
if validator_stake.public_key != validator_signer.public_key() {
return Err(Error::BlockProducer(format!(
"Validator key doesn't match. Expected {} Actual {}",
validator_stake.public_key,
validator_signer.public_key()
)));
debug!(target: "client", "Local validator key {} does not match expected validator key {}, skipping block production", validator_signer.public_key(), validator_stake.public_key);
return Ok(None);
}

debug!(target: "client", "{:?} Producing block at height {}, parent {} @ {}", validator_signer.validator_id(), next_height, prev.height(), format_hash(head.last_block_hash));
Expand Down
23 changes: 11 additions & 12 deletions chain/client/src/client_actor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -590,11 +590,10 @@ impl ClientActor {
}

// First check that we currently have an AccountId
if self.client.validator_signer.is_none() {
// There is no account id associated with this client
return;
}
let validator_signer = self.client.validator_signer.as_ref().unwrap();
let validator_signer = match self.client.validator_signer.as_ref() {
None => return,
Some(signer) => signer,
};

let now = Instant::now();
// Check that we haven't announced it too recently
Expand All @@ -615,14 +614,14 @@ impl ClientActor {
.get_next_epoch_id_from_prev_block(&prev_block_hash));

// Check client is part of the futures validators
if let Ok(validators) = self
.client
.runtime_adapter
.get_epoch_block_producers_ordered(&next_epoch_id, &prev_block_hash)
if let Ok((validator_stake, is_slashed)) =
self.client.runtime_adapter.get_validator_by_account_id(
&next_epoch_id,
&prev_block_hash,
validator_signer.validator_id(),
)
{
if validators.iter().any(|(validator_stake, _)| {
&validator_stake.account_id == validator_signer.validator_id()
}) {
if !is_slashed && validator_stake.public_key == validator_signer.public_key() {
debug!(target: "client", "Sending announce account for {}", validator_signer.validator_id());
self.last_validator_announce_time = Some(now);
let signature = self.sign_announce_account(&next_epoch_id).unwrap();
Expand Down
5 changes: 1 addition & 4 deletions chain/client/tests/process_blocks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1207,10 +1207,7 @@ fn test_incorrect_validator_key_produce_block() {
)
.unwrap();
let res = client.produce_block(1);
match res {
Err(near_client::Error::BlockProducer(_)) => {}
_ => panic!("unexpected result: {:?}", res),
}
matches!(res, Ok(None));
bowenwang1996 marked this conversation as resolved.
Show resolved Hide resolved
}

fn test_block_merkle_proof_with_len(n: NumBlocks) {
Expand Down
12 changes: 10 additions & 2 deletions neard/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,10 @@ fn default_peer_recent_time_window() -> Duration {
fn default_safe_set_size() -> u32 {
20
}
/// Time to persist Accounts Id in the router without removing them in seconds.
fn default_ttl_account_id_router() -> Duration {
Duration::from_secs(TTL_ACCOUNT_ID_ROUTER)
}

#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct Network {
Expand Down Expand Up @@ -212,6 +216,9 @@ pub struct Network {
/// It can be IP:Port or IP (to blacklist all connections coming from this address).
#[serde(default)]
pub blacklist: Vec<String>,
/// Time to persist Accounts Id in the router without removing them in seconds.
#[serde(default = "default_ttl_account_id_router")]
pub ttl_account_id_router: Duration,
}

impl Default for Network {
Expand All @@ -231,6 +238,7 @@ impl Default for Network {
skip_sync_wait: false,
ban_window: Duration::from_secs(3 * 60 * 60),
blacklist: vec![],
ttl_account_id_router: default_ttl_account_id_router(),
}
}
}
Expand Down Expand Up @@ -527,7 +535,7 @@ impl NearConfig {
epoch_length: genesis.config.epoch_length,
num_block_producer_seats: genesis.config.num_block_producer_seats,
announce_account_horizon: genesis.config.epoch_length / 2,
ttl_account_id_router: Duration::from_secs(TTL_ACCOUNT_ID_ROUTER),
ttl_account_id_router: config.network.ttl_account_id_router,
// TODO(1047): this should be adjusted depending on the speed of sync of state.
block_fetch_horizon: config.consensus.block_fetch_horizon,
state_fetch_horizon: config.consensus.state_fetch_horizon,
Expand Down Expand Up @@ -571,7 +579,7 @@ impl NearConfig {
max_send_peers: 512,
peer_expiration_duration: Duration::from_secs(7 * 24 * 60 * 60),
peer_stats_period: Duration::from_secs(5),
ttl_account_id_router: Duration::from_secs(TTL_ACCOUNT_ID_ROUTER),
ttl_account_id_router: config.network.ttl_account_id_router,
routed_message_ttl: ROUTED_MESSAGE_TTL,
max_routes_to_store: MAX_ROUTES_TO_STORE,
highest_peer_horizon: HIGHEST_PEER_HORIZON,
Expand Down
3 changes: 2 additions & 1 deletion nightly/nightly.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ pytest --timeout=120 sanity/garbage_collection1.py
pytest --timeout=300 sanity/gc_after_sync.py
pytest --timeout=300 sanity/gc_sync_after_sync.py
pytest --timeout=300 sanity/gc_sync_after_sync.py swap_nodes
pytest --timeout=300 tests/sanity/large_messages.py
pytest --timeout=300 sanity/large_messages.py
pytest --timeout=300 sanity/upgradable.py
pytest --timeout=240 sanity/validator_switch_key.py

# python tests for smart contract deployment and invocation
pytest contracts/deploy_call_smart_contract.py
Expand Down
17 changes: 16 additions & 1 deletion pytest/lib/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ def from_json_file(self, jf):
with open(jf) as f:
return Key.from_json(json.loads(f.read()))

def to_json(self):
return {'account_id': self.account_id, 'public_key': self.pk, 'secret_key': self.sk}


class BaseNode(object):

Expand Down Expand Up @@ -330,6 +333,11 @@ def kill(self):
def reset_data(self):
shutil.rmtree(os.path.join(self.node_dir, "data"))

def reset_validator_key(self, new_key):
self.validator_key = new_key
with open(os.path.join(self.node_dir, "validator_key.json"), 'w+') as f:
json.dump(new_key.to_json(), f)

def cleanup(self):
if self.cleaned:
return
Expand Down Expand Up @@ -467,6 +475,13 @@ def resume_network(self):
rc.run(f'gcloud compute firewall-rules delete {self.machine.name}-stop',
input='yes\n')

def reset_validator_key(self, new_key):
self.validator_key = new_key
with open(os.path.join(self.node_dir, "validator_key.json"), 'w+') as f:
json.dump(new_key.to_json(), f)
self.machine.upload(os.path.join(self.node_dir, 'validator_key.json'),
f'/home/{self.machine.username}/.near/')


def spin_up_node(config,
near_root,
Expand Down Expand Up @@ -598,7 +613,7 @@ def apply_config_changes(node_dir, client_config_change):
assert k in config_json
if isinstance(v, dict):
for key, value in v.items():
assert key in config_json[k]
assert key in config_json[k], key
config_json[k][key] = value
else:
config_json[k] = v
Expand Down
49 changes: 49 additions & 0 deletions pytest/tests/sanity/validator_switch_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Starts two validating nodes and one non-validating node
# Set a new validator key that has the same account id as one of
# the validating nodes. Stake that account with the new key
# and make sure that the network doesn't stall even after
# the non-validating node becomes a validator.

import sys, time, base58

sys.path.append('lib')

from cluster import start_cluster, Key
from transaction import sign_staking_tx

EPOCH_LENGTH = 10
TIMEOUT = 60

client_config = {"network": {"ttl_account_id_router": {"secs": 0, "nanos": 100000000}}}
nodes = start_cluster(2, 1, 1, None, [["epoch_length", EPOCH_LENGTH], ["block_producer_kickout_threshold", 10],
["chunk_producer_kickout_threshold", 10]], {1: client_config, 2: client_config})
time.sleep(2)

nodes[2].kill()

validator_key = Key(nodes[1].validator_key.account_id, nodes[2].signer_key.pk, nodes[2].signer_key.sk)
nodes[2].reset_validator_key(validator_key)
nodes[2].reset_data()
nodes[2].start(nodes[0].node_key.pk, nodes[0].addr())
time.sleep(3)

status = nodes[0].get_status()
block_hash = status['sync_info']['latest_block_hash']
block_height = status['sync_info']['latest_block_height']

tx = sign_staking_tx(nodes[1].signer_key, validator_key, 50000000000000000000000000000000, 1,
base58.b58decode(block_hash.encode('utf8')))
res = nodes[0].send_tx_and_wait(tx, timeout=15)
assert 'error' not in res

start_time = time.time()
while True:
if time.time() - start_time > TIMEOUT:
assert False, "Validators get stuck"
status1 = nodes[1].get_status()
node1_height = status1['sync_info']['latest_block_height']
status2 = nodes[2].get_status()
node2_height = status2['sync_info']['latest_block_height']
if node1_height > block_height + 4 * EPOCH_LENGTH and node2_height > block_height + 4 * EPOCH_LENGTH:
break
time.sleep(2)