Skip to content

Commit

Permalink
Merge ddc0a60 into merged_master (Bitcoin PR #18617)
Browse files Browse the repository at this point in the history
For reasons I do not really grok, this PR changes the timing behavior of the
functional tests to reliably expose a deadlock in the claimpegin RPC that has
existed since the 0.17 rebase.

The mechanism is: in `claimpegin` in src/wallet/rpcwallet.cpp:5873, we call
`AcceptToMemoryPoolWorker`. This requires cs_main to be locked, which it is
not (contrast Core's `testmempoolaccept` RPC, which similarly calls
`AcceptToMemoryPoolWorker` from the RPC thread, and locks cs_main immediately
before).

We do *say* that it is locked, in the `LockAssertion` one the line above, but
this was added in ad3d496 during the 0.17
rebase (PR #620), apparently to shut up some linter on OSX, and as near as I
can tell it was never true.

Anyway, `AcceptToMemoryPoolWorker` calls through a couple layers which assume
cs_main is locked, to `AcceptSingleTransaction`, which locks m_pool.cs on line
src/validation.cpp:1101. It then calls `PreChecks`, which on line 784 calls
::ChainActive(), which _actually_ locks cs_main, though only briefly. This
line is the deadlock, because we locked m_pool.cs followed by cs_main...

...meanwhile, in src/net_processing.cpp, we lock cs_main at the top of the
`PeerLogicValidation::SendMessages` loop (src/net_processing.cpp:3628). In the
same loop, in the `feefilter` message processing, we call CTxMemPool::GetMinFee
(src/net_processing.cpp:4137), which locks m_pool.cs. Deadlock.

Anyway, that explains the change to locking behavior that I added to an
otherwise test-only PR.
  • Loading branch information
apoelstra committed Nov 26, 2020
2 parents 9ea76b5 + ddc0a60 commit c0fda0c
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 21 deletions.
2 changes: 1 addition & 1 deletion ci/test/00_setup_env_native_valgrind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export NO_DEPENDS=1
if [[ "${TRAVIS}" == "true" && "${TRAVIS_REPO_SLUG}" != "bitcoin/bitcoin" ]]; then
export TEST_RUNNER_EXTRA="wallet_disable" # Only run wallet_disable as a smoke test to not hit the 50 min travis time limit
else
export TEST_RUNNER_EXTRA="--exclude rpc_bind" # Excluded for now, see https://github.com/bitcoin/bitcoin/issues/17765#issuecomment-602068547
export TEST_RUNNER_EXTRA="--exclude rpc_bind --factor=2" # Excluded for now, see https://github.com/bitcoin/bitcoin/issues/17765#issuecomment-602068547
fi
export GOAL="install"
export BITCOIN_CONFIG="--enable-zmq --with-incompatible-bdb --with-gui=no CC=clang CXX=clang++" # TODO enable GUI
2 changes: 1 addition & 1 deletion src/wallet/rpcwallet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5868,7 +5868,7 @@ UniValue claimpegin(const JSONRPCRequest& request)

// To check if it's not double spending an existing pegin UTXO, we check mempool acceptance.
TxValidationState acceptState;
LockAssertion lock(::cs_main); //TODO(stevenroose) replace with locked_chain later
LOCK(::cs_main);
bool accepted = ::AcceptToMemoryPool(mempool, acceptState, MakeTransactionRef(mtx),
nullptr /* plTxnReplaced */, false /* bypass_limits */, pwallet->m_default_max_tx_fee, true /* test_accept */);
if (!accepted) {
Expand Down
30 changes: 17 additions & 13 deletions test/functional/test_framework/mininode.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,9 @@ def __init__(self):
def is_connected(self):
return self._transport is not None

def peer_connect(self, dstaddr, dstport, *, net):
def peer_connect(self, dstaddr, dstport, *, net, factor):
assert not self.is_connected
self.factor = factor
self.dstaddr = dstaddr
self.dstport = dstport
# The initial message to send after the connection was made:
Expand Down Expand Up @@ -368,9 +369,12 @@ def on_version(self, message):

# Connection helper methods

def wait_until(self, test_function, timeout):
wait_until(test_function, timeout=timeout, lock=mininode_lock, factor=self.factor)

def wait_for_disconnect(self, timeout=60):
test_function = lambda: not self.is_connected
wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

# Message receiving helper methods

Expand All @@ -381,14 +385,14 @@ def test_function():
return False
return self.last_message['tx'].tx.rehash() == txid

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_block(self, blockhash, timeout=60):
def test_function():
assert self.is_connected
return self.last_message.get("block") and self.last_message["block"].block.rehash() == blockhash

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_header(self, blockhash, timeout=60):
def test_function():
Expand All @@ -398,7 +402,7 @@ def test_function():
return False
return last_headers.headers[0].rehash() == int(blockhash, 16)

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_merkleblock(self, blockhash, timeout=60):
def test_function():
Expand All @@ -408,7 +412,7 @@ def test_function():
return False
return last_filtered_block.merkleblock.header.rehash() == int(blockhash, 16)

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_getdata(self, hash_list, timeout=60):
"""Waits for a getdata message.
Expand All @@ -422,7 +426,7 @@ def test_function():
return False
return [x.hash for x in last_data.inv] == hash_list

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_getheaders(self, timeout=60):
"""Waits for a getheaders message.
Expand All @@ -436,7 +440,7 @@ def test_function():
assert self.is_connected
return self.last_message.get("getheaders")

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_inv(self, expected_inv, timeout=60):
"""Waits for an INV message and checks that the first inv object in the message was as expected."""
Expand All @@ -449,13 +453,13 @@ def test_function():
self.last_message["inv"].inv[0].type == expected_inv[0].type and \
self.last_message["inv"].inv[0].hash == expected_inv[0].hash

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

def wait_for_verack(self, timeout=60):
def test_function():
return self.message_count["verack"]

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)

# Message sending helper functions

Expand All @@ -471,7 +475,7 @@ def test_function():
assert self.is_connected
return self.last_message.get("pong") and self.last_message["pong"].nonce == self.ping_counter

wait_until(test_function, timeout=timeout, lock=mininode_lock)
self.wait_until(test_function, timeout=timeout)
self.ping_counter += 1


Expand Down Expand Up @@ -587,15 +591,15 @@ def send_blocks_and_test(self, blocks, node, *, success=True, force_send=False,
self.send_message(msg_block(block=b))
else:
self.send_message(msg_headers([CBlockHeader(block) for block in blocks]))
wait_until(lambda: blocks[-1].sha256 in self.getdata_requests, timeout=timeout, lock=mininode_lock)
self.wait_until(lambda: blocks[-1].sha256 in self.getdata_requests, timeout=timeout)

if expect_disconnect:
self.wait_for_disconnect(timeout=timeout)
else:
self.sync_with_ping(timeout=timeout)

if success:
wait_until(lambda: node.getbestblockhash() == blocks[-1].hash, timeout=timeout)
self.wait_until(lambda: node.getbestblockhash() == blocks[-1].hash, timeout=timeout)
else:
assert node.getbestblockhash() != blocks[-1].hash

Expand Down
4 changes: 4 additions & 0 deletions test/functional/test_framework/test_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def __init__(self):
self.bind_to_localhost_only = True
self.set_test_params()
self.parse_args()
self.rpc_timeout = int(self.rpc_timeout * self.options.factor) # optionally, increase timeout by a factor

def main(self):
"""Main function. This should not be overridden by the subclass test scripts."""
Expand Down Expand Up @@ -168,6 +169,7 @@ def parse_args(self):
help="set a random seed for deterministically reproducing a previous test run")
parser.add_argument("--descriptors", default=False, action="store_true",
help="Run test using a descriptor wallet")
parser.add_argument('--factor', type=float, default=1.0, help='adjust test timeouts by a factor')
self.add_options(parser)
self.options = parser.parse_args()

Expand Down Expand Up @@ -439,6 +441,7 @@ def get_bin_from_version(version, bin_name, bin_default):
chain=chain[i],
rpchost=rpchost,
timewait=self.rpc_timeout,
factor=self.options.factor,
bitcoind=binary[i],
bitcoin_cli=binary_cli[i],
version=versions[i],
Expand Down Expand Up @@ -586,6 +589,7 @@ def _initialize_chain(self):
extra_args=['-disablewallet'],
rpchost=None,
timewait=self.rpc_timeout,
factor=self.options.factor,
bitcoind=self.options.bitcoind,
bitcoin_cli=self.options.bitcoincli,
coverage_dir=None,
Expand Down
9 changes: 5 additions & 4 deletions test/functional/test_framework/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class TestNode():
To make things easier for the test writer, any unrecognised messages will
be dispatched to the RPC connection."""

def __init__(self, i, datadir, *, chain, rpchost, timewait, bitcoind, bitcoin_cli, coverage_dir, cwd, extra_conf=None, extra_args=None, use_cli=False, start_perf=False, use_valgrind=False, version=None, descriptors=False, chain_in_args=True):
def __init__(self, i, datadir, *, chain, rpchost, timewait, factor, bitcoind, bitcoin_cli, coverage_dir, cwd, extra_conf=None, extra_args=None, use_cli=False, start_perf=False, use_valgrind=False, version=None, descriptors=False, chain_in_args=True):
"""
Kwargs:
start_perf (bool): If True, begin profiling the node with `perf` as soon as
Expand Down Expand Up @@ -131,6 +131,7 @@ def __init__(self, i, datadir, *, chain, rpchost, timewait, bitcoind, bitcoin_cl
self.perf_subprocesses = {}

self.p2ps = []
self.factor = factor

# ELEMENTS:
self.deterministic_priv_key = None
Expand Down Expand Up @@ -340,13 +341,13 @@ def is_node_stopped(self):
return True

def wait_until_stopped(self, timeout=BITCOIND_PROC_WAIT_TIMEOUT):
wait_until(self.is_node_stopped, timeout=timeout)
wait_until(self.is_node_stopped, timeout=timeout, factor=self.factor)

@contextlib.contextmanager
def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2):
if unexpected_msgs is None:
unexpected_msgs = []
time_end = time.time() + timeout
time_end = time.time() + timeout * self.factor
debug_log = os.path.join(self.datadir, self.chain, 'debug.log')
with open(debug_log, encoding='utf-8') as dl:
dl.seek(0, 2)
Expand Down Expand Up @@ -503,7 +504,7 @@ def add_p2p_connection(self, p2p_conn, *, wait_for_verack=True, **kwargs):
if 'dstaddr' not in kwargs:
kwargs['dstaddr'] = '127.0.0.1'

p2p_conn.peer_connect(**kwargs, net=self.chain)()
p2p_conn.peer_connect(**kwargs, net=self.chain, factor=self.factor)()
self.p2ps.append(p2p_conn)
if wait_for_verack:
# Wait for the node to send us the version and verack
Expand Down
5 changes: 3 additions & 2 deletions test/functional/test_framework/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,9 +220,10 @@ def str_to_b64str(string):
def satoshi_round(amount):
return Decimal(amount).quantize(Decimal('0.00000001'), rounding=ROUND_DOWN)

def wait_until(predicate, *, attempts=float('inf'), timeout=float('inf'), lock=None):
def wait_until(predicate, *, attempts=float('inf'), timeout=float('inf'), lock=None, factor=1.0):
if attempts == float('inf') and timeout == float('inf'):
timeout = 60
timeout = timeout * factor
attempt = 0
time_end = time.time() + timeout

Expand Down Expand Up @@ -277,7 +278,7 @@ def get_rpc_proxy(url, node_number, *, timeout=None, coveragedir=None):
"""
proxy_kwargs = {}
if timeout is not None:
proxy_kwargs['timeout'] = timeout
proxy_kwargs['timeout'] = int(timeout)

proxy = AuthServiceProxy(url, **proxy_kwargs)
proxy.url = url # store URL on proxy for info
Expand Down

0 comments on commit c0fda0c

Please sign in to comment.