Skip to content

Commit

Permalink
Merge #19697: Improvements on ADDR caching
Browse files Browse the repository at this point in the history
0d04784 Refactor the functional test (Gleb Naumenko)
83ad65f Address nits in ADDR caching (Gleb Naumenko)
81b00f8 Add indexing ADDR cache by local socket addr (Gleb Naumenko)
42ec558 Justify the choice of ADDR cache lifetime (Gleb Naumenko)

Pull request description:

  This is a follow-up on #18991 which does 3 things:
  - improves privacy of a node listening to multiple addresses via adding cache index by local socket addr (suggested [here](bitcoin/bitcoin#18991 (comment)))
  - documents on the choice of 24h cache lifetime
  - addresses nits from #18991

ACKs for top commit:
  jnewbery:
    utACK 0d04784
  vasild:
    ACK 0d04784
  jonatack:
    Code review ACK 0d04784

Tree-SHA512: bb65a34dd1ce2811186d3e4469bc33e8399cebaaa494ce13041c7cff23275870e4176a719f7a72f8d779c49f8b2344bf4fa1aeb3ea4e2626d5ae76514f00a750
  • Loading branch information
laanwj committed Sep 21, 2020
2 parents b99a163 + 0d04784 commit c0c409d
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 46 deletions.
45 changes: 39 additions & 6 deletions src/net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ const std::string NET_MESSAGE_COMMAND_OTHER = "*other*";

static const uint64_t RANDOMIZER_ID_NETGROUP = 0x6c0edd8036ef4036ULL; // SHA256("netgroup")[0:8]
static const uint64_t RANDOMIZER_ID_LOCALHOSTNONCE = 0xd93e69e2bbfa5735ULL; // SHA256("localhostnonce")[0:8]
static const uint64_t RANDOMIZER_ID_ADDRCACHE = 0x1cf2e4ddd306dda9ULL; // SHA256("addrcache")[0:8]
//
// Global state variables
//
Expand Down Expand Up @@ -2560,15 +2561,47 @@ std::vector<CAddress> CConnman::GetAddresses(size_t max_addresses, size_t max_pc
return addresses;
}

std::vector<CAddress> CConnman::GetAddresses(Network requestor_network, size_t max_addresses, size_t max_pct)
std::vector<CAddress> CConnman::GetAddresses(CNode& requestor, size_t max_addresses, size_t max_pct)
{
SOCKET socket;
WITH_LOCK(requestor.cs_hSocket, socket = requestor.hSocket);
auto local_socket_bytes = GetBindAddress(socket).GetAddrBytes();
uint64_t cache_id = GetDeterministicRandomizer(RANDOMIZER_ID_ADDRCACHE)
.Write(requestor.addr.GetNetwork())
.Write(local_socket_bytes.data(), local_socket_bytes.size())
.Finalize();
const auto current_time = GetTime<std::chrono::microseconds>();
if (m_addr_response_caches.find(requestor_network) == m_addr_response_caches.end() ||
m_addr_response_caches[requestor_network].m_update_addr_response < current_time) {
m_addr_response_caches[requestor_network].m_addrs_response_cache = GetAddresses(max_addresses, max_pct);
m_addr_response_caches[requestor_network].m_update_addr_response = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6));
auto r = m_addr_response_caches.emplace(cache_id, CachedAddrResponse{});
CachedAddrResponse& cache_entry = r.first->second;
if (cache_entry.m_cache_entry_expiration < current_time) { // If emplace() added new one it has expiration 0.
cache_entry.m_addrs_response_cache = GetAddresses(max_addresses, max_pct);
// Choosing a proper cache lifetime is a trade-off between the privacy leak minimization
// and the usefulness of ADDR responses to honest users.
//
// Longer cache lifetime makes it more difficult for an attacker to scrape
// enough AddrMan data to maliciously infer something useful.
// By the time an attacker scraped enough AddrMan records, most of
// the records should be old enough to not leak topology info by
// e.g. analyzing real-time changes in timestamps.
//
// It takes only several hundred requests to scrape everything from an AddrMan containing 100,000 nodes,
// so ~24 hours of cache lifetime indeed makes the data less inferable by the time
// most of it could be scraped (considering that timestamps are updated via
// ADDR self-announcements and when nodes communicate).
// We also should be robust to those attacks which may not require scraping *full* victim's AddrMan
// (because even several timestamps of the same handful of nodes may leak privacy).
//
// On the other hand, longer cache lifetime makes ADDR responses
// outdated and less useful for an honest requestor, e.g. if most nodes
// in the ADDR response are no longer active.
//
// However, the churn in the network is known to be rather low. Since we consider
// nodes to be "terrible" (see IsTerrible()) if the timestamps are older than 30 days,
// max. 24 hours of "penalty" due to cache shouldn't make any meaningful difference
// in terms of the freshness of the response.
cache_entry.m_cache_entry_expiration = current_time + std::chrono::hours(21) + GetRandMillis(std::chrono::hours(6));
}
return m_addr_response_caches[requestor_network].m_addrs_response_cache;
return cache_entry.m_addrs_response_cache;
}

bool CConnman::AddNode(const std::string& strNode)
Expand Down
16 changes: 10 additions & 6 deletions src/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ class CConnman
* A non-malicious call (from RPC or a peer with addr permission) should
* call the function without a parameter to avoid using the cache.
*/
std::vector<CAddress> GetAddresses(Network requestor_network, size_t max_addresses, size_t max_pct);
std::vector<CAddress> GetAddresses(CNode& requestor, size_t max_addresses, size_t max_pct);

// This allows temporarily exceeding m_max_outbound_full_relay, with the goal of finding
// a peer that is better than all our current peers.
Expand Down Expand Up @@ -484,20 +484,24 @@ class CConnman
*/
struct CachedAddrResponse {
std::vector<CAddress> m_addrs_response_cache;
std::chrono::microseconds m_update_addr_response{0};
std::chrono::microseconds m_cache_entry_expiration{0};
};

/**
* Addr responses stored in different caches
* per network prevent cross-network node identification.
* per (network, local socket) prevent cross-network node identification.
* If a node for example is multi-homed under Tor and IPv6,
* a single cache (or no cache at all) would let an attacker
* to easily detect that it is the same node by comparing responses.
* The used memory equals to 1000 CAddress records (or around 32 bytes) per
* Indexing by local socket prevents leakage when a node has multiple
* listening addresses on the same network.
*
* The used memory equals to 1000 CAddress records (or around 40 bytes) per
* distinct Network (up to 5) we have/had an inbound peer from,
* resulting in at most ~160 KB.
* resulting in at most ~196 KB. Every separate local socket may
* add up to ~196 KB extra.
*/
std::map<Network, CachedAddrResponse> m_addr_response_caches;
std::map<uint64_t, CachedAddrResponse> m_addr_response_caches;

/**
* Services this instance offers.
Expand Down
2 changes: 1 addition & 1 deletion src/net_processing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3545,7 +3545,7 @@ void PeerManager::ProcessMessage(CNode& pfrom, const std::string& msg_type, CDat
if (pfrom.HasPermission(PF_ADDR)) {
vAddr = m_connman.GetAddresses(MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND);
} else {
vAddr = m_connman.GetAddresses(pfrom.addr.GetNetwork(), MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND);
vAddr = m_connman.GetAddresses(pfrom, MAX_ADDR_TO_SEND, MAX_PCT_ADDR_TO_SEND);
}
FastRandomContext insecure_rand;
for (const CAddress &addr : vAddr) {
Expand Down
47 changes: 14 additions & 33 deletions test/functional/p2p_getaddr_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,8 @@
"""Test addr response caching"""

import time
from test_framework.messages import (
CAddress,
NODE_NETWORK,
NODE_WITNESS,
msg_addr,
msg_getaddr,
)

from test_framework.messages import msg_getaddr
from test_framework.p2p import (
P2PInterface,
p2p_lock
Expand All @@ -21,21 +16,9 @@
assert_equal,
)

# As defined in net_processing.
MAX_ADDR_TO_SEND = 1000

def gen_addrs(n):
addrs = []
for i in range(n):
addr = CAddress()
addr.time = int(time.time())
addr.nServices = NODE_NETWORK | NODE_WITNESS
# Use first octets to occupy different AddrMan buckets
first_octet = i >> 8
second_octet = i % 256
addr.ip = "{}.{}.1.1".format(first_octet, second_octet)
addr.port = 8333
addrs.append(addr)
return addrs
MAX_PCT_ADDR_TO_SEND = 23

class AddrReceiver(P2PInterface):

Expand All @@ -62,18 +45,16 @@ def set_test_params(self):
self.num_nodes = 1

def run_test(self):
self.log.info('Create connection that sends and requests addr messages')
addr_source = self.nodes[0].add_p2p_connection(P2PInterface())

msg_send_addrs = msg_addr()
self.log.info('Fill peer AddrMan with a lot of records')
# Since these addrs are sent from the same source, not all of them will be stored,
# because we allocate a limited number of AddrMan buckets per addr source.
total_addrs = 10000
addrs = gen_addrs(total_addrs)
for i in range(int(total_addrs/MAX_ADDR_TO_SEND)):
msg_send_addrs.addrs = addrs[i * MAX_ADDR_TO_SEND:(i + 1) * MAX_ADDR_TO_SEND]
addr_source.send_and_ping(msg_send_addrs)
for i in range(10000):
first_octet = i >> 8
second_octet = i % 256
a = "{}.{}.1.1".format(first_octet, second_octet)
self.nodes[0].addpeeraddress(a, 8333)

# Need to make sure we hit MAX_ADDR_TO_SEND records in the addr response later because
# only a fraction of all known addresses can be cached and returned.
assert(len(self.nodes[0].getnodeaddresses(0)) > int(MAX_ADDR_TO_SEND / (MAX_PCT_ADDR_TO_SEND / 100)))

responses = []
self.log.info('Send many addr requests within short time to receive same response')
Expand All @@ -89,7 +70,7 @@ def run_test(self):
responses.append(addr_receiver.get_received_addrs())
for response in responses[1:]:
assert_equal(response, responses[0])
assert(len(response) < MAX_ADDR_TO_SEND)
assert(len(response) == MAX_ADDR_TO_SEND)

cur_mock_time += 3 * 24 * 60 * 60
self.nodes[0].setmocktime(cur_mock_time)
Expand Down

0 comments on commit c0c409d

Please sign in to comment.