Skip to content
This repository has been archived by the owner on Jan 22, 2025. It is now read-only.

Commit

Permalink
Quic client stats (#24195) (#24305)
Browse files Browse the repository at this point in the history
* Add metrics to connection-cache to measure cache hits and misses

* Add congestion stats

* Add more client stats

* Review comments

Co-authored-by: Ryan Leung <[email protected]>

Co-authored-by: sakridge <[email protected]>
Co-authored-by: Ryan Leung <[email protected]>
  • Loading branch information
3 people authored Apr 14, 2022
1 parent 304cd65 commit 58ef6b3
Show file tree
Hide file tree
Showing 8 changed files with 345 additions and 63 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ lazy_static = "1.4.0"
log = "0.4.14"
lru = "0.7.5"
quinn = "0.8.0"
quinn-proto = "0.8.0"
rand = "0.7.0"
rand_chacha = "0.2.2"
rayon = "1.5.1"
Expand Down
224 changes: 191 additions & 33 deletions client/src/connection_cache.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
use {
crate::{
quic_client::QuicTpuConnection, tpu_connection::TpuConnection, udp_client::UdpTpuConnection,
quic_client::QuicTpuConnection,
tpu_connection::{ClientStats, TpuConnection},
udp_client::UdpTpuConnection,
},
lazy_static::lazy_static,
lru::LruCache,
solana_net_utils::VALIDATOR_PORT_RANGE,
solana_sdk::{transaction::VersionedTransaction, transport::TransportError},
solana_sdk::{
timing::AtomicInterval, transaction::VersionedTransaction, transport::TransportError,
},
std::{
net::{IpAddr, Ipv4Addr, SocketAddr},
sync::{Arc, Mutex},
sync::{
atomic::{AtomicU64, Ordering},
Arc, Mutex,
},
},
};

Expand All @@ -21,15 +28,108 @@ enum Connection {
Quic(Arc<QuicTpuConnection>),
}

#[derive(Default)]
struct ConnectionCacheStats {
cache_hits: AtomicU64,
cache_misses: AtomicU64,
sent_packets: AtomicU64,
total_batches: AtomicU64,
batch_success: AtomicU64,
batch_failure: AtomicU64,

// Need to track these separately per-connection
// because we need to track the base stat value from quinn
total_client_stats: ClientStats,
}

const CONNECTION_STAT_SUBMISSION_INTERVAL: u64 = 2000;

impl ConnectionCacheStats {
fn add_client_stats(&self, client_stats: &ClientStats, num_packets: usize, is_success: bool) {
self.total_client_stats.total_connections.fetch_add(
client_stats.total_connections.load(Ordering::Relaxed),
Ordering::Relaxed,
);
self.total_client_stats.connection_reuse.fetch_add(
client_stats.connection_reuse.load(Ordering::Relaxed),
Ordering::Relaxed,
);
self.sent_packets
.fetch_add(num_packets as u64, Ordering::Relaxed);
self.total_batches.fetch_add(1, Ordering::Relaxed);
if is_success {
self.batch_success.fetch_add(1, Ordering::Relaxed);
} else {
self.batch_failure.fetch_add(1, Ordering::Relaxed);
}
}

fn report(&self) {
datapoint_info!(
"quic-client-connection-stats",
(
"cache_hits",
self.cache_hits.swap(0, Ordering::Relaxed),
i64
),
(
"cache_misses",
self.cache_misses.swap(0, Ordering::Relaxed),
i64
),
(
"total_connections",
self.total_client_stats
.total_connections
.swap(0, Ordering::Relaxed),
i64
),
(
"connection_reuse",
self.total_client_stats
.connection_reuse
.swap(0, Ordering::Relaxed),
i64
),
(
"congestion_events",
self.total_client_stats.congestion_events.load_and_reset(),
i64
),
(
"tx_streams_blocked_uni",
self.total_client_stats
.tx_streams_blocked_uni
.load_and_reset(),
i64
),
(
"tx_data_blocked",
self.total_client_stats.tx_data_blocked.load_and_reset(),
i64
),
(
"tx_acks",
self.total_client_stats.tx_acks.load_and_reset(),
i64
),
);
}
}

struct ConnMap {
map: LruCache<SocketAddr, Connection>,
stats: Arc<ConnectionCacheStats>,
last_stats: AtomicInterval,
use_quic: bool,
}

impl ConnMap {
pub fn new() -> Self {
Self {
map: LruCache::new(MAX_CONNECTIONS),
stats: Arc::new(ConnectionCacheStats::default()),
last_stats: AtomicInterval::default(),
use_quic: false,
}
}
Expand All @@ -50,11 +150,25 @@ pub fn set_use_quic(use_quic: bool) {

// TODO: see https://github.com/solana-labs/solana/issues/23661
// remove lazy_static and optimize and refactor this
fn get_connection(addr: &SocketAddr) -> Connection {
fn get_connection(addr: &SocketAddr) -> (Connection, Arc<ConnectionCacheStats>) {
let mut map = (*CONNECTION_MAP).lock().unwrap();

match map.map.get(addr) {
Some(connection) => connection.clone(),
if map
.last_stats
.should_update(CONNECTION_STAT_SUBMISSION_INTERVAL)
{
map.stats.report();
}

let (connection, hit, maybe_stats) = match map.map.get(addr) {
Some(connection) => {
let mut stats = None;
// update connection stats
if let Connection::Quic(conn) = connection {
stats = conn.stats().map(|s| (conn.base_stats(), s));
}
(connection.clone(), true, stats)
}
None => {
let (_, send_socket) = solana_net_utils::bind_in_range(
IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)),
Expand All @@ -68,9 +182,41 @@ fn get_connection(addr: &SocketAddr) -> Connection {
};

map.map.put(*addr, connection.clone());
connection
(connection, false, None)
}
};

if let Some((connection_stats, new_stats)) = maybe_stats {
map.stats.total_client_stats.congestion_events.update_stat(
&connection_stats.congestion_events,
new_stats.path.congestion_events,
);

map.stats
.total_client_stats
.tx_streams_blocked_uni
.update_stat(
&connection_stats.tx_streams_blocked_uni,
new_stats.frame_tx.streams_blocked_uni,
);

map.stats.total_client_stats.tx_data_blocked.update_stat(
&connection_stats.tx_data_blocked,
new_stats.frame_tx.data_blocked,
);

map.stats
.total_client_stats
.tx_acks
.update_stat(&connection_stats.tx_acks, new_stats.frame_tx.acks);
}

if hit {
map.stats.cache_hits.fetch_add(1, Ordering::Relaxed);
} else {
map.stats.cache_misses.fetch_add(1, Ordering::Relaxed);
}
(connection, map.stats.clone())
}

// TODO: see https://github.com/solana-labs/solana/issues/23851
Expand All @@ -86,55 +232,67 @@ pub fn send_wire_transaction_batch(
packets: &[&[u8]],
addr: &SocketAddr,
) -> Result<(), TransportError> {
let conn = get_connection(addr);
match conn {
Connection::Udp(conn) => conn.send_wire_transaction_batch(packets),
Connection::Quic(conn) => conn.send_wire_transaction_batch(packets),
}
let (conn, stats) = get_connection(addr);
let client_stats = ClientStats::default();
let r = match conn {
Connection::Udp(conn) => conn.send_wire_transaction_batch(packets, &client_stats),
Connection::Quic(conn) => conn.send_wire_transaction_batch(packets, &client_stats),
};
stats.add_client_stats(&client_stats, packets.len(), r.is_ok());
r
}

pub fn send_wire_transaction_async(
packets: Vec<u8>,
addr: &SocketAddr,
) -> Result<(), TransportError> {
let conn = get_connection(addr);
match conn {
Connection::Udp(conn) => conn.send_wire_transaction_async(packets),
Connection::Quic(conn) => conn.send_wire_transaction_async(packets),
}
let (conn, stats) = get_connection(addr);
let client_stats = Arc::new(ClientStats::default());
let r = match conn {
Connection::Udp(conn) => conn.send_wire_transaction_async(packets, client_stats.clone()),
Connection::Quic(conn) => conn.send_wire_transaction_async(packets, client_stats.clone()),
};
stats.add_client_stats(&client_stats, 1, r.is_ok());
r
}

pub fn send_wire_transaction(
wire_transaction: &[u8],
addr: &SocketAddr,
) -> Result<(), TransportError> {
let conn = get_connection(addr);
match conn {
Connection::Udp(conn) => conn.send_wire_transaction(wire_transaction),
Connection::Quic(conn) => conn.send_wire_transaction(wire_transaction),
}
send_wire_transaction_batch(&[wire_transaction], addr)
}

pub fn serialize_and_send_transaction(
transaction: &VersionedTransaction,
addr: &SocketAddr,
) -> Result<(), TransportError> {
let conn = get_connection(addr);
match conn {
Connection::Udp(conn) => conn.serialize_and_send_transaction(transaction),
Connection::Quic(conn) => conn.serialize_and_send_transaction(transaction),
}
let (conn, stats) = get_connection(addr);
let client_stats = ClientStats::default();
let r = match conn {
Connection::Udp(conn) => conn.serialize_and_send_transaction(transaction, &client_stats),
Connection::Quic(conn) => conn.serialize_and_send_transaction(transaction, &client_stats),
};
stats.add_client_stats(&client_stats, 1, r.is_ok());
r
}

pub fn par_serialize_and_send_transaction_batch(
transactions: &[VersionedTransaction],
addr: &SocketAddr,
) -> Result<(), TransportError> {
let conn = get_connection(addr);
match conn {
Connection::Udp(conn) => conn.par_serialize_and_send_transaction_batch(transactions),
Connection::Quic(conn) => conn.par_serialize_and_send_transaction_batch(transactions),
}
let (conn, stats) = get_connection(addr);
let client_stats = ClientStats::default();
let r = match conn {
Connection::Udp(conn) => {
conn.par_serialize_and_send_transaction_batch(transactions, &client_stats)
}
Connection::Quic(conn) => {
conn.par_serialize_and_send_transaction_batch(transactions, &client_stats)
}
};
stats.add_client_stats(&client_stats, transactions.len(), r.is_ok());
r
}

#[cfg(test)]
Expand Down Expand Up @@ -182,7 +340,7 @@ mod tests {
// be lazy and not connect until first use or handle connection errors somehow
// (without crashing, as would be required in a real practical validator)
let first_addr = get_addr(&mut rng);
assert!(ip(get_connection(&first_addr)) == first_addr.ip());
assert!(ip(get_connection(&first_addr).0) == first_addr.ip());
let addrs = (0..MAX_CONNECTIONS)
.into_iter()
.map(|_| {
Expand Down
Loading

0 comments on commit 58ef6b3

Please sign in to comment.