Skip to content

Commit

Permalink
Merge pull request #48 from dfinity/igornovg/shed
Browse files Browse the repository at this point in the history
feat(BOUN-1216): use system and sharded load shedders from ic-bn-lib
  • Loading branch information
blind-oracle authored Oct 28, 2024
2 parents 0712978 + ab0803d commit 853af4d
Show file tree
Hide file tree
Showing 8 changed files with 473 additions and 260 deletions.
317 changes: 163 additions & 154 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,10 @@ http-body-util = "0.1"
humantime = "2.1"
hyper-util = "0.1"
ic-agent = { version = "0.37.1", features = ["reqwest"] }
ic-bn-lib = { git = "https://github.com/dfinity/ic-bn-lib", rev = "550521b9b51cb0b59b18360a3e985f109d9d9664" }
ic-bn-lib = { git = "https://github.com/dfinity/ic-bn-lib", rev = "1aa781275cd958f6148f6ea6a5630f73ab7b2d57" }
ic-http-gateway = { git = "https://github.com/dfinity/http-gateway", tag = "0.1.0-b0" }
itertools = "0.13"
lazy_static = "1.5"
little-loadshedder = "0.2"
maxminddb = "0.24"
mockall = "0.12"
moka = { version = "0.12", features = ["sync", "future"] }
Expand Down Expand Up @@ -80,7 +79,7 @@ thiserror = "1.0"
tikv-jemallocator = "0.6"
tikv-jemalloc-ctl = { version = "0.6", features = ["stats"] }
time = { version = "0.3", features = ["macros", "serde"] }
tokio = { version = "1.40", features = ["full", "tracing"] }
tokio = { version = "1.41", features = ["full", "tracing"] }
tokio-util = { version = "0.7.11", features = ["full"] }
tower = "0.4"
tower_governor = "0.4"
Expand All @@ -102,7 +101,7 @@ zstd = "0.13.2"

[dev-dependencies]
hex-literal = "0.4"
hyper = "1.4"
hyper = "1.5"
criterion = { version = "0.5", features = ["async_tokio"] }
httptest = "0.16"
tempfile = "3.10"
Expand Down
122 changes: 85 additions & 37 deletions repro-env.lock

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions rust-toolchain.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[toolchain]
channel = "1.81.0"
channel = "1.82.0"
targets = ["x86_64-unknown-linux-musl"]
profile = "minimal"
profile = "default"
55 changes: 46 additions & 9 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,18 @@ use fqdn::FQDN;
use hickory_resolver::config::CLOUDFLARE_IPS;
use humantime::parse_duration;
use ic_bn_lib::{
http::{self, client::CloneableDnsResolver},
http::{
self,
client::CloneableDnsResolver,
shed::cli::{ShedSharded, ShedSystem},
},
tls::acme,
};
use reqwest::Url;

use crate::{
core::{AUTHOR_NAME, SERVICE_NAME},
routing::domain::CanisterAlias,
routing::{domain::CanisterAlias, RequestType},
tls,
};

Expand Down Expand Up @@ -74,6 +78,12 @@ pub struct Cli {

#[command(flatten, next_help_heading = "Cache")]
pub cache: CacheConfig,

#[command(flatten, next_help_heading = "Shedding System")]
pub shed_system: ShedSystem,

#[command(flatten, next_help_heading = "Shedding Latency")]
pub shed_latency: ShedSharded<RequestType>,
}

#[derive(Args)]
Expand Down Expand Up @@ -137,7 +147,7 @@ pub struct HttpServer {
#[clap(env, long, default_value = "127.0.0.1:8443")]
pub http_server_listen_tls: SocketAddr,

/// Backlog of incoming connections to set on the listening socket.
/// Backlog of incoming connections to set on the listening socket
#[clap(env, long, default_value = "2048")]
pub http_server_backlog: u32,

Expand All @@ -147,12 +157,35 @@ pub struct HttpServer {
#[clap(env, long, default_value = "1000")]
pub http_server_max_requests_per_conn: u64,

/// For how long to wait for the client to send headers
/// Currently applies only to HTTP1 connections.
/// Timeout for network read calls.
/// If the read call take longer than that - the connection is closed.
/// This effectively closes idle HTTP/1.1 connections.
#[clap(env, long, default_value = "30s", value_parser = parse_duration)]
pub http_server_read_timeout: Duration,

/// Timeout for network write calls.
/// If the write call take longer than that - the connection is closed.
#[clap(env, long, default_value = "30s", value_parser = parse_duration)]
pub http_server_write_timeout: Duration,

/// Idle timeout for connections.
/// If no requests are executed during this period - the connections is closed.
/// Mostly needed for HTTP/2 where the read timeout sometimes cannot kick in
/// due to PING frames and other non-request activity.
#[clap(env, long, default_value = "60s", value_parser = parse_duration)]
pub http_server_idle_timeout: Duration,

/// TLS handshake timeout
#[clap(env, long, default_value = "15s", value_parser = parse_duration)]
pub http_server_tls_handshake_timeout: Duration,

/// For how long to wait for the client to send headers.
/// Applies only to HTTP1 connections.
/// Should be set lower than the global `http_server_read_timeout`.
#[clap(env, long, default_value = "10s", value_parser = parse_duration)]
pub http_server_http1_header_read_timeout: Duration,

/// For how long to wait for the client to send request body.
/// For how long to wait for the client to send full request body.
#[clap(env, long, default_value = "60s", value_parser = parse_duration)]
pub http_server_body_read_timeout: Duration,

Expand Down Expand Up @@ -181,11 +214,11 @@ pub struct HttpServer {
#[clap(env, long, default_value = "18h", value_parser = parse_duration)]
pub http_server_tls_session_cache_tti: Duration,

/// Lifetime of a TLS1.3 ticket, due to key rotation the actual lifetime will be twice than this.
/// Lifetime of a TLS1.3 ticket, due to key rotation the actual lifetime will be twice than this
#[clap(env, long, default_value = "9h", value_parser = parse_duration)]
pub http_server_tls_ticket_lifetime: Duration,

/// Option to only serve HTTP instead for testing.
/// Option to only serve HTTP instead for testing
#[clap(env, long)]
pub http_server_insecure_serve_http_only: bool,
}
Expand Down Expand Up @@ -531,7 +564,7 @@ pub struct CacheConfig {
#[clap(env, long, default_value = "10MB", value_parser = parse_size_usize)]
pub cache_max_item_size: usize,

/// Time-to-live for the cache entries in seconds
/// Time-to-live for the cache entries
#[clap(env, long, default_value = "10s", value_parser = parse_duration)]
pub cache_ttl: Duration,

Expand Down Expand Up @@ -567,6 +600,10 @@ impl From<&HttpServer> for http::server::Options {
fn from(c: &HttpServer) -> Self {
Self {
backlog: c.http_server_backlog,
read_timeout: Some(c.http_server_read_timeout),
write_timeout: Some(c.http_server_write_timeout),
idle_timeout: c.http_server_idle_timeout,
tls_handshake_timeout: c.http_server_tls_handshake_timeout,
http1_header_read_timeout: c.http_server_http1_header_read_timeout,
http2_keepalive_interval: c.http_server_http2_keepalive_interval,
http2_keepalive_timeout: c.http_server_http2_keepalive_timeout,
Expand Down
27 changes: 6 additions & 21 deletions src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::{

use axum::{
body::Body,
extract::{Extension, MatchedPath, Request, State},
extract::{Extension, Request, State},
middleware::Next,
response::{IntoResponse, Response},
routing::get,
Expand Down Expand Up @@ -40,7 +40,7 @@ use crate::{
error_cause::ErrorCause,
ic::{BNRequestMetadata, BNResponseMetadata, IcResponseStatus},
middleware::{geoip::CountryCode, request_id::RequestId},
CanisterId, RequestCtx, RequestType, RequestTypeApi,
CanisterId, RequestCtx,
},
};

Expand Down Expand Up @@ -164,20 +164,6 @@ impl HttpMetrics {
}
}

fn infer_request_type(path: &str) -> RequestType {
match path {
"/api/v2/canister/:principal/query" => RequestType::Api(RequestTypeApi::Query),
"/api/v2/canister/:principal/call" => RequestType::Api(RequestTypeApi::Call),
"/api/v3/canister/:principal/call" => RequestType::Api(RequestTypeApi::SyncCall),
"/api/v2/canister/:principal/read_state" => RequestType::Api(RequestTypeApi::ReadState),
"/api/v2/subnet/:principal/read_state" => RequestType::Api(RequestTypeApi::ReadStateSubnet),
"/api/v2/status" => RequestType::Api(RequestTypeApi::Status),
"/health" => RequestType::Health,
"/registrations" | "/registrations/:id" => RequestType::Registrations,
_ => RequestType::Unknown,
}
}

pub async fn middleware(
State(state): State<Arc<HttpMetrics>>,
Extension(conn_info): Extension<Arc<ConnInfo>>,
Expand Down Expand Up @@ -271,12 +257,11 @@ pub async fn middleware(
.unwrap_or_else(|| "none".into());

let cache_status_str: &'static str = cache_status.into();
let request_type = response
.extensions_mut()
.remove::<MatchedPath>()
.map_or(RequestType::Http, |x| infer_request_type(x.as_str()));
// Strum IntoStaticStr doesn't respect to_string macro option, so fall back to allocation for now
let request_type = request_type.to_string();
let request_type = ctx
.as_ref()
.map(|x| x.request_type.to_string())
.unwrap_or_else(|| "unknown".into());

// By this time the channel should already have the data
// since the response headers are already received -> request body was for sure read (or an error happened)
Expand Down
8 changes: 6 additions & 2 deletions src/routing/middleware/validate.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
use std::sync::Arc;

use axum::{
extract::{Request, State},
extract::{MatchedPath, Request, State},
middleware::Next,
response::IntoResponse,
};

use super::extract_authority;
use crate::routing::{domain::ResolvesDomain, CanisterId, ErrorCause, RequestCtx};
use crate::routing::{domain::ResolvesDomain, CanisterId, ErrorCause, RequestCtx, RequestType};

pub async fn middleware(
State(resolver): State<Arc<dyn ResolvesDomain>>,
Expand All @@ -29,13 +29,17 @@ pub async fn middleware(
request.extensions_mut().insert(CanisterId(v));
}

let request_type = RequestType::from(request.extensions().get::<MatchedPath>());

// Inject request context
// TODO remove Arc?
let ctx = Arc::new(RequestCtx {
authority,
domain: lookup.domain.clone(),
verify: lookup.verify,
request_type,
});

request.extensions_mut().insert(ctx.clone());

// Execute the request
Expand Down
Loading

0 comments on commit 853af4d

Please sign in to comment.