Skip to content

Commit

Permalink
chore: [NET-1386] Move http endpoint constants to config file
Browse files Browse the repository at this point in the history
  • Loading branch information
DSharifi committed Apr 19, 2023
1 parent 7bf4cff commit ffbeb80
Show file tree
Hide file tree
Showing 8 changed files with 84 additions and 82 deletions.
33 changes: 32 additions & 1 deletion rs/config/src/http_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const DEFAULT_PORT: u16 = 8080u16;
pub struct Config {
/// IP address and port to listen on
pub listen_addr: SocketAddr,

/// The path to write the listening port to
pub port_file_path: Option<PathBuf>,

Expand All @@ -28,8 +29,33 @@ pub struct Config {
/// they are conditioned on the received requests.
pub connection_read_timeout_seconds: u64,

/// Per request timeout in seconds before the server replies with 504 Gateway Timeout.
/// Per request timeout in seconds before the server replies with `504 Gateway Timeout`.
pub request_timeout_seconds: u64,

/// The `SETTINGS_MAX_CONCURRENT_STREAMS` option for HTTP2 connections.
pub http_max_concurrent_streams: u32,

/// The maximum time we should wait for a peeking the first bytes on a TCP
/// connection. Effectively, if we can't read the first bytes within the
/// timeout the connection is broken.
/// If you modify this constant please also adjust:
/// - `ic_canister_client::agent::MAX_POLL_INTERVAL`,
/// - `canister_test::canister::MAX_BACKOFF_INTERVAL`.
/// See VER-1060 for details.
pub max_tcp_peek_timeout_seconds: u64,

/// Request with body size bigger than `max_request_size_bytes` will be rejected
/// and [`413 Content Too Large`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/413) will be returned to the user.
pub max_request_size_bytes: u64,

/// Delegation certificate requests with body size bigger than `max_delegation_certificate_size_bytes`
/// will be rejected. For valid IC delegation certificates this is never the case since the size is always constant.
pub max_delegation_certificate_size_bytes: u64,

/// If the request body is not received/parsed within
/// `max_request_receive_seconds`, then the request will be rejected and
/// [`408 Request Timeout`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/408) will be returned to the user.
pub max_request_receive_seconds: u64,
}

impl Default for Config {
Expand All @@ -43,6 +69,11 @@ impl Default for Config {
max_outstanding_connections: 20_000,
connection_read_timeout_seconds: 1_200, // 20 min
request_timeout_seconds: 300, // 5 min
http_max_concurrent_streams: 256,
max_tcp_peek_timeout_seconds: 11,
max_request_size_bytes: 5 * 1024 * 1024, // 5MB
max_delegation_certificate_size_bytes: 1024 * 1024, // 1MB
max_request_receive_seconds: 300, // 5 min
}
}
}
18 changes: 5 additions & 13 deletions rs/http_endpoints/public/src/body.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use crate::{
common::{make_plaintext_response, poll_ready},
MAX_REQUEST_RECEIVE_DURATION, MAX_REQUEST_SIZE_BYTES,
};
use crate::common::{make_plaintext_response, poll_ready};
use byte_unit::Byte;
use http::Request;
use hyper::{Body, Response, StatusCode};
use ic_async_utils::{receive_body, BodyReceiveError};
use ic_config::http_handler::Config;
use std::convert::Infallible;
use std::future::Future;
use std::pin::Pin;
Expand All @@ -19,20 +17,14 @@ pub(crate) struct BodyReceiverLayer {
}

impl BodyReceiverLayer {
pub(crate) fn new(max_request_receive_duration: Duration, max_request_body_size: Byte) -> Self {
pub(crate) fn new(config: &Config) -> Self {
Self {
max_request_receive_duration,
max_request_body_size,
max_request_receive_duration: Duration::from_secs(config.max_request_receive_seconds),
max_request_body_size: Byte::from_bytes(config.max_request_size_bytes.into()),
}
}
}

impl Default for BodyReceiverLayer {
fn default() -> Self {
BodyReceiverLayer::new(MAX_REQUEST_RECEIVE_DURATION, MAX_REQUEST_SIZE_BYTES)
}
}

impl<S> Layer<S> for BodyReceiverLayer {
type Service = BodyReceiverService<S>;

Expand Down
9 changes: 6 additions & 3 deletions rs/http_endpoints/public/src/call.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ use crate::{
get_cors_headers, make_plaintext_response, make_response, map_box_error_to_response,
remove_effective_canister_id,
},
metrics::LABEL_UNKNOWN,
types::ApiReqType,
validator_executor::ValidatorExecutor,
EndpointService, HttpError, HttpHandlerMetrics, IngressFilterService, UNKNOWN_LABEL,
EndpointService, HttpError, HttpHandlerMetrics, IngressFilterService,
};
use http::Request;
use hyper::{Body, Response, StatusCode};
use ic_config::http_handler::Config;
use ic_interfaces_p2p::{IngressError, IngressIngestionService};
use ic_interfaces_registry::RegistryClient;
use ic_logger::{error, info_sample, warn, ReplicaLogger};
Expand Down Expand Up @@ -47,6 +49,7 @@ pub(crate) struct CallService {
impl CallService {
#[allow(clippy::too_many_arguments)]
pub(crate) fn new_service(
config: Config,
log: ReplicaLogger,
metrics: HttpHandlerMetrics,
subnet_id: SubnetId,
Expand All @@ -68,7 +71,7 @@ impl CallService {
}));
BoxCloneService::new(
ServiceBuilder::new()
.layer(BodyReceiverLayer::default())
.layer(BodyReceiverLayer::new(&config))
.service(base_service),
)
}
Expand Down Expand Up @@ -137,7 +140,7 @@ impl Service<Request<Vec<u8>>> for CallService {
// Actual parsing.
self.metrics
.request_body_size_bytes
.with_label_values(&[ApiReqType::Call.into(), UNKNOWN_LABEL])
.with_label_values(&[ApiReqType::Call.into(), LABEL_UNKNOWN])
.observe(request.body().len() as f64);

let (mut parts, body) = request.into_parts();
Expand Down
8 changes: 5 additions & 3 deletions rs/http_endpoints/public/src/catch_up_package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@

use crate::{
body::BodyReceiverLayer, common, types::ApiReqType, EndpointService, HttpHandlerMetrics,
UNKNOWN_LABEL,
LABEL_UNKNOWN,
};
use http::Request;
use hyper::{Body, Response, StatusCode};
use ic_config::http_handler::Config;
use ic_interfaces::consensus_pool::ConsensusPoolCache;
use ic_types::consensus::catchup::CatchUpPackageParam;
use prost::Message;
Expand All @@ -28,6 +29,7 @@ pub(crate) struct CatchUpPackageService {

impl CatchUpPackageService {
pub(crate) fn new_service(
config: Config,
metrics: HttpHandlerMetrics,
consensus_pool_cache: Arc<dyn ConsensusPoolCache>,
) -> EndpointService {
Expand All @@ -44,7 +46,7 @@ impl CatchUpPackageService {

BoxCloneService::new(
ServiceBuilder::new()
.layer(BodyReceiverLayer::default())
.layer(BodyReceiverLayer::new(&config))
.service(base_service),
)
}
Expand Down Expand Up @@ -80,7 +82,7 @@ impl Service<Request<Vec<u8>>> for CatchUpPackageService {
fn call(&mut self, request: Request<Vec<u8>>) -> Self::Future {
self.metrics
.request_body_size_bytes
.with_label_values(&[ApiReqType::CatchUpPackage.into(), UNKNOWN_LABEL])
.with_label_values(&[ApiReqType::CatchUpPackage.into(), LABEL_UNKNOWN])
.observe(request.body().len() as f64);

let body = request.into_body();
Expand Down
77 changes: 21 additions & 56 deletions rs/http_endpoints/public/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ use ic_types::{
time::expiry_time_from_now,
CanisterId, NodeId, SubnetId,
};
use metrics::HttpHandlerMetrics;
use metrics::{HttpHandlerMetrics, LABEL_UNKNOWN};
use rand::Rng;
use std::{
convert::{Infallible, TryFrom},
Expand All @@ -96,55 +96,9 @@ use tower::{
ServiceBuilder, ServiceExt,
};

// Constants defining the limits of the HttpHandler.

// The http handler should apply backpresure when we lack a particular resources
// which is purely HttpHandler related (e.g. connections, file descritors).
//
// Current mechanisms for constrained resources include:
//
// 1. File descriptors. The limit can be checked by 'process_max_fds'
// Prometheus metric. The number of file descriptors used by the crate is
// controlled by 'MAX_OUTSTANDING_CONNECTIONS'.
//
// 2. Lock contention. Currently we don't use lock-free data structures
// (e.g. StateManager, RegistryClient), hence we can observe lock contention.
// 'MAX_REQUESTS_PER_SECOND_PER_CONNECTION' is used to control the risk of
// running into contention. A resonable value can be derived by looking what are
// the latencies for operations that hold locks (e.g. methods on the
// RegistryClient and StateManager).

// Sets the SETTINGS_MAX_CONCURRENT_STREAMS option for HTTP2 connections.
const HTTP_MAX_CONCURRENT_STREAMS: u32 = 256;

// The maximum time we should wait for a peeking the first bytes on a TCP
// connection. Effectively, if we can't read the first bytes within the
// timeout the connection is broken.
// If you modify this constant please also adjust:
// - `ic_canister_client::agent::MAX_POLL_INTERVAL`,
// - `canister_test::canister::MAX_BACKOFF_INTERVAL`.
// See VER-1060 for details.
const MAX_TCP_PEEK_TIMEOUT_SECS: u64 = 11;

// Request with body size bigger than 'MAX_REQUEST_SIZE_BYTES' will be rejected
// and appropriate error code will be returned to the user.
pub(crate) const MAX_REQUEST_SIZE_BYTES: Byte = Byte::from_bytes(5 * 1024 * 1024); // 5MB

// Delegation certificate requests with body size bigger than 'MAX_DELEGATION_CERTIFICATE_SIZE' will be rejected.
// For valid IC delegation certificates this is never the case since the size is always constant.
pub(crate) const MAX_DELEGATION_CERTIFICATE_SIZE: Byte = Byte::from_bytes(1024 * 1024); // 1MB

// If the request body is not received/parsed within
// 'MAX_REQUEST_RECEIVE_DURATION', then the request will be rejected and
// appropriate error code will be returned to the user.
pub(crate) const MAX_REQUEST_RECEIVE_DURATION: Duration = Duration::from_secs(300); // 5 min

const HTTP_DASHBOARD_URL_PATH: &str = "/_/dashboard";
const CONTENT_TYPE_CBOR: &str = "application/cbor";

// Placeholder used when we can't determine the approriate prometheus label.
const UNKNOWN_LABEL: &str = "unknown";

#[derive(Debug, Clone, PartialEq)]
pub(crate) struct HttpError {
pub status: StatusCode,
Expand All @@ -169,6 +123,7 @@ struct HttpHandler {
// Crates a detached tokio blocking task that initializes the server (reading
// required state, etc).
fn start_server_initialization(
config: Config,
log: ReplicaLogger,
metrics: HttpHandlerMetrics,
subnet_id: SubnetId,
Expand Down Expand Up @@ -204,7 +159,7 @@ fn start_server_initialization(
// able to issue certificates.
health_status.store(ReplicaHealthStatus::WaitingForRootDelegation);
let loaded_delegation =
load_root_delegation(&log, subnet_id, nns_subnet_id, registry_client).await;
load_root_delegation(&config, &log, subnet_id, nns_subnet_id, registry_client).await;
*delegation_from_nns.write().unwrap() = loaded_delegation;
metrics
.health_status_transitions_total
Expand Down Expand Up @@ -302,6 +257,7 @@ pub fn start_server(
let state_reader_executor = StateReaderExecutor::new(state_reader);
let validator_executor = ValidatorExecutor::new(ingress_verifier, log.clone());
let call_service = CallService::new_service(
config.clone(),
log.clone(),
metrics.clone(),
subnet_id,
Expand All @@ -312,6 +268,7 @@ pub fn start_server(
malicious_flags.clone(),
);
let query_service = QueryService::new_service(
config.clone(),
log.clone(),
metrics.clone(),
Arc::clone(&health_status),
Expand All @@ -322,6 +279,7 @@ pub fn start_server(
malicious_flags.clone(),
);
let read_state_service = ReadStateService::new_service(
config.clone(),
log.clone(),
metrics.clone(),
Arc::clone(&health_status),
Expand All @@ -340,8 +298,11 @@ pub fn start_server(
);
let dashboard_service =
DashboardService::new_service(config.clone(), subnet_type, state_reader_executor.clone());
let catchup_service =
CatchUpPackageService::new_service(metrics.clone(), consensus_pool_cache.clone());
let catchup_service = CatchUpPackageService::new_service(
config.clone(),
metrics.clone(),
consensus_pool_cache.clone(),
);

let health_status_refresher = HealthStatusRefreshLayer::new(
log.clone(),
Expand All @@ -352,6 +313,7 @@ pub fn start_server(
);

start_server_initialization(
config.clone(),
log.clone(),
metrics.clone(),
subnet_id,
Expand Down Expand Up @@ -449,7 +411,7 @@ fn create_main_service(
let request_timer = HistogramVecTimer::start_timer(
metrics.requests.clone(),
&REQUESTS_LABEL_NAMES,
[UNKNOWN_LABEL, UNKNOWN_LABEL],
[LABEL_UNKNOWN, LABEL_UNKNOWN],
);
(request, request_timer)
})
Expand Down Expand Up @@ -478,11 +440,11 @@ async fn handshake_and_serve_connection(
) -> Result<(), Infallible> {
let connection_start_time = Instant::now();
let mut http = Http::new();
http.http2_max_concurrent_streams(HTTP_MAX_CONCURRENT_STREAMS);
http.http2_max_concurrent_streams(config.http_max_concurrent_streams);

let mut b = [0_u8; 1];
let app_layer = match timeout(
Duration::from_secs(MAX_TCP_PEEK_TIMEOUT_SECS),
Duration::from_secs(config.max_tcp_peek_timeout_seconds),
tcp_stream.peek(&mut b),
)
.await
Expand All @@ -504,7 +466,9 @@ async fn handshake_and_serve_connection(
Err(err) => {
warn!(
log,
"TCP peeking timeout after {}s, error = {}", MAX_TCP_PEEK_TIMEOUT_SECS, err
"TCP peeking timeout after {}s, error = {}",
config.max_tcp_peek_timeout_seconds,
err
);
metrics.observe_connection_error(ConnectionError::PeekTimeout, connection_start_time);
return Ok(());
Expand Down Expand Up @@ -752,6 +716,7 @@ async fn make_router(
// Fetches a delegation from the NNS subnet to allow this subnet to issue
// certificates on its behalf. On the NNS subnet this method is a no-op.
async fn load_root_delegation(
config: &Config,
log: &ReplicaLogger,
subnet_id: SubnetId,
nns_subnet_id: SubnetId,
Expand Down Expand Up @@ -891,8 +856,8 @@ async fn load_root_delegation(

let raw_response = match receive_body(
raw_response_res.into_body(),
MAX_REQUEST_RECEIVE_DURATION,
MAX_DELEGATION_CERTIFICATE_SIZE,
Duration::from_secs(config.max_request_receive_seconds),
Byte::from_bytes(config.max_delegation_certificate_size_bytes.into()),
)
.await
{
Expand Down
3 changes: 3 additions & 0 deletions rs/http_endpoints/public/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ pub const LABEL_STATUS: &str = "status";
pub const LABEL_HEALTH_STATUS_BEFORE: &str = "before";
pub const LABEL_HEALTH_STATUS_AFTER: &str = "after";

/// Placeholder used when we can't determine the approriate prometheus label.
pub const LABEL_UNKNOWN: &str = "unknown";

const STATUS_SUCCESS: &str = "success";
const STATUS_ERROR: &str = "error";

Expand Down
Loading

0 comments on commit ffbeb80

Please sign in to comment.