diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a92b9123..3f4ec3b35 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## Added - Parse nearly the complete field list of smaps/smaps_rollup in the Linux observer. +- Metrics storage is now generational, expiring unwritten metrics by configuration parameter `expiration`. ## Removed - Removed the unused target-rss-byte-limit from the command line, internal stub of. ## Changed - logrotate_fs is now behind a feature flag and not enabled in the default build. It remains enabled in the release artifact. - The build now includes http1 and http2 support. Actual usage and availability may vary. -- Metrics storage is now generational, expiring unwritten metrics every 3 seconds. - CPU data now sourced from cgroup v2 on Linux, memory data expanded significantly. - CPU data now also includes kubernetes style 'millicore' calculations. diff --git a/lading/src/bin/lading.rs b/lading/src/bin/lading.rs index 7b4816666..e86c7cfa5 100644 --- a/lading/src/bin/lading.rs +++ b/lading/src/bin/lading.rs @@ -181,6 +181,9 @@ struct Opts { /// prometheus-addr #[clap(long)] capture_path: Option, + /// time that capture metrics will expire by if they are not seen again, only useful when capture-path is set + #[clap(long)] + capture_expiriation_seconds: Option, /// address to bind prometheus exporter to, exclusive of prometheus-path and /// promtheus-addr #[clap(long)] @@ -301,6 +304,7 @@ fn get_config(ops: &Opts, config: Option) -> Result { config.telemetry = Telemetry::Log { path: capture_path.parse().map_err(|_| Error::CapturePath)?, global_labels: options_global_labels.inner, + expiration: Duration::from_secs(ops.capture_expiriation_seconds.unwrap_or(u64::MAX)), }; } else { match config.telemetry { @@ -380,12 +384,14 @@ async fn inner_main( Telemetry::Log { path, global_labels, + expiration, } => { let mut capture_manager = CaptureManager::new( path, shutdown_watcher.register()?, experiment_started_watcher.clone(), target_running_watcher.clone(), + expiration, ) .await?; for (k, v) in global_labels { diff --git a/lading/src/captures.rs b/lading/src/captures.rs index 816520465..57a1f6a09 100644 --- a/lading/src/captures.rs +++ b/lading/src/captures.rs @@ -84,6 +84,7 @@ impl CaptureManager { shutdown: lading_signal::Watcher, experiment_started: lading_signal::Watcher, target_running: lading_signal::Watcher, + expiration: Duration, ) -> Result { let fp = tokio::fs::File::create(&capture_path).await?; let fp = fp.into_std().await; @@ -93,7 +94,7 @@ impl CaptureManager { recency: Recency::new( quanta::Clock::new(), MetricKindMask::GAUGE | MetricKindMask::COUNTER, - Some(Duration::from_secs(3)), + Some(expiration), ), }; diff --git a/lading/src/config.rs b/lading/src/config.rs index 5f680bf3a..43081c763 100644 --- a/lading/src/config.rs +++ b/lading/src/config.rs @@ -1,7 +1,7 @@ //! This module controls configuration parsing from the end user, providing a //! convenience mechanism for the rest of the program. Crashes are most likely //! to originate from this code, intentionally. -use std::{net::SocketAddr, path::PathBuf}; +use std::{net::SocketAddr, path::PathBuf, time::Duration}; use rustc_hash::FxHashMap; use serde::Deserialize; @@ -51,6 +51,12 @@ pub struct Config { pub inspector: Option, } +/// Default value for [`Telemetry::Log::expiration`] +#[must_use] +pub fn default_expiration() -> Duration { + Duration::MAX +} + #[derive(Debug, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] #[serde(deny_unknown_fields)] @@ -80,6 +86,9 @@ pub enum Telemetry { path: PathBuf, /// Additional labels to include in every metric global_labels: FxHashMap, + /// The time metrics that have not been written to will take to expire. + #[serde(default = "default_expiration")] + expiration: Duration, }, }