From fc18f28ff98aa42bb252bd1fb363a47d4398a64a Mon Sep 17 00:00:00 2001 From: "Brian L. Troutwine" Date: Fri, 13 Dec 2024 16:07:51 -0800 Subject: [PATCH] Allow users to configure capture metrics expiration This commit adjusts the capture metrics expiration so that it is by default Duration::MAX and is a configurable setting. In the future we will bring this value down, but this preserves behavior between lading releases while still allowing for new capability. Signed-off-by: Brian L. Troutwine --- CHANGELOG.md | 2 +- lading/src/bin/lading.rs | 5 ++++- lading/src/captures.rs | 3 ++- lading/src/config.rs | 11 ++++++++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a92b91239..3f4ec3b352 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,13 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ## Added - Parse nearly the complete field list of smaps/smaps_rollup in the Linux observer. +- Metrics storage is now generational, expiring unwritten metrics by configuration parameter `expiration`. ## Removed - Removed the unused target-rss-byte-limit from the command line, internal stub of. ## Changed - logrotate_fs is now behind a feature flag and not enabled in the default build. It remains enabled in the release artifact. - The build now includes http1 and http2 support. Actual usage and availability may vary. -- Metrics storage is now generational, expiring unwritten metrics every 3 seconds. - CPU data now sourced from cgroup v2 on Linux, memory data expanded significantly. - CPU data now also includes kubernetes style 'millicore' calculations. diff --git a/lading/src/bin/lading.rs b/lading/src/bin/lading.rs index 7b48166669..8cb0af662b 100644 --- a/lading/src/bin/lading.rs +++ b/lading/src/bin/lading.rs @@ -11,7 +11,7 @@ use clap::{ArgGroup, Parser, Subcommand}; use lading::{ blackhole, captures::CaptureManager, - config::{Config, Telemetry}, + config::{default_expiration, Config, Telemetry}, generator::{self, process_tree}, inspector, observer, target::{self, Behavior, Output}, @@ -301,6 +301,7 @@ fn get_config(ops: &Opts, config: Option) -> Result { config.telemetry = Telemetry::Log { path: capture_path.parse().map_err(|_| Error::CapturePath)?, global_labels: options_global_labels.inner, + expiration: default_expiration(), }; } else { match config.telemetry { @@ -380,12 +381,14 @@ async fn inner_main( Telemetry::Log { path, global_labels, + expiration, } => { let mut capture_manager = CaptureManager::new( path, shutdown_watcher.register()?, experiment_started_watcher.clone(), target_running_watcher.clone(), + expiration, ) .await?; for (k, v) in global_labels { diff --git a/lading/src/captures.rs b/lading/src/captures.rs index 816520465f..57a1f6a09b 100644 --- a/lading/src/captures.rs +++ b/lading/src/captures.rs @@ -84,6 +84,7 @@ impl CaptureManager { shutdown: lading_signal::Watcher, experiment_started: lading_signal::Watcher, target_running: lading_signal::Watcher, + expiration: Duration, ) -> Result { let fp = tokio::fs::File::create(&capture_path).await?; let fp = fp.into_std().await; @@ -93,7 +94,7 @@ impl CaptureManager { recency: Recency::new( quanta::Clock::new(), MetricKindMask::GAUGE | MetricKindMask::COUNTER, - Some(Duration::from_secs(3)), + Some(expiration), ), }; diff --git a/lading/src/config.rs b/lading/src/config.rs index 5f680bf3a3..43081c763c 100644 --- a/lading/src/config.rs +++ b/lading/src/config.rs @@ -1,7 +1,7 @@ //! This module controls configuration parsing from the end user, providing a //! convenience mechanism for the rest of the program. Crashes are most likely //! to originate from this code, intentionally. -use std::{net::SocketAddr, path::PathBuf}; +use std::{net::SocketAddr, path::PathBuf, time::Duration}; use rustc_hash::FxHashMap; use serde::Deserialize; @@ -51,6 +51,12 @@ pub struct Config { pub inspector: Option, } +/// Default value for [`Telemetry::Log::expiration`] +#[must_use] +pub fn default_expiration() -> Duration { + Duration::MAX +} + #[derive(Debug, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] #[serde(deny_unknown_fields)] @@ -80,6 +86,9 @@ pub enum Telemetry { path: PathBuf, /// Additional labels to include in every metric global_labels: FxHashMap, + /// The time metrics that have not been written to will take to expire. + #[serde(default = "default_expiration")] + expiration: Duration, }, }