Skip to content

Commit

Permalink
Allow users to configure capture metrics expiration (#1150)
Browse files Browse the repository at this point in the history
### What does this PR do?

This commit adjusts the capture metrics expiration so that it is by
default Duration::MAX and is a configurable setting. In the future we
will bring this value down, but this preserves behavior between lading
releases while still allowing for new capability.
  • Loading branch information
blt authored Dec 14, 2024
1 parent 4191cbb commit 7dc01de
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 3 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## Added
- Parse nearly the complete field list of smaps/smaps_rollup in the Linux observer.
- Metrics storage is now generational, expiring unwritten metrics by configuration parameter `expiration`.
## Removed
- Removed the unused target-rss-byte-limit from the command line, internal stub of.
## Changed
- logrotate_fs is now behind a feature flag and not enabled in the default
build. It remains enabled in the release artifact.
- The build now includes http1 and http2 support. Actual usage and availability may vary.
- Metrics storage is now generational, expiring unwritten metrics every 3 seconds.
- CPU data now sourced from cgroup v2 on Linux, memory data expanded significantly.
- CPU data now also includes kubernetes style 'millicore' calculations.

Expand Down
6 changes: 6 additions & 0 deletions lading/src/bin/lading.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ struct Opts {
/// prometheus-addr
#[clap(long)]
capture_path: Option<String>,
/// time that capture metrics will expire by if they are not seen again, only useful when capture-path is set
#[clap(long)]
capture_expiriation_seconds: Option<u64>,
/// address to bind prometheus exporter to, exclusive of prometheus-path and
/// promtheus-addr
#[clap(long)]
Expand Down Expand Up @@ -301,6 +304,7 @@ fn get_config(ops: &Opts, config: Option<String>) -> Result<Config, Error> {
config.telemetry = Telemetry::Log {
path: capture_path.parse().map_err(|_| Error::CapturePath)?,
global_labels: options_global_labels.inner,
expiration: Duration::from_secs(ops.capture_expiriation_seconds.unwrap_or(u64::MAX)),
};
} else {
match config.telemetry {
Expand Down Expand Up @@ -380,12 +384,14 @@ async fn inner_main(
Telemetry::Log {
path,
global_labels,
expiration,
} => {
let mut capture_manager = CaptureManager::new(
path,
shutdown_watcher.register()?,
experiment_started_watcher.clone(),
target_running_watcher.clone(),
expiration,
)
.await?;
for (k, v) in global_labels {
Expand Down
3 changes: 2 additions & 1 deletion lading/src/captures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ impl CaptureManager {
shutdown: lading_signal::Watcher,
experiment_started: lading_signal::Watcher,
target_running: lading_signal::Watcher,
expiration: Duration,
) -> Result<Self, io::Error> {
let fp = tokio::fs::File::create(&capture_path).await?;
let fp = fp.into_std().await;
Expand All @@ -93,7 +94,7 @@ impl CaptureManager {
recency: Recency::new(
quanta::Clock::new(),
MetricKindMask::GAUGE | MetricKindMask::COUNTER,
Some(Duration::from_secs(3)),
Some(expiration),
),
};

Expand Down
11 changes: 10 additions & 1 deletion lading/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! This module controls configuration parsing from the end user, providing a
//! convenience mechanism for the rest of the program. Crashes are most likely
//! to originate from this code, intentionally.
use std::{net::SocketAddr, path::PathBuf};
use std::{net::SocketAddr, path::PathBuf, time::Duration};

use rustc_hash::FxHashMap;
use serde::Deserialize;
Expand Down Expand Up @@ -51,6 +51,12 @@ pub struct Config {
pub inspector: Option<inspector::Config>,
}

/// Default value for [`Telemetry::Log::expiration`]
#[must_use]
pub fn default_expiration() -> Duration {
Duration::MAX
}

#[derive(Debug, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
#[serde(deny_unknown_fields)]
Expand Down Expand Up @@ -80,6 +86,9 @@ pub enum Telemetry {
path: PathBuf,
/// Additional labels to include in every metric
global_labels: FxHashMap<String, String>,
/// The time metrics that have not been written to will take to expire.
#[serde(default = "default_expiration")]
expiration: Duration,
},
}

Expand Down

0 comments on commit 7dc01de

Please sign in to comment.