From 4c73754d4fbb2a03219a6eec8be586f574201311 Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Thu, 6 Aug 2020 13:34:45 +0200 Subject: [PATCH] Track the database size on initialization --- docs/user/collected-metrics/metrics.md | 1 + glean-core/metrics.yaml | 14 +++++++++ glean-core/src/database/mod.rs | 40 +++++++++++++++++++++++--- glean-core/src/internal_metrics.rs | 23 +++++++++++++++ glean-core/src/lib.rs | 19 +++++++++++- glean-core/src/lib_unit_tests.rs | 28 ++++++++++++++++++ 6 files changed, 120 insertions(+), 5 deletions(-) diff --git a/docs/user/collected-metrics/metrics.md b/docs/user/collected-metrics/metrics.md index 8f47c707c8..92427e648d 100644 --- a/docs/user/collected-metrics/metrics.md +++ b/docs/user/collected-metrics/metrics.md @@ -128,6 +128,7 @@ The following metrics are added to the ping: | Name | Type | Description | Data reviews | Extras | Expiration | [Data Sensitivity](https://wiki.mozilla.org/Firefox/Data_Collection) | | --- | --- | --- | --- | --- | --- | --- | +| glean.database.size |[memory_distribution](https://mozilla.github.io/glean/book/user/metrics/memory_distribution.html) |The size of the database file at startup. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1656589#c7)||2020-09-30 | | | glean.error.preinit_tasks_overflow |[counter](https://mozilla.github.io/glean/book/user/metrics/counter.html) |The number of tasks queued in the pre-initialization buffer. Only sent if the buffer overflows. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1609482#c3)||never |1 | | glean.upload.deleted_pings_after_quota_hit |[counter](https://mozilla.github.io/glean/book/user/metrics/counter.html) |The number of pings deleted after the quota for the size of the pending pings directory is hit. Since quota is only calculated for the pending pings directory, and deletion request ping live in a different directory, deletion request pings are never deleted. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1601550#c3)||never |1 | | glean.upload.discarded_exceeding_pings_size |[memory_distribution](https://mozilla.github.io/glean/book/user/metrics/memory_distribution.html) |The size of pings that exceeded the maximum ping size allowed for upload. |[1](https://bugzilla.mozilla.org/show_bug.cgi?id=1597761#c10)||never |1 | diff --git a/glean-core/metrics.yaml b/glean-core/metrics.yaml index ed8724f43d..62ad6f8f4e 100644 --- a/glean-core/metrics.yaml +++ b/glean-core/metrics.yaml @@ -429,3 +429,17 @@ glean.upload: expires: never no_lint: - COMMON_PREFIX + +glean.database: + size: + type: memory_distribution + description: > + The size of the database file at startup. + memory_unit: byte + bugs: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1656589 + data_reviews: + - https://bugzilla.mozilla.org/show_bug.cgi?id=1656589#c7 + notification_emails: + - glean-team@mozilla.com + expires: "2020-09-30" diff --git a/glean-core/src/database/mod.rs b/glean-core/src/database/mod.rs index 33a9bc0023..fec0f821e2 100644 --- a/glean-core/src/database/mod.rs +++ b/glean-core/src/database/mod.rs @@ -5,6 +5,8 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::fs; +use std::num::NonZeroU64; +use std::path::Path; use std::str; use std::sync::RwLock; @@ -32,6 +34,9 @@ pub struct Database { /// we will save metrics with 'ping' lifetime data in a map temporarily /// so as to persist them to disk using rkv in bulk on demand. ping_lifetime_data: Option>>, + + // Initial file size when opening the database. + file_size: Option, } impl std::fmt::Debug for Database { @@ -46,6 +51,24 @@ impl std::fmt::Debug for Database { } } +/// Get the file size of a file in the given path and file. +/// +/// # Arguments +/// +/// - `path` - The path +/// +/// # Returns +/// +/// Returns the non-zero file size in bytes, +/// or `None` on error or if the size is `0`. +fn file_size(path: &Path) -> Option { + log::trace!("Getting file size for path: {}", path.display()); + fs::metadata(path) + .ok() + .map(|stat| stat.len()) + .and_then(NonZeroU64::new) +} + impl Database { /// Initialize the data store. /// @@ -55,7 +78,12 @@ impl Database { /// It also loads any Lifetime::Ping data that might be /// persisted, in case `delay_ping_lifetime_io` is set. pub fn new(data_path: &str, delay_ping_lifetime_io: bool) -> Result { - let rkv = Self::open_rkv(data_path)?; + let path = Path::new(data_path).join("db"); + log::debug!("Database path: {:?}", path.display()); + + let file_size = file_size(&path.join("data.mdb")); + + let rkv = Self::open_rkv(&path)?; let user_store = rkv.open_single(Lifetime::User.as_str(), StoreOptions::create())?; let ping_store = rkv.open_single(Lifetime::Ping.as_str(), StoreOptions::create())?; let application_store = @@ -72,6 +100,7 @@ impl Database { ping_store, application_store, ping_lifetime_data, + file_size, }; db.load_ping_lifetime_data(); @@ -79,6 +108,11 @@ impl Database { Ok(db) } + /// Get the initial database file size. + pub fn file_size(&self) -> Option { + self.file_size + } + fn get_store(&self, lifetime: Lifetime) -> &SingleStore { match lifetime { Lifetime::User => &self.user_store, @@ -88,9 +122,7 @@ impl Database { } /// Creates the storage directories and inits rkv. - fn open_rkv(path: &str) -> Result { - let path = std::path::Path::new(path).join("db"); - log::debug!("Database path: {:?}", path.display()); + fn open_rkv(path: &Path) -> Result { fs::create_dir_all(&path)?; let rkv = Rkv::new(&path)?; diff --git a/glean-core/src/internal_metrics.rs b/glean-core/src/internal_metrics.rs index 4472f8e794..33279d9bff 100644 --- a/glean-core/src/internal_metrics.rs +++ b/glean-core/src/internal_metrics.rs @@ -111,3 +111,26 @@ impl UploadMetrics { } } } + +#[derive(Debug)] +pub struct DatabaseMetrics { + pub size: MemoryDistributionMetric, +} + +impl DatabaseMetrics { + pub fn new() -> DatabaseMetrics { + DatabaseMetrics { + size: MemoryDistributionMetric::new( + CommonMetricData { + name: "size".into(), + category: "glean.database".into(), + send_in_pings: vec!["metrics".into()], + lifetime: Lifetime::Application, + disabled: false, + dynamic_label: None, + }, + MemoryUnit::Byte, + ), + } + } +} diff --git a/glean-core/src/lib.rs b/glean-core/src/lib.rs index 132e011395..a70e27a6a9 100644 --- a/glean-core/src/lib.rs +++ b/glean-core/src/lib.rs @@ -47,7 +47,7 @@ use crate::debug::DebugOptions; pub use crate::error::{Error, ErrorKind, Result}; pub use crate::error_recording::{test_get_num_recorded_errors, ErrorType}; use crate::event_database::EventDatabase; -use crate::internal_metrics::CoreMetrics; +use crate::internal_metrics::{CoreMetrics, DatabaseMetrics}; use crate::internal_pings::InternalPings; use crate::metrics::{Metric, MetricType, PingType}; use crate::ping::PingMaker; @@ -170,6 +170,7 @@ pub struct Glean { data_store: Option, event_data_store: EventDatabase, core_metrics: CoreMetrics, + database_metrics: DatabaseMetrics, internal_pings: InternalPings, data_path: PathBuf, application_id: String, @@ -210,6 +211,7 @@ impl Glean { data_store, event_data_store, core_metrics: CoreMetrics::new(), + database_metrics: DatabaseMetrics::new(), internal_pings: InternalPings::new(), upload_manager, data_path: PathBuf::from(&cfg.data_path), @@ -318,6 +320,20 @@ impl Glean { self.set_application_lifetime_core_metrics(); } + /// Initialize the database metrics managed by Glean's Rust core. + fn initialize_database_metrics(&mut self) { + log::trace!("Initializing database metrics"); + + if let Some(size) = self + .data_store + .as_ref() + .and_then(|database| database.file_size()) + { + log::trace!("Database file size: {}", size.get()); + self.database_metrics.size.accumulate(self, size.get()) + } + } + /// Called when Glean is initialized to the point where it can correctly /// assemble pings. Usually called from the language specific layer after all /// of the core metrics have been set and the ping types have been @@ -380,6 +396,7 @@ impl Glean { fn on_upload_enabled(&mut self) { self.upload_enabled = true; self.initialize_core_metrics(); + self.initialize_database_metrics(); } /// Handles the changing of state from upload enabled to disabled. diff --git a/glean-core/src/lib_unit_tests.rs b/glean-core/src/lib_unit_tests.rs index e59ad01b61..ce35fb1ae7 100644 --- a/glean-core/src/lib_unit_tests.rs +++ b/glean-core/src/lib_unit_tests.rs @@ -805,3 +805,31 @@ fn test_empty_application_id() { // Check that this is indeed the first run. assert!(glean.is_first_run()); } + +#[test] +fn records_database_file_size() { + let _ = env_logger::builder().is_test(true).try_init(); + + // Note: We don't use `new_glean` because we need to re-use the database directory. + + let dir = tempfile::tempdir().unwrap(); + let tmpname = dir.path().display().to_string(); + + // Initialize Glean once to ensure we create the database. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true).unwrap(); + let database_size = &glean.database_metrics.size; + let data = database_size.test_get_value(&glean, "metrics"); + assert!(data.is_none()); + drop(glean); + + // Initialize Glean again to record file size. + let glean = Glean::with_options(&tmpname, GLOBAL_APPLICATION_ID, true).unwrap(); + + let database_size = &glean.database_metrics.size; + let data = database_size.test_get_value(&glean, "metrics"); + assert!(data.is_some()); + let data = data.unwrap(); + + // We should see the database containing some data. + assert!(data.sum > 0); +}