diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6abc8c4e..0ae21140 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -119,7 +119,7 @@ jobs: uses: dtolnay/rust-toolchain@stable with: # cargo check-external-types is only available on this specific nightly - toolchain: nightly-2024-02-07 + toolchain: nightly-2024-05-01 - uses: Swatinem/rust-cache@v2 - name: check-external-types run: | diff --git a/Cargo.lock b/Cargo.lock index fdfda1bc..d6fe01c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,9 +4,9 @@ version = 3 [[package]] name = "addr2line" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" dependencies = [ "gimli", ] @@ -143,6 +143,17 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "async-trait" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -157,9 +168,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "backtrace" -version = "0.3.71" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" +checksum = "17c6a35df3749d2e8bb1b7b21a976d82b15548788d2735b9d82f329268f71a11" dependencies = [ "addr2line", "cc", @@ -708,12 +719,34 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -734,6 +767,7 @@ checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ "futures-core", "futures-io", + "futures-macro", "futures-sink", "futures-task", "memchr", @@ -767,9 +801,9 @@ dependencies = [ [[package]] name = "gimli" -version = "0.28.1" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" [[package]] name = "gix" @@ -1038,9 +1072,9 @@ dependencies = [ [[package]] name = "gix-fs" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f78f7d6dcda7a5809efd73a33b145e3dce7421c460df21f32126f9732736b0c" +checksum = "c3338ff92a2164f5209f185ec0cd316f571a72676bb01d27e22f2867ba69f77a" dependencies = [ "fastrand", "gix-features", @@ -1049,9 +1083,9 @@ dependencies = [ [[package]] name = "gix-glob" -version = "0.16.2" +version = "0.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "682bdc43cb3c00dbedfcc366de2a849b582efd8d886215dbad2ea662ec156bb5" +checksum = "c2a29ad0990cf02c48a7aac76ed0dbddeb5a0d070034b83675cc3bbf937eace4" dependencies = [ "bitflags 2.5.0", "bstr", @@ -1315,9 +1349,9 @@ dependencies = [ [[package]] name = "gix-ref" -version = "0.44.0" +version = "0.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b36752b448647acd59c9668fdd830b16d07db1e6d9c3b3af105c1605a6e23d9" +checksum = "3394a2997e5bc6b22ebc1e1a87b41eeefbcfcff3dbfa7c4bd73cb0ac8f1f3e2e" dependencies = [ "gix-actor", "gix-date", @@ -1711,9 +1745,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +checksum = "7b875924a60b96e5d7b9ae7b066540b1dd1cbd90d1828f54c92e02a283351c56" dependencies = [ "bytes", "futures-channel", @@ -1762,6 +1796,25 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "include_dir" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18762faeff7122e89e0857b02f7ce6fcc0d101d5e9ad2ad7846cc01d61b7f19e" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b139284b5cf57ecfa712bcc66950bb635b31aff41c188e8a4cfc758eca374a3f" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "indexmap" version = "2.2.6" @@ -1835,12 +1888,12 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jaq-core" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03d6a5713b8f33675abfac79d1db0022a3f28764b2a6b96a185c199ad8dab86d" +checksum = "eaadb25ab6563759089aa897f8b8d609d108a8531d742b17a2a80220de685440" dependencies = [ "aho-corasick", - "base64 0.21.7", + "base64 0.22.1", "hifijson", "jaq-interpret", "libm", @@ -1852,9 +1905,9 @@ dependencies = [ [[package]] name = "jaq-interpret" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f569e38e5fc677db8dfda89ee0b4c25b3f53e811b16434fd14bdc5b43fc362ac" +checksum = "1bbde0d83ef94bb43e862a845e9ecc3fb87210d11b1a844850a9d1a678a496cf" dependencies = [ "ahash", "dyn-clone", @@ -1877,9 +1930,9 @@ dependencies = [ [[package]] name = "jaq-std" -version = "1.2.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d7871c59297cbfdd18f6f1bbbafaad24e97fd555ee1e2a1be7a40a5a20f551a" +checksum = "6d80737e5c3dfdc0cb0ef05ec35ac1eaccd7e79ad712d1f934995ccf6a2cda39" dependencies = [ "bincode", "jaq-parse", @@ -2260,9 +2313,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.2" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" dependencies = [ "memchr", ] @@ -2273,6 +2326,21 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "opentelemetry" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900d57987be3f2aeb70d385fff9b27fb74c5723cc9a52d904d4f9c807a0667bf" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "once_cell", + "pin-project-lite", + "thiserror", + "urlencoding", +] + [[package]] name = "opentelemetry" version = "0.23.0" @@ -2287,6 +2355,44 @@ dependencies = [ "thiserror", ] +[[package]] +name = "opentelemetry-stdout" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bdf28b381f23afcd150afc0b38a4183dd321fc96320c1554752b6b761648f78" +dependencies = [ + "async-trait", + "chrono", + "futures-util", + "opentelemetry 0.22.0", + "opentelemetry_sdk", + "ordered-float", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e90c7113be649e31e9a0f8b5ee24ed7a16923b322c3c5ab6367469c049d6b7e" +dependencies = [ + "async-trait", + "crossbeam-channel", + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "once_cell", + "opentelemetry 0.22.0", + "ordered-float", + "percent-encoding", + "rand 0.8.5", + "serde_json", + "thiserror", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -2318,9 +2424,9 @@ checksum = "8fecab3723493c7851f292cb060f3ee1c42f19b8d749345d0d7eaf3fd19aa62d" [[package]] name = "parking_lot" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e4af0ca4f6caed20e900d564c242b8e5d4903fdacf31d3daf527b66fe6f42fb" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" dependencies = [ "lock_api", "parking_lot_core", @@ -2484,9 +2590,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.83" +version = "1.0.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" dependencies = [ "unicode-ident", ] @@ -3076,9 +3182,9 @@ checksum = "b7401a30af6cb5818bb64852270bb722533397edcfc7344954a38f420819ece2" [[package]] name = "syn" -version = "2.0.65" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", @@ -3316,7 +3422,6 @@ dependencies = [ "tokio", "tower-layer", "tower-service", - "tracing", ] [[package]] @@ -3337,7 +3442,6 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "log", "pin-project-lite", "tracing-core", ] @@ -3617,14 +3721,19 @@ version = "0.1.0" dependencies = [ "assert_cmd", "clap", + "include_dir", + "miette", "rayon", "serde", "serde_json", "serde_yaml", + "tempdir", + "thiserror", "walkdir", "weaver_cache", "weaver_checker", "weaver_common", + "weaver_diff", "weaver_forge", "weaver_resolved_schema", "weaver_resolver", @@ -3638,9 +3747,11 @@ version = "0.1.0" dependencies = [ "dirs", "gix", + "miette", "serde", "tempdir", "thiserror", + "weaver_common", ] [[package]] @@ -3662,7 +3773,7 @@ dependencies = [ name = "weaver_codegen_test" version = "0.1.0" dependencies = [ - "opentelemetry", + "opentelemetry 0.23.0", "walkdir", "weaver_cache", "weaver_common", @@ -3687,6 +3798,7 @@ name = "weaver_diff" version = "0.1.0" dependencies = [ "similar", + "walkdir", ] [[package]] @@ -3695,6 +3807,7 @@ version = "0.1.0" dependencies = [ "convert_case", "globset", + "include_dir", "indexmap", "itertools 0.12.1", "jaq-core", @@ -3704,6 +3817,9 @@ dependencies = [ "markdown", "miette", "minijinja", + "opentelemetry 0.22.0", + "opentelemetry-stdout", + "opentelemetry_sdk", "rayon", "regex", "serde", @@ -3990,9 +4106,9 @@ checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" [[package]] name = "winnow" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3c52e9c97a68071b23e836c9380edae937f17b9c4667bd021973efc689f618d" +checksum = "86c949fede1d13936a99f14fafd3e76fd642b556dd2ce96287fbe2e0151bfac6" dependencies = [ "memchr", ] @@ -4037,6 +4153,6 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" diff --git a/Cargo.toml b/Cargo.toml index 79496511..7e7e51b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,8 @@ anyhow = "1.0.83" itertools = "0.12.1" globset = { version = "0.4.14", features = ["serde1"] } miette = { version = "7.2.0", features = ["fancy", "serde"] } +include_dir = "0.7.3" +tempdir = "0.3.7" # Features definition ========================================================= [features] @@ -69,10 +71,15 @@ serde.workspace = true serde_yaml.workspace = true serde_json.workspace = true walkdir.workspace = true +include_dir.workspace = true +thiserror.workspace = true +miette.workspace = true rayon = "1.10.0" [dev-dependencies] +weaver_diff = { path = "crates/weaver_diff" } +tempdir.workspace = true assert_cmd = "2.0.14" [profile.release] diff --git a/Dockerfile b/Dockerfile index 58387c05..5cc108aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,14 +3,16 @@ FROM rust:1.76.0-alpine3.18 as weaver-build RUN apk add musl-dev WORKDIR /build + # list out directories to avoid pulling local cargo `target/` COPY Cargo.toml /build/Cargo.toml COPY Cargo.lock /build/Cargo.lock COPY crates /build/crates COPY data /build/data COPY src /build/src -COPY tests build/tests -COPY diagnostic_templates /build/diagnostic_templates +COPY tests /build/tests +COPY default_diagnostic_templates /build/default_diagnostic_templates + # Don't build release, so we get template debugging output. RUN cargo build diff --git a/README.md b/README.md index fc6aede9..e0f51c98 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![codecov](https://codecov.io/gh/open-telemetry/weaver/graph/badge.svg?token=tmWKFoMT2G)](https://codecov.io/gh/open-telemetry/weaver) [![build](https://github.com/open-telemetry/weaver/actions/workflows/audit.yml/badge.svg)](https://github.com/open-telemetry/weaver/actions/workflows/audit.yml) [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![Slack](https://img.shields.io/badge/Slack-OpenTelemetry_Weaver-purple)](https://cloud-native.slack.com/archives/C0697EXNTL3) ---- [Getting started](#getting-started) | [Main commands](#main-commands) | [Generate Doc & Code](crates/weaver_forge/README.md) | [Architecture](docs/architecture.md) | [Change log](CHANGELOG.md) | [Contributing](CONTRIBUTING.md) | [Links](#links) | @@ -130,6 +131,7 @@ Telemetry Schemas. ## Documentation - [Weaver Architecture](docs/architecture.md): A document detailing the architecture of the project. +- [Weaver Configuration](docs/weaver-config.md): A document detailing the configuration options available. - [Weaver Forge](crates/weaver_forge/README.md): An integrated template engine designed to generate documentation and code based on semantic conventions. - [Weaver Checker](crates/weaver_policy_engine/README.md): An integrated policy diff --git a/crates/weaver_cache/Cargo.toml b/crates/weaver_cache/Cargo.toml index 77ac6ec9..cfa5c5f0 100644 --- a/crates/weaver_cache/Cargo.toml +++ b/crates/weaver_cache/Cargo.toml @@ -12,6 +12,8 @@ rust-version.workspace = true workspace = true [dependencies] +weaver_common = { path = "../weaver_common" } + tempdir = "0.3.7" dirs = "5.0.1" gix = { version = "0.63.0", default-features = false, features = [ @@ -24,4 +26,5 @@ gix = { version = "0.63.0", default-features = false, features = [ thiserror.workspace = true serde.workspace = true +miette.workspace = true diff --git a/crates/weaver_cache/allowed-external-types.toml b/crates/weaver_cache/allowed-external-types.toml index 95fadf37..9cb59a98 100644 --- a/crates/weaver_cache/allowed-external-types.toml +++ b/crates/weaver_cache/allowed-external-types.toml @@ -4,4 +4,5 @@ # the public API. Ideally this can have a few exceptions as possible. allowed_external_types = [ "serde::ser::Serialize", + "miette::protocol::Diagnostic", ] \ No newline at end of file diff --git a/crates/weaver_cache/src/lib.rs b/crates/weaver_cache/src/lib.rs index 4d8701ff..2d605192 100644 --- a/crates/weaver_cache/src/lib.rs +++ b/crates/weaver_cache/src/lib.rs @@ -17,11 +17,13 @@ use gix::clone::PrepareFetch; use gix::create::Kind; use gix::remote::fetch::Shallow; use gix::{create, open, progress}; +use miette::Diagnostic; use serde::Serialize; use tempdir::TempDir; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; /// An error that can occur while creating or using a cache. -#[derive(thiserror::Error, Debug, Serialize)] +#[derive(thiserror::Error, Debug, Clone, Serialize, Diagnostic)] #[non_exhaustive] pub enum Error { /// Home directory not found. @@ -54,6 +56,12 @@ pub enum Error { }, } +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(vec![DiagnosticMessage::new(error)]) + } +} + /// A cache system for OTel Weaver. #[derive(Default)] pub struct Cache { diff --git a/crates/weaver_checker/src/lib.rs b/crates/weaver_checker/src/lib.rs index 821d4934..705dc4df 100644 --- a/crates/weaver_checker/src/lib.rs +++ b/crates/weaver_checker/src/lib.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 +#![allow(rustdoc::broken_intra_doc_links)] #![doc = include_str!("../README.md")] use std::fmt::{Display, Formatter}; @@ -10,6 +11,7 @@ use miette::Diagnostic; use serde::Serialize; use serde_json::to_value; use walkdir::DirEntry; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, handle_errors, WeaverError}; @@ -27,7 +29,6 @@ pub enum Error { /// An invalid policy. #[error("Invalid policy file '{file}', error: {error})")] #[diagnostic( - severity = "error", url("https://www.openpolicyagent.org/docs/latest/policy-language/"), help("Check the policy file for syntax errors.") )] @@ -41,7 +42,6 @@ pub enum Error { /// An invalid policy glob pattern. #[error("Invalid policy glob pattern '{pattern}', error: {error})")] #[diagnostic( - severity = "error", url("https://docs.rs/globset/latest/globset/"), help("Check the glob pattern for syntax errors.") )] @@ -70,7 +70,6 @@ pub enum Error { /// Violation evaluation error. #[error("Violation evaluation error: {error}")] - #[diagnostic(severity = "error")] ViolationEvaluationError { /// The error that occurred. error: String, @@ -78,7 +77,6 @@ pub enum Error { /// A policy violation error. #[error("Policy violation: {violation}, provenance: {provenance}")] - #[diagnostic(severity = "error")] PolicyViolation { /// The provenance of the violation (URL or path). provenance: String, @@ -93,13 +91,6 @@ pub enum Error { } impl WeaverError for Error { - /// Retrieves a list of error messages associated with this error. - fn errors(&self) -> Vec { - match self { - CompoundError(errors) => errors.iter().flat_map(WeaverError::errors).collect(), - _ => vec![self.to_string()], - } - } fn compound(errors: Vec) -> Error { Self::CompoundError( errors @@ -113,6 +104,21 @@ impl WeaverError for Error { } } +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(match error { + CompoundError(errors) => errors + .into_iter() + .flat_map(|e| { + let diag_msgs: DiagnosticMessages = e.into(); + diag_msgs.into_inner() + }) + .collect(), + _ => vec![DiagnosticMessage::new(error)], + }) + } +} + /// A list of supported policy stages. pub enum PolicyStage { /// Policies that are evaluated before resolution. diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 45564ff3..e1974fc2 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -13,14 +13,15 @@ use std::process::exit; use weaver_cache::Cache; use weaver_common::in_memory::LogMessage; use weaver_common::{in_memory, Logger}; +use weaver_forge::file_loader::FileSystemFileLoader; use weaver_forge::registry::TemplateRegistry; -use weaver_forge::{GeneratorConfig, OutputDirective, TemplateEngine}; +use weaver_forge::{OutputDirective, TemplateEngine}; use weaver_resolver::SchemaResolver; use weaver_semconv::path::RegistryPath; use weaver_semconv::registry::SemConvRegistry; const SEMCONV_REGISTRY_PATH: &str = "./semconv_registry/"; -const TEMPLATES_PATH: &str = "./templates/"; +const TEMPLATES_PATH: &str = "./templates/registry/"; const REGISTRY_ID: &str = "test"; const TARGET: &str = "rust"; @@ -47,9 +48,9 @@ fn main() { let schema = SchemaResolver::resolve_semantic_convention_registry(&mut registry) .unwrap_or_else(|e| process_error(&logger, e)); - let config = GeneratorConfig::new(TEMPLATES_PATH.into()); - let engine = TemplateEngine::try_new(&format!("registry/{}", TARGET), config) + let loader = FileSystemFileLoader::try_new(TEMPLATES_PATH.into(), TARGET) .unwrap_or_else(|e| process_error(&logger, e)); + let engine = TemplateEngine::try_new(loader).unwrap_or_else(|e| process_error(&logger, e)); let template_registry = TemplateRegistry::try_from_resolved_registry( schema .registry(REGISTRY_ID) diff --git a/crates/weaver_common/src/diagnostic.rs b/crates/weaver_common/src/diagnostic.rs index a3b7b6b6..e1f9ce26 100644 --- a/crates/weaver_common/src/diagnostic.rs +++ b/crates/weaver_common/src/diagnostic.rs @@ -2,7 +2,7 @@ //! A generic diagnostic message -use crate::error::WeaverDiagnostic; +use crate::Logger; use miette::{Diagnostic, LabeledSpan, Report, Severity}; use serde::Serialize; use std::error::Error; @@ -42,9 +42,9 @@ pub struct MietteDiagnosticExt { #[derive(Debug, serde::Serialize)] pub struct DiagnosticMessage { /// The error - error: serde_json::Value, + pub(crate) error: serde_json::Value, /// The diagnostic message - diagnostic: MietteDiagnosticExt, + pub(crate) diagnostic: MietteDiagnosticExt, } /// A list of diagnostic messages @@ -80,6 +80,31 @@ impl DiagnosticMessage { } impl DiagnosticMessages { + /// Creates a new list of diagnostic messages + #[must_use] + pub fn new(diag_msgs: Vec) -> Self { + Self(diag_msgs) + } + + /// Logs all the diagnostic messages + pub fn log(&self, logger: impl Logger) { + self.0 + .iter() + .for_each(|msg| logger.error(&msg.diagnostic.message)); + } + + /// Returns the number of diagnostic messages + #[must_use] + pub fn len(&self) -> usize { + self.0.len() + } + + /// Returns the diagnostic messages + #[must_use] + pub fn into_inner(self) -> Vec { + self.0 + } + /// Creates a new list of diagnostic messages for a list of errors pub fn from_errors( errors: Vec, @@ -92,22 +117,25 @@ impl DiagnosticMessages { Self(vec![DiagnosticMessage::new(error)]) } - /// Returns true if any of the diagnostic messages have an error severity - /// level. + /// Returns true if all the diagnostic messages are explicitly marked as + /// warnings or advices. #[must_use] pub fn has_error(&self) -> bool { - self.0 + let non_error_count = self + .0 .iter() - .any(|message| message.diagnostic.severity == Some(Severity::Error)) + .filter(|message| { + message.diagnostic.severity == Some(Severity::Warning) + || message.diagnostic.severity == Some(Severity::Advice) + }) + .count(); + self.0.len() - non_error_count > 0 } -} -impl From - for DiagnosticMessages -{ - /// Convert errors marked with the DiagnosticMessage trait into a DiagnosticMessages. - fn from(error: T) -> Self { - Self(vec![DiagnosticMessage::new(error)]) + /// Returns true if there are no diagnostic messages + #[must_use] + pub fn is_empty(&self) -> bool { + self.0.is_empty() } } @@ -161,6 +189,8 @@ mod tests { }; let diagnostic_messages = DiagnosticMessages::from_error(error.clone()); assert_eq!(diagnostic_messages.0.len(), 1); + assert!(diagnostic_messages.has_error()); + assert!(!diagnostic_messages.is_empty()); assert_eq!( diagnostic_messages.0[0].diagnostic.message, "This is a test error" diff --git a/crates/weaver_common/src/error.rs b/crates/weaver_common/src/error.rs index a651979f..6e78e8fa 100644 --- a/crates/weaver_common/src/error.rs +++ b/crates/weaver_common/src/error.rs @@ -4,7 +4,6 @@ //! This trait is used by the logging infrastructure to print errors in //! a consistent way. -use crate::Logger; use miette::Diagnostic; use serde::Serialize; @@ -20,15 +19,6 @@ impl WeaverDiagnostic for T where T: Serialize + Diagnostic + Send + Sync + ? /// A trait for custom error handling in the `weaver` crates. pub trait WeaverError: Serialize + Diagnostic + Send + Sync { - /// Retrieves a list of error messages associated with this error. - /// For compound errors, this method should return a list of all - /// error messages. For simple errors, this method should return - /// a list with a single error message. - /// - /// # Returns - /// A `Vec` containing human-readable error messages. - fn errors(&self) -> Vec; - /// Constructs a single compound error from a collection of individuals. #[must_use] fn compound(errors: Vec) -> T; @@ -58,45 +48,3 @@ pub fn format_errors + std::fmt::Display>(errors: &[E]) -> Str .collect::>() .join("\n\n") } - -/// A trait for types that can cleanly exit the application if an error -/// is encountered. -pub trait ExitIfError { - /// Processes the `Result` and panics if it is an `Err`. - /// If `Ok`, the contained value is returned. - /// - /// # Arguments - /// * `self` - The `Result` to process. - /// * `logger` - An object implementing the `Logger` trait used to log any - /// errors. - /// - /// # Returns - /// The contained value if the result is `Ok`. - /// Panics if the result is `Err`. - fn panic_if_error(self, logger: impl Logger) -> T; -} - -/// Provides default implementations of the `ExitIfError` trait for any -/// `Result` where `E` implements `WeaverError`. -impl> ExitIfError for Result { - /// Processes the `Result` and panics if it is an `Err`. - /// If `Ok`, the contained value is returned. - /// - /// # Arguments - /// * `self` - The `Result` to process. - /// * `logger` - An object implementing the `Logger` trait used to log any - /// errors. - /// - /// # Returns - /// The contained value if the result is `Ok`. - /// Panics if the result is `Err`. - fn panic_if_error(self, logger: impl Logger) -> T { - match self { - Ok(value) => value, - Err(e) => { - e.errors().iter().for_each(|msg| logger.error(msg)); - panic!("One or several errors occurred (see above)"); - } - } - } -} diff --git a/crates/weaver_diff/Cargo.toml b/crates/weaver_diff/Cargo.toml index 8b97ae83..eb63ed6b 100644 --- a/crates/weaver_diff/Cargo.toml +++ b/crates/weaver_diff/Cargo.toml @@ -9,6 +9,7 @@ edition.workspace = true rust-version.workspace = true [dependencies] +walkdir.workspace = true similar = "2.5.0" diff --git a/crates/weaver_diff/src/lib.rs b/crates/weaver_diff/src/lib.rs index 65bc8ed4..d708e85a 100644 --- a/crates/weaver_diff/src/lib.rs +++ b/crates/weaver_diff/src/lib.rs @@ -3,6 +3,10 @@ //! This crate provides bare minimum support for colorized string differencing. use similar::TextDiff; +use std::collections::HashSet; +use std::fs; +use std::path::Path; +use walkdir::WalkDir; const GREEN: &str = "\x1b[32m"; const RED: &str = "\x1b[31m"; @@ -30,3 +34,118 @@ pub fn diff_output(original: &str, updated: &str) -> String { result.push_str(RESET); result } + +/// Displays differences between two directories and returns whether they are identical. +/// The function will print differences to stderr. +#[allow(clippy::print_stderr)] +pub fn diff_dir>(expected_dir: P, observed_dir: P) -> std::io::Result { + let mut expected_files = HashSet::new(); + let mut observed_files = HashSet::new(); + + // Walk through the first directory and add files to files1 set + for entry in WalkDir::new(&expected_dir) + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.is_file() { + let relative_path = path + .strip_prefix(&expected_dir) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + _ = expected_files.insert(relative_path.to_path_buf()); + } + } + + // Walk through the second directory and add files to files2 set + for entry in WalkDir::new(&observed_dir) + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if path.is_file() { + let relative_path = path + .strip_prefix(&observed_dir) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + _ = observed_files.insert(relative_path.to_path_buf()); + } + } + + // Assume directories are identical until proven otherwise + let mut are_identical = true; + + // Compare files in both sets + for file in expected_files.intersection(&observed_files) { + let file1_content = + fs::read_to_string(expected_dir.as_ref().join(file))?.replace("\r\n", "\n"); + let file2_content = + fs::read_to_string(observed_dir.as_ref().join(file))?.replace("\r\n", "\n"); + + if file1_content != file2_content { + are_identical = false; + eprintln!( + "Files {:?} and {:?} are different", + expected_dir.as_ref().join(file), + observed_dir.as_ref().join(file) + ); + + eprintln!( + "Found differences:\n{}", + diff_output(&file1_content, &file2_content) + ); + break; + } + } + // If any file is unique to one directory, they are not identical + let not_in_observed = expected_files + .difference(&observed_files) + .collect::>(); + if !not_in_observed.is_empty() { + are_identical = false; + eprintln!("Observed output is missing files: {:?}", not_in_observed); + } + let not_in_expected = observed_files + .difference(&expected_files) + .collect::>(); + if !not_in_expected.is_empty() { + are_identical = false; + eprintln!( + "Observed output has unexpected files: {:?}", + not_in_expected + ); + } + + Ok(are_identical) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_diff_output() { + let original = "Hello, world!"; + let updated = "Hello, there!"; + let diff = diff_output(original, updated); + assert_eq!( + diff, + "\u{1b}[31m- Hello, world!\n\u{1b}[32m+ Hello, there!\n\u{1b}[0m" + ); + } + + #[test] + fn test_diff_dir() { + let expected_dir = "./src"; + let observed_dir = "./src"; + + let are_identical = + diff_dir(&expected_dir, &observed_dir).expect("Failed to diff directories"); + assert!(are_identical); + + let expected_dir = "./src"; + let observed_dir = "../weaver_cache/src"; + + let are_identical = + diff_dir(&expected_dir, &observed_dir).expect("Failed to diff directories"); + assert!(!are_identical); + } +} diff --git a/crates/weaver_forge/Cargo.toml b/crates/weaver_forge/Cargo.toml index 2f109949..7d04b8d9 100644 --- a/crates/weaver_forge/Cargo.toml +++ b/crates/weaver_forge/Cargo.toml @@ -36,4 +36,11 @@ serde_json.workspace = true rayon.workspace = true walkdir.workspace = true globset.workspace = true -miette.workspace = true \ No newline at end of file +miette.workspace = true +include_dir.workspace = true + +[dev-dependencies] +opentelemetry = { version = "0.22.0", features = ["trace", "metrics", "logs", "otel_unstable"] } +opentelemetry_sdk = { version = "0.22.1", features = ["trace", "metrics", "logs"] } +opentelemetry-stdout = { version = "0.3.0", features = ["trace", "metrics", "logs"] } + diff --git a/crates/weaver_forge/README.md b/crates/weaver_forge/README.md index 7e177f19..0654b07a 100644 --- a/crates/weaver_forge/README.md +++ b/crates/weaver_forge/README.md @@ -72,116 +72,8 @@ its choice. ## Configuration File - `weaver.yaml` -The configuration file `weaver.yaml` is optional. It allows configuring the -following options: - -```yaml -# Uncomment this section to specify the configuration of the `text_map` filter. -#text_maps: -# java_types: -# int: int -# double: double -# boolean: boolean -# string: String -# java_keys: -# int: intKey -# double: doubleKey -# boolean: booleanKey -# string: stringKey - -# Deprecated, please use text_maps instead -# Configuration of the type mapping. This is useful to generate code in a -# specific language. This is optional. -# Example: {{ attribute.type | type_mapping }} will be evaluated as int64 -# if the semconv attribute type is int. -#type_mapping: -# int: int64 -# double: double -# boolean: bool -# string: string -# "int[]": "[]int64" -# "double[]": "[]double" -# "boolean[]": "[]bool" -# "string[]": "[]string" -# ... - -# Configuration of the template engine (optional) -#template_syntax: -# block_start: "{%" -# block_end: "%}" -# variable_start: "{{" -# variable_end: "}}" -# comment_start: "{#" -# comment_end: "#}" - -# Please uncomment the following section to specify a list of acronyms that -# will be interpreted by the acronym filter. This is optional. -# acronyms: ["iOS", "HTTP", "API", "SDK", "CLI", "URL", "JSON", "XML", "HTML"] - -# Please uncomment the following templates to override the default template -# mapping. Each template mapping specifies a jaq filter (compatible with jq) -# to apply to every file matching the pattern. The application_mode specifies -# how the template should be applied. The application_mode can be `each` or -# `single`. The `each` mode will evaluate the template for each object selected -# by the jaq filter. The `single` mode will evaluate the template once with all -# the objects selected by the jq filter. -# -# Note: jaq is a Rust reimplementation of jq. Most of the jq filters are -# supported. For more information, see https://github.com/01mf02/jaq -# -# templates: -# - pattern: "**/registry.md" -# filter: "." -# application_mode: single -# - pattern: "**/attribute_group.md" -# filter: ".groups[] | select(.type == \"attribute_group\")" -# application_mode: each -# - pattern: "**/attribute_groups.md" -# filter: ".groups[] | select(.type == \"attribute_group\")" -# application_mode: single -# - pattern: "**/event.md" -# filter: ".groups[] | select(.type == \"event\")" -# application_mode: each -# - pattern: "**/events.md" -# filter: ".groups[] | select(.type == \"event\")" -# application_mode: single -# - pattern: "**/group.md" -# filter: ".groups[] | select(.type == \"group\")" -# application_mode: each -# - pattern: "**/groups.md" -# filter: ".groups[] | select(.type == \"group\")" -# application_mode: single -# - pattern: "**/metric.md" -# filter: ".groups[] | select(.type == \"metric\")" -# application_mode: each -# - pattern: "**/metrics.md" -# filter: ".groups[] | select(.type == \"metric\")" -# application_mode: single -# - pattern: "**/metric_group.md" -# filter: ".groups[] | select(.type == \"metric_group\")" -# application_mode: each -# - pattern: "**/metric_groups.md" -# filter: ".groups[] | select(.type == \"metric_group\")" -# application_mode: single -# - pattern: "**/resource.md" -# filter: ".groups[] | select(.type == \"resource\")" -# application_mode: each -# - pattern: "**/resources.md" -# filter: ".groups[] | select(.type == \"resource\")" -# application_mode: single -# - pattern: "**/scope.md" -# filter: ".groups[] | select(.type == \"scope\")" -# application_mode: each -# - pattern: "**/scopes.md" -# filter: ".groups[] | select(.type == \"scope\")" -# application_mode: single -# - pattern: "**/span.md" -# filter: ".groups[] | select(.type == \"span\")" -# application_mode: each -# - pattern: "**/spans.md" -# filter: ".groups[] | select(.type == \"span\")" -# application_mode: single -``` +The configuration file `weaver.yaml` is optional. See the [Weaver Configuration File](/docs/weaver-config.md) +documentation for more details. ## Jinja Filters diff --git a/crates/weaver_forge/allowed-external-types.toml b/crates/weaver_forge/allowed-external-types.toml index 72d1780e..4994cbd8 100644 --- a/crates/weaver_forge/allowed-external-types.toml +++ b/crates/weaver_forge/allowed-external-types.toml @@ -11,4 +11,5 @@ allowed_external_types = [ "weaver_semconv::*", "minijinja::value::Value", "miette::protocol::Diagnostic", + "include_dir::dir::Dir", ] \ No newline at end of file diff --git a/crates/weaver_forge/overloaded-templates/test/group.md b/crates/weaver_forge/overloaded-templates/test/group.md new file mode 100644 index 00000000..a817d676 --- /dev/null +++ b/crates/weaver_forge/overloaded-templates/test/group.md @@ -0,0 +1,58 @@ +{%- set file_name = ctx.id | file_name -%} +{{- template.set_file_name("group/" ~ file_name ~ ".md") -}} + +# Overloaded Group `{{ ctx.id }}` ({{ ctx.type }}) + +## Brief + +{{ ctx.brief | trim }} + +prefix: {{ ctx.prefix }} + +## Attributes + +{% for attribute in ctx.attributes %} +### Attribute `{{ attribute.name }}` + +{{ attribute.brief }} + +{% if attribute.note %} +{{ attribute.note | trim }} +{% endif %} + +{%- if attribute.requirement_level == "required" %} +- Requirement Level: Required +{%- elif attribute.requirement_level.conditionally_required %} +- Requirement Level: Conditionally Required - {{ attribute.requirement_level.conditionally_required }} +{%- elif attribute.requirement_level == "recommended" %} +- Requirement Level: Recommended +{%- else %} +- Requirement Level: Optional +{%- endif %} +{% if attribute.tag %} +- Tag: {{ attribute.tag }} +{% endif %} +{%- include "attribute_type.j2" %} +{%- include "examples.j2" -%} +{%- if attribute.sampling_relevant %} +- Sampling relevant: {{ attribute.sampling_relevant }} +{%- endif %} +{%- if attribute.deprecated %} +- Deprecated: {{ attribute.deprecated }} +{%- endif %} +{% if attribute.stability %} +- Stability: {{ attribute.stability | capitalize }} +{% endif %} +{% endfor %} + +## Lineage + +Source file: {{ ctx.lineage.source_file | replace("\\", "/") }} + +{% for item in ctx.lineage.attributes -%} +attribute: {{ item.id }} + - source group: {{ item.source_group }} + - inherited fields: {{ item.inherited_fields | join(", ") }} + - locally overridden fields: {{ item.locally_overridden_fields | join(", ") }} + +{% endfor -%} \ No newline at end of file diff --git a/crates/weaver_forge/src/config.rs b/crates/weaver_forge/src/config.rs index a9e2f207..bc7dfa32 100644 --- a/crates/weaver_forge/src/config.rs +++ b/crates/weaver_forge/src/config.rs @@ -13,6 +13,7 @@ use serde::Deserialize; use crate::error::Error; use crate::error::Error::InvalidConfigFile; +use crate::file_loader::FileLoader; use crate::filter::Filter; use crate::WEAVER_YAML; @@ -353,16 +354,16 @@ impl CaseConvention { } impl TargetConfig { - pub fn try_new(lang_path: &Path) -> Result { - let config_file = lang_path.join(WEAVER_YAML); - if config_file.exists() { - let reader = - std::fs::File::open(config_file.clone()).map_err(|e| InvalidConfigFile { - config_file: config_file.clone(), - error: e.to_string(), - })?; - serde_yaml::from_reader(reader).map_err(|e| InvalidConfigFile { - config_file: config_file.clone(), + pub fn try_new(loader: &dyn FileLoader) -> Result { + let weaver_file = loader + .load_file(WEAVER_YAML) + .map_err(|e| InvalidConfigFile { + config_file: WEAVER_YAML.into(), + error: e.to_string(), + })?; + if let Some(weaver_file) = weaver_file { + serde_yaml::from_str(&weaver_file).map_err(|e| InvalidConfigFile { + config_file: WEAVER_YAML.into(), error: e.to_string(), }) } else { diff --git a/crates/weaver_forge/src/debug.rs b/crates/weaver_forge/src/debug.rs index 0941b145..df151b01 100644 --- a/crates/weaver_forge/src/debug.rs +++ b/crates/weaver_forge/src/debug.rs @@ -117,19 +117,23 @@ mod tests { // <-- These 3 errors are deduplicated root_path: "target".to_owned(), target: "target".to_owned(), + error: "error".to_owned(), }, TargetNotSupported { root_path: "target".to_owned(), target: "target".to_owned(), + error: "error".to_owned(), }, TargetNotSupported { root_path: "target".to_owned(), target: "target".to_owned(), + error: "error".to_owned(), }, TargetNotSupported { // <-- This error is not deduplicated root_path: "target".to_owned(), target: "other_target".to_owned(), + error: "error".to_owned(), }, ]); print_dedup_errors(logger.clone(), error); diff --git a/crates/weaver_forge/src/error.rs b/crates/weaver_forge/src/error.rs index 40923a78..78706abe 100644 --- a/crates/weaver_forge/src/error.rs +++ b/crates/weaver_forge/src/error.rs @@ -2,19 +2,27 @@ //! Error types and utilities. -use crate::error::Error::CompoundError; +use std::{path::PathBuf, str::FromStr}; + use miette::Diagnostic; use serde::Serialize; -use std::{path::PathBuf, str::FromStr}; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; + use weaver_common::error::WeaverError; use weaver_resolved_schema::attribute::AttributeRef; +use crate::error::Error::CompoundError; + /// Errors emitted by this crate. #[derive(thiserror::Error, Debug, Clone, Diagnostic, Serialize)] #[non_exhaustive] pub enum Error { /// Invalid config file. #[error("Invalid config file `{config_file}`: {error}")] + #[diagnostic( + help("Please check the syntax of the weaver.yaml file."), + url("https://github.com/open-telemetry/weaver/blob/main/docs/weaver-config.md") + )] InvalidConfigFile { /// Config file. config_file: PathBuf, @@ -23,14 +31,18 @@ pub enum Error { }, /// Target not found. - #[error( - "Target `{target}` not found in `{root_path}`. Use the command `targets` to list supported targets." + #[error("Target `{target}` not found in `{root_path}`. Error: {error}")] + #[diagnostic( + help("Please check the subdirectories of the template path for the target."), + url("https://github.com/open-telemetry/weaver/blob/main/crates/weaver_forge/README.md") )] TargetNotSupported { /// Root path. root_path: String, /// Target name. target: String, + /// Error message. + error: String, }, /// Invalid template directory. @@ -60,6 +72,15 @@ pub enum Error { error: String, }, + /// Error loading a file content from the file loader. + #[error("Error loading the file '{file}': {error}")] + FileLoaderError { + /// File path. + file: PathBuf, + /// Error message. + error: String, + }, + /// Template evaluation failed. #[error("Template evaluation error -> {error}")] TemplateEvaluationFailed { @@ -76,7 +97,8 @@ pub enum Error { InvalidTemplateDirectory(PathBuf), /// Template file name undefined. - #[error("File name undefined in the template `{template}`. To resolve this, use the function `config(file_name = )` to set the file name.")] + #[error("File name undefined in the template `{template}`. To resolve this, use the function `config(file_name = )` to set the file name." + )] TemplateFileNameUndefined { /// Template path. template: PathBuf, @@ -129,18 +151,26 @@ pub enum Error { } impl WeaverError for Error { - /// Retrieves a list of error messages associated with this error. - fn errors(&self) -> Vec { - match self { - CompoundError(errors) => errors.iter().flat_map(|e| e.errors()).collect(), - _ => vec![self.to_string()], - } - } fn compound(errors: Vec) -> Error { Self::compound_error(errors) } } +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(match error { + CompoundError(errors) => errors + .into_iter() + .flat_map(|e| { + let diag_msgs: DiagnosticMessages = e.into(); + diag_msgs.into_inner() + }) + .collect(), + _ => vec![DiagnosticMessage::new(error)], + }) + } +} + #[must_use] pub(crate) fn jinja_err_convert(e: minijinja::Error) -> Error { Error::WriteGeneratedCodeFailed { diff --git a/crates/weaver_forge/src/file_loader.rs b/crates/weaver_forge/src/file_loader.rs new file mode 100644 index 00000000..b9485426 --- /dev/null +++ b/crates/weaver_forge/src/file_loader.rs @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Set of supported template loaders + +use std::path::{Path, PathBuf}; +use std::{fs, io}; + +use minijinja::ErrorKind; +use walkdir::WalkDir; + +use crate::error::Error; +use crate::error::Error::TargetNotSupported; + +/// An abstraction for loading files from a file system, embedded directory, etc. +pub trait FileLoader { + /// Returns a textual representation of the root path of the loader. + /// This representation is mostly used for debugging and logging purposes. + fn root(&self) -> &Path; + + /// Returns a list of all files in the loader's root directory. + fn all_files(&self) -> Vec; + + /// Returns the content of a file from a given name. + fn load_file(&self, file: &str) -> Result, Error>; +} + +/// A loader that loads files from the embedded directory in the binary of Weaver or +/// from the file system if the `local_dir/target` directory exists. +/// +/// This is useful for loading templates and other files that are embedded in the binary but +/// can be overridden by the user. +pub struct EmbeddedFileLoader { + target: String, + embedded_dir: &'static include_dir::Dir<'static>, + fs_loader: Option, +} + +impl EmbeddedFileLoader { + /// Create a new embedded file loader. + /// + /// If the `local_dir/target` directory exists, the loader will use the file system loader. + /// Otherwise, it will use the embedded directory. + pub fn try_new( + embedded_dir: &'static include_dir::Dir<'static>, + local_dir: PathBuf, + target: &str, + ) -> Result { + let target_embedded_dir = embedded_dir.get_dir(target); + let target_local_dir = local_dir.join(target); + if let Some(dir) = target_embedded_dir { + Ok(Self { + target: target.to_owned(), + embedded_dir: dir, + fs_loader: if target_local_dir.exists() { + Some(FileSystemFileLoader::try_new(local_dir, target)?) + } else { + None + }, + }) + } else { + Err(TargetNotSupported { + root_path: embedded_dir.path().to_string_lossy().to_string(), + target: target.to_owned(), + error: "Target not found".to_owned(), + }) + } + } +} + +impl FileLoader for EmbeddedFileLoader { + /// Returns a textual representation of the root path of the loader. + /// This representation is mostly used for debugging and logging purposes. + fn root(&self) -> &Path { + if let Some(fs_loader) = &self.fs_loader { + return fs_loader.root(); + } + self.embedded_dir.path() + } + + /// Returns a list of all files in the loader's root directory. + fn all_files(&self) -> Vec { + fn collect_files<'a>(dir: &'a include_dir::Dir<'a>, paths: &mut Vec) { + for entry in dir.entries() { + match entry { + include_dir::DirEntry::Dir(d) => collect_files(d, paths), + include_dir::DirEntry::File(f) => { + let relative_path = f.path().strip_prefix(dir.path()).expect("Failed to strip prefix. Should never happen as `dir.path()` is initial root."); + paths.push(relative_path.to_owned()); + } + } + } + } + + if let Some(fs_loader) = &self.fs_loader { + return fs_loader.all_files(); + } + + let mut files = vec![]; + collect_files(self.embedded_dir, &mut files); + files + } + + /// Returns the content of a file from a given name. + fn load_file(&self, file: &str) -> Result, Error> { + if let Some(fs_loader) = &self.fs_loader { + return fs_loader.load_file(file); + } + + let name = format!("{}/{}", self.target, file); + match self.embedded_dir.get_file(name) { + Some(file) => Ok(Some( + file.contents_utf8() + .ok_or_else(|| Error::FileLoaderError { + file: file.path().to_owned(), + error: "Failed to read file contents".to_owned(), + })? + .to_owned(), + )), + None => Ok(None), + } + } +} + +/// A loader that loads files from the file system. +pub struct FileSystemFileLoader { + dir: PathBuf, +} + +impl FileSystemFileLoader { + /// Create a new file system loader + pub fn try_new(dir: PathBuf, target: &str) -> Result { + let dir = safe_join(&dir, target).map_err(|e| TargetNotSupported { + root_path: dir.to_string_lossy().to_string(), + target: target.to_owned(), + error: e.to_string(), + })?; + Ok(Self { dir }) + } +} + +impl FileLoader for FileSystemFileLoader { + /// Returns a textual representation of the root path of the loader. + /// This representation is mostly used for debugging and logging purposes. + fn root(&self) -> &Path { + self.dir.as_path() + } + + /// Returns a list of all files in the loader's root directory. + fn all_files(&self) -> Vec { + // List all files in the target directory and its subdirectories + let files: Vec = WalkDir::new(self.dir.clone()) + .into_iter() + .filter_map(|e| { + // Skip directories that the owner of the running process does not + // have permission to access + e.ok() + }) + .filter(|dir_entry| dir_entry.path().is_file()) + .map(|dir_entry| dir_entry.into_path() + .strip_prefix(&self.dir) + .expect("Failed to strip prefix. Should never happen as `self.dir` is initial root.").to_owned()) + .collect(); + + files + } + + /// Returns a function that loads a file from a given name + /// Based on MiniJinja loader semantics, the function should return `Ok(None)` if the template + /// does not exist. + fn load_file(&self, file: &str) -> Result, Error> { + let path = if let Ok(path) = safe_join(&self.dir, file) { + path + } else { + return Ok(None); + }; + match fs::read_to_string(path.clone()) { + Ok(result) => Ok(Some(result)), + Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(Error::FileLoaderError { + file: path, + error: err.to_string(), + }), + } + } +} + +// Combine a root path and a template name, ensuring that the combined path is +// a subdirectory of the base path. +fn safe_join(root: &Path, template: &str) -> Result { + let mut path = root.to_path_buf(); + path.push(template); + + // Canonicalize the paths to resolve any `..` or `.` components + let canonical_root = root.canonicalize().map_err(|e| { + minijinja::Error::new( + ErrorKind::InvalidOperation, + format!("Failed to canonicalize root path: {}", e), + ) + })?; + let canonical_combined = path.canonicalize().map_err(|e| { + let curr_dir = std::env::current_dir().expect("Failed to get current directory"); + minijinja::Error::new( + ErrorKind::InvalidOperation, + format!("Failed to canonicalize the path '{}' (error: {}). The current working directory is '{}'", path.display(), e, curr_dir.display()), + ) + })?; + + // Verify that the canonical combined path starts with the canonical root path + if canonical_combined.starts_with(&canonical_root) { + Ok(canonical_combined) + } else { + Err(minijinja::Error::new( + ErrorKind::InvalidOperation, + format!( + "The combined path is not a subdirectory of the root path: {:?} -> {:?}", + canonical_root, canonical_combined + ), + )) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + + use include_dir::{include_dir, Dir}; + + use super::*; + + static EMBEDDED_TEMPLATES: Dir<'_> = include_dir!("crates/weaver_forge/templates"); + + #[test] + fn test_template_loader() { + let embedded_loader = EmbeddedFileLoader::try_new( + &EMBEDDED_TEMPLATES, + PathBuf::from("./does-not-exist"), + "test", + ) + .unwrap(); + let embedded_content = embedded_loader.load_file("group.md"); + assert!(embedded_content.is_ok()); + let embedded_content = embedded_content.unwrap().unwrap(); + assert!(embedded_content.contains("# Group `{{ ctx.id }}` ({{ ctx.type }})")); + + let overloaded_embedded_loader = EmbeddedFileLoader::try_new( + &EMBEDDED_TEMPLATES, + PathBuf::from("./overloaded-templates"), + "test", + ) + .unwrap(); + let overloaded_embedded_content = overloaded_embedded_loader.load_file("group.md"); + assert!(overloaded_embedded_content.is_ok()); + let overloaded_embedded_content = overloaded_embedded_content.unwrap().unwrap(); + assert!(overloaded_embedded_content + .contains("# Overloaded Group `{{ ctx.id }}` ({{ ctx.type }})")); + + let fs_loader = + FileSystemFileLoader::try_new(PathBuf::from("./templates"), "test").unwrap(); + let fs_content = fs_loader.load_file("group.md"); + assert!(fs_content.is_ok()); + let fs_content = fs_content.unwrap().unwrap(); + assert!(fs_content.contains("# Group `{{ ctx.id }}` ({{ ctx.type }})")); + + // Test content equality between embedded and file system loaders + assert_eq!(embedded_content, fs_content); + + // Test root path + assert_eq!( + embedded_loader + .root() + .file_name() + .unwrap() + .to_string_lossy() + .to_string(), + "test".to_owned() + ); + assert_eq!( + fs_loader + .root() + .file_name() + .unwrap() + .to_string_lossy() + .to_string(), + "test".to_owned() + ); + + // Test all files + let embedded_files: HashSet = embedded_loader.all_files().into_iter().collect(); + assert_eq!(embedded_files.len(), 17); + let fs_files: HashSet = fs_loader.all_files().into_iter().collect(); + assert_eq!(fs_files.len(), 17); + // Test that the files are the same between the embedded and file system loaders + assert_eq!(embedded_files, fs_files); + // Test that all the files can be loaded from the embedded loader + for file in embedded_files { + let content = embedded_loader.load_file(&file.to_string_lossy()).unwrap(); + assert!(content.is_some()); + } + // Test that all the files can be loaded from the file system loader + for file in fs_files { + let content = fs_loader.load_file(&file.to_string_lossy()).unwrap(); + assert!(content.is_some()); + } + + // Test case where the file does not exist + let embedded_content = embedded_loader.load_file("missing_file.md"); + assert!(embedded_content.is_ok()); + assert!(embedded_content.unwrap().is_none()); + + let fs_content = fs_loader.load_file("missing_file.md"); + assert!(fs_content.is_ok()); + assert!(fs_content.unwrap().is_none()); + + // Test case where the file is outside the root directory + // This should return no content although the file exists + // This is because the file is not a subdirectory of the root directory + // and is considered unsafe to load. This is a security measure to prevent + // loading files outside the root directory. + // It's None instead of an error because the contract of the loader is to return None + // if the file does not exist (cf MiniJinja doc). + let fs_content = fs_loader.load_file("../../Cargo.toml").unwrap(); + assert!(fs_content.is_none()); + } + + #[test] + fn test_embedded_loader_error() { + let embedded_loader = EmbeddedFileLoader::try_new( + &EMBEDDED_TEMPLATES, + PathBuf::from("./does-not-exist"), + "doesn't-exist", + ); + + assert!(embedded_loader.is_err()); + } + + #[test] + fn test_file_system_loader_error() { + let fs_loader = + FileSystemFileLoader::try_new(PathBuf::from("./templates"), "doesn't-exist"); + + assert!(fs_loader.is_err()); + } +} diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index 96ed97a5..605c1b13 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -3,10 +3,11 @@ #![doc = include_str!("../README.md")] use std::borrow::Cow; +use std::ffi::OsString; use std::fmt::{Debug, Display, Formatter}; +use std::fs; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; -use std::{fs, io}; use minijinja::syntax::SyntaxConfig; use minijinja::value::{from_args, Object}; @@ -14,12 +15,11 @@ use minijinja::{Environment, ErrorKind, State, Value}; use rayon::iter::IntoParallelIterator; use rayon::iter::ParallelIterator; use serde::Serialize; -use walkdir::{DirEntry, WalkDir}; use error::Error; use error::Error::{ - ContextSerializationFailed, InvalidTemplateDir, InvalidTemplateFile, TargetNotSupported, - TemplateEvaluationFailed, WriteGeneratedCodeFailed, + ContextSerializationFailed, InvalidTemplateFile, TemplateEvaluationFailed, + WriteGeneratedCodeFailed, }; use weaver_common::error::handle_errors; use weaver_common::Logger; @@ -28,41 +28,20 @@ use crate::config::{ApplicationMode, TargetConfig}; use crate::debug::error_summary; use crate::error::Error::InvalidConfigFile; use crate::extensions::{ansi, case, code, otel, util}; +use crate::file_loader::FileLoader; use crate::registry::{TemplateGroup, TemplateRegistry}; mod config; pub mod debug; pub mod error; pub mod extensions; +pub mod file_loader; mod filter; pub mod registry; /// Name of the Weaver configuration file. pub const WEAVER_YAML: &str = "weaver.yaml"; -/// General configuration for the generator. -pub struct GeneratorConfig { - /// Root directory for the templates. - root_dir: PathBuf, -} - -impl Default for GeneratorConfig { - /// Create a new generator configuration with default values. - fn default() -> Self { - Self { - root_dir: PathBuf::from("templates"), - } - } -} - -impl GeneratorConfig { - /// Create a new generator configuration with the given root directory. - #[must_use] - pub fn new(root_dir: PathBuf) -> Self { - Self { root_dir } - } -} - /// Enumeration defining where the output of program execution should be directed. #[derive(Debug, Clone)] pub enum OutputDirective { @@ -119,8 +98,8 @@ impl Object for TemplateObject { /// Template engine for generating artifacts from a semantic convention /// registry and telemetry schema. pub struct TemplateEngine { - /// Template path - path: PathBuf, + /// File loader used by the engine. + file_loader: Arc, /// Target configuration target_config: TargetConfig, @@ -158,21 +137,11 @@ impl TryInto for NewContext<'_> { impl TemplateEngine { /// Create a new template engine for the given target or return an error if /// the target does not exist or is not a directory. - pub fn try_new(target: &str, config: GeneratorConfig) -> Result { - // Check if the target is supported. - // A target is supported if a template directory exists for it. - let target_path = config.root_dir.join(target); - - if !target_path.exists() { - return Err(TargetNotSupported { - root_path: config.root_dir.to_string_lossy().to_string(), - target: target.to_owned(), - }); - } - + pub fn try_new(loader: impl FileLoader + Send + Sync + 'static) -> Result { + let target_config = TargetConfig::try_new(&loader)?; Ok(Self { - path: target_path.clone(), - target_config: TargetConfig::try_new(&target_path)?, + file_loader: Arc::new(loader), + target_config, }) } @@ -223,17 +192,7 @@ impl TemplateEngine { output_dir: &Path, output_directive: &OutputDirective, ) -> Result<(), Error> { - // List all files in the target directory and its subdirectories - let files: Vec = WalkDir::new(self.path.clone()) - .into_iter() - .filter_map(|e| { - // Skip directories that the owner of the running process does not - // have permission to access - e.ok() - }) - .filter(|dir_entry| dir_entry.path().is_file()) - .collect(); - + let files = self.file_loader.all_files(); let tmpl_matcher = self.target_config.template_matcher()?; // Create a read-only context for the filter evaluations @@ -253,18 +212,8 @@ impl TemplateEngine { // independently and in parallel with the same template. let errs = files .into_par_iter() - .filter_map(|file| { - let relative_path = match file.path().strip_prefix(&self.path) { - Ok(relative_path) => relative_path, - Err(e) => { - return Some(InvalidTemplateDir { - template_dir: self.path.clone(), - error: e.to_string(), - }); - } - }; - - for template in tmpl_matcher.matches(relative_path) { + .filter_map(|relative_path| { + for template in tmpl_matcher.matches(relative_path.clone()) { let filtered_result = match template.filter.apply(context.clone()) { Ok(result) => result, Err(e) => return Some(e), @@ -280,7 +229,7 @@ impl TemplateEngine { } .try_into() .ok()?, - relative_path, + relative_path.as_path(), output_directive, output_dir, ) { @@ -298,7 +247,7 @@ impl TemplateEngine { if let Err(e) = self.evaluate_template( log.clone(), NewContext { ctx: result }.try_into().ok()?, - relative_path, + relative_path.as_path(), output_directive, output_dir, ) { @@ -317,7 +266,7 @@ impl TemplateEngine { } .try_into() .ok()?, - relative_path, + relative_path.as_path(), output_directive, output_dir, ) { @@ -361,8 +310,6 @@ impl TemplateEngine { engine.add_global("template", Value::from_object(template_object.clone())); - log.loading(&format!("Rendering template {}", template_file)); - let template = engine.get_template(template_file).map_err(|e| { let templates = engine .templates() @@ -418,11 +365,17 @@ impl TemplateEngine { ) .build() .map_err(|e| InvalidConfigFile { - config_file: self.path.join(WEAVER_YAML), + config_file: PathBuf::from(OsString::from(&self.file_loader.root())) + .join(WEAVER_YAML), error: e.to_string(), })?; - env.set_loader(cross_platform_loader(&self.path)); + let file_loader = self.file_loader.clone(); + env.set_loader(move |name| { + file_loader + .load_file(name) + .map_err(|e| minijinja::Error::new(ErrorKind::InvalidOperation, e.to_string())) + }); env.set_syntax(syntax); code::add_filters(&mut env, &self.target_config); @@ -463,76 +416,21 @@ impl TemplateEngine { } } -// The template loader provided by MiniJinja is not cross-platform. -fn cross_platform_loader<'x, P: AsRef + 'x>( - dir: P, -) -> impl for<'a> Fn(&'a str) -> Result, minijinja::Error> + Send + Sync + 'static { - let dir = dir.as_ref().to_path_buf(); - move |name| { - let path = safe_join(&dir, name)?; - match fs::read_to_string(path) { - Ok(result) => Ok(Some(result)), - Err(err) if err.kind() == io::ErrorKind::NotFound => Ok(None), - Err(err) => Err(minijinja::Error::new( - ErrorKind::InvalidOperation, - "could not read template", - ) - .with_source(err)), - } - } -} - -// Combine a root path and a template name, ensuring that the combined path is -// a subdirectory of the base path. -fn safe_join(root: &Path, template: &str) -> Result { - let mut path = root.to_path_buf(); - path.push(template); - - // Canonicalize the paths to resolve any `..` or `.` components - let canonical_root = root.canonicalize().map_err(|e| { - minijinja::Error::new( - ErrorKind::InvalidOperation, - format!("Failed to canonicalize root path: {}", e), - ) - })?; - let canonical_combined = path.canonicalize().map_err(|e| { - minijinja::Error::new( - ErrorKind::InvalidOperation, - format!("Failed to canonicalize combined path: {}", e), - ) - })?; - - // Verify that the canonical combined path starts with the canonical root path - if canonical_combined.starts_with(&canonical_root) { - Ok(canonical_combined) - } else { - Err(minijinja::Error::new( - ErrorKind::InvalidOperation, - format!( - "The combined path is not a subdirectory of the root path: {:?} -> {:?}", - canonical_root, canonical_combined - ), - )) - } -} - #[cfg(test)] mod tests { - use std::collections::HashSet; - use std::fs; use std::path::Path; use globset::Glob; - use walkdir::WalkDir; use weaver_common::TestLogger; - use weaver_diff::diff_output; + use weaver_diff::diff_dir; use weaver_resolver::SchemaResolver; use weaver_semconv::registry::SemConvRegistry; use crate::config::{ApplicationMode, CaseConvention, TemplateConfig}; use crate::debug::print_dedup_errors; use crate::extensions::case::case_converter; + use crate::file_loader::FileSystemFileLoader; use crate::filter::Filter; use crate::registry::TemplateRegistry; use crate::OutputDirective; @@ -662,8 +560,10 @@ mod tests { #[test] fn test() { let logger = TestLogger::default(); - let mut engine = super::TemplateEngine::try_new("test", super::GeneratorConfig::default()) - .expect("Failed to create template engine"); + let loader = FileSystemFileLoader::try_new("templates".into(), "test") + .expect("Failed to create file system loader"); + let mut engine = + super::TemplateEngine::try_new(loader).expect("Failed to create template engine"); // Add a template configuration for converter.md on top // of the default template configuration. This is useful @@ -703,82 +603,6 @@ mod tests { }) .expect("Failed to generate registry assets"); - assert!(cmp_dir("expected_output", "observed_output").unwrap()); - } - - #[allow(clippy::print_stderr)] - fn cmp_dir>(expected_dir: P, observed_dir: P) -> std::io::Result { - let mut expected_files = HashSet::new(); - let mut observed_files = HashSet::new(); - - // Walk through the first directory and add files to files1 set - for entry in WalkDir::new(&expected_dir) - .into_iter() - .filter_map(|e| e.ok()) - { - let path = entry.path(); - if path.is_file() { - let relative_path = path.strip_prefix(&expected_dir).unwrap(); - _ = expected_files.insert(relative_path.to_path_buf()); - } - } - - // Walk through the second directory and add files to files2 set - for entry in WalkDir::new(&observed_dir) - .into_iter() - .filter_map(|e| e.ok()) - { - let path = entry.path(); - if path.is_file() { - let relative_path = path.strip_prefix(&observed_dir).unwrap(); - _ = observed_files.insert(relative_path.to_path_buf()); - } - } - - // Assume directories are identical until proven otherwise - let mut are_identical = true; - - // Compare files in both sets - for file in expected_files.intersection(&observed_files) { - let file1_content = - fs::read_to_string(expected_dir.as_ref().join(file))?.replace("\r\n", "\n"); - let file2_content = - fs::read_to_string(observed_dir.as_ref().join(file))?.replace("\r\n", "\n"); - - if file1_content != file2_content { - are_identical = false; - eprintln!( - "Files {:?} and {:?} are different", - expected_dir.as_ref().join(file), - observed_dir.as_ref().join(file) - ); - - eprintln!( - "Found differences:\n{}", - diff_output(&file1_content, &file2_content) - ); - break; - } - } - // If any file is unique to one directory, they are not identical - let not_in_observed = expected_files - .difference(&observed_files) - .collect::>(); - if !not_in_observed.is_empty() { - are_identical = false; - eprintln!("Observed output is missing files: {:?}", not_in_observed); - } - let not_in_expected = observed_files - .difference(&expected_files) - .collect::>(); - if !not_in_expected.is_empty() { - are_identical = false; - eprintln!( - "Observed output has unexpected files: {:?}", - not_in_expected - ); - } - - Ok(are_identical) + assert!(diff_dir("expected_output", "observed_output").unwrap()); } } diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 9853b04c..6d8b1948 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -12,6 +12,7 @@ use serde::Serialize; use walkdir::DirEntry; use weaver_cache::Cache; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, handle_errors, WeaverError}; use weaver_common::Logger; use weaver_resolved_schema::catalog::Catalog; @@ -33,13 +34,13 @@ pub mod registry; pub struct SchemaResolver {} /// An error that can occur while resolving a telemetry schema. -#[derive(thiserror::Error, Debug, Serialize, Diagnostic)] +#[derive(thiserror::Error, Debug, Clone, Serialize, Diagnostic)] #[must_use] #[non_exhaustive] pub enum Error { /// An invalid URL. #[error("Invalid URL `{url:?}`, error: {error:?})")] - #[diagnostic(severity = "error", help("Check the URL and try again."))] + #[diagnostic(help("Check the URL and try again."))] InvalidUrl { /// The invalid URL. url: String, @@ -49,7 +50,6 @@ pub enum Error { /// A semantic convention error. #[error("{message}")] - #[diagnostic()] SemConvError { /// The error that occurred. message: String, @@ -146,13 +146,6 @@ pub enum Error { } impl WeaverError for Error { - /// Returns a list of human-readable error messages. - fn errors(&self) -> Vec { - match self { - Error::CompoundError(errors) => errors.iter().flat_map(|e| e.errors()).collect(), - _ => vec![self.to_string()], - } - } fn compound(errors: Vec) -> Error { Self::CompoundError( errors @@ -166,6 +159,21 @@ impl WeaverError for Error { } } +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(match error { + Error::CompoundError(errors) => errors + .into_iter() + .flat_map(|e| { + let diag_msgs: DiagnosticMessages = e.into(); + diag_msgs.into_inner() + }) + .collect(), + _ => vec![DiagnosticMessage::new(error)], + }) + } +} + /// A constraint that is not satisfied and its missing attributes. #[derive(Debug)] pub struct UnsatisfiedAnyOfConstraint { diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index b549c758..4b52ae4d 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -2,8 +2,10 @@ #![doc = include_str!("../README.md")] +use crate::Error::CompoundError; use miette::Diagnostic; use serde::Serialize; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, WeaverError}; pub mod attribute; @@ -93,19 +95,12 @@ pub enum Error { } impl WeaverError for Error { - /// Returns a list of human-readable error messages. - fn errors(&self) -> Vec { - match self { - Error::CompoundError(errors) => errors.iter().flat_map(|e| e.errors()).collect(), - _ => vec![self.to_string()], - } - } fn compound(errors: Vec) -> Error { - Self::CompoundError( + CompoundError( errors .into_iter() .flat_map(|e| match e { - Self::CompoundError(errors) => errors, + CompoundError(errors) => errors, e => vec![e], }) .collect(), @@ -113,11 +108,26 @@ impl WeaverError for Error { } } +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(match error { + CompoundError(errors) => errors + .into_iter() + .flat_map(|e| { + let diag_msgs: DiagnosticMessages = e.into(); + diag_msgs.into_inner() + }) + .collect(), + _ => vec![DiagnosticMessage::new(error)], + }) + } +} + #[cfg(test)] mod tests { use crate::registry::SemConvRegistry; use std::vec; - use weaver_common::error::WeaverError; + use weaver_common::diagnostic::DiagnosticMessages; /// Load multiple semantic convention files in the semantic convention registry. /// No error should be emitted. @@ -167,8 +177,9 @@ mod tests { let result = registry.add_semconv_spec_from_file(yaml); assert!(result.is_err(), "{:#?}", result.ok().unwrap()); if let Err(err) = result { - assert_eq!(err.errors().len(), 1); let output = format!("{}", err); + let diag_msgs: DiagnosticMessages = err.into(); + assert_eq!(diag_msgs.len(), 1); assert!(!output.is_empty()); } } diff --git a/crates/weaver_semconv_gen/src/lib.rs b/crates/weaver_semconv_gen/src/lib.rs index 866ef18f..8a09a9e7 100644 --- a/crates/weaver_semconv_gen/src/lib.rs +++ b/crates/weaver_semconv_gen/src/lib.rs @@ -9,6 +9,7 @@ use std::{fmt, fs}; use serde::Serialize; use weaver_cache::Cache; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; use weaver_common::error::{format_errors, WeaverError}; use weaver_diff::diff_output; use weaver_forge::registry::TemplateGroup; @@ -27,7 +28,7 @@ mod gen; mod parser; /// Errors emitted by this crate. -#[derive(thiserror::Error, Debug, Serialize, Diagnostic)] +#[derive(thiserror::Error, Debug, Clone, Serialize, Diagnostic)] #[non_exhaustive] pub enum Error { /// Thrown when we are unable to find a semconv by id. @@ -99,13 +100,6 @@ pub enum Error { } impl WeaverError for Error { - /// Retrieves a list of error messages associated with this error. - fn errors(&self) -> Vec { - match self { - Self::CompoundError(errors) => errors.iter().flat_map(|e| e.errors()).collect(), - _ => vec![self.to_string()], - } - } fn compound(errors: Vec) -> Error { Self::CompoundError( errors @@ -119,6 +113,27 @@ impl WeaverError for Error { } } +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + match error { + Error::CompoundError(errors) => DiagnosticMessages::new( + errors + .into_iter() + .flat_map(|e| { + let diag_msgs: DiagnosticMessages = e.into(); + diag_msgs.into_inner() + }) + .collect(), + ), + Error::SemconvError(e) => e.into(), + Error::ResolverError(e) => e.into(), + Error::CacheError(e) => e.into(), + Error::ForgeError(e) => e.into(), + _ => DiagnosticMessages::new(vec![DiagnosticMessage::new(error)]), + } + } +} + impl From for Error { fn from(e: fmt::Error) -> Self { Error::StdIoError(e.to_string()) @@ -409,7 +424,8 @@ mod tests { use std::fs; use std::path::PathBuf; - use weaver_forge::{GeneratorConfig, TemplateEngine}; + use weaver_forge::file_loader::FileSystemFileLoader; + use weaver_forge::TemplateEngine; use crate::{update_markdown, Error, SnippetGenerator}; @@ -422,7 +438,8 @@ mod tests { #[test] fn test_template_engine() -> Result<(), Error> { - let template = TemplateEngine::try_new("markdown", GeneratorConfig::default())?; + let loader = FileSystemFileLoader::try_new("templates/registry".into(), "markdown")?; + let template = TemplateEngine::try_new(loader)?; let generator = SnippetGenerator::try_from_path("data", Some(template))?; let attribute_registry_url = "/docs/attributes-registry"; // Now we should check a snippet. diff --git a/crates/weaver_semconv_gen/templates/markdown/snippet.md.j2 b/crates/weaver_semconv_gen/templates/registry/markdown/snippet.md.j2 similarity index 100% rename from crates/weaver_semconv_gen/templates/markdown/snippet.md.j2 rename to crates/weaver_semconv_gen/templates/registry/markdown/snippet.md.j2 diff --git a/data/update_markdown/markdown/templates.md b/data/update_markdown/markdown/templates.md new file mode 100644 index 00000000..771157cf --- /dev/null +++ b/data/update_markdown/markdown/templates.md @@ -0,0 +1,7 @@ + +attribute_table: registry.user_agent + + + +metric_table: metric.http.server.request.duration + diff --git a/data/update_markdown/registry/http-common.yaml b/data/update_markdown/registry/http-common.yaml new file mode 100644 index 00000000..f175215f --- /dev/null +++ b/data/update_markdown/registry/http-common.yaml @@ -0,0 +1,87 @@ +groups: + - id: attributes.http.common + type: attribute_group + brief: "Describes HTTP attributes." + attributes: + - ref: http.request.method + requirement_level: required + - ref: http.response.status_code + requirement_level: + conditionally_required: If and only if one was received/sent. + - ref: error.type + requirement_level: + conditionally_required: If request has ended with an error. + examples: ['timeout', 'java.net.UnknownHostException', 'server_certificate_invalid', '500'] + note: | + If the request fails with an error before response status code was sent or received, + `error.type` SHOULD be set to exception type (its fully-qualified class name, if applicable) + or a component-specific low cardinality error identifier. + + If response status code was sent or received and status indicates an error according to [HTTP span status definition](/docs/http/http-spans.md), + `error.type` SHOULD be set to the status code number (represented as a string), an exception type (if thrown) or a component-specific error identifier. + + The `error.type` value SHOULD be predictable and SHOULD have low cardinality. + Instrumentations SHOULD document the list of errors they report. + + The cardinality of `error.type` within one instrumentation library SHOULD be low, but + telemetry consumers that aggregate data from multiple instrumentation libraries and applications + should be prepared for `error.type` to have high cardinality at query time, when no + additional filters are applied. + + If the request has completed successfully, instrumentations SHOULD NOT set `error.type`. + - ref: network.protocol.name + examples: ['http', 'spdy'] + requirement_level: + conditionally_required: If not `http` and `network.protocol.version` is set. + - ref: network.protocol.version + examples: ['1.0', '1.1', '2', '3'] + + - id: attributes.http.client + type: attribute_group + brief: 'HTTP Client attributes' + extends: attributes.http.common + attributes: + - ref: server.address + requirement_level: required + brief: > + Host identifier of the ["URI origin"](https://www.rfc-editor.org/rfc/rfc9110.html#name-uri-origin) HTTP request is sent to. + note: > + If an HTTP client request is explicitly made to an IP address, e.g. `http://x.x.x.x:8080`, then + `server.address` SHOULD be the IP address `x.x.x.x`. A DNS lookup SHOULD NOT be used. + - ref: server.port + requirement_level: required + brief: > + Port identifier of the ["URI origin"](https://www.rfc-editor.org/rfc/rfc9110.html#name-uri-origin) HTTP request is sent to. + - ref: url.scheme + requirement_level: opt_in + examples: ["http", "https"] + + - id: attributes.http.server + type: attribute_group + brief: 'HTTP Server attributes' + extends: attributes.http.common + attributes: + - ref: http.route + requirement_level: + conditionally_required: If and only if it's available + - ref: server.address + brief: > + Name of the local HTTP server that received the request. + note: > + See [Setting `server.address` and `server.port` attributes](/docs/http/http-spans.md#setting-serveraddress-and-serverport-attributes). + - ref: server.port + brief: > + Port of the local HTTP server that received the request. + note: > + See [Setting `server.address` and `server.port` attributes](/docs/http/http-spans.md#setting-serveraddress-and-serverport-attributes). + requirement_level: + conditionally_required: If `server.address` is set. + - ref: url.scheme + requirement_level: required + examples: ["http", "https"] + note: > + The scheme of the original client request, if known + (e.g. from [Forwarded#proto](https://developer.mozilla.org/docs/Web/HTTP/Headers/Forwarded#proto), + [X-Forwarded-Proto](https://developer.mozilla.org/docs/Web/HTTP/Headers/X-Forwarded-Proto), + or a similar header). + Otherwise, the scheme of the immediate peer request. \ No newline at end of file diff --git a/data/update_markdown/registry/metric/http.yaml b/data/update_markdown/registry/metric/http.yaml new file mode 100644 index 00000000..a8e5eb56 --- /dev/null +++ b/data/update_markdown/registry/metric/http.yaml @@ -0,0 +1,33 @@ +groups: + - id: metric_attributes.http.server + type: attribute_group + brief: 'HTTP server attributes' + extends: attributes.http.server + attributes: + - ref: server.address + requirement_level: opt_in + note: | + See [Setting `server.address` and `server.port` attributes](/docs/http/http-spans.md#setting-serveraddress-and-serverport-attributes). + > **Warning** + > Since this attribute is based on HTTP headers, opting in to it may allow an attacker + > to trigger cardinality limits, degrading the usefulness of the metric. + - ref: server.port + requirement_level: opt_in + note: | + See [Setting `server.address` and `server.port` attributes](/docs/http/http-spans.md#setting-serveraddress-and-serverport-attributes). + > **Warning** + > Since this attribute is based on HTTP headers, opting in to it may allow an attacker + > to trigger cardinality limits, degrading the usefulness of the metric. + - id: metric_attributes.http.client + type: attribute_group + brief: 'HTTP client attributes' + extends: attributes.http.client + + - id: metric.http.server.request.duration + type: metric + metric_name: http.server.request.duration + brief: "Duration of HTTP server requests." + instrument: histogram + unit: "s" + stability: stable + extends: metric_attributes.http.server \ No newline at end of file diff --git a/data/update_markdown/registry/registry/error.yaml b/data/update_markdown/registry/registry/error.yaml new file mode 100644 index 00000000..6831a7a5 --- /dev/null +++ b/data/update_markdown/registry/registry/error.yaml @@ -0,0 +1,35 @@ +groups: + - id: registry.error + type: attribute_group + prefix: error + brief: > + This document defines the shared attributes used to report an error. + attributes: + - id: type + stability: stable + brief: > + Describes a class of error the operation ended with. + type: + allow_custom_values: true + members: + - id: other + value: "_OTHER" + brief: > + A fallback error value to be used when the instrumentation doesn't define a custom value. + examples: ['timeout', 'java.net.UnknownHostException', 'server_certificate_invalid', '500'] + note: | + The `error.type` SHOULD be predictable and SHOULD have low cardinality. + Instrumentations SHOULD document the list of errors they report. + + The cardinality of `error.type` within one instrumentation library SHOULD be low. + Telemetry consumers that aggregate data from multiple instrumentation libraries and applications + should be prepared for `error.type` to have high cardinality at query time when no + additional filters are applied. + + If the operation has completed successfully, instrumentations SHOULD NOT set `error.type`. + + If a specific domain defines its own set of error identifiers (such as HTTP or gRPC status codes), + it's RECOMMENDED to: + + * Use a domain-specific attribute + * Set `error.type` to capture all errors, regardless of whether they are defined within the domain-specific set or not. \ No newline at end of file diff --git a/data/update_markdown/registry/registry/http.yaml b/data/update_markdown/registry/registry/http.yaml new file mode 100644 index 00000000..2e09eb94 --- /dev/null +++ b/data/update_markdown/registry/registry/http.yaml @@ -0,0 +1,148 @@ +groups: + - id: registry.http + prefix: http + type: attribute_group + brief: 'This document defines semantic convention attributes in the HTTP namespace.' + attributes: + - id: request.body.size + type: int + brief: > + The size of the request payload body in bytes. This is the number of bytes transferred excluding headers and + is often, but not always, present as the [Content-Length](https://www.rfc-editor.org/rfc/rfc9110.html#field.content-length) + header. For requests using transport encoding, this should be the compressed size. + examples: 3495 + stability: experimental # this should not be marked stable with other HTTP attributes + - id: request.header + stability: stable + type: template[string[]] + brief: > + HTTP request headers, `` being the normalized HTTP Header name (lowercase), the value being the header values. + note: > + Instrumentations SHOULD require an explicit configuration of which headers are to be captured. + Including all request headers can be a security risk - explicit configuration helps avoid leaking sensitive information. + + The `User-Agent` header is already captured in the `user_agent.original` attribute. + Users MAY explicitly configure instrumentations to capture them even though it is not recommended. + + The attribute value MUST consist of either multiple header values as an array of strings + or a single-item array containing a possibly comma-concatenated string, depending on the way + the HTTP library provides access to headers. + examples: ['http.request.header.content-type=["application/json"]', 'http.request.header.x-forwarded-for=["1.2.3.4", "1.2.3.5"]'] + - id: request.method + stability: stable + type: + allow_custom_values: true + members: + - id: connect + value: "CONNECT" + brief: 'CONNECT method.' + - id: delete + value: "DELETE" + brief: 'DELETE method.' + - id: get + value: "GET" + brief: 'GET method.' + - id: head + value: "HEAD" + brief: 'HEAD method.' + - id: options + value: "OPTIONS" + brief: 'OPTIONS method.' + - id: patch + value: "PATCH" + brief: 'PATCH method.' + - id: post + value: "POST" + brief: 'POST method.' + - id: put + value: "PUT" + brief: 'PUT method.' + - id: trace + value: "TRACE" + brief: 'TRACE method.' + - id: other + value: "_OTHER" + brief: 'Any HTTP method that the instrumentation has no prior knowledge of.' + brief: 'HTTP request method.' + examples: ["GET", "POST", "HEAD"] + note: | + HTTP request method value SHOULD be "known" to the instrumentation. + By default, this convention defines "known" methods as the ones listed in [RFC9110](https://www.rfc-editor.org/rfc/rfc9110.html#name-methods) + and the PATCH method defined in [RFC5789](https://www.rfc-editor.org/rfc/rfc5789.html). + + If the HTTP request method is not known to instrumentation, it MUST set the `http.request.method` attribute to `_OTHER`. + + If the HTTP instrumentation could end up converting valid HTTP request methods to `_OTHER`, then it MUST provide a way to override + the list of known HTTP methods. If this override is done via environment variable, then the environment variable MUST be named + OTEL_INSTRUMENTATION_HTTP_KNOWN_METHODS and support a comma-separated list of case-sensitive known HTTP methods + (this list MUST be a full override of the default known method, it is not a list of known methods in addition to the defaults). + + HTTP method names are case-sensitive and `http.request.method` attribute value MUST match a known HTTP method name exactly. + Instrumentations for specific web frameworks that consider HTTP methods to be case insensitive, SHOULD populate a canonical equivalent. + Tracing instrumentations that do so, MUST also set `http.request.method_original` to the original value. + - id: request.method_original + stability: stable + type: string + brief: Original HTTP method sent by the client in the request line. + examples: ["GeT", "ACL", "foo"] + - id: request.resend_count + stability: stable + type: int + brief: > + The ordinal number of request resending attempt (for any reason, including redirects). + note: > + The resend count SHOULD be updated each time an HTTP request gets resent by the client, regardless of what + was the cause of the resending (e.g. redirection, authorization failure, 503 Server Unavailable, network issues, + or any other). + examples: 3 + - id: response.body.size + type: int + brief: > + The size of the response payload body in bytes. This is the number of bytes transferred excluding headers and + is often, but not always, present as the [Content-Length](https://www.rfc-editor.org/rfc/rfc9110.html#field.content-length) + header. For requests using transport encoding, this should be the compressed size. + examples: 3495 + stability: experimental # this should not be marked stable with other HTTP attributes + - id: response.header + stability: stable + type: template[string[]] + brief: > + HTTP response headers, `` being the normalized HTTP Header name (lowercase), the value being the header values. + note: > + Instrumentations SHOULD require an explicit configuration of which headers are to be captured. + Including all response headers can be a security risk - explicit configuration helps avoid leaking sensitive information. + + Users MAY explicitly configure instrumentations to capture them even though it is not recommended. + + The attribute value MUST consist of either multiple header values as an array of strings + or a single-item array containing a possibly comma-concatenated string, depending on the way + the HTTP library provides access to headers. + examples: ['http.response.header.content-type=["application/json"]', 'http.response.header.my-custom-header=["abc", "def"]'] + - id: response.status_code + stability: stable + type: int + brief: '[HTTP response status code](https://tools.ietf.org/html/rfc7231#section-6).' + examples: [200] + - id: route + stability: stable + type: string + brief: > + The matched route, that is, the path template in the format used by the respective server framework. + examples: ['/users/:userID?', '{controller}/{action}/{id?}'] + note: > + MUST NOT be populated when this is not supported by the HTTP server framework as the route attribute should have low-cardinality and the URI path can NOT substitute it. + + SHOULD include the [application root](/docs/http/http-spans.md#http-server-definitions) if there is one. + - id: connection.state + type: + allow_custom_values: true + members: + - id: active + value: "active" + brief: 'active state.' + - id: idle + value: "idle" + brief: 'idle state.' + brief: State of the HTTP connection in the HTTP connection pool. + stability: experimental + examples: ["active", "idle"] \ No newline at end of file diff --git a/data/update_markdown/registry/registry/network.yaml b/data/update_markdown/registry/registry/network.yaml new file mode 100644 index 00000000..fa0391e5 --- /dev/null +++ b/data/update_markdown/registry/registry/network.yaml @@ -0,0 +1,201 @@ +groups: + - id: registry.network + prefix: network + type: attribute_group + brief: > + These attributes may be used for any network related operation. + attributes: + - id: carrier.icc + type: string + stability: experimental + brief: "The ISO 3166-1 alpha-2 2-character country code associated with the mobile carrier network." + examples: "DE" + - id: carrier.mcc + type: string + stability: experimental + brief: "The mobile carrier country code." + examples: "310" + - id: carrier.mnc + type: string + stability: experimental + brief: "The mobile carrier network code." + examples: "001" + - id: carrier.name + type: string + stability: experimental + brief: "The name of the mobile carrier." + examples: "sprint" + - id: connection.subtype + type: + allow_custom_values: true + members: + - id: gprs + brief: GPRS + value: "gprs" + - id: edge + brief: EDGE + value: "edge" + - id: umts + brief: UMTS + value: "umts" + - id: cdma + brief: CDMA + value: "cdma" + - id: evdo_0 + brief: EVDO Rel. 0 + value: "evdo_0" + - id: evdo_a + brief: "EVDO Rev. A" + value: "evdo_a" + - id: cdma2000_1xrtt + brief: CDMA2000 1XRTT + value: "cdma2000_1xrtt" + - id: hsdpa + brief: HSDPA + value: "hsdpa" + - id: hsupa + brief: HSUPA + value: "hsupa" + - id: hspa + brief: HSPA + value: "hspa" + - id: iden + brief: IDEN + value: "iden" + - id: evdo_b + brief: "EVDO Rev. B" + value: "evdo_b" + - id: lte + brief: LTE + value: "lte" + - id: ehrpd + brief: EHRPD + value: "ehrpd" + - id: hspap + brief: HSPAP + value: "hspap" + - id: gsm + brief: GSM + value: "gsm" + - id: td_scdma + brief: TD-SCDMA + value: "td_scdma" + - id: iwlan + brief: IWLAN + value: "iwlan" + - id: nr + brief: "5G NR (New Radio)" + value: "nr" + - id: nrnsa + brief: "5G NRNSA (New Radio Non-Standalone)" + value: "nrnsa" + - id: lte_ca + brief: LTE CA + value: "lte_ca" + stability: experimental + brief: 'This describes more details regarding the connection.type. It may be the type of cell technology connection, but it could be used for describing details about a wifi connection.' + examples: 'LTE' + - id: connection.type + type: + allow_custom_values: true + members: + - id: wifi + value: "wifi" + - id: wired + value: "wired" + - id: cell + value: "cell" + - id: unavailable + value: "unavailable" + - id: unknown + value: "unknown" + stability: experimental + brief: 'The internet connection type.' + examples: 'wifi' + - id: local.address + stability: stable + type: string + brief: Local address of the network connection - IP address or Unix domain socket name. + examples: ['10.1.2.80', '/tmp/my.sock'] + - id: local.port + stability: stable + type: int + brief: Local port number of the network connection. + examples: [65123] + - id: peer.address + stability: stable + type: string + brief: Peer address of the network connection - IP address or Unix domain socket name. + examples: ['10.1.2.80', '/tmp/my.sock'] + - id: peer.port + stability: stable + type: int + brief: Peer port number of the network connection. + examples: [65123] + - id: protocol.name + stability: stable + type: string + brief: '[OSI application layer](https://osi-model.com/application-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['amqp', 'http', 'mqtt'] + - id: protocol.version + stability: stable + type: string + brief: Version of the protocol specified in `network.protocol.name`. + examples: '3.1.1' + note: > + `network.protocol.version` refers to the version of the protocol used and might be + different from the protocol client's version. If the HTTP client has a version + of `0.27.2`, but sends HTTP version `1.1`, this attribute should be set to `1.1`. + - id: transport + stability: stable + type: + allow_custom_values: true + members: + - id: tcp + value: 'tcp' + brief: "TCP" + - id: udp + value: 'udp' + brief: "UDP" + - id: pipe + value: "pipe" + brief: 'Named or anonymous pipe.' + - id: unix + value: 'unix' + brief: "Unix domain socket" + brief: > + [OSI transport layer](https://osi-model.com/transport-layer/) or + [inter-process communication method](https://wikipedia.org/wiki/Inter-process_communication). + note: | + The value SHOULD be normalized to lowercase. + + Consider always setting the transport when setting a port number, since + a port number is ambiguous without knowing the transport. For example + different processes could be listening on TCP port 12345 and UDP port 12345. + examples: ['tcp', 'udp'] + - id: type + stability: stable + type: + allow_custom_values: true + members: + - id: ipv4 + value: 'ipv4' + brief: "IPv4" + - id: ipv6 + value: 'ipv6' + brief: "IPv6" + brief: '[OSI network layer](https://osi-model.com/network-layer/) or non-OSI equivalent.' + note: The value SHOULD be normalized to lowercase. + examples: ['ipv4', 'ipv6'] + - id: io.direction + type: + allow_custom_values: false + members: + - id: transmit + value: 'transmit' + - id: receive + value: 'receive' + stability: experimental + brief: "The network IO operation direction." + examples: ["transmit"] \ No newline at end of file diff --git a/data/update_markdown/registry/registry/server.yaml b/data/update_markdown/registry/registry/server.yaml new file mode 100644 index 00000000..0523bb0d --- /dev/null +++ b/data/update_markdown/registry/registry/server.yaml @@ -0,0 +1,28 @@ +groups: + - id: server + prefix: server + type: attribute_group + brief: > + These attributes may be used to describe the server in a connection-based network interaction + where there is one side that initiates the connection (the client is the side that initiates the connection). + This covers all TCP network interactions since TCP is connection-based and one side initiates the + connection (an exception is made for peer-to-peer communication over TCP where the "user-facing" surface of the + protocol / API doesn't expose a clear notion of client and server). + This also covers UDP network interactions where one side initiates the interaction, e.g. QUIC (HTTP/3) and DNS. + attributes: + - id: address + stability: stable + type: string + brief: "Server domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name." + note: > + When observed from the client side, and when communicating through an intermediary, `server.address` SHOULD represent + the server address behind any intermediaries, for example proxies, if it's available. + examples: ['example.com', '10.1.2.80', '/tmp/my.sock'] + - id: port + stability: stable + type: int + brief: Server port number. + note: > + When observed from the client side, and when communicating through an intermediary, `server.port` SHOULD represent + the server port behind any intermediaries, for example proxies, if it's available. + examples: [80, 8080, 443] \ No newline at end of file diff --git a/data/update_markdown/registry/registry/url.yaml b/data/update_markdown/registry/registry/url.yaml new file mode 100644 index 00000000..1110c0d0 --- /dev/null +++ b/data/update_markdown/registry/registry/url.yaml @@ -0,0 +1,116 @@ +groups: + - id: registry.url + brief: Attributes describing URL. + type: attribute_group + prefix: url + attributes: + - id: domain + type: string + stability: experimental + brief: > + Domain extracted from the `url.full`, such as "opentelemetry.io". + note: > + In some cases a URL may refer to an IP and/or port directly, + without a domain name. In this case, the IP address would go to the domain field. + If the URL contains a [literal IPv6 address](https://www.rfc-editor.org/rfc/rfc2732#section-2) + enclosed by `[` and `]`, the `[` and `]` characters should also be captured in the domain field. + examples: ["www.foo.bar", "opentelemetry.io", "3.12.167.2", "[1080:0:0:0:8:800:200C:417A]"] + - id: extension + type: string + stability: experimental + brief: > + The file extension extracted from the `url.full`, excluding the leading dot. + note: > + The file extension is only set if it exists, as not every url has a file extension. + When the file name has multiple extensions `example.tar.gz`, only the last one should be captured `gz`, not `tar.gz`. + examples: [ "png", "gz" ] + - id: fragment + stability: stable + type: string + brief: > + The [URI fragment](https://www.rfc-editor.org/rfc/rfc3986#section-3.5) component + examples: ["SemConv"] + - id: full + stability: stable + type: string + brief: Absolute URL describing a network resource according to [RFC3986](https://www.rfc-editor.org/rfc/rfc3986) + note: > + For network calls, URL usually has `scheme://host[:port][path][?query][#fragment]` format, where the fragment + is not transmitted over HTTP, but if it is known, it SHOULD be included nevertheless. + + `url.full` MUST NOT contain credentials passed via URL in form of `https://username:password@www.example.com/`. + In such case username and password SHOULD be redacted and attribute's value SHOULD be `https://REDACTED:REDACTED@www.example.com/`. + + `url.full` SHOULD capture the absolute URL when it is available (or can be reconstructed). + Sensitive content provided in `url.full` SHOULD be scrubbed when instrumentations can identify it. + examples: ['https://www.foo.bar/search?q=OpenTelemetry#SemConv', '//localhost'] + - id: original + type: string + stability: experimental + brief: > + Unmodified original URL as seen in the event source. + note: > + In network monitoring, the observed URL may be a full URL, whereas in access logs, the URL is often + just represented as a path. This field is meant to represent the URL as it was observed, complete or not. + + `url.original` might contain credentials passed via URL in form of `https://username:password@www.example.com/`. + In such case password and username SHOULD NOT be redacted and attribute's value SHOULD remain the same. + examples: ["https://www.foo.bar/search?q=OpenTelemetry#SemConv", "search?q=OpenTelemetry"] + - id: path + stability: stable + type: string + brief: > + The [URI path](https://www.rfc-editor.org/rfc/rfc3986#section-3.3) component + examples: ["/search"] + note: > + Sensitive content provided in `url.path` SHOULD be scrubbed when instrumentations can identify it. + - id: port + type: int + stability: experimental + brief: > + Port extracted from the `url.full` + examples: [443] + - id: query + stability: stable + type: string + brief: > + The [URI query](https://www.rfc-editor.org/rfc/rfc3986#section-3.4) component + examples: ["q=OpenTelemetry"] + note: > + Sensitive content provided in `url.query` SHOULD be scrubbed when instrumentations can identify it. + - id: registered_domain + type: string + stability: experimental + brief: > + The highest registered url domain, stripped of the subdomain. + examples: ["example.com", "foo.co.uk"] + note: > + This value can be determined precisely with the [public suffix list](http://publicsuffix.org). + For example, the registered domain for `foo.example.com` is `example.com`. + Trying to approximate this by simply taking the last two labels will not work well for TLDs such as `co.uk`. + - id: scheme + stability: stable + type: string + brief: > + The [URI scheme](https://www.rfc-editor.org/rfc/rfc3986#section-3.1) component identifying the used protocol. + examples: ["https", "ftp", "telnet"] + - id: subdomain + type: string + stability: experimental + brief: > + The subdomain portion of a fully qualified domain name includes all of the names except the host name + under the registered_domain. In a partially qualified domain, or if the qualification level of the + full name cannot be determined, subdomain contains all of the names below the registered domain. + examples: ["east", "sub2.sub1"] + note: > + The subdomain portion of `www.east.mydomain.co.uk` is `east`. If the domain has multiple levels of subdomain, + such as `sub2.sub1.example.com`, the subdomain field should contain `sub2.sub1`, with no trailing period. + - id: top_level_domain + type: string + stability: experimental + brief: > + The effective top level domain (eTLD), also known as the domain suffix, is the last part of the domain name. + For example, the top level domain for example.com is `com`. + examples: ["com", "co.uk"] + note: > + This value can be determined precisely with the [public suffix list](http://publicsuffix.org). \ No newline at end of file diff --git a/data/update_markdown/registry/registry/user_agent.yaml b/data/update_markdown/registry/registry/user_agent.yaml new file mode 100644 index 00000000..48fa48a6 --- /dev/null +++ b/data/update_markdown/registry/registry/user_agent.yaml @@ -0,0 +1,36 @@ +groups: + - id: registry.user_agent + prefix: user_agent + type: attribute_group + brief: "Describes user-agent attributes." + attributes: + - id: original + stability: stable + type: string + brief: > + Value of the [HTTP User-Agent](https://www.rfc-editor.org/rfc/rfc9110.html#field.user-agent) header sent by the client. + examples: ['CERN-LineMode/2.15 libwww/2.17b3', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1', + 'YourApp/1.0.0 grpc-java-okhttp/1.27.2'] + - id: name + type: string + stability: experimental + brief: > + Name of the user-agent extracted from original. Usually refers to the browser's name. + examples: ['Safari', 'YourApp'] + note: > + [Example](https://www.whatsmyua.info) of extracting browser's name from original string. In the case of using + a user-agent for non-browser products, such as microservices with multiple names/versions inside the + `user_agent.original`, the most significant name SHOULD be selected. In such a scenario it should align with + `user_agent.version` + - id: version + type: string + stability: experimental + brief: > + Version of the user-agent extracted from original. Usually refers to the browser's version + examples: ['14.1.2', '1.0.0'] + note: > + [Example](https://www.whatsmyua.info) of extracting browser's version from original string. In the case of + using a user-agent for non-browser products, such as microservices with multiple names/versions inside the + `user_agent.original`, the most significant version SHOULD be selected. In such a scenario it should align + with `user_agent.name` \ No newline at end of file diff --git a/data/update_markdown/templates/registry/markdown/snippet.md.j2 b/data/update_markdown/templates/registry/markdown/snippet.md.j2 new file mode 100644 index 00000000..fcdcf110 --- /dev/null +++ b/data/update_markdown/templates/registry/markdown/snippet.md.j2 @@ -0,0 +1 @@ +{{ snippet_type }}: {{ group.id }} diff --git a/diagnostic_templates/ansi/errors.txt.j2 b/default_diagnostic_templates/ansi/errors.txt.j2 similarity index 100% rename from diagnostic_templates/ansi/errors.txt.j2 rename to default_diagnostic_templates/ansi/errors.txt.j2 diff --git a/diagnostic_templates/ansi/weaver.yaml b/default_diagnostic_templates/ansi/weaver.yaml similarity index 100% rename from diagnostic_templates/ansi/weaver.yaml rename to default_diagnostic_templates/ansi/weaver.yaml diff --git a/diagnostic_templates/gh_workflow_command/errors.txt.j2 b/default_diagnostic_templates/gh_workflow_command/errors.txt.j2 similarity index 100% rename from diagnostic_templates/gh_workflow_command/errors.txt.j2 rename to default_diagnostic_templates/gh_workflow_command/errors.txt.j2 diff --git a/diagnostic_templates/gh_workflow_command/weaver.yaml b/default_diagnostic_templates/gh_workflow_command/weaver.yaml similarity index 100% rename from diagnostic_templates/gh_workflow_command/weaver.yaml rename to default_diagnostic_templates/gh_workflow_command/weaver.yaml diff --git a/diagnostic_templates/json/errors.json.j2 b/default_diagnostic_templates/json/errors.json.j2 similarity index 100% rename from diagnostic_templates/json/errors.json.j2 rename to default_diagnostic_templates/json/errors.json.j2 diff --git a/diagnostic_templates/json/weaver.yaml b/default_diagnostic_templates/json/weaver.yaml similarity index 100% rename from diagnostic_templates/json/weaver.yaml rename to default_diagnostic_templates/json/weaver.yaml diff --git a/docs/usage.md b/docs/usage.md index 46c6b605..5a3b5483 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -219,3 +219,24 @@ Options: > Note: The `-d` and `--registry-git-sub-dir` options are only used when the > registry is a Git URL otherwise these options are ignored. + +## diagnostic init + +``` +Initializes a `diagnostic_templates` directory to define or override diagnostic output formats + +Usage: weaver diagnostic init [OPTIONS] [TARGET] + +Arguments: + [TARGET] Optional target to initialize the diagnostic templates for. If empty, all default templates will be extracted [default: ] + +Options: + -t, --diagnostic-templates-dir + Optional path where the diagnostic templates directory should be created [default: diagnostic_templates] + --diagnostic-format + Format used to render the diagnostic messages. Predefined formats are: ansi, json, gh_workflow_command [default: ansi] + --diagnostic-template + Path to the directory where the diagnostic templates are located [default: diagnostic_templates] + -h, --help + Print help +``` \ No newline at end of file diff --git a/docs/weaver-config.md b/docs/weaver-config.md new file mode 100644 index 00000000..45bdede7 --- /dev/null +++ b/docs/weaver-config.md @@ -0,0 +1,71 @@ +# Weaver Configuration File - `weaver.yaml` + +The configuration file `weaver.yaml` is optional. It allows configuring the +following options: + +```yaml +# Uncomment this section to specify the configuration of the `text_map` filter. +#text_maps: +# java_types: +# int: int +# double: double +# boolean: boolean +# string: String +# java_keys: +# int: intKey +# double: doubleKey +# boolean: booleanKey +# string: stringKey + +# Deprecated, please use text_maps instead +# Configuration of the type mapping. This is useful to generate code in a +# specific language. This is optional. +# Example: {{ attribute.type | type_mapping }} will be evaluated as int64 +# if the semconv attribute type is int. +#type_mapping: +# int: int64 +# double: double +# boolean: bool +# string: string +# "int[]": "[]int64" +# "double[]": "[]double" +# "boolean[]": "[]bool" +# "string[]": "[]string" +# ... + +# Uncomment this section to specify the configuration of the Jinja template syntax. +# Note: The default syntax is strongly recommended. +#template_syntax: +# block_start: "{%" +# block_end: "%}" +# variable_start: "{{" +# variable_end: "}}" +# comment_start: "{#" +# comment_end: "#}" + +# Uncomment the following section to specify a list of acronyms that +# will be interpreted by the acronym filter. This is optional. +# acronyms: ["iOS", "HTTP", "API", "SDK", "CLI", "URL", "JSON", "XML", "HTML"] + +# Uncomment the following templates to override the default template +# mapping. Each template mapping specifies a jaq filter (compatible with jq) +# to apply to every file matching the pattern. The application_mode specifies +# how the template should be applied. The application_mode can be `each` or +# `single`. The `each` mode will evaluate the template for each object selected +# by the jaq filter. The `single` mode will evaluate the template once with all +# the objects selected by the jq filter. +# +# Note: jaq is a Rust reimplementation of jq. Most of the jq filters are +# supported. For more information, see https://github.com/01mf02/jaq +# +# templates: +# - pattern: "**/registry.md" +# filter: "." +# application_mode: single +# - pattern: "**/attribute_group.md" +# filter: ".groups[] | select(.type == \"attribute_group\")" +# application_mode: each +# - pattern: "**/attribute_groups.md" +# filter: ".groups[] | select(.type == \"attribute_group\")" +# application_mode: single +``` diff --git a/justfile b/justfile index c6f925fe..ffdd6cf6 100644 --- a/justfile +++ b/justfile @@ -4,7 +4,7 @@ install: cargo install cargo-machete cargo install cargo-depgraph cargo install cargo-edit - rustup install nightly-2023-10-10 # used by cargo-check-external-types + rustup install nightly-2024-05-01 # used by cargo-check-external-types cargo install cargo-check-external-types cargo install git-cliff cargo install cargo-tarpaulin diff --git a/scripts/check_external_types.sh b/scripts/check_external_types.sh index e8fd6ab4..87dc796b 100755 --- a/scripts/check_external_types.sh +++ b/scripts/check_external_types.sh @@ -13,7 +13,7 @@ for dir in crates/*/; do # Check if the public API is compliant with the allowed-external-types.toml echo "Checking the public API of $dir" - cargo +nightly-2024-02-07 check-external-types --all-features --manifest-path "$dir/Cargo.toml" --config "$dir/allowed-external-types.toml" || exit 1 + cargo +nightly-2024-05-01 check-external-types --all-features --manifest-path "$dir/Cargo.toml" --config "$dir/allowed-external-types.toml" || exit 1 done echo "The Cargo workspace is compliant with the 'allowed external types' policies." \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs index 2540f11f..e1f30fe7 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -2,6 +2,7 @@ //! Manage command line arguments +use crate::diagnostic::DiagnosticCommand; use crate::registry::RegistryCommand; use clap::{Parser, Subcommand}; @@ -27,4 +28,6 @@ pub struct Cli { pub enum Commands { /// Manage Semantic Convention Registry Registry(RegistryCommand), + /// Manage Diagnostic Messages + Diagnostic(DiagnosticCommand), } diff --git a/src/diagnostic/init.rs b/src/diagnostic/init.rs new file mode 100644 index 00000000..6b044796 --- /dev/null +++ b/src/diagnostic/init.rs @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Initializes a `diagnostic_templates` directory to define or override diagnostic output formats. + +use crate::diagnostic::{Error, DEFAULT_DIAGNOSTIC_TEMPLATES}; +use crate::DiagnosticArgs; +use clap::Args; +use include_dir::DirEntry; +use std::fs; +use std::path::{Path, PathBuf}; +use weaver_common::diagnostic::DiagnosticMessages; +use weaver_common::Logger; + +/// Parameters for the `diagnostic init` sub-command +#[derive(Debug, Args)] +pub struct DiagnosticInitArgs { + /// Optional target to initialize the diagnostic templates for. If empty, all default templates will be extracted. + #[arg(default_value = "")] + pub target: String, + + /// Optional path where the diagnostic templates directory should be created. + #[arg(short = 't', long, default_value = "diagnostic_templates")] + pub diagnostic_templates_dir: PathBuf, + + /// Parameters to specify the diagnostic format. + #[command(flatten)] + pub diagnostic: DiagnosticArgs, +} + +/// Initializes a `diagnostic_templates` directory to define or override diagnostic output formats. +#[cfg(not(tarpaulin_include))] +pub(crate) fn command( + logger: impl Logger + Sync + Clone, + args: &DiagnosticInitArgs, +) -> Result<(), DiagnosticMessages> { + extract(args.diagnostic_templates_dir.clone(), &args.target).map_err(|e| { + Error::InitDiagnosticError { + path: args.diagnostic_templates_dir.clone(), + error: e.to_string(), + } + })?; + + logger.success(&format!( + "Diagnostic templates initialized at {:?}", + args.diagnostic_templates_dir + )); + Ok(()) +} + +/// Extracts the diagnostic templates to the specified path for the given target. +/// If the target is empty, all templates will be extracted. +fn extract>(base_path: S, target: &str) -> std::io::Result<()> { + let base_path = base_path.as_ref(); + + for entry in DEFAULT_DIAGNOSTIC_TEMPLATES.entries() { + let path = base_path.join(entry.path()); + + match entry { + DirEntry::Dir(d) => { + if d.path().starts_with(target) { + fs::create_dir_all(&path)?; + d.extract(base_path)?; + } + } + DirEntry::File(f) => { + fs::write(path, f.contents())?; + } + } + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use std::fs; + + use tempdir::TempDir; + + use weaver_common::TestLogger; + + use crate::cli::{Cli, Commands}; + use crate::diagnostic::init::DiagnosticInitArgs; + use crate::diagnostic::{DiagnosticCommand, DiagnosticSubCommand}; + use crate::run_command; + + #[test] + fn test_diagnostic_init() { + let logger = TestLogger::new(); + let temp_output = TempDir::new("output") + .expect("Failed to create temporary directory") + .into_path(); + + // Let's init for all targets + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Diagnostic(DiagnosticCommand { + command: DiagnosticSubCommand::Init(DiagnosticInitArgs { + target: "".to_owned(), + diagnostic_templates_dir: temp_output.clone(), + diagnostic: Default::default(), + }), + })), + }; + + let exit_code = run_command(&cli, logger.clone()); + // The command should succeed. + assert_eq!(exit_code, 0); + + // Check the presence of 3 subdirectories in the temp_output directory + let subdirs = fs::read_dir(&temp_output).unwrap().count(); + assert_eq!(subdirs, 3); + + // Let's init for a specific target + let temp_output = TempDir::new("output") + .expect("Failed to create temporary directory") + .into_path(); + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Diagnostic(DiagnosticCommand { + command: DiagnosticSubCommand::Init(DiagnosticInitArgs { + target: "json".to_owned(), + diagnostic_templates_dir: temp_output.clone(), + diagnostic: Default::default(), + }), + })), + }; + + let exit_code = run_command(&cli, logger.clone()); + // The command should succeed. + assert_eq!(exit_code, 0); + + // Check the presence of 3 subdirectories in the temp_output directory + let subdirs = fs::read_dir(&temp_output).unwrap().count(); + assert_eq!(subdirs, 1); + } +} diff --git a/src/diagnostic/mod.rs b/src/diagnostic/mod.rs new file mode 100644 index 00000000..e1fe343e --- /dev/null +++ b/src/diagnostic/mod.rs @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Command to manage diagnostic messages + +mod init; + +use crate::CmdResult; +use clap::{Args, Subcommand}; +use include_dir::{include_dir, Dir}; +use miette::Diagnostic; +use serde::Serialize; +use std::path::PathBuf; +use weaver_common::diagnostic::{DiagnosticMessage, DiagnosticMessages}; +use weaver_common::Logger; + +/// Embedded default diagnostic templates +pub(crate) static DEFAULT_DIAGNOSTIC_TEMPLATES: Dir<'_> = + include_dir!("default_diagnostic_templates"); + +/// Errors emitted by the `diagnostic` sub-commands +#[derive(thiserror::Error, Debug, Serialize, Diagnostic)] +#[non_exhaustive] +pub enum Error { + /// Failed to initialize diagnostic templates + #[error("Failed to initialize diagnostic templates at {path}: {error}")] + InitDiagnosticError { path: PathBuf, error: String }, +} + +impl From for DiagnosticMessages { + fn from(error: Error) -> Self { + DiagnosticMessages::new(vec![DiagnosticMessage::new(error)]) + } +} + +/// Parameters for the `diagnostic` command +#[derive(Debug, Args)] +pub struct DiagnosticCommand { + /// Define the sub-commands for the `diagnostic` command + #[clap(subcommand)] + pub command: DiagnosticSubCommand, +} + +/// Sub-commands to manage `diagnostic` messages. +#[derive(Debug, Subcommand)] +#[clap(verbatim_doc_comment)] +pub enum DiagnosticSubCommand { + /// Initializes a `diagnostic_templates` directory to define or override diagnostic output + /// formats. + Init(init::DiagnosticInitArgs), +} + +/// Manage diagnostic messages. +#[cfg(not(tarpaulin_include))] +pub fn diagnostic(log: impl Logger + Sync + Clone, command: &DiagnosticCommand) -> CmdResult { + match &command.command { + DiagnosticSubCommand::Init(args) => { + CmdResult::new(init::command(log, args), Some(args.diagnostic.clone())) + } + } +} diff --git a/src/main.rs b/src/main.rs index e6b532cf..917482b4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,17 +2,66 @@ #![allow(clippy::print_stdout)] -use clap::Parser; +use std::path::PathBuf; + +use clap::{Args, Parser}; use registry::semconv_registry; +use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::quiet::QuietLogger; use weaver_common::{ConsoleLogger, Logger}; +use weaver_forge::file_loader::EmbeddedFileLoader; +use weaver_forge::{OutputDirective, TemplateEngine}; use crate::cli::{Cli, Commands}; +use crate::diagnostic::DEFAULT_DIAGNOSTIC_TEMPLATES; mod cli; +mod diagnostic; mod registry; +/// Set of parameters used to specify the diagnostic format. +#[derive(Args, Debug, Clone)] +pub(crate) struct DiagnosticArgs { + /// Format used to render the diagnostic messages. Predefined formats are: ansi, json, + /// gh_workflow_command. + #[arg(long, default_value = "ansi")] + pub(crate) diagnostic_format: String, + + /// Path to the directory where the diagnostic templates are located. + #[arg(long, default_value = "diagnostic_templates")] + pub(crate) diagnostic_template: PathBuf, +} + +impl Default for DiagnosticArgs { + fn default() -> Self { + Self { + diagnostic_format: "ansi".to_owned(), + diagnostic_template: PathBuf::from("diagnostic_templates"), + } + } +} + +/// Result of a command execution. +#[derive(Debug)] +pub(crate) struct CmdResult { + pub(crate) command_result: Result<(), DiagnosticMessages>, + pub(crate) diagnostic_args: Option, +} + +impl CmdResult { + /// Create a new command result. + pub(crate) fn new( + command_result: Result<(), DiagnosticMessages>, + diagnostic_args: Option, + ) -> Self { + Self { + command_result, + diagnostic_args, + } + } +} + #[cfg(not(tarpaulin_include))] fn main() { let cli = Cli::parse(); @@ -39,8 +88,61 @@ fn main() { /// Run the command specified by the CLI arguments and return the exit code. #[cfg(not(tarpaulin_include))] fn run_command(cli: &Cli, log: impl Logger + Sync + Clone) -> i32 { - match &cli.command { - Some(Commands::Registry(params)) => semconv_registry(log, params), - None => 0, + let cmd_result = match &cli.command { + Some(Commands::Registry(params)) => semconv_registry(log.clone(), params), + Some(Commands::Diagnostic(params)) => diagnostic::diagnostic(log.clone(), params), + None => return 0, + }; + + process_diagnostics(cmd_result, log.clone()) +} + +/// Render the diagnostic messages based on the diagnostic configuration and return the exit code +/// based on the diagnostic messages. +fn process_diagnostics(cmd_result: CmdResult, logger: impl Logger + Sync + Clone) -> i32 { + let diagnostic_args = if let Some(diagnostic_args) = cmd_result.diagnostic_args { + diagnostic_args + } else { + DiagnosticArgs::default() // Default diagnostic arguments; + }; + + if let Err(diagnostic_messages) = cmd_result.command_result { + let loader = EmbeddedFileLoader::try_new( + &DEFAULT_DIAGNOSTIC_TEMPLATES, + diagnostic_args.diagnostic_template, + &diagnostic_args.diagnostic_format, + ) + .expect("Failed to create the embedded file loader for the diagnostic templates"); + match TemplateEngine::try_new(loader) { + Ok(engine) => { + match engine.generate( + logger.clone(), + &diagnostic_messages, + PathBuf::new().as_path(), + &OutputDirective::Stdout, + ) { + Ok(_) => {} + Err(e) => { + logger.error(&format!( + "Failed to render the diagnostic messages. Error: {}", + e + )); + return 1; + } + } + } + Err(e) => { + logger.error(&format!("Failed to create the template engine to render the diagnostic messages. Error: {}", e)); + return 1; + } + } + return if diagnostic_messages.has_error() { + 1 + } else { + 0 + }; } + + // Return 0 if there are no diagnostic messages + 0 } diff --git a/src/registry/check.rs b/src/registry/check.rs index a61d7de4..c6bdf828 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -4,8 +4,9 @@ use crate::registry::{ check_policies, load_semconv_specs, resolve_semconv_specs, semconv_registry_path_from, - DiagnosticArgs, RegistryPath, + RegistryArgs, }; +use crate::DiagnosticArgs; use clap::Args; use std::path::PathBuf; use weaver_cache::Cache; @@ -16,24 +17,19 @@ use weaver_semconv::registry::SemConvRegistry; /// Parameters for the `registry check` sub-command #[derive(Debug, Args)] pub struct RegistryCheckArgs { - /// Local path or Git URL of the semantic convention registry to check. - #[arg( - short = 'r', - long, - default_value = "https://github.com/open-telemetry/semantic-conventions.git" - )] - pub registry: RegistryPath, - - /// Optional path in the Git repository where the semantic convention - /// registry is located - #[arg(short = 'd', long, default_value = "model")] - pub registry_git_sub_dir: Option, + /// Parameters to specify the semantic convention registry + #[command(flatten)] + registry: RegistryArgs, /// Optional list of policy files to check against the files of the semantic /// convention registry. #[arg(short = 'p', long = "policy")] pub policies: Vec, + /// Skip the policy checks. + #[arg(long, default_value = "false")] + pub skip_policies: bool, + /// Parameters to specify the diagnostic format. #[command(flatten)] pub diagnostic: DiagnosticArgs, @@ -46,25 +42,87 @@ pub(crate) fn command( cache: &Cache, args: &RegistryCheckArgs, ) -> Result<(), DiagnosticMessages> { - logger.loading(&format!("Checking registry `{}`", args.registry)); + logger.loading(&format!("Checking registry `{}`", args.registry.registry)); let registry_id = "default"; - let registry_path = semconv_registry_path_from(&args.registry, &args.registry_git_sub_dir); + let registry_path = + semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); // Load the semantic convention registry into a local cache. // No parsing errors should be observed. let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; - check_policies( - ®istry_path, - cache, - &args.policies, - &semconv_specs, - logger.clone(), - )?; + if !args.skip_policies { + check_policies( + ®istry_path, + cache, + &args.policies, + &semconv_specs, + logger.clone(), + )?; + } let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); _ = resolve_semconv_specs(&mut registry, logger.clone())?; Ok(()) } + +#[cfg(test)] +mod tests { + use weaver_common::TestLogger; + + use crate::cli::{Cli, Commands}; + use crate::registry::check::RegistryCheckArgs; + use crate::registry::{RegistryArgs, RegistryCommand, RegistryPath, RegistrySubCommand}; + use crate::run_command; + + #[test] + fn test_registry_check() { + let logger = TestLogger::new(); + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Registry(RegistryCommand { + command: RegistrySubCommand::Check(RegistryCheckArgs { + registry: RegistryArgs { + registry: RegistryPath::Local( + "crates/weaver_codegen_test/semconv_registry/".to_owned(), + ), + registry_git_sub_dir: None, + }, + policies: vec![], + skip_policies: true, + diagnostic: Default::default(), + }), + })), + }; + + let exit_code = run_command(&cli, logger.clone()); + // The command should succeed. + assert_eq!(exit_code, 0); + + // Now, let's run the command again with the policy checks enabled. + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Registry(RegistryCommand { + command: RegistrySubCommand::Check(RegistryCheckArgs { + registry: RegistryArgs { + registry: RegistryPath::Local( + "crates/weaver_codegen_test/semconv_registry/".to_owned(), + ), + registry_git_sub_dir: None, + }, + policies: vec![], + skip_policies: false, + diagnostic: Default::default(), + }), + })), + }; + + let exit_code = run_command(&cli, logger); + // The command should exit with an error code. + assert_eq!(exit_code, 1); + } +} diff --git a/src/registry/generate.rs b/src/registry/generate.rs index ff01dc66..43f6c113 100644 --- a/src/registry/generate.rs +++ b/src/registry/generate.rs @@ -9,14 +9,16 @@ use clap::Args; use weaver_cache::Cache; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; +use weaver_forge::file_loader::FileSystemFileLoader; use weaver_forge::registry::TemplateRegistry; -use weaver_forge::{GeneratorConfig, OutputDirective, TemplateEngine}; +use weaver_forge::{OutputDirective, TemplateEngine}; use weaver_semconv::registry::SemConvRegistry; use crate::registry::{ check_policies, load_semconv_specs, resolve_semconv_specs, semconv_registry_path_from, - DiagnosticArgs, RegistryPath, + RegistryArgs, }; +use crate::DiagnosticArgs; /// Parameters for the `registry generate` sub-command #[derive(Debug, Args)] @@ -34,31 +36,25 @@ pub struct RegistryGenerateArgs { #[arg(short = 't', long, default_value = "templates")] pub templates: PathBuf, - /// Local path or Git URL of the semantic convention registry. - #[arg( - short = 'r', - long, - default_value = "https://github.com/open-telemetry/semantic-conventions.git" - )] - pub registry: RegistryPath, - - /// Optional path in the Git repository where the semantic convention - /// registry is located - #[arg(short = 'd', long, default_value = "model")] - pub registry_git_sub_dir: Option, + /// Parameters to specify the semantic convention registry + #[command(flatten)] + registry: RegistryArgs, /// Optional list of policy files to check against the files of the semantic /// convention registry. #[arg(short = 'p', long = "policy")] pub policies: Vec, + /// Skip the policy checks. + #[arg(long, default_value = "false")] + pub skip_policies: bool, + /// Parameters to specify the diagnostic format. #[command(flatten)] pub diagnostic: DiagnosticArgs, } /// Generate artifacts from a semantic convention registry. -#[cfg(not(tarpaulin_include))] pub(crate) fn command( logger: impl Logger + Sync + Clone, cache: &Cache, @@ -66,27 +62,30 @@ pub(crate) fn command( ) -> Result<(), DiagnosticMessages> { logger.loading(&format!( "Generating artifacts for the registry `{}`", - args.registry + args.registry.registry )); let registry_id = "default"; - let registry_path = semconv_registry_path_from(&args.registry, &args.registry_git_sub_dir); + let registry_path = + semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir); // Load the semantic convention registry into a local cache. let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; - check_policies( - ®istry_path, - cache, - &args.policies, - &semconv_specs, - logger.clone(), - )?; + if !args.skip_policies { + check_policies( + ®istry_path, + cache, + &args.policies, + &semconv_specs, + logger.clone(), + )?; + } + let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); let schema = resolve_semconv_specs(&mut registry, logger.clone())?; - let config = GeneratorConfig::new(args.templates.clone()); - - let engine = TemplateEngine::try_new(&format!("registry/{}", args.target), config)?; + let loader = FileSystemFileLoader::try_new(args.templates.join("registry"), &args.target)?; + let engine = TemplateEngine::try_new(loader)?; let template_registry = TemplateRegistry::try_from_resolved_registry( schema @@ -105,3 +104,116 @@ pub(crate) fn command( logger.success("Artifacts generated successfully"); Ok(()) } + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use tempdir::TempDir; + + use weaver_common::TestLogger; + + use crate::cli::{Cli, Commands}; + use crate::registry::generate::RegistryGenerateArgs; + use crate::registry::{RegistryArgs, RegistryCommand, RegistryPath, RegistrySubCommand}; + use crate::run_command; + + #[test] + fn test_registry_generate() { + let logger = TestLogger::new(); + let temp_output = TempDir::new("output") + .expect("Failed to create temporary directory") + .into_path(); + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Registry(RegistryCommand { + command: RegistrySubCommand::Generate(RegistryGenerateArgs { + target: "rust".to_owned(), + output: temp_output.clone(), + templates: PathBuf::from("crates/weaver_codegen_test/templates/"), + registry: RegistryArgs { + registry: RegistryPath::Local( + "crates/weaver_codegen_test/semconv_registry/".to_owned(), + ), + registry_git_sub_dir: None, + }, + policies: vec![], + skip_policies: true, + diagnostic: Default::default(), + }), + })), + }; + + let exit_code = run_command(&cli, logger.clone()); + // The command should succeed. + assert_eq!(exit_code, 0); + + // Hashset containing recursively all the relative paths of rust files in the + // output directory. + let rust_files: std::collections::HashSet<_> = walkdir::WalkDir::new(&temp_output) + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| e.path().extension().map_or(false, |ext| ext == "rs")) + .map(|e| { + e.path() + .strip_prefix(&temp_output) + .unwrap() + .to_string_lossy() + .to_string() + }) + .collect(); + + let expected_rust_files = vec![ + "attributes/client.rs", + "metrics/system.rs", + "attributes/mod.rs", + "metrics/http.rs", + "attributes/exception.rs", + "attributes/server.rs", + "metrics/mod.rs", + "attributes/network.rs", + "attributes/url.rs", + "attributes/http.rs", + "attributes/system.rs", + "attributes/error.rs", + ] + .into_iter() + .map(|s| { + // Split the string by `/` and join the parts with the OS specific separator. + s.split('/') + .collect::() + .to_string_lossy() + .to_string() + }) + .collect::>(); + + assert_eq!(rust_files, expected_rust_files); + + // Now, let's run the command again with the policy checks enabled. + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Registry(RegistryCommand { + command: RegistrySubCommand::Generate(RegistryGenerateArgs { + target: "rust".to_owned(), + output: temp_output.clone(), + templates: PathBuf::from("crates/weaver_codegen_test/templates/"), + registry: RegistryArgs { + registry: RegistryPath::Local( + "crates/weaver_codegen_test/semconv_registry/".to_owned(), + ), + registry_git_sub_dir: None, + }, + policies: vec![], + skip_policies: false, + diagnostic: Default::default(), + }), + })), + }; + + let exit_code = run_command(&cli, logger); + // The command should exit with an error code. + assert_eq!(exit_code, 1); + } +} diff --git a/src/registry/mod.rs b/src/registry/mod.rs index 7114291d..bf34efdb 100644 --- a/src/registry/mod.rs +++ b/src/registry/mod.rs @@ -16,7 +16,6 @@ use weaver_checker::{Engine, Error, PolicyStage}; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::error::handle_errors; use weaver_common::Logger; -use weaver_forge::{GeneratorConfig, OutputDirective, TemplateEngine}; use weaver_resolved_schema::ResolvedTelemetrySchema; use weaver_resolver::SchemaResolver; use weaver_semconv::registry::SemConvRegistry; @@ -27,6 +26,7 @@ use crate::registry::resolve::RegistryResolveArgs; use crate::registry::search::RegistrySearchArgs; use crate::registry::stats::RegistryStatsArgs; use crate::registry::update_markdown::RegistryUpdateMarkdownArgs; +use crate::CmdResult; mod check; mod generate; @@ -80,7 +80,7 @@ pub enum RegistrySubCommand { Resolve(RegistryResolveArgs), /// Searches a registry (not yet implemented). Search(RegistrySearchArgs), - /// Calculate and display a set of general statistics on a registry (not yet implemented). + /// Calculate a set of general statistics on a semantic convention registry. Stats(RegistryStatsArgs), /// Update markdown files that contain markers indicating the templates used to update the specified sections. UpdateMarkdown(RegistryUpdateMarkdownArgs), @@ -138,98 +138,39 @@ pub struct RegistryArgs { pub registry_git_sub_dir: Option, } -/// Set of parameters used to specify the diagnostic format. -#[derive(Args, Debug, Clone)] -pub struct DiagnosticArgs { - /// Format used to render the diagnostic messages. Predefined formats are: ansi, json, - /// gh_workflow_command. - #[arg(long, default_value = "ansi")] - pub diagnostic_format: String, - - /// Path to the directory where the diagnostic templates are located. - #[arg(long, default_value = "diagnostic_templates")] - pub diagnostic_template: PathBuf, -} - /// Manage a semantic convention registry and return the exit code. -#[cfg(not(tarpaulin_include))] -pub fn semconv_registry(log: impl Logger + Sync + Clone, command: &RegistryCommand) -> i32 { +pub fn semconv_registry(log: impl Logger + Sync + Clone, command: &RegistryCommand) -> CmdResult { let cache = match Cache::try_new() { Ok(cache) => cache, - Err(e) => { - log.error(&format!("Failed to create cache: {}", e)); - return 1; - } + Err(e) => return CmdResult::new(Err(e.into()), None), }; - let (cmd_result, diag_args) = match &command.command { - RegistrySubCommand::Check(args) => ( + match &command.command { + RegistrySubCommand::Check(args) => CmdResult::new( check::command(log.clone(), &cache, args), - args.diagnostic.clone(), + Some(args.diagnostic.clone()), ), - RegistrySubCommand::Generate(args) => ( + RegistrySubCommand::Generate(args) => CmdResult::new( generate::command(log.clone(), &cache, args), - args.diagnostic.clone(), + Some(args.diagnostic.clone()), ), - RegistrySubCommand::Stats(args) => ( + RegistrySubCommand::Stats(args) => CmdResult::new( stats::command(log.clone(), &cache, args), - args.diagnostic.clone(), + Some(args.diagnostic.clone()), ), - RegistrySubCommand::Resolve(args) => ( + RegistrySubCommand::Resolve(args) => CmdResult::new( resolve::command(log.clone(), &cache, args), - args.diagnostic.clone(), + Some(args.diagnostic.clone()), ), RegistrySubCommand::Search(_) => unimplemented!(), - RegistrySubCommand::UpdateMarkdown(args) => ( + RegistrySubCommand::UpdateMarkdown(args) => CmdResult::new( update_markdown::command(log.clone(), &cache, args), - args.diagnostic.clone(), + Some(args.diagnostic.clone()), ), - }; - - process_diagnostics(cmd_result, diag_args, log.clone()) -} - -/// Render the diagnostic messages based on the diagnostic configuration and return the exit code -/// based on the diagnostic messages. -fn process_diagnostics( - cmd_result: Result<(), DiagnosticMessages>, - diagnostic_args: DiagnosticArgs, - logger: impl Logger + Sync + Clone, -) -> i32 { - if let Err(diag_msgs) = cmd_result { - let config = GeneratorConfig::new(diagnostic_args.diagnostic_template); - match TemplateEngine::try_new(&diagnostic_args.diagnostic_format, config) { - Ok(engine) => { - match engine.generate( - logger.clone(), - &diag_msgs, - PathBuf::new().as_path(), - &OutputDirective::Stdout, - ) { - Ok(_) => {} - Err(e) => { - logger.error(&format!( - "Failed to render the diagnostic messages. Error: {}", - e - )); - return 1; - } - } - } - Err(e) => { - logger.error(&format!("Failed to create the template engine to render the diagnostic messages. Error: {}", e)); - return 1; - } - } - return if diag_msgs.has_error() { 1 } else { 0 }; } - - // Return 0 if there are no diagnostic messages - 0 } /// Convert a `RegistryPath` to a `weaver_semconv::path::RegistryPath`. -#[cfg(not(tarpaulin_include))] pub(crate) fn semconv_registry_path_from( registry: &RegistryPath, path: &Option, @@ -252,7 +193,6 @@ pub(crate) fn semconv_registry_path_from( /// * `registry_path` - The path to the semantic convention registry. /// * `cache` - The cache to use for loading the registry. /// * `log` - The logger to use for logging messages. -#[cfg(not(tarpaulin_include))] pub(crate) fn load_semconv_specs( registry_path: &weaver_semconv::path::RegistryPath, cache: &Cache, @@ -272,7 +212,6 @@ pub(crate) fn load_semconv_specs( /// /// * `policy_engine` - The pre-configured policy engine to use for checking the policies. /// * `semconv_specs` - The semantic convention specifications to check. -#[cfg(not(tarpaulin_include))] pub fn check_policy( policy_engine: &Engine, semconv_specs: &[(String, SemConvSpec)], @@ -321,7 +260,6 @@ pub fn check_policy( /// * `policies` - The list of policy files to check. /// * `semconv_specs` - The semantic convention specifications to check. /// * `logger` - The logger to use for logging messages. -#[cfg(not(tarpaulin_include))] fn check_policies( registry_path: &weaver_semconv::path::RegistryPath, cache: &Cache, @@ -362,7 +300,6 @@ fn check_policies( /// * `registry_id` - The ID of the semantic convention registry. /// * `semconv_specs` - The semantic convention specifications to resolve. /// * `logger` - The logger to use for logging messages. -#[cfg(not(tarpaulin_include))] pub(crate) fn resolve_semconv_specs( registry: &mut SemConvRegistry, logger: impl Logger + Sync + Clone, diff --git a/src/registry/resolve.rs b/src/registry/resolve.rs index 4abb5c8f..6b0b8fda 100644 --- a/src/registry/resolve.rs +++ b/src/registry/resolve.rs @@ -7,6 +7,7 @@ use std::path::PathBuf; use clap::{Args, ValueEnum}; use serde::Serialize; +use crate::DiagnosticArgs; use weaver_cache::Cache; use weaver_common::diagnostic::DiagnosticMessages; use weaver_common::Logger; @@ -15,7 +16,7 @@ use weaver_semconv::registry::SemConvRegistry; use crate::registry::{ check_policies, load_semconv_specs, resolve_semconv_specs, semconv_registry_path_from, - DiagnosticArgs, RegistryArgs, + RegistryArgs, }; /// Supported output formats for the resolved schema @@ -61,6 +62,10 @@ pub struct RegistryResolveArgs { #[arg(short = 'p', long = "policy")] pub policies: Vec, + /// Skip the policy checks. + #[arg(long, default_value = "false")] + pub skip_policies: bool, + /// Parameters to specify the diagnostic format. #[command(flatten)] pub diagnostic: DiagnosticArgs, @@ -83,13 +88,15 @@ pub(crate) fn command( // Load the semantic convention registry into a local cache. let semconv_specs = load_semconv_specs(®istry_path, cache, logger.clone())?; - check_policies( - ®istry_path, - cache, - &args.policies, - &semconv_specs, - logger.clone(), - )?; + if !args.skip_policies { + check_policies( + ®istry_path, + cache, + &args.policies, + &semconv_specs, + logger.clone(), + )?; + } let mut registry = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); let schema = resolve_semconv_specs(&mut registry, logger.clone())?; @@ -144,3 +151,70 @@ fn apply_format(format: &Format, object: &T) -> Result, + /// Parameters to specify the semantic convention registry + #[command(flatten)] + registry: RegistryArgs, /// Whether or not to run updates in dry-run mode. #[arg(long, default_value = "false")] @@ -71,14 +64,17 @@ pub(crate) fn command( // Construct a generator if we were given a `--target` argument. let generator = match args.target.as_ref() { None => None, - Some(target) => Some(TemplateEngine::try_new( - &format!("registry/{}", target), - GeneratorConfig::default(), - )?), + Some(target) => { + let loader = FileSystemFileLoader::try_new( + format!("{}/registry", args.templates).into(), + target, + )?; + Some(TemplateEngine::try_new(loader)?) + } }; let generator = SnippetGenerator::try_from_url( - semconv_registry_path_from(&args.registry, &args.registry_git_sub_dir), + semconv_registry_path_from(&args.registry.registry, &args.registry.registry_git_sub_dir), cache, generator, )?; @@ -113,3 +109,40 @@ pub(crate) fn command( Ok(()) } + +#[cfg(test)] +mod tests { + use weaver_common::TestLogger; + + use crate::cli::{Cli, Commands}; + use crate::registry::update_markdown::RegistryUpdateMarkdownArgs; + use crate::registry::{RegistryArgs, RegistryCommand, RegistryPath, RegistrySubCommand}; + use crate::run_command; + + #[test] + fn test_registry_update_markdown() { + let logger = TestLogger::new(); + let cli = Cli { + debug: 0, + quiet: false, + command: Some(Commands::Registry(RegistryCommand { + command: RegistrySubCommand::UpdateMarkdown(RegistryUpdateMarkdownArgs { + markdown_dir: "data/update_markdown/markdown".to_owned(), + registry: RegistryArgs { + registry: RegistryPath::Local("data/update_markdown/registry".to_owned()), + registry_git_sub_dir: None, + }, + dry_run: true, + attribute_registry_base_url: Some("/docs/attributes-registry".to_owned()), + templates: "data/update_markdown/templates".to_owned(), + diagnostic: Default::default(), + target: Some("markdown".to_owned()), + }), + })), + }; + + let exit_code = run_command(&cli, logger.clone()); + // The command should succeed. + assert_eq!(exit_code, 0); + } +} diff --git a/tests/registry_check.rs b/tests/registry_check.rs index f06032c0..816fd0af 100644 --- a/tests/registry_check.rs +++ b/tests/registry_check.rs @@ -4,8 +4,10 @@ use assert_cmd::Command; +/// This test checks the CLI interface for the registry generate command. +/// This test doesn't count for the coverage report as it runs a separate process. #[test] -fn test_registry_check() { +fn test_cli_interface() { // Test OTel official semantic convention registry. // This test requires internet access to fetch the registry. // This registry should always be valid! diff --git a/tests/registry_generate.rs b/tests/registry_generate.rs index 1402addb..e8d32832 100644 --- a/tests/registry_generate.rs +++ b/tests/registry_generate.rs @@ -4,8 +4,10 @@ use assert_cmd::Command; +/// This test checks the CLI interface for the registry generate command. +/// This test doesn't count for the coverage report as it runs a separate process. #[test] -fn test_registry_generate() { +fn test_cli_interface() { let mut cmd = Command::cargo_bin("weaver").unwrap(); let output = cmd .arg("registry") diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 7dfbd0ee..395bde43 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -3,7 +3,6 @@ //! Integration tests for the resolution process. use weaver_cache::Cache; -use weaver_common::error::ExitIfError; use weaver_common::{Logger, TestLogger}; use weaver_resolver::attribute::AttributeCatalog; use weaver_resolver::registry::resolve_semconv_registry; @@ -16,6 +15,9 @@ const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-c /// The directory name of the official semantic convention registry. const SEMCONV_REGISTRY_MODEL: &str = "model"; +/// This test checks the CLI interface for the registry generate command. +/// This test doesn't count for the coverage report as it runs a separate process. +/// /// Test the resolution process for the official semantic convention registry. /// Success criteria: /// - All semconv files downloaded from the official semconv repo. @@ -23,7 +25,7 @@ const SEMCONV_REGISTRY_MODEL: &str = "model"; /// - The resolution process should not fail. /// - No warn or error messages should be reported by the logger. #[test] -fn test_semconv_registry_resolution() { +fn test_cli_interface() { let log = TestLogger::new(); let cache = Cache::try_new().unwrap_or_else(|e| { log.error(&e.to_string()); @@ -39,7 +41,9 @@ fn test_semconv_registry_resolution() { path: Some(SEMCONV_REGISTRY_MODEL.to_owned()), }; let semconv_specs = - SchemaResolver::load_semconv_specs(®istry_path, &cache).panic_if_error(log.clone()); + SchemaResolver::load_semconv_specs(®istry_path, &cache).unwrap_or_else(|e| { + panic!("Failed to load the semantic convention specs, error: {e}"); + }); let semconv_specs = SemConvRegistry::from_semconv_specs(registry_id, semconv_specs); // Check if the logger has reported any warnings or errors. @@ -62,10 +66,3 @@ fn test_semconv_registry_resolution() { assert_eq!(log.warn_count(), 0); assert_eq!(log.error_count(), 0); } - -/// Test the resolution process for the official Telemetry Schema. -/// Success criteria: The resolution process should not fail. -#[test] -fn test_telemetry_schema_resolution() { - // ToDo once the official Application Telemetry Schema is approved and implemented by this project. -}