diff --git a/Cargo.lock b/Cargo.lock index be1b278e..ca9f89ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -154,6 +154,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "ascii" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" + [[package]] name = "assert_cmd" version = "2.0.16" @@ -400,6 +406,12 @@ dependencies = [ "hashbrown 0.14.5", ] +[[package]] +name = "chunked_transfer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4de3bc4ea267985becf712dc6d9eed8b04c953b3fcfb339ebc87acd9804901" + [[package]] name = "cipher" version = "0.4.4" @@ -1828,6 +1840,12 @@ version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + [[package]] name = "human_format" version = "1.1.0" @@ -3737,6 +3755,18 @@ dependencies = [ "time-core", ] +[[package]] +name = "tiny_http" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "389915df6413a2e74fb181895f933386023c71110878cd0825588928e64cdc82" +dependencies = [ + "ascii", + "chunked_transfer", + "httpdate", + "log", +] + [[package]] name = "tinyvec" version = "1.8.0" @@ -4261,6 +4291,8 @@ dependencies = [ "serde", "serde_json", "thiserror", + "tiny_http", + "ureq", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 8be2ae36..9791debc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -56,6 +56,7 @@ schemars = "0.8.21" dirs = "5.0.1" once_cell = "1.20.2" opentelemetry = { version = "0.23.0", features = ["trace", "metrics", "logs", "otel_unstable"] } +tiny_http = "0.12.0" # Features definition ========================================================= [features] diff --git a/crates/weaver_cache/src/lib.rs b/crates/weaver_cache/src/lib.rs index 724be3dc..aa2b7bc4 100644 --- a/crates/weaver_cache/src/lib.rs +++ b/crates/weaver_cache/src/lib.rs @@ -531,6 +531,7 @@ impl RegistryRepo { #[cfg(test)] mod tests { use super::*; + use weaver_common::test::ServeStaticFiles; fn count_yaml_files(repo_path: &Path) -> usize { let count = walkdir::WalkDir::new(repo_path) @@ -608,15 +609,25 @@ mod tests { #[test] fn test_semconv_registry_remote_tar_gz_archive() { - let registry_path = "https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.26.0.tar.gz[model]" - .parse::().unwrap(); + let server = ServeStaticFiles::from("tests/test_data").unwrap(); + let registry_path = format!( + "{}[model]", + server.relative_path_to_url("semconv_registry_v1.26.0.tar.gz") + ) + .parse::() + .unwrap(); check_archive(registry_path, Some("general.yaml")); } #[test] fn test_semconv_registry_remote_zip_archive() { - let registry_path = "https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.26.0.zip[model]" - .parse::().unwrap(); + let server = ServeStaticFiles::from("tests/test_data").unwrap(); + let registry_path = format!( + "{}[model]", + server.relative_path_to_url("semconv_registry_v1.26.0.zip") + ) + .parse::() + .unwrap(); check_archive(registry_path, Some("general.yaml")); } } diff --git a/crates/weaver_cache/tests/test_data/semconv_registry_v1.26.0.tar.gz b/crates/weaver_cache/tests/test_data/semconv_registry_v1.26.0.tar.gz new file mode 100644 index 00000000..83c7c24a Binary files /dev/null and b/crates/weaver_cache/tests/test_data/semconv_registry_v1.26.0.tar.gz differ diff --git a/crates/weaver_cache/tests/test_data/semconv_registry_v1.26.0.zip b/crates/weaver_cache/tests/test_data/semconv_registry_v1.26.0.zip new file mode 100644 index 00000000..6c5c4783 Binary files /dev/null and b/crates/weaver_cache/tests/test_data/semconv_registry_v1.26.0.zip differ diff --git a/crates/weaver_common/Cargo.toml b/crates/weaver_common/Cargo.toml index 50148990..63d26d14 100644 --- a/crates/weaver_common/Cargo.toml +++ b/crates/weaver_common/Cargo.toml @@ -16,7 +16,9 @@ paris = { version = "1.5.15", features = ["macros"] } serde.workspace = true serde_json.workspace = true miette.workspace = true +thiserror.workspace = true +tiny_http.workspace = true [dev-dependencies] -thiserror.workspace = true +ureq.workspace = true diff --git a/crates/weaver_common/src/lib.rs b/crates/weaver_common/src/lib.rs index e0237792..5adce191 100644 --- a/crates/weaver_common/src/lib.rs +++ b/crates/weaver_common/src/lib.rs @@ -7,6 +7,7 @@ pub mod error; pub mod in_memory; pub mod quiet; pub mod result; +pub mod test; use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; diff --git a/crates/weaver_common/src/test.rs b/crates/weaver_common/src/test.rs new file mode 100644 index 00000000..8550cdf5 --- /dev/null +++ b/crates/weaver_common/src/test.rs @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! HTTP server for testing purposes. + +use paris::error; +use std::ffi::OsStr; +use std::fs::File; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; +use std::{collections::HashMap, thread::JoinHandle}; +use tiny_http::{Header, Response, Server, StatusCode}; + +/// An error that can occur while starting the HTTP server. +#[derive(thiserror::Error, Debug, Clone)] +#[error("Internal HTTP server error: {error}")] +pub struct HttpServerError { + error: String, +} + +/// A struct that serves static files from a directory. +pub struct ServeStaticFiles { + server: Arc, + port: u16, + request_handler: JoinHandle<()>, +} + +impl Drop for ServeStaticFiles { + /// Stops the HTTP server. + fn drop(&mut self) { + // Test to see if we can force tiny_http to kill our thread, dropping the Arc + // before we continue to try to ensure `server` is dropped, cleaning + // open threads. + let mut attempts = 0; + while !self.request_handler.is_finished() && attempts < 10 { + self.server.unblock(); + std::thread::yield_now(); + attempts += 1; + } + } +} + +impl ServeStaticFiles { + /// Creates a new HTTP server that serves static files from a directory. + /// Note: This server is only available for testing purposes. + pub fn from(static_path: impl Into) -> Result { + let server = Server::http("127.0.0.1:0").map_err(|e| HttpServerError { + error: e.to_string(), + })?; + + let content_types: HashMap<&'static str, &'static str> = [ + ("yaml", "application/yaml"), + ("json", "application/json"), + ("zip", "application/zip"), + ("gz", "application/gzip"), + ] + .iter() + .cloned() + .collect(); + + let static_path = static_path.into(); + let server = Arc::new(server); + let server_clone = server.clone(); + let port = server + .server_addr() + .to_ip() + .map(|ip| ip.port()) + .unwrap_or(0); + + let request_handler = std::thread::spawn(move || { + for request in server_clone.incoming_requests() { + let mut file_path = static_path.clone(); + if request.url().len() > 1 { + for chunk in request.url().trim_start_matches('/').split('/') { + file_path.push(chunk); + } + } + + if !file_path.exists() { + let status = StatusCode(404); + request + .respond(Response::empty(status)) + .expect("Failed to respond"); + } else if let Ok(file) = File::open(&file_path) { + let mut response = Response::from_file(file); + let content_type = file_path + .extension() + .and_then(OsStr::to_str) + .and_then(|ext| content_types.get(ext).copied()) + .unwrap_or("text/plain"); + response.add_header( + Header::from_str(&format!("Content-Type: {}", content_type)) + .expect("Failed to parse header"), + ); + request.respond(response).expect("Failed to respond"); + } else { + let status = StatusCode(500); + request + .respond(Response::empty(status)) + .expect("Failed to respond"); + } + } + }); + + Ok(Self { + server, + port, + request_handler, + }) + } + + /// Returns the port of the server. + #[must_use] + pub fn port(&self) -> u16 { + self.port + } + + /// Returns the URL of a file. + /// The file path should be relative to the static path. + #[must_use] + pub fn relative_path_to_url(&self, file: &str) -> String { + format!("http://127.0.0.1:{}/{}", self.port, file) + } +} + +#[cfg(test)] +mod tests { + use crate::test::ServeStaticFiles; + + #[test] + fn test_http_server() { + let server = ServeStaticFiles::from("tests/test_data").unwrap(); + + assert!(server.port() > 0); + + let content = ureq::get(&server.relative_path_to_url("file_a.yaml")) + .call() + .unwrap(); + assert_eq!(content.status(), 200); + assert_eq!(content.header("Content-Type").unwrap(), "application/yaml"); + assert_eq!(content.into_string().unwrap(), "file: A"); + + let content = ureq::get(&server.relative_path_to_url("file_b.yaml")) + .call() + .unwrap(); + assert_eq!(content.status(), 200); + assert_eq!(content.header("Content-Type").unwrap(), "application/yaml"); + assert_eq!(content.into_string().unwrap(), "file: B"); + + let result = ureq::get(&server.relative_path_to_url("unknown_file.yaml")).call(); + assert!(result.is_err()); + assert!(matches!(result.unwrap_err(), ureq::Error::Status(404, _))); + } +} diff --git a/crates/weaver_common/tests/test_data/file_a.yaml b/crates/weaver_common/tests/test_data/file_a.yaml new file mode 100644 index 00000000..c1984c45 --- /dev/null +++ b/crates/weaver_common/tests/test_data/file_a.yaml @@ -0,0 +1 @@ +file: A \ No newline at end of file diff --git a/crates/weaver_common/tests/test_data/file_b.yaml b/crates/weaver_common/tests/test_data/file_b.yaml new file mode 100644 index 00000000..23ff5651 --- /dev/null +++ b/crates/weaver_common/tests/test_data/file_b.yaml @@ -0,0 +1 @@ +file: B \ No newline at end of file diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 352528bf..cc97654e 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -215,6 +215,7 @@ mod tests { use crate::group::{GroupSpec, GroupType}; use crate::registry::SemConvRegistry; use crate::Error; + use weaver_common::test::ServeStaticFiles; #[test] fn test_try_from_path_pattern() { @@ -237,9 +238,10 @@ mod tests { #[test] fn test_semconv_spec_from_url() { - let semconv_url = "https://raw.githubusercontent.com/open-telemetry/semantic-conventions/main/model/url/common.yaml"; + let server = ServeStaticFiles::from("tests/test_data").unwrap(); + let semconv_url = server.relative_path_to_url("url/common.yaml"); let result = - SemConvRegistry::semconv_spec_from_url(semconv_url).into_result_failing_non_fatal(); + SemConvRegistry::semconv_spec_from_url(&semconv_url).into_result_failing_non_fatal(); assert!(result.is_ok()); } diff --git a/crates/weaver_semconv/src/semconv.rs b/crates/weaver_semconv/src/semconv.rs index a37e2c7d..ee40e857 100644 --- a/crates/weaver_semconv/src/semconv.rs +++ b/crates/weaver_semconv/src/semconv.rs @@ -188,6 +188,7 @@ mod tests { InvalidAttribute, InvalidExampleWarning, InvalidSemConvSpec, RegistryNotFound, }; use std::path::PathBuf; + use weaver_common::test::ServeStaticFiles; #[test] fn test_semconv_spec_from_file() { @@ -303,26 +304,27 @@ mod tests { #[test] fn test_semconv_spec_from_url() { + let server = ServeStaticFiles::from("tests/test_data").unwrap(); // Existing URL. The URL is a raw file from the semantic conventions repository. // This file is expected to be available. - let semconv_url = "https://raw.githubusercontent.com/open-telemetry/semantic-conventions/main/model/url/common.yaml"; - let semconv_spec = SemConvSpec::from_url(semconv_url) + let semconv_url = server.relative_path_to_url("url/common.yaml"); + let semconv_spec = SemConvSpec::from_url(&semconv_url) .into_result_failing_non_fatal() .unwrap(); assert!(!semconv_spec.groups.is_empty()); // Invalid semconv file - let semconv_url = "https://raw.githubusercontent.com/open-telemetry/semantic-conventions/main/model/README.md"; - let semconv_spec = SemConvSpec::from_url(semconv_url).into_result_failing_non_fatal(); + let semconv_url = server.relative_path_to_url("README.md"); + let semconv_spec = SemConvSpec::from_url(&semconv_url).into_result_failing_non_fatal(); assert!(semconv_spec.is_err()); assert!(matches!( semconv_spec.unwrap_err(), InvalidSemConvSpec { .. } )); - // Non-existing URL (including both a leading underscore (which is not a valid domain) and a non-existing domain) - let semconv_url = "http://_unknown.com.invalid/unknown-semconv.yaml"; - let semconv_spec = SemConvSpec::from_url(semconv_url).into_result_failing_non_fatal(); + // Non-existing URL + let semconv_url = server.relative_path_to_url("unknown-semconv.yaml"); + let semconv_spec = SemConvSpec::from_url(&semconv_url).into_result_failing_non_fatal(); assert!(semconv_spec.is_err()); assert!(matches!(semconv_spec.unwrap_err(), RegistryNotFound { .. })); } diff --git a/crates/weaver_semconv/tests/test_data/README.md b/crates/weaver_semconv/tests/test_data/README.md new file mode 100644 index 00000000..cfaad196 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/README.md @@ -0,0 +1,7 @@ +# YAML Model for Semantic Conventions + +The YAML descriptions of semantic convention contained in this directory are intended to +be used by the various OpenTelemetry language implementations to aid in automatic +generation of semantics-related code. + +... \ No newline at end of file diff --git a/crates/weaver_semconv/tests/test_data/url/common.yaml b/crates/weaver_semconv/tests/test_data/url/common.yaml new file mode 100644 index 00000000..23674786 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/url/common.yaml @@ -0,0 +1,12 @@ +groups: + - id: url + brief: Attributes describing URL. + type: attribute_group + attributes: + - ref: url.scheme + - ref: url.full + tag: sensitive-information + - ref: url.path + - ref: url.query + tag: sensitive-information + - ref: url.fragment \ No newline at end of file