From 7aa85afa67c9f8bb4a30078f7f25c6b53d647e90 Mon Sep 17 00:00:00 2001 From: John Sonnenschein Date: Mon, 4 Dec 2023 15:10:28 -0800 Subject: [PATCH] feat(http_server source): add all headers to the namespace metadata (#18922) * feat(http_server source): add all headers to the namespace metadata * feat(http_server source): allow wildcard matching in headers * style: whitespace typo * rework header glob matching, add docs and tests * examples, docs, tests, error on misconfiguration * fmt & clippy cleanup * Generate docs Signed-off-by: Jesse Szwedko * docs grammar adjustment * add some code docs --------- Signed-off-by: Jesse Szwedko Co-authored-by: Jesse Szwedko Co-authored-by: neuronull --- src/sources/http_server.rs | 122 ++++++++++++++++-- .../components/sources/base/http.cue | 6 +- .../components/sources/base/http_server.cue | 6 +- 3 files changed, 119 insertions(+), 15 deletions(-) diff --git a/src/sources/http_server.rs b/src/sources/http_server.rs index 80e6c9b9ee5c5..a752358ad968c 100644 --- a/src/sources/http_server.rs +++ b/src/sources/http_server.rs @@ -89,10 +89,16 @@ pub struct SimpleHttpConfig { /// A list of HTTP headers to include in the log event. /// + /// Accepts the wildcard (`*`) character for headers matching a specified pattern. + /// + /// Specifying "*" results in all headers included in the log event. + /// /// These override any values included in the JSON payload with conflicting names. #[serde(default)] #[configurable(metadata(docs::examples = "User-Agent"))] #[configurable(metadata(docs::examples = "X-My-Custom-Header"))] + #[configurable(metadata(docs::examples = "X-*"))] + #[configurable(metadata(docs::examples = "*"))] headers: Vec, /// A list of URL query parameters to include in the log event. @@ -329,6 +335,21 @@ fn remove_duplicates(mut list: Vec, list_name: &str) -> Vec { list } +#[derive(Clone)] +enum HttpConfigParamKind { + Glob(glob::Pattern), + Exact(String), +} + +fn build_param_matcher(list: &[String]) -> crate::Result> { + list.iter() + .map(|s| match s.contains('*') { + true => Ok(HttpConfigParamKind::Glob(glob::Pattern::new(s)?)), + false => Ok(HttpConfigParamKind::Exact(s.to_string())), + }) + .collect::>>() +} + #[async_trait::async_trait] #[typetag::serde(name = "http_server")] impl SourceConfig for SimpleHttpConfig { @@ -337,7 +358,7 @@ impl SourceConfig for SimpleHttpConfig { let log_namespace = cx.log_namespace(self.log_namespace); let source = SimpleHttpSource { - headers: remove_duplicates(self.headers.clone(), "headers"), + headers: build_param_matcher(&remove_duplicates(self.headers.clone(), "headers"))?, query_parameters: remove_duplicates(self.query_parameters.clone(), "query_parameters"), path_key: self.path_key.clone(), decoder, @@ -384,7 +405,7 @@ impl SourceConfig for SimpleHttpConfig { #[derive(Clone)] struct SimpleHttpSource { - headers: Vec, + headers: Vec, query_parameters: Vec, path_key: OptionalValuePath, decoder: Decoder, @@ -414,17 +435,48 @@ impl HttpSource for SimpleHttpSource { request_path.to_owned(), ); - // add each header to each event - for header_name in &self.headers { - let value = headers_config.get(header_name).map(HeaderValue::as_bytes); - - self.log_namespace.insert_source_metadata( - SimpleHttpConfig::NAME, - log, - Some(LegacyKey::InsertIfEmpty(path!(header_name))), - path!("headers", header_name), - Value::from(value.map(Bytes::copy_from_slice)), - ); + for h in &self.headers { + match h { + // Add each non-wildcard containing header that was specified + // in the `headers` config option to the event if an exact match + // is found. + HttpConfigParamKind::Exact(header_name) => { + let value = + headers_config.get(header_name).map(HeaderValue::as_bytes); + + self.log_namespace.insert_source_metadata( + SimpleHttpConfig::NAME, + log, + Some(LegacyKey::InsertIfEmpty(path!(header_name))), + path!("headers", header_name), + Value::from(value.map(Bytes::copy_from_slice)), + ); + } + // Add all headers that match against wildcard pattens specified + // in the `headers` config option to the event. + HttpConfigParamKind::Glob(header_pattern) => { + for header_name in headers_config.keys() { + if header_pattern.matches_with( + header_name.as_str(), + glob::MatchOptions::default(), + ) { + let value = headers_config + .get(header_name) + .map(HeaderValue::as_bytes); + + self.log_namespace.insert_source_metadata( + SimpleHttpConfig::NAME, + log, + Some(LegacyKey::InsertIfEmpty(path!( + header_name.as_str() + ))), + path!("headers", header_name.as_str()), + Value::from(value.map(Bytes::copy_from_slice)), + ); + } + } + } + }; } self.log_namespace.insert_standard_vector_source_metadata( @@ -983,11 +1035,13 @@ mod tests { let mut headers = HeaderMap::new(); headers.insert("User-Agent", "test_client".parse().unwrap()); headers.insert("Upgrade-Insecure-Requests", "false".parse().unwrap()); + headers.insert("X-Test-Header", "true".parse().unwrap()); let (rx, addr) = source( vec![ "User-Agent".to_string(), "Upgrade-Insecure-Requests".to_string(), + "X-*".to_string(), "AbsentHeader".to_string(), ], vec![], @@ -1018,11 +1072,53 @@ mod tests { assert_eq!(log["key1"], "value1".into()); assert_eq!(log["\"User-Agent\""], "test_client".into()); assert_eq!(log["\"Upgrade-Insecure-Requests\""], "false".into()); + assert_eq!(log["\"x-test-header\""], "true".into()); assert_eq!(log["AbsentHeader"], Value::Null); assert_event_metadata(log).await; } } + #[tokio::test] + async fn http_headers_wildcard() { + let mut events = assert_source_compliance(&HTTP_PUSH_SOURCE_TAGS, async { + let mut headers = HeaderMap::new(); + headers.insert("User-Agent", "test_client".parse().unwrap()); + headers.insert("X-Case-Sensitive-Value", "CaseSensitive".parse().unwrap()); + + let (rx, addr) = source( + vec!["*".to_string()], + vec![], + "http_path", + "/", + "POST", + StatusCode::OK, + true, + EventStatus::Delivered, + true, + None, + Some(JsonDeserializerConfig::default().into()), + ) + .await; + + spawn_ok_collect_n( + send_with_headers(addr, "{\"key1\":\"value1\"}", headers), + rx, + 1, + ) + .await + }) + .await; + + { + let event = events.remove(0); + let log = event.as_log(); + assert_eq!(log["key1"], "value1".into()); + assert_eq!(log["\"user-agent\""], "test_client".into()); + assert_eq!(log["\"x-case-sensitive-value\""], "CaseSensitive".into()); + assert_event_metadata(log).await; + } + } + #[tokio::test] async fn http_query() { let mut events = assert_source_compliance(&HTTP_PUSH_SOURCE_TAGS, async { diff --git a/website/cue/reference/components/sources/base/http.cue b/website/cue/reference/components/sources/base/http.cue index d97eb5b546e59..b92abd9053910 100644 --- a/website/cue/reference/components/sources/base/http.cue +++ b/website/cue/reference/components/sources/base/http.cue @@ -287,12 +287,16 @@ base: components: sources: http: configuration: { description: """ A list of HTTP headers to include in the log event. + Accepts the wildcard (`*`) character for headers matching a specified pattern. + + Specifying "*" results in all headers included in the log event. + These override any values included in the JSON payload with conflicting names. """ required: false type: array: { default: [] - items: type: string: examples: ["User-Agent", "X-My-Custom-Header"] + items: type: string: examples: ["User-Agent", "X-My-Custom-Header", "X-*", "*"] } } keepalive: { diff --git a/website/cue/reference/components/sources/base/http_server.cue b/website/cue/reference/components/sources/base/http_server.cue index 56d08d81a2523..130a7d1040c7c 100644 --- a/website/cue/reference/components/sources/base/http_server.cue +++ b/website/cue/reference/components/sources/base/http_server.cue @@ -287,12 +287,16 @@ base: components: sources: http_server: configuration: { description: """ A list of HTTP headers to include in the log event. + Accepts the wildcard (`*`) character for headers matching a specified pattern. + + Specifying "*" results in all headers included in the log event. + These override any values included in the JSON payload with conflicting names. """ required: false type: array: { default: [] - items: type: string: examples: ["User-Agent", "X-My-Custom-Header"] + items: type: string: examples: ["User-Agent", "X-My-Custom-Header", "X-*", "*"] } } keepalive: {