From 9f4f41a5585b96b292f39391a000b4d37e6b977c Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 26 Jul 2022 13:50:55 -0600 Subject: [PATCH 01/50] take 1 - lifetime issue --- Cargo.toml | 2 + src/sources/http_scrape/config.rs | 367 ++++++++++++++++++++++++++++++ src/sources/http_scrape/mod.rs | 150 ++++++++++++ src/sources/http_scrape/source.rs | 0 src/sources/mod.rs | 6 + 5 files changed, 525 insertions(+) create mode 100644 src/sources/http_scrape/config.rs create mode 100644 src/sources/http_scrape/mod.rs create mode 100644 src/sources/http_scrape/source.rs diff --git a/Cargo.toml b/Cargo.toml index 05068a0949d7a..a6ba50b0e117a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -449,6 +449,7 @@ sources-logs = [ "sources-gcp_pubsub", "sources-heroku_logs", "sources-http", + "sources-http_scrape", "sources-internal_logs", "sources-journald", "sources-kafka", @@ -494,6 +495,7 @@ sources-gcp_pubsub = ["gcp", "dep:h2", "dep:prost-types", "protobuf-build", "dep sources-heroku_logs = ["sources-utils-http", "sources-utils-http-query", "sources-http"] sources-host_metrics = ["dep:heim"] sources-http = ["sources-utils-http", "sources-utils-http-query"] +sources-http_scrape = ["sources-utils-http", "sources-utils-http-query"] sources-internal_logs = [] sources-internal_metrics = [] sources-journald = [] diff --git a/src/sources/http_scrape/config.rs b/src/sources/http_scrape/config.rs new file mode 100644 index 0000000000000..ea44fcaf97f4c --- /dev/null +++ b/src/sources/http_scrape/config.rs @@ -0,0 +1,367 @@ +use crate::{ + codecs::{Decoder, DecodingConfig}, + config::{self, GenerateConfig, SourceConfig, SourceContext, SourceDescription}, + http::{Auth, HttpClient}, + internal_events::{EndpointBytesReceived, RequestCompleted, StreamClosedError}, + serde::default_decoding, + sources, + tls::{TlsConfig, TlsSettings}, + Error, SourceSender, +}; +use async_trait::async_trait; +use bytes::BytesMut; +use codecs::decoding::{DeserializerConfig, FramingConfig}; +use codecs::StreamDecodingError; +use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; +use hyper::{Body, Request}; +//use snafu::ResultExt; +use super::HttpScrape; +use std::collections::HashMap; +use std::future::ready; +use std::time::{Duration, Instant}; +use tokio_stream::wrappers::IntervalStream; +use tokio_util::codec::Decoder as _; +use vector_common::shutdown::ShutdownSignal; +use vector_config::configurable_component; +use vector_core::{ + config::{proxy::ProxyConfig, LogNamespace, Output}, + event::Event, +}; + +const NAME: &str = "http_scrape"; + +/// Configuration for the `http_scrape` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct HttpScrapeConfig { + /// Endpoints to scrape metrics from. + endpoint: String, + + /// Custom parameters for the scrape request query string. + /// + /// One or more values for the same parameter key can be provided. The parameters provided in this option are + /// appended to any parameters manually provided in the `endpoint` option. + query: Option>>, + + /// The interval between scrapes, in seconds. + #[serde(default = "default_scrape_interval_secs")] + scrape_interval_secs: u64, + + /// TODO + #[configurable(derived)] + #[serde(default = "default_decoding")] + decoding: DeserializerConfig, + + /// TODO + #[configurable(derived)] + framing: Option, + + /// TODO + #[serde(default)] + headers: Option>, + + /// TODO + #[configurable(derived)] + tls: Option, + + /// TODO + #[configurable(derived)] + auth: Option, +} + +pub(crate) const fn default_scrape_interval_secs() -> u64 { + 15 +} + +inventory::submit! { + SourceDescription::new::(NAME) +} + +impl GenerateConfig for HttpScrapeConfig { + fn generate_config() -> toml::Value { + toml::Value::try_from(Self { + endpoint: "http://localhost:9090/metrics".to_string(), + query: None, + scrape_interval_secs: default_scrape_interval_secs(), + decoding: default_decoding(), + framing: None, + headers: None, + tls: None, + auth: None, + }) + .unwrap() + } +} + +#[async_trait::async_trait] +#[typetag::serde(name = "http_scrape")] +impl SourceConfig for HttpScrapeConfig { + async fn build(&self, cx: SourceContext) -> crate::Result { + // let url = self + // .endpoint + // .parse::() + // .context(sources::UriParseSnafu) + // .map(|uri| { + // let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + // if let Some(query) = uri.query() { + // serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); + // }; + // if let Some(query) = &self.query { + // for (k, l) in query { + // for v in l { + // serializer.append_pair(k, v); + // } + // } + // }; + // let mut builder = http::Uri::builder(); + // if let Some(scheme) = uri.scheme() { + // builder = builder.scheme(scheme.clone()); + // }; + // if let Some(authority) = uri.authority() { + // builder = builder.authority(authority.clone()); + // }; + // builder = builder.path_and_query(match serializer.finish() { + // query if !query.is_empty() => format!("{}?{}", uri.path(), query), + // _ => uri.path().to_string(), + // }); + // builder.build().expect("error building URI") + // })?; + let endpoints = vec![self.endpoint]; + //let urls = super::get_urls(&cx, &endpoints)?; + let tls = TlsSettings::from_options(&self.tls)?; + let decoder = DecodingConfig::new( + self.framing + .clone() + .unwrap_or_else(|| self.decoding.default_stream_framing()), + self.decoding.clone(), + LogNamespace::Vector, + ) + .build(); + + let urls = super::get_urls(&endpoints, self.query)?; + + Ok(HttpScrape::http_scrape( + self, + &urls, + self.scrape_interval_secs, + self.auth, + tls, + cx.proxy.clone(), + cx.shutdown, + cx.out, + ) + .boxed()) + + // Ok(http_scrape( + // self.clone(), + // decoder, + // url, + // tls, + // cx.proxy.clone(), + // cx.shutdown, + // cx.out, + // ) + // .boxed()) + } + + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] + } + + fn source_type(&self) -> &'static str { + NAME + } + + fn can_acknowledge(&self) -> bool { + false + } +} + +//struct HttpScrapeGeneric { +// config: HttpScrapeConfig, +// interval_secs: u64, +// decoder: Decoder, +// auth: Option, +// tls: TlsSettings, +// proxy: ProxyConfig, +// shutdown: ShutdownSignal, +// out: SourceSender, +// query: Option>>, +//} + +#[async_trait] +impl super::HttpScrape for HttpScrapeConfig { + async fn pre_request_context(&self, url: &http::Uri) {} + + async fn post_request( + &self, + response: Result< + (http::response::Parts, bytes::Bytes), + Box, + >, + ) -> Option>> { + None + } +} + +async fn http_scrape( + config: HttpScrapeConfig, + decoder: Decoder, + url: http::Uri, + tls: TlsSettings, + proxy: ProxyConfig, + shutdown: ShutdownSignal, + mut out: SourceSender, +) -> Result<(), ()> { + let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( + config.scrape_interval_secs, + ))) + .take_until(shutdown) + .map(move |_| stream::iter(vec![url.clone()])) + .flatten() + .map(move |url| { + let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); + let endpoint = url.to_string(); + let mut decoder = decoder.clone(); + + let mut request = Request::get(&url) + .header(http::header::ACCEPT, "text/plain") + .body(Body::empty()) + .expect("error creating request"); + if let Some(auth) = &config.auth { + auth.apply(&mut request); + } + + let start = Instant::now(); + client + .send(request) + .map_err(Error::from) + .and_then(|response| async move { + let (header, body) = response.into_parts(); + let body = hyper::body::to_bytes(body).await?; + emit!(EndpointBytesReceived { + byte_size: body.len(), + protocol: "http", + endpoint: endpoint.as_str(), + }); + Ok((header, body)) + }) + .into_stream() + .filter_map(move |response| { + ready(match response { + Ok((header, body)) if header.status == hyper::StatusCode::OK => { + emit!(RequestCompleted { + start, + end: Instant::now() + }); + let body = String::from_utf8_lossy(&body); + dbg!(&body); + + let mut events = Vec::new(); + let mut bytes = BytesMut::new(); + bytes.extend_from_slice(body.as_bytes()); + + loop { + match decoder.decode_eof(&mut bytes) { + Ok(Some((next, _))) => { + events.extend(next.into_iter()); + } + Ok(None) => break, + Err(error) => { + // Error is logged by `crate::codecs::Decoder`, no further + // handling is needed here. + if !error.can_continue() { + break; + } + break; + } + } + } + for event in &events { + dbg!(event); + } + // TODO emit EventsReceived (PrometheusEventsReceived) + Some(stream::iter(events)) + } + Ok((_header, _)) => { + // emit!(PrometheusHttpResponseError { + // code: header.status, + // url: url.clone(), + // }); + println!("error 1"); + None + } + Err(_error) => { + // emit!(PrometheusHttpError { + // error, + // url: url.clone(), + // }); + println!("error 2"); + None + } + }) + }) + .flatten() + }) + .flatten() + .boxed(); + + match out.send_event_stream(&mut stream).await { + Ok(()) => { + info!("Finished sending."); + Ok(()) + } + Err(error) => { + let (count, _) = stream.size_hint(); + emit!(StreamClosedError { error, count }); + Err(()) + } + } +} + +#[cfg(test)] +mod test { + use tokio::time::Duration; + use warp::Filter; + + use super::*; + use crate::test_util::{ + components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, + next_addr, + }; + + #[test] + fn generate_config() { + crate::test_util::test_generate_config::(); + } + + #[tokio::test] + async fn test_() { + let in_addr = next_addr(); + + let dummy_endpoint = warp::path!("metrics") + .and(warp::header::exact("Accept", "text/plain")) + .map(|| r#"A plain text event"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + let config = HttpScrapeConfig { + endpoint: format!("http://{}/metrics", in_addr), + scrape_interval_secs: 1, + query: None, + decoding: default_decoding(), + framing: None, + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + } +} diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs new file mode 100644 index 0000000000000..5cbd223b7df48 --- /dev/null +++ b/src/sources/http_scrape/mod.rs @@ -0,0 +1,150 @@ +#[cfg(all(unix, feature = "sources-http_scrape"))] +pub mod config; +#[cfg(all(unix, feature = "sources-http_scrape"))] +pub mod source; + +pub use config::HttpScrapeConfig; + +//use crate::config::SinkContext; +use crate::{ + //codecs::{Decoder, DecodingConfig}, + //config::{GenerateConfig, SourceConfig, SourceContext, SourceDescription}, + //config::SourceConfig, + http::{Auth, HttpClient}, + internal_events::{EndpointBytesReceived, StreamClosedError}, + sources, + tls::TlsSettings, + Error, + SourceSender, +}; +use async_trait::async_trait; +//use bytes::BytesMut; +//use codecs::decoding::{DeserializerConfig, FramingConfig}; +use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; +use hyper::{Body, Request}; +use snafu::ResultExt; +use std::collections::HashMap; +//use std::future::ready; +use std::time::{Duration, Instant}; +use tokio_stream::wrappers::IntervalStream; +//use tokio_util::codec::Decoder as _; +use vector_common::shutdown::ShutdownSignal; +//use vector_config::configurable_component; +//use vector_core::config::{proxy::ProxyConfig, LogNamespace, Output}; +use vector_core::{config::proxy::ProxyConfig, event::Event}; + +fn get_urls( + endpoints: &Vec, + query: Option>>, +) -> Result, sources::BuildError> { + endpoints + .iter() + .map(|s| s.parse::().context(sources::UriParseSnafu)) + .map(|r| { + r.map(|uri| { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + if let Some(query) = uri.query() { + serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); + }; + if let Some(query) = &query { + for (k, l) in query { + for v in l { + serializer.append_pair(k, v); + } + } + }; + let mut builder = http::Uri::builder(); + if let Some(scheme) = uri.scheme() { + builder = builder.scheme(scheme.clone()); + }; + if let Some(authority) = uri.authority() { + builder = builder.authority(authority.clone()); + }; + builder = builder.path_and_query(match serializer.finish() { + query if !query.is_empty() => format!("{}?{}", uri.path(), query), + _ => uri.path().to_string(), + }); + builder.build().expect("error building URI") + }) + }) + .collect::, sources::BuildError>>() +} + +#[async_trait] +pub trait HttpScrape { + async fn pre_request_context(&self, url: &http::Uri); + + async fn post_request( + &self, + response: Result< + (http::response::Parts, bytes::Bytes), + Box, + >, + ) -> Option>>; + + async fn http_scrape( + &self, + urls: &Vec, + interval_secs: u64, + //decoder: Decoder, + //url: http::Uri, + auth: Option, + tls: TlsSettings, + proxy: ProxyConfig, + shutdown: ShutdownSignal, + mut out: SourceSender, + ) -> Result<(), ()> { + let mut stream = + IntervalStream::new(tokio::time::interval(Duration::from_secs(interval_secs))) + .take_until(shutdown) + .map(move |_| stream::iter(urls.clone())) + .flatten() + .map(move |url| { + let client = + HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); + let endpoint = url.to_string(); + + let mut request = Request::get(&url) + .header(http::header::ACCEPT, "text/plain") + .body(Body::empty()) + .expect("error creating request"); + if let Some(auth) = &auth { + auth.apply(&mut request); + } + + self.pre_request_context(&url); + + let start = Instant::now(); + client + .send(request) + .map_err(Error::from) + .and_then(|response| async move { + let (header, body) = response.into_parts(); + let body = hyper::body::to_bytes(body).await?; + emit!(EndpointBytesReceived { + byte_size: body.len(), + protocol: "http", + endpoint: endpoint.as_str(), + }); + Ok((header, body)) + }) + .into_stream() + .filter_map(move |response| self.post_request(response)) + .flatten() + }) + .flatten() + .boxed(); + + match out.send_event_stream(&mut stream).await { + Ok(()) => { + info!("Finished sending."); + Ok(()) + } + Err(error) => { + let (count, _) = stream.size_hint(); + emit!(StreamClosedError { error, count }); + Err(()) + } + } + } +} diff --git a/src/sources/http_scrape/source.rs b/src/sources/http_scrape/source.rs new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/src/sources/mod.rs b/src/sources/mod.rs index 9155a579cf3d8..6f47984a9b556 100644 --- a/src/sources/mod.rs +++ b/src/sources/mod.rs @@ -34,6 +34,8 @@ pub mod heroku_logs; pub mod host_metrics; #[cfg(feature = "sources-http")] pub mod http; +#[cfg(feature = "sources-http_scrape")] +pub mod http_scrape; #[cfg(feature = "sources-internal_logs")] pub mod internal_logs; #[cfg(feature = "sources-internal_metrics")] @@ -162,6 +164,10 @@ pub enum Sources { #[cfg(feature = "sources-http")] Http(#[configurable(derived)] http::SimpleHttpConfig), + /// HTTP Scrape. + #[cfg(feature = "sources-http_scrape")] + HttpScrape(#[configurable(derived)] http_scrape::HttpScrapeConfig), + /// Internal Logs. #[cfg(feature = "sources-internal_logs")] InternalLogs(#[configurable(derived)] internal_logs::InternalLogsConfig), From 12d9ea428d4368dac1bb93bbbf80b194203a404c Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 27 Jul 2022 12:11:05 -0600 Subject: [PATCH 02/50] in a working state, refactored prom scrape --- src/sources/http_scrape/config.rs | 384 ++++++++++++++---------------- src/sources/http_scrape/mod.rs | 265 +++++++++++---------- src/sources/http_scrape/scrape.rs | 347 +++++++++++++++++++++++++++ src/sources/http_scrape/source.rs | 0 src/sources/prometheus/scrape.rs | 323 +++++++++---------------- 5 files changed, 786 insertions(+), 533 deletions(-) create mode 100644 src/sources/http_scrape/scrape.rs delete mode 100644 src/sources/http_scrape/source.rs diff --git a/src/sources/http_scrape/config.rs b/src/sources/http_scrape/config.rs index ea44fcaf97f4c..d53f72fe531f6 100644 --- a/src/sources/http_scrape/config.rs +++ b/src/sources/http_scrape/config.rs @@ -1,30 +1,29 @@ +//! +//! + +use bytes::{Bytes, BytesMut}; +use futures_util::FutureExt; +use http::{response::Parts, Uri}; +use snafu::ResultExt; +use std::collections::HashMap; +use tokio_util::codec::Decoder as _; + use crate::{ codecs::{Decoder, DecodingConfig}, config::{self, GenerateConfig, SourceConfig, SourceContext, SourceDescription}, - http::{Auth, HttpClient}, - internal_events::{EndpointBytesReceived, RequestCompleted, StreamClosedError}, + http::Auth, serde::default_decoding, sources, tls::{TlsConfig, TlsSettings}, - Error, SourceSender, + Result, +}; +use codecs::{ + decoding::{DeserializerConfig, FramingConfig}, + StreamDecodingError, }; -use async_trait::async_trait; -use bytes::BytesMut; -use codecs::decoding::{DeserializerConfig, FramingConfig}; -use codecs::StreamDecodingError; -use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; -use hyper::{Body, Request}; -//use snafu::ResultExt; -use super::HttpScrape; -use std::collections::HashMap; -use std::future::ready; -use std::time::{Duration, Instant}; -use tokio_stream::wrappers::IntervalStream; -use tokio_util::codec::Decoder as _; -use vector_common::shutdown::ShutdownSignal; use vector_config::configurable_component; use vector_core::{ - config::{proxy::ProxyConfig, LogNamespace, Output}, + config::{LogNamespace, Output}, event::Event, }; @@ -96,39 +95,16 @@ impl GenerateConfig for HttpScrapeConfig { #[async_trait::async_trait] #[typetag::serde(name = "http_scrape")] impl SourceConfig for HttpScrapeConfig { - async fn build(&self, cx: SourceContext) -> crate::Result { - // let url = self - // .endpoint - // .parse::() - // .context(sources::UriParseSnafu) - // .map(|uri| { - // let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - // if let Some(query) = uri.query() { - // serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); - // }; - // if let Some(query) = &self.query { - // for (k, l) in query { - // for v in l { - // serializer.append_pair(k, v); - // } - // } - // }; - // let mut builder = http::Uri::builder(); - // if let Some(scheme) = uri.scheme() { - // builder = builder.scheme(scheme.clone()); - // }; - // if let Some(authority) = uri.authority() { - // builder = builder.authority(authority.clone()); - // }; - // builder = builder.path_and_query(match serializer.finish() { - // query if !query.is_empty() => format!("{}?{}", uri.path(), query), - // _ => uri.path().to_string(), - // }); - // builder.build().expect("error building URI") - // })?; - let endpoints = vec![self.endpoint]; - //let urls = super::get_urls(&cx, &endpoints)?; + async fn build(&self, cx: SourceContext) -> Result { + let endpoints = vec![self.endpoint.clone()]; + let urls = endpoints + .iter() + .map(|s| s.parse::().context(sources::UriParseSnafu)) + .map(|r| r.map(|uri| super::get_url(&uri, &self.query))) + .collect::, sources::BuildError>>()?; + let tls = TlsSettings::from_options(&self.tls)?; + let decoder = DecodingConfig::new( self.framing .clone() @@ -138,30 +114,19 @@ impl SourceConfig for HttpScrapeConfig { ) .build(); - let urls = super::get_urls(&endpoints, self.query)?; + let context = HttpScrapeContext { decoder }; - Ok(HttpScrape::http_scrape( - self, - &urls, + Ok(super::http_scrape( + context, + urls, self.scrape_interval_secs, - self.auth, + self.auth.clone(), tls, cx.proxy.clone(), cx.shutdown, cx.out, ) .boxed()) - - // Ok(http_scrape( - // self.clone(), - // decoder, - // url, - // tls, - // cx.proxy.clone(), - // cx.shutdown, - // cx.out, - // ) - // .boxed()) } fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { @@ -177,147 +142,162 @@ impl SourceConfig for HttpScrapeConfig { } } -//struct HttpScrapeGeneric { -// config: HttpScrapeConfig, -// interval_secs: u64, -// decoder: Decoder, -// auth: Option, -// tls: TlsSettings, -// proxy: ProxyConfig, -// shutdown: ShutdownSignal, -// out: SourceSender, -// query: Option>>, -//} - -#[async_trait] -impl super::HttpScrape for HttpScrapeConfig { - async fn pre_request_context(&self, url: &http::Uri) {} - - async fn post_request( - &self, - response: Result< - (http::response::Parts, bytes::Bytes), - Box, - >, - ) -> Option>> { - None - } -} - -async fn http_scrape( - config: HttpScrapeConfig, +#[derive(Clone)] +struct HttpScrapeContext { decoder: Decoder, - url: http::Uri, - tls: TlsSettings, - proxy: ProxyConfig, - shutdown: ShutdownSignal, - mut out: SourceSender, -) -> Result<(), ()> { - let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( - config.scrape_interval_secs, - ))) - .take_until(shutdown) - .map(move |_| stream::iter(vec![url.clone()])) - .flatten() - .map(move |url| { - let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); - let endpoint = url.to_string(); - let mut decoder = decoder.clone(); - - let mut request = Request::get(&url) - .header(http::header::ACCEPT, "text/plain") - .body(Body::empty()) - .expect("error creating request"); - if let Some(auth) = &config.auth { - auth.apply(&mut request); - } +} - let start = Instant::now(); - client - .send(request) - .map_err(Error::from) - .and_then(|response| async move { - let (header, body) = response.into_parts(); - let body = hyper::body::to_bytes(body).await?; - emit!(EndpointBytesReceived { - byte_size: body.len(), - protocol: "http", - endpoint: endpoint.as_str(), - }); - Ok((header, body)) - }) - .into_stream() - .filter_map(move |response| { - ready(match response { - Ok((header, body)) if header.status == hyper::StatusCode::OK => { - emit!(RequestCompleted { - start, - end: Instant::now() - }); - let body = String::from_utf8_lossy(&body); - dbg!(&body); - - let mut events = Vec::new(); - let mut bytes = BytesMut::new(); - bytes.extend_from_slice(body.as_bytes()); - - loop { - match decoder.decode_eof(&mut bytes) { - Ok(Some((next, _))) => { - events.extend(next.into_iter()); - } - Ok(None) => break, - Err(error) => { - // Error is logged by `crate::codecs::Decoder`, no further - // handling is needed here. - if !error.can_continue() { - break; - } - break; - } - } - } - for event in &events { - dbg!(event); - } - // TODO emit EventsReceived (PrometheusEventsReceived) - Some(stream::iter(events)) - } - Ok((_header, _)) => { - // emit!(PrometheusHttpResponseError { - // code: header.status, - // url: url.clone(), - // }); - println!("error 1"); - None - } - Err(_error) => { - // emit!(PrometheusHttpError { - // error, - // url: url.clone(), - // }); - println!("error 2"); - None +impl super::HttpScraper for HttpScrapeContext { + /// + fn on_response( + &mut self, + _url: &http::Uri, + _header: &Parts, + body: &Bytes, + ) -> Option> { + let body = String::from_utf8_lossy(&body); + dbg!(&body); + + let mut events = Vec::new(); + let mut bytes = BytesMut::new(); + bytes.extend_from_slice(body.as_bytes()); + + loop { + match self.decoder.decode_eof(&mut bytes) { + Ok(Some((next, _))) => { + events.extend(next.into_iter()); + } + Ok(None) => break, + Err(error) => { + // Error is logged by `crate::codecs::Decoder`, no further + // handling is needed here. + if !error.can_continue() { + break; } - }) - }) - .flatten() - }) - .flatten() - .boxed(); - - match out.send_event_stream(&mut stream).await { - Ok(()) => { - info!("Finished sending."); - Ok(()) + break; + } + } } - Err(error) => { - let (count, _) = stream.size_hint(); - emit!(StreamClosedError { error, count }); - Err(()) + for event in &events { + dbg!(event); } + Some(events) } } +//async fn http_scrape( +// config: HttpScrapeConfig, +// decoder: Decoder, +// url: http::Uri, +// tls: TlsSettings, +// proxy: ProxyConfig, +// shutdown: ShutdownSignal, +// mut out: SourceSender, +//) -> Result<(), ()> { +// let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( +// config.scrape_interval_secs, +// ))) +// .take_until(shutdown) +// .map(move |_| stream::iter(vec![url.clone()])) +// .flatten() +// .map(move |url| { +// let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); +// let endpoint = url.to_string(); +// let mut decoder = decoder.clone(); +// +// let mut request = Request::get(&url) +// .header(http::header::ACCEPT, "text/plain") +// .body(Body::empty()) +// .expect("error creating request"); +// if let Some(auth) = &config.auth { +// auth.apply(&mut request); +// } +// +// let start = Instant::now(); +// client +// .send(request) +// .map_err(Error::from) +// .and_then(|response| async move { +// let (header, body) = response.into_parts(); +// let body = hyper::body::to_bytes(body).await?; +// emit!(EndpointBytesReceived { +// byte_size: body.len(), +// protocol: "http", +// endpoint: endpoint.as_str(), +// }); +// Ok((header, body)) +// }) +// .into_stream() +// .filter_map(move |response| { +// ready(match response { +// Ok((header, body)) if header.status == hyper::StatusCode::OK => { +// emit!(RequestCompleted { +// start, +// end: Instant::now() +// }); +// let body = String::from_utf8_lossy(&body); +// dbg!(&body); +// +// let mut events = Vec::new(); +// let mut bytes = BytesMut::new(); +// bytes.extend_from_slice(body.as_bytes()); +// +// loop { +// match decoder.decode_eof(&mut bytes) { +// Ok(Some((next, _))) => { +// events.extend(next.into_iter()); +// } +// Ok(None) => break, +// Err(error) => { +// // Error is logged by `crate::codecs::Decoder`, no further +// // handling is needed here. +// if !error.can_continue() { +// break; +// } +// break; +// } +// } +// } +// for event in &events { +// dbg!(event); +// } +// // TODO emit EventsReceived (PrometheusEventsReceived) +// Some(stream::iter(events)) +// } +// Ok((_header, _)) => { +// // emit!(PrometheusHttpResponseError { +// // code: header.status, +// // url: url.clone(), +// // }); +// println!("error 1"); +// None +// } +// Err(_error) => { +// // emit!(PrometheusHttpError { +// // error, +// // url: url.clone(), +// // }); +// println!("error 2"); +// None +// } +// }) +// }) +// .flatten() +// }) +// .flatten() +// .boxed(); +// +// match out.send_event_stream(&mut stream).await { +// Ok(()) => { +// info!("Finished sending."); +// Ok(()) +// } +// Err(error) => { +// let (count, _) = stream.size_hint(); +// emit!(StreamClosedError { error, count }); +// Err(()) +// } +// } +//} #[cfg(test)] mod test { @@ -327,12 +307,12 @@ mod test { use super::*; use crate::test_util::{ components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, + next_addr, test_generate_config, }; #[test] fn generate_config() { - crate::test_util::test_generate_config::(); + test_generate_config::(); } #[tokio::test] diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 5cbd223b7df48..4234fed2d1b73 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -1,3 +1,6 @@ +//! +//! + #[cfg(all(unix, feature = "sources-http_scrape"))] pub mod config; #[cfg(all(unix, feature = "sources-http_scrape"))] @@ -5,146 +8,162 @@ pub mod source; pub use config::HttpScrapeConfig; -//use crate::config::SinkContext; -use crate::{ - //codecs::{Decoder, DecodingConfig}, - //config::{GenerateConfig, SourceConfig, SourceContext, SourceDescription}, - //config::SourceConfig, - http::{Auth, HttpClient}, - internal_events::{EndpointBytesReceived, StreamClosedError}, - sources, - tls::TlsSettings, - Error, - SourceSender, -}; -use async_trait::async_trait; -//use bytes::BytesMut; -//use codecs::decoding::{DeserializerConfig, FramingConfig}; +use bytes::Bytes; use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; +use http::{response::Parts, Uri}; use hyper::{Body, Request}; -use snafu::ResultExt; -use std::collections::HashMap; -//use std::future::ready; use std::time::{Duration, Instant}; +use std::{collections::HashMap, future::ready}; use tokio_stream::wrappers::IntervalStream; -//use tokio_util::codec::Decoder as _; + +use crate::{ + http::{Auth, HttpClient}, + internal_events::{ + EndpointBytesReceived, PrometheusEventsReceived, PrometheusHttpError, + PrometheusHttpResponseError, RequestCompleted, StreamClosedError, + }, + tls::TlsSettings, + Error, SourceSender, +}; use vector_common::shutdown::ShutdownSignal; -//use vector_config::configurable_component; -//use vector_core::config::{proxy::ProxyConfig, LogNamespace, Output}; -use vector_core::{config::proxy::ProxyConfig, event::Event}; +use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; -fn get_urls( - endpoints: &Vec, - query: Option>>, -) -> Result, sources::BuildError> { - endpoints - .iter() - .map(|s| s.parse::().context(sources::UriParseSnafu)) - .map(|r| { - r.map(|uri| { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - if let Some(query) = uri.query() { - serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); - }; - if let Some(query) = &query { - for (k, l) in query { - for v in l { - serializer.append_pair(k, v); - } - } - }; - let mut builder = http::Uri::builder(); - if let Some(scheme) = uri.scheme() { - builder = builder.scheme(scheme.clone()); - }; - if let Some(authority) = uri.authority() { - builder = builder.authority(authority.clone()); - }; - builder = builder.path_and_query(match serializer.finish() { - query if !query.is_empty() => format!("{}?{}", uri.path(), query), - _ => uri.path().to_string(), - }); - builder.build().expect("error building URI") - }) - }) - .collect::, sources::BuildError>>() +/// +pub trait HttpScraper { + /// + fn build(&mut self, _url: &Uri) {} + + /// + fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option>; + + /// + fn on_http_response_error(&self, _uri: &Uri, _header: &Parts) {} } -#[async_trait] -pub trait HttpScrape { - async fn pre_request_context(&self, url: &http::Uri); +/// +pub(crate) fn get_url(uri: &Uri, query: &Option>>) -> Uri { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + if let Some(query) = uri.query() { + serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); + }; + if let Some(query) = &query { + for (k, l) in query { + for v in l { + serializer.append_pair(k, v); + } + } + }; + let mut builder = Uri::builder(); + if let Some(scheme) = uri.scheme() { + builder = builder.scheme(scheme.clone()); + }; + if let Some(authority) = uri.authority() { + builder = builder.authority(authority.clone()); + }; + builder = builder.path_and_query(match serializer.finish() { + query if !query.is_empty() => format!("{}?{}", uri.path(), query), + _ => uri.path().to_string(), + }); + builder.build().expect("error building URI") +} - async fn post_request( - &self, - response: Result< - (http::response::Parts, bytes::Bytes), - Box, - >, - ) -> Option>>; +/// +pub(crate) async fn http_scrape( + context: H, + urls: Vec, + interval_secs: u64, + auth: Option, + tls: TlsSettings, + proxy: ProxyConfig, + shutdown: ShutdownSignal, + mut out: SourceSender, +) -> Result<(), ()> { + let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs(interval_secs))) + .take_until(shutdown) + .map(move |_| stream::iter(urls.clone())) + .flatten() + .map(move |url| { + let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); + let endpoint = url.to_string(); - async fn http_scrape( - &self, - urls: &Vec, - interval_secs: u64, - //decoder: Decoder, - //url: http::Uri, - auth: Option, - tls: TlsSettings, - proxy: ProxyConfig, - shutdown: ShutdownSignal, - mut out: SourceSender, - ) -> Result<(), ()> { - let mut stream = - IntervalStream::new(tokio::time::interval(Duration::from_secs(interval_secs))) - .take_until(shutdown) - .map(move |_| stream::iter(urls.clone())) - .flatten() - .map(move |url| { - let client = - HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); - let endpoint = url.to_string(); + let mut context = context.clone(); + context.build(&url); - let mut request = Request::get(&url) - .header(http::header::ACCEPT, "text/plain") - .body(Body::empty()) - .expect("error creating request"); - if let Some(auth) = &auth { - auth.apply(&mut request); - } + let mut request = Request::get(&url) + .header(http::header::ACCEPT, "text/plain") + .body(Body::empty()) + .expect("error creating request"); - self.pre_request_context(&url); + if let Some(auth) = &auth { + auth.apply(&mut request); + } - let start = Instant::now(); - client - .send(request) - .map_err(Error::from) - .and_then(|response| async move { - let (header, body) = response.into_parts(); - let body = hyper::body::to_bytes(body).await?; - emit!(EndpointBytesReceived { - byte_size: body.len(), - protocol: "http", - endpoint: endpoint.as_str(), + let start = Instant::now(); + client + .send(request) + .map_err(Error::from) + .and_then(|response| async move { + let (header, body) = response.into_parts(); + let body = hyper::body::to_bytes(body).await?; + emit!(EndpointBytesReceived { + byte_size: body.len(), + protocol: "http", + endpoint: endpoint.as_str(), + }); + Ok((header, body)) + }) + .into_stream() + .filter_map(move |response| { + ready(match response { + Ok((header, body)) if header.status == hyper::StatusCode::OK => { + emit!(RequestCompleted { + start, + end: Instant::now() + }); + match context.on_response(&url, &header, &body) { + Some(events) => { + // TODO emit EventsReceived (PrometheusEventsReceived) + emit!(PrometheusEventsReceived { + byte_size: events.size_of(), + count: events.len(), + uri: url.clone() + }); + Some(stream::iter(events)) + } + None => None, + } + } + Ok((header, _)) => { + context.on_http_response_error(&url, &header); + emit!(PrometheusHttpResponseError { + code: header.status, + url: url.clone(), }); - Ok((header, body)) - }) - .into_stream() - .filter_map(move |response| self.post_request(response)) - .flatten() + None + } + Err(error) => { + emit!(PrometheusHttpError { + error, + url: url.clone(), + }); + None + } + }) }) .flatten() - .boxed(); + }) + .flatten() + .boxed(); - match out.send_event_stream(&mut stream).await { - Ok(()) => { - info!("Finished sending."); - Ok(()) - } - Err(error) => { - let (count, _) = stream.size_hint(); - emit!(StreamClosedError { error, count }); - Err(()) - } + match out.send_event_stream(&mut stream).await { + Ok(()) => { + info!("Finished sending."); + Ok(()) + } + Err(error) => { + let (count, _) = stream.size_hint(); + emit!(StreamClosedError { error, count }); + Err(()) } } } diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs new file mode 100644 index 0000000000000..d53f72fe531f6 --- /dev/null +++ b/src/sources/http_scrape/scrape.rs @@ -0,0 +1,347 @@ +//! +//! + +use bytes::{Bytes, BytesMut}; +use futures_util::FutureExt; +use http::{response::Parts, Uri}; +use snafu::ResultExt; +use std::collections::HashMap; +use tokio_util::codec::Decoder as _; + +use crate::{ + codecs::{Decoder, DecodingConfig}, + config::{self, GenerateConfig, SourceConfig, SourceContext, SourceDescription}, + http::Auth, + serde::default_decoding, + sources, + tls::{TlsConfig, TlsSettings}, + Result, +}; +use codecs::{ + decoding::{DeserializerConfig, FramingConfig}, + StreamDecodingError, +}; +use vector_config::configurable_component; +use vector_core::{ + config::{LogNamespace, Output}, + event::Event, +}; + +const NAME: &str = "http_scrape"; + +/// Configuration for the `http_scrape` source. +#[configurable_component(source)] +#[derive(Clone, Debug)] +pub struct HttpScrapeConfig { + /// Endpoints to scrape metrics from. + endpoint: String, + + /// Custom parameters for the scrape request query string. + /// + /// One or more values for the same parameter key can be provided. The parameters provided in this option are + /// appended to any parameters manually provided in the `endpoint` option. + query: Option>>, + + /// The interval between scrapes, in seconds. + #[serde(default = "default_scrape_interval_secs")] + scrape_interval_secs: u64, + + /// TODO + #[configurable(derived)] + #[serde(default = "default_decoding")] + decoding: DeserializerConfig, + + /// TODO + #[configurable(derived)] + framing: Option, + + /// TODO + #[serde(default)] + headers: Option>, + + /// TODO + #[configurable(derived)] + tls: Option, + + /// TODO + #[configurable(derived)] + auth: Option, +} + +pub(crate) const fn default_scrape_interval_secs() -> u64 { + 15 +} + +inventory::submit! { + SourceDescription::new::(NAME) +} + +impl GenerateConfig for HttpScrapeConfig { + fn generate_config() -> toml::Value { + toml::Value::try_from(Self { + endpoint: "http://localhost:9090/metrics".to_string(), + query: None, + scrape_interval_secs: default_scrape_interval_secs(), + decoding: default_decoding(), + framing: None, + headers: None, + tls: None, + auth: None, + }) + .unwrap() + } +} + +#[async_trait::async_trait] +#[typetag::serde(name = "http_scrape")] +impl SourceConfig for HttpScrapeConfig { + async fn build(&self, cx: SourceContext) -> Result { + let endpoints = vec![self.endpoint.clone()]; + let urls = endpoints + .iter() + .map(|s| s.parse::().context(sources::UriParseSnafu)) + .map(|r| r.map(|uri| super::get_url(&uri, &self.query))) + .collect::, sources::BuildError>>()?; + + let tls = TlsSettings::from_options(&self.tls)?; + + let decoder = DecodingConfig::new( + self.framing + .clone() + .unwrap_or_else(|| self.decoding.default_stream_framing()), + self.decoding.clone(), + LogNamespace::Vector, + ) + .build(); + + let context = HttpScrapeContext { decoder }; + + Ok(super::http_scrape( + context, + urls, + self.scrape_interval_secs, + self.auth.clone(), + tls, + cx.proxy.clone(), + cx.shutdown, + cx.out, + ) + .boxed()) + } + + fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { + vec![Output::default(config::DataType::Metric)] + } + + fn source_type(&self) -> &'static str { + NAME + } + + fn can_acknowledge(&self) -> bool { + false + } +} + +#[derive(Clone)] +struct HttpScrapeContext { + decoder: Decoder, +} + +impl super::HttpScraper for HttpScrapeContext { + /// + fn on_response( + &mut self, + _url: &http::Uri, + _header: &Parts, + body: &Bytes, + ) -> Option> { + let body = String::from_utf8_lossy(&body); + dbg!(&body); + + let mut events = Vec::new(); + let mut bytes = BytesMut::new(); + bytes.extend_from_slice(body.as_bytes()); + + loop { + match self.decoder.decode_eof(&mut bytes) { + Ok(Some((next, _))) => { + events.extend(next.into_iter()); + } + Ok(None) => break, + Err(error) => { + // Error is logged by `crate::codecs::Decoder`, no further + // handling is needed here. + if !error.can_continue() { + break; + } + break; + } + } + } + for event in &events { + dbg!(event); + } + Some(events) + } +} +//async fn http_scrape( +// config: HttpScrapeConfig, +// decoder: Decoder, +// url: http::Uri, +// tls: TlsSettings, +// proxy: ProxyConfig, +// shutdown: ShutdownSignal, +// mut out: SourceSender, +//) -> Result<(), ()> { +// let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( +// config.scrape_interval_secs, +// ))) +// .take_until(shutdown) +// .map(move |_| stream::iter(vec![url.clone()])) +// .flatten() +// .map(move |url| { +// let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); +// let endpoint = url.to_string(); +// let mut decoder = decoder.clone(); +// +// let mut request = Request::get(&url) +// .header(http::header::ACCEPT, "text/plain") +// .body(Body::empty()) +// .expect("error creating request"); +// if let Some(auth) = &config.auth { +// auth.apply(&mut request); +// } +// +// let start = Instant::now(); +// client +// .send(request) +// .map_err(Error::from) +// .and_then(|response| async move { +// let (header, body) = response.into_parts(); +// let body = hyper::body::to_bytes(body).await?; +// emit!(EndpointBytesReceived { +// byte_size: body.len(), +// protocol: "http", +// endpoint: endpoint.as_str(), +// }); +// Ok((header, body)) +// }) +// .into_stream() +// .filter_map(move |response| { +// ready(match response { +// Ok((header, body)) if header.status == hyper::StatusCode::OK => { +// emit!(RequestCompleted { +// start, +// end: Instant::now() +// }); +// let body = String::from_utf8_lossy(&body); +// dbg!(&body); +// +// let mut events = Vec::new(); +// let mut bytes = BytesMut::new(); +// bytes.extend_from_slice(body.as_bytes()); +// +// loop { +// match decoder.decode_eof(&mut bytes) { +// Ok(Some((next, _))) => { +// events.extend(next.into_iter()); +// } +// Ok(None) => break, +// Err(error) => { +// // Error is logged by `crate::codecs::Decoder`, no further +// // handling is needed here. +// if !error.can_continue() { +// break; +// } +// break; +// } +// } +// } +// for event in &events { +// dbg!(event); +// } +// // TODO emit EventsReceived (PrometheusEventsReceived) +// Some(stream::iter(events)) +// } +// Ok((_header, _)) => { +// // emit!(PrometheusHttpResponseError { +// // code: header.status, +// // url: url.clone(), +// // }); +// println!("error 1"); +// None +// } +// Err(_error) => { +// // emit!(PrometheusHttpError { +// // error, +// // url: url.clone(), +// // }); +// println!("error 2"); +// None +// } +// }) +// }) +// .flatten() +// }) +// .flatten() +// .boxed(); +// +// match out.send_event_stream(&mut stream).await { +// Ok(()) => { +// info!("Finished sending."); +// Ok(()) +// } +// Err(error) => { +// let (count, _) = stream.size_hint(); +// emit!(StreamClosedError { error, count }); +// Err(()) +// } +// } +//} + +#[cfg(test)] +mod test { + use tokio::time::Duration; + use warp::Filter; + + use super::*; + use crate::test_util::{ + components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, + next_addr, test_generate_config, + }; + + #[test] + fn generate_config() { + test_generate_config::(); + } + + #[tokio::test] + async fn test_() { + let in_addr = next_addr(); + + let dummy_endpoint = warp::path!("metrics") + .and(warp::header::exact("Accept", "text/plain")) + .map(|| r#"A plain text event"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + let config = HttpScrapeConfig { + endpoint: format!("http://{}/metrics", in_addr), + scrape_interval_secs: 1, + query: None, + decoding: default_decoding(), + framing: None, + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + } +} diff --git a/src/sources/http_scrape/source.rs b/src/sources/http_scrape/source.rs deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index c02bc234e1e88..640b89580f625 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -1,32 +1,24 @@ -use std::{ - collections::HashMap, - future::ready, - time::{Duration, Instant}, -}; +use std::collections::HashMap; -use futures::{stream, FutureExt, StreamExt, TryFutureExt}; -use hyper::{Body, Request}; +use bytes::Bytes; +use futures::FutureExt; +use http::{response::Parts, Uri}; use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; -use tokio_stream::wrappers::IntervalStream; use vector_config::configurable_component; -use vector_core::config::LogNamespace; -use vector_core::ByteSizeOf; +use vector_core::{config::LogNamespace, event::Event, ByteSizeOf}; use super::parser; use crate::{ - config::{ - self, GenerateConfig, Output, ProxyConfig, SourceConfig, SourceContext, SourceDescription, - }, - http::{Auth, HttpClient}, - internal_events::{ - EndpointBytesReceived, PrometheusEventsReceived, PrometheusHttpError, - PrometheusHttpResponseError, PrometheusParseError, RequestCompleted, StreamClosedError, + config::{self, GenerateConfig, Output, SourceConfig, SourceContext, SourceDescription}, + http::Auth, + internal_events::{PrometheusEventsReceived, PrometheusParseError}, + sources::{ + self, + http_scrape::{get_url, http_scrape, HttpScraper}, }, - shutdown::ShutdownSignal, - sources, tls::{TlsConfig, TlsSettings}, - SourceSender, + Result, }; // pulled up, and split over multiple lines, because the long lines trip up rustfmt such that it @@ -123,43 +115,28 @@ impl GenerateConfig for PrometheusScrapeConfig { #[async_trait::async_trait] #[typetag::serde(name = "prometheus_scrape")] impl SourceConfig for PrometheusScrapeConfig { - async fn build(&self, cx: SourceContext) -> crate::Result { + async fn build(&self, cx: SourceContext) -> Result { let urls = self .endpoints .iter() - .map(|s| s.parse::().context(sources::UriParseSnafu)) - .map(|r| { - r.map(|uri| { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - if let Some(query) = uri.query() { - serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); - }; - if let Some(query) = &self.query { - for (k, l) in query { - for v in l { - serializer.append_pair(k, v); - } - } - }; - let mut builder = http::Uri::builder(); - if let Some(scheme) = uri.scheme() { - builder = builder.scheme(scheme.clone()); - }; - if let Some(authority) = uri.authority() { - builder = builder.authority(authority.clone()); - }; - builder = builder.path_and_query(match serializer.finish() { - query if !query.is_empty() => format!("{}?{}", uri.path(), query), - _ => uri.path().to_string(), - }); - builder.build().expect("error building URI") - }) - }) - .collect::, sources::BuildError>>()?; + .map(|s| s.parse::().context(sources::UriParseSnafu)) + .map(|r| r.map(|uri| get_url(&uri, &self.query))) + .collect::, sources::BuildError>>()?; let tls = TlsSettings::from_options(&self.tls)?; - Ok(prometheus( - self.clone(), + + let context = HttpScrapeContext { + honor_labels: self.honor_labels, + instance_tag: self.instance_tag.clone(), + endpoint_tag: self.endpoint_tag.clone(), + instance_info: None, + endpoint_info: None, + }; + + Ok(http_scrape( + context, urls, + self.scrape_interval_secs, + self.auth.clone(), tls, cx.proxy.clone(), cx.shutdown, @@ -203,7 +180,7 @@ struct PrometheusCompatConfig { #[async_trait::async_trait] #[typetag::serde(name = "prometheus")] impl SourceConfig for PrometheusCompatConfig { - async fn build(&self, cx: SourceContext) -> crate::Result { + async fn build(&self, cx: SourceContext) -> Result { // Workaround for serde bug // https://github.com/serde-rs/serde/issues/1504 let config = PrometheusScrapeConfig { @@ -252,33 +229,19 @@ struct EndpointInfo { honor_label: bool, } -async fn prometheus( - config: PrometheusScrapeConfig, - urls: Vec, - tls: TlsSettings, - proxy: ProxyConfig, - shutdown: ShutdownSignal, - mut out: SourceSender, -) -> Result<(), ()> { - let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( - config.scrape_interval_secs, - ))) - .take_until(shutdown) - .map(move |_| stream::iter(urls.clone())) - .flatten() - .map(move |url| { - let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); - let endpoint = url.to_string(); - - let mut request = Request::get(&url) - .header(http::header::ACCEPT, "text/plain") - .body(Body::empty()) - .expect("error creating request"); - if let Some(auth) = &config.auth { - auth.apply(&mut request); - } +#[derive(Clone)] +struct HttpScrapeContext { + honor_labels: bool, + instance_tag: Option, + endpoint_tag: Option, + instance_info: Option, + endpoint_info: Option, +} - let instance_info = config.instance_tag.as_ref().map(|tag| { +impl HttpScraper for HttpScrapeContext { + /// + fn build(&mut self, url: &Uri) { + self.instance_info = self.instance_tag.as_ref().map(|tag| { let instance = format!( "{}:{}", url.host().unwrap_or_default(), @@ -291,149 +254,93 @@ async fn prometheus( InstanceInfo { tag: tag.to_string(), instance, - honor_label: config.honor_labels, + honor_label: self.honor_labels, } }); - let endpoint_info = config.endpoint_tag.as_ref().map(|tag| EndpointInfo { + + self.endpoint_info = self.endpoint_tag.as_ref().map(|tag| EndpointInfo { tag: tag.to_string(), endpoint: url.to_string(), - honor_label: config.honor_labels, + honor_label: self.honor_labels, }); + } - let start = Instant::now(); - client - .send(request) - .map_err(crate::Error::from) - .and_then(|response| async move { - let (header, body) = response.into_parts(); - let body = hyper::body::to_bytes(body).await?; - emit!(EndpointBytesReceived { - byte_size: body.len(), - protocol: "http", - endpoint: endpoint.as_str(), + /// + fn on_response(&mut self, url: &Uri, _header: &Parts, body: &Bytes) -> Option> { + let body = String::from_utf8_lossy(&body); + + match parser::parse_text(&body) { + Ok(mut events) => { + emit!(PrometheusEventsReceived { + byte_size: events.size_of(), + count: events.len(), + uri: url.clone() }); - Ok((header, body)) - }) - .into_stream() - .filter_map(move |response| { - let instance_info = instance_info.clone(); - let endpoint_info = endpoint_info.clone(); - - ready(match response { - Ok((header, body)) if header.status == hyper::StatusCode::OK => { - emit!(RequestCompleted { - start, - end: Instant::now() - }); - - let body = String::from_utf8_lossy(&body); - - match parser::parse_text(&body) { - Ok(events) => { - emit!(PrometheusEventsReceived { - byte_size: events.size_of(), - count: events.len(), - uri: url.clone() - }); - Some(stream::iter(events).map(move |mut event| { - let metric = event.as_mut_metric(); - if let Some(InstanceInfo { - tag, - instance, - honor_label, - }) = &instance_info - { - match (honor_label, metric.tag_value(tag)) { - (false, Some(old_instance)) => { - metric.insert_tag( - format!("exported_{}", tag), - old_instance, - ); - metric.insert_tag(tag.clone(), instance.clone()); - } - (true, Some(_)) => {} - (_, None) => { - metric.insert_tag(tag.clone(), instance.clone()); - } - } - } - if let Some(EndpointInfo { - tag, - endpoint, - honor_label, - }) = &endpoint_info - { - match (honor_label, metric.tag_value(tag)) { - (false, Some(old_endpoint)) => { - metric.insert_tag( - format!("exported_{}", tag), - old_endpoint, - ); - metric.insert_tag(tag.clone(), endpoint.clone()); - } - (true, Some(_)) => {} - (_, None) => { - metric.insert_tag(tag.clone(), endpoint.clone()); - } - } - } - event - })) + for event in events.iter_mut() { + let metric = event.as_mut_metric(); + if let Some(InstanceInfo { + tag, + instance, + honor_label, + }) = &self.instance_info + { + match (honor_label, metric.tag_value(tag)) { + (false, Some(old_instance)) => { + metric.insert_tag(format!("exported_{}", tag), old_instance); + metric.insert_tag(tag.clone(), instance.clone()); } - Err(error) => { - if url.path() == "/" { - // https://github.com/vectordotdev/vector/pull/3801#issuecomment-700723178 - warn!( - message = PARSE_ERROR_NO_PATH, - endpoint = %url, - ); - } - emit!(PrometheusParseError { - error, - url: url.clone(), - body, - }); - None + (true, Some(_)) => {} + (_, None) => { + metric.insert_tag(tag.clone(), instance.clone()); } } } - Ok((header, _)) => { - if header.status == hyper::StatusCode::NOT_FOUND && url.path() == "/" { - // https://github.com/vectordotdev/vector/pull/3801#issuecomment-700723178 - warn!( - message = NOT_FOUND_NO_PATH, - endpoint = %url, - ); + if let Some(EndpointInfo { + tag, + endpoint, + honor_label, + }) = &self.endpoint_info + { + match (honor_label, metric.tag_value(tag)) { + (false, Some(old_endpoint)) => { + metric.insert_tag(format!("exported_{}", tag), old_endpoint); + metric.insert_tag(tag.clone(), endpoint.clone()); + } + (true, Some(_)) => {} + (_, None) => { + metric.insert_tag(tag.clone(), endpoint.clone()); + } } - emit!(PrometheusHttpResponseError { - code: header.status, - url: url.clone(), - }); - None - } - Err(error) => { - emit!(PrometheusHttpError { - error, - url: url.clone(), - }); - None } - }) - }) - .flatten() - }) - .flatten() - .boxed(); - - match out.send_event_stream(&mut stream).await { - Ok(()) => { - info!("Finished sending."); - Ok(()) + } + Some(events) + } + Err(error) => { + if url.path() == "/" { + // https://github.com/vectordotdev/vector/pull/3801#issuecomment-700723178 + warn!( + message = PARSE_ERROR_NO_PATH, + endpoint = %url, + ); + } + emit!(PrometheusParseError { + error, + url: url.clone(), + body, + }); + None + } } - Err(error) => { - let (count, _) = stream.size_hint(); - emit!(StreamClosedError { error, count }); - Err(()) + } + + /// + fn on_http_response_error(&self, url: &Uri, header: &Parts) { + if header.status == hyper::StatusCode::NOT_FOUND && url.path() == "/" { + // https://github.com/vectordotdev/vector/pull/3801#issuecomment-700723178 + warn!( + message = NOT_FOUND_NO_PATH, + endpoint = %url, + ); } } } From 44db3827d88e2dfabc609b63505e25ccb80b3ca0 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 27 Jul 2022 12:20:06 -0600 Subject: [PATCH 03/50] cleanup --- src/sources/http_scrape/config.rs | 347 ------------------------------ src/sources/http_scrape/mod.rs | 6 +- src/sources/http_scrape/scrape.rs | 114 ---------- 3 files changed, 2 insertions(+), 465 deletions(-) delete mode 100644 src/sources/http_scrape/config.rs diff --git a/src/sources/http_scrape/config.rs b/src/sources/http_scrape/config.rs deleted file mode 100644 index d53f72fe531f6..0000000000000 --- a/src/sources/http_scrape/config.rs +++ /dev/null @@ -1,347 +0,0 @@ -//! -//! - -use bytes::{Bytes, BytesMut}; -use futures_util::FutureExt; -use http::{response::Parts, Uri}; -use snafu::ResultExt; -use std::collections::HashMap; -use tokio_util::codec::Decoder as _; - -use crate::{ - codecs::{Decoder, DecodingConfig}, - config::{self, GenerateConfig, SourceConfig, SourceContext, SourceDescription}, - http::Auth, - serde::default_decoding, - sources, - tls::{TlsConfig, TlsSettings}, - Result, -}; -use codecs::{ - decoding::{DeserializerConfig, FramingConfig}, - StreamDecodingError, -}; -use vector_config::configurable_component; -use vector_core::{ - config::{LogNamespace, Output}, - event::Event, -}; - -const NAME: &str = "http_scrape"; - -/// Configuration for the `http_scrape` source. -#[configurable_component(source)] -#[derive(Clone, Debug)] -pub struct HttpScrapeConfig { - /// Endpoints to scrape metrics from. - endpoint: String, - - /// Custom parameters for the scrape request query string. - /// - /// One or more values for the same parameter key can be provided. The parameters provided in this option are - /// appended to any parameters manually provided in the `endpoint` option. - query: Option>>, - - /// The interval between scrapes, in seconds. - #[serde(default = "default_scrape_interval_secs")] - scrape_interval_secs: u64, - - /// TODO - #[configurable(derived)] - #[serde(default = "default_decoding")] - decoding: DeserializerConfig, - - /// TODO - #[configurable(derived)] - framing: Option, - - /// TODO - #[serde(default)] - headers: Option>, - - /// TODO - #[configurable(derived)] - tls: Option, - - /// TODO - #[configurable(derived)] - auth: Option, -} - -pub(crate) const fn default_scrape_interval_secs() -> u64 { - 15 -} - -inventory::submit! { - SourceDescription::new::(NAME) -} - -impl GenerateConfig for HttpScrapeConfig { - fn generate_config() -> toml::Value { - toml::Value::try_from(Self { - endpoint: "http://localhost:9090/metrics".to_string(), - query: None, - scrape_interval_secs: default_scrape_interval_secs(), - decoding: default_decoding(), - framing: None, - headers: None, - tls: None, - auth: None, - }) - .unwrap() - } -} - -#[async_trait::async_trait] -#[typetag::serde(name = "http_scrape")] -impl SourceConfig for HttpScrapeConfig { - async fn build(&self, cx: SourceContext) -> Result { - let endpoints = vec![self.endpoint.clone()]; - let urls = endpoints - .iter() - .map(|s| s.parse::().context(sources::UriParseSnafu)) - .map(|r| r.map(|uri| super::get_url(&uri, &self.query))) - .collect::, sources::BuildError>>()?; - - let tls = TlsSettings::from_options(&self.tls)?; - - let decoder = DecodingConfig::new( - self.framing - .clone() - .unwrap_or_else(|| self.decoding.default_stream_framing()), - self.decoding.clone(), - LogNamespace::Vector, - ) - .build(); - - let context = HttpScrapeContext { decoder }; - - Ok(super::http_scrape( - context, - urls, - self.scrape_interval_secs, - self.auth.clone(), - tls, - cx.proxy.clone(), - cx.shutdown, - cx.out, - ) - .boxed()) - } - - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] - } - - fn source_type(&self) -> &'static str { - NAME - } - - fn can_acknowledge(&self) -> bool { - false - } -} - -#[derive(Clone)] -struct HttpScrapeContext { - decoder: Decoder, -} - -impl super::HttpScraper for HttpScrapeContext { - /// - fn on_response( - &mut self, - _url: &http::Uri, - _header: &Parts, - body: &Bytes, - ) -> Option> { - let body = String::from_utf8_lossy(&body); - dbg!(&body); - - let mut events = Vec::new(); - let mut bytes = BytesMut::new(); - bytes.extend_from_slice(body.as_bytes()); - - loop { - match self.decoder.decode_eof(&mut bytes) { - Ok(Some((next, _))) => { - events.extend(next.into_iter()); - } - Ok(None) => break, - Err(error) => { - // Error is logged by `crate::codecs::Decoder`, no further - // handling is needed here. - if !error.can_continue() { - break; - } - break; - } - } - } - for event in &events { - dbg!(event); - } - Some(events) - } -} -//async fn http_scrape( -// config: HttpScrapeConfig, -// decoder: Decoder, -// url: http::Uri, -// tls: TlsSettings, -// proxy: ProxyConfig, -// shutdown: ShutdownSignal, -// mut out: SourceSender, -//) -> Result<(), ()> { -// let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( -// config.scrape_interval_secs, -// ))) -// .take_until(shutdown) -// .map(move |_| stream::iter(vec![url.clone()])) -// .flatten() -// .map(move |url| { -// let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); -// let endpoint = url.to_string(); -// let mut decoder = decoder.clone(); -// -// let mut request = Request::get(&url) -// .header(http::header::ACCEPT, "text/plain") -// .body(Body::empty()) -// .expect("error creating request"); -// if let Some(auth) = &config.auth { -// auth.apply(&mut request); -// } -// -// let start = Instant::now(); -// client -// .send(request) -// .map_err(Error::from) -// .and_then(|response| async move { -// let (header, body) = response.into_parts(); -// let body = hyper::body::to_bytes(body).await?; -// emit!(EndpointBytesReceived { -// byte_size: body.len(), -// protocol: "http", -// endpoint: endpoint.as_str(), -// }); -// Ok((header, body)) -// }) -// .into_stream() -// .filter_map(move |response| { -// ready(match response { -// Ok((header, body)) if header.status == hyper::StatusCode::OK => { -// emit!(RequestCompleted { -// start, -// end: Instant::now() -// }); -// let body = String::from_utf8_lossy(&body); -// dbg!(&body); -// -// let mut events = Vec::new(); -// let mut bytes = BytesMut::new(); -// bytes.extend_from_slice(body.as_bytes()); -// -// loop { -// match decoder.decode_eof(&mut bytes) { -// Ok(Some((next, _))) => { -// events.extend(next.into_iter()); -// } -// Ok(None) => break, -// Err(error) => { -// // Error is logged by `crate::codecs::Decoder`, no further -// // handling is needed here. -// if !error.can_continue() { -// break; -// } -// break; -// } -// } -// } -// for event in &events { -// dbg!(event); -// } -// // TODO emit EventsReceived (PrometheusEventsReceived) -// Some(stream::iter(events)) -// } -// Ok((_header, _)) => { -// // emit!(PrometheusHttpResponseError { -// // code: header.status, -// // url: url.clone(), -// // }); -// println!("error 1"); -// None -// } -// Err(_error) => { -// // emit!(PrometheusHttpError { -// // error, -// // url: url.clone(), -// // }); -// println!("error 2"); -// None -// } -// }) -// }) -// .flatten() -// }) -// .flatten() -// .boxed(); -// -// match out.send_event_stream(&mut stream).await { -// Ok(()) => { -// info!("Finished sending."); -// Ok(()) -// } -// Err(error) => { -// let (count, _) = stream.size_hint(); -// emit!(StreamClosedError { error, count }); -// Err(()) -// } -// } -//} - -#[cfg(test)] -mod test { - use tokio::time::Duration; - use warp::Filter; - - use super::*; - use crate::test_util::{ - components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, test_generate_config, - }; - - #[test] - fn generate_config() { - test_generate_config::(); - } - - #[tokio::test] - async fn test_() { - let in_addr = next_addr(); - - let dummy_endpoint = warp::path!("metrics") - .and(warp::header::exact("Accept", "text/plain")) - .map(|| r#"A plain text event"#); - - tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - - let config = HttpScrapeConfig { - endpoint: format!("http://{}/metrics", in_addr), - scrape_interval_secs: 1, - query: None, - decoding: default_decoding(), - framing: None, - headers: None, - auth: None, - tls: None, - }; - - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) - .await; - assert!(!events.is_empty()); - } -} diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 4234fed2d1b73..72f336d6cca5a 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -2,11 +2,9 @@ //! #[cfg(all(unix, feature = "sources-http_scrape"))] -pub mod config; -#[cfg(all(unix, feature = "sources-http_scrape"))] -pub mod source; +pub mod scrape; -pub use config::HttpScrapeConfig; +pub use scrape::HttpScrapeConfig; use bytes::Bytes; use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index d53f72fe531f6..a74f6455ad309 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -184,120 +184,6 @@ impl super::HttpScraper for HttpScrapeContext { Some(events) } } -//async fn http_scrape( -// config: HttpScrapeConfig, -// decoder: Decoder, -// url: http::Uri, -// tls: TlsSettings, -// proxy: ProxyConfig, -// shutdown: ShutdownSignal, -// mut out: SourceSender, -//) -> Result<(), ()> { -// let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( -// config.scrape_interval_secs, -// ))) -// .take_until(shutdown) -// .map(move |_| stream::iter(vec![url.clone()])) -// .flatten() -// .map(move |url| { -// let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); -// let endpoint = url.to_string(); -// let mut decoder = decoder.clone(); -// -// let mut request = Request::get(&url) -// .header(http::header::ACCEPT, "text/plain") -// .body(Body::empty()) -// .expect("error creating request"); -// if let Some(auth) = &config.auth { -// auth.apply(&mut request); -// } -// -// let start = Instant::now(); -// client -// .send(request) -// .map_err(Error::from) -// .and_then(|response| async move { -// let (header, body) = response.into_parts(); -// let body = hyper::body::to_bytes(body).await?; -// emit!(EndpointBytesReceived { -// byte_size: body.len(), -// protocol: "http", -// endpoint: endpoint.as_str(), -// }); -// Ok((header, body)) -// }) -// .into_stream() -// .filter_map(move |response| { -// ready(match response { -// Ok((header, body)) if header.status == hyper::StatusCode::OK => { -// emit!(RequestCompleted { -// start, -// end: Instant::now() -// }); -// let body = String::from_utf8_lossy(&body); -// dbg!(&body); -// -// let mut events = Vec::new(); -// let mut bytes = BytesMut::new(); -// bytes.extend_from_slice(body.as_bytes()); -// -// loop { -// match decoder.decode_eof(&mut bytes) { -// Ok(Some((next, _))) => { -// events.extend(next.into_iter()); -// } -// Ok(None) => break, -// Err(error) => { -// // Error is logged by `crate::codecs::Decoder`, no further -// // handling is needed here. -// if !error.can_continue() { -// break; -// } -// break; -// } -// } -// } -// for event in &events { -// dbg!(event); -// } -// // TODO emit EventsReceived (PrometheusEventsReceived) -// Some(stream::iter(events)) -// } -// Ok((_header, _)) => { -// // emit!(PrometheusHttpResponseError { -// // code: header.status, -// // url: url.clone(), -// // }); -// println!("error 1"); -// None -// } -// Err(_error) => { -// // emit!(PrometheusHttpError { -// // error, -// // url: url.clone(), -// // }); -// println!("error 2"); -// None -// } -// }) -// }) -// .flatten() -// }) -// .flatten() -// .boxed(); -// -// match out.send_event_stream(&mut stream).await { -// Ok(()) => { -// info!("Finished sending."); -// Ok(()) -// } -// Err(error) => { -// let (count, _) = stream.size_hint(); -// emit!(StreamClosedError { error, count }); -// Err(()) -// } -// } -//} #[cfg(test)] mod test { From 6e2971d9e5088b3aa5fa297442cec03596da1252 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 27 Jul 2022 15:28:59 -0600 Subject: [PATCH 04/50] more tests, cleanup, clippy --- src/sources/http_scrape/mod.rs | 191 ++++++++++++++++++------------ src/sources/http_scrape/scrape.rs | 129 ++++++++++++++++---- src/sources/prometheus/scrape.rs | 26 ++-- 3 files changed, 231 insertions(+), 115 deletions(-) diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 72f336d6cca5a..23ddd853a6b59 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -26,8 +26,43 @@ use crate::{ use vector_common::shutdown::ShutdownSignal; use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; +/// TODO +pub(crate) struct GenericHttpScrapeInputs { + urls: Vec, + interval_secs: u64, + auth: Option, + tls: TlsSettings, + proxy: ProxyConfig, + shutdown: ShutdownSignal, +} + +impl GenericHttpScrapeInputs { + pub fn new( + urls: Vec, + interval_secs: u64, + auth: Option, + tls: TlsSettings, + proxy: ProxyConfig, + shutdown: ShutdownSignal, + ) -> Self { + Self { + urls, + interval_secs, + auth, + tls, + proxy, + shutdown, + } + } +} + +/// TODO +pub(crate) const fn default_scrape_interval_secs() -> u64 { + 15 +} + /// -pub trait HttpScraper { +pub(crate) trait HttpScraper { /// fn build(&mut self, _url: &Uri) {} @@ -67,91 +102,89 @@ pub(crate) fn get_url(uri: &Uri, query: &Option>>) - /// pub(crate) async fn http_scrape( + inputs: GenericHttpScrapeInputs, context: H, - urls: Vec, - interval_secs: u64, - auth: Option, - tls: TlsSettings, - proxy: ProxyConfig, - shutdown: ShutdownSignal, mut out: SourceSender, ) -> Result<(), ()> { - let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs(interval_secs))) - .take_until(shutdown) - .map(move |_| stream::iter(urls.clone())) - .flatten() - .map(move |url| { - let client = HttpClient::new(tls.clone(), &proxy).expect("Building HTTP client failed"); - let endpoint = url.to_string(); - - let mut context = context.clone(); - context.build(&url); - - let mut request = Request::get(&url) - .header(http::header::ACCEPT, "text/plain") - .body(Body::empty()) - .expect("error creating request"); - - if let Some(auth) = &auth { - auth.apply(&mut request); - } + let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( + inputs.interval_secs, + ))) + .take_until(inputs.shutdown) + .map(move |_| stream::iter(inputs.urls.clone())) + .flatten() + .map(move |url| { + let client = HttpClient::new(inputs.tls.clone(), &inputs.proxy) + .expect("Building HTTP client failed"); + let endpoint = url.to_string(); - let start = Instant::now(); - client - .send(request) - .map_err(Error::from) - .and_then(|response| async move { - let (header, body) = response.into_parts(); - let body = hyper::body::to_bytes(body).await?; - emit!(EndpointBytesReceived { - byte_size: body.len(), - protocol: "http", - endpoint: endpoint.as_str(), - }); - Ok((header, body)) - }) - .into_stream() - .filter_map(move |response| { - ready(match response { - Ok((header, body)) if header.status == hyper::StatusCode::OK => { - emit!(RequestCompleted { - start, - end: Instant::now() - }); - match context.on_response(&url, &header, &body) { - Some(events) => { - // TODO emit EventsReceived (PrometheusEventsReceived) - emit!(PrometheusEventsReceived { - byte_size: events.size_of(), - count: events.len(), - uri: url.clone() - }); - Some(stream::iter(events)) - } - None => None, + let mut context = context.clone(); + context.build(&url); + + let mut request = Request::get(&url) + .header(http::header::ACCEPT, "text/plain") + .body(Body::empty()) + .expect("error creating request"); + + if let Some(auth) = &inputs.auth { + auth.apply(&mut request); + } + + let start = Instant::now(); + client + .send(request) + .map_err(Error::from) + .and_then(|response| async move { + let (header, body) = response.into_parts(); + let body = hyper::body::to_bytes(body).await?; + emit!(EndpointBytesReceived { + byte_size: body.len(), + protocol: "http", + endpoint: endpoint.as_str(), + }); + Ok((header, body)) + }) + .into_stream() + .filter_map(move |response| { + ready(match response { + Ok((header, body)) if header.status == hyper::StatusCode::OK => { + emit!(RequestCompleted { + start, + end: Instant::now() + }); + match context.on_response(&url, &header, &body) { + Some(events) => { + // TODO emit EventsReceived (PrometheusEventsReceived) + emit!(PrometheusEventsReceived { + byte_size: events.size_of(), + count: events.len(), + uri: url.clone() + }); + Some(stream::iter(events)) } + None => None, } - Ok((header, _)) => { - context.on_http_response_error(&url, &header); - emit!(PrometheusHttpResponseError { - code: header.status, - url: url.clone(), - }); - None - } - Err(error) => { - emit!(PrometheusHttpError { - error, - url: url.clone(), - }); - None - } - }) + } + Ok((header, _)) => { + context.on_http_response_error(&url, &header); + emit!(PrometheusHttpResponseError { + code: header.status, + url: url.clone(), + }); + None + } + Err(error) => { + emit!(PrometheusHttpError { + error, + url: url.clone(), + }); + None + } }) - .flatten() - }) - .flatten() - .boxed(); + }) + .flatten() + }) + .flatten() + .boxed(); match out.send_event_stream(&mut stream).await { Ok(()) => { diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index a74f6455ad309..7ef43d87661fc 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -43,7 +43,7 @@ pub struct HttpScrapeConfig { query: Option>>, /// The interval between scrapes, in seconds. - #[serde(default = "default_scrape_interval_secs")] + #[serde(default = "super::default_scrape_interval_secs")] scrape_interval_secs: u64, /// TODO @@ -68,10 +68,6 @@ pub struct HttpScrapeConfig { auth: Option, } -pub(crate) const fn default_scrape_interval_secs() -> u64 { - 15 -} - inventory::submit! { SourceDescription::new::(NAME) } @@ -81,7 +77,7 @@ impl GenerateConfig for HttpScrapeConfig { toml::Value::try_from(Self { endpoint: "http://localhost:9090/metrics".to_string(), query: None, - scrape_interval_secs: default_scrape_interval_secs(), + scrape_interval_secs: super::default_scrape_interval_secs(), decoding: default_decoding(), framing: None, headers: None, @@ -116,17 +112,16 @@ impl SourceConfig for HttpScrapeConfig { let context = HttpScrapeContext { decoder }; - Ok(super::http_scrape( - context, + let inputs = super::GenericHttpScrapeInputs::new( urls, self.scrape_interval_secs, self.auth.clone(), tls, cx.proxy.clone(), cx.shutdown, - cx.out, - ) - .boxed()) + ); + + Ok(super::http_scrape(inputs, context, cx.out).boxed()) } fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { @@ -155,13 +150,11 @@ impl super::HttpScraper for HttpScrapeContext { _header: &Parts, body: &Bytes, ) -> Option> { - let body = String::from_utf8_lossy(&body); - dbg!(&body); - - let mut events = Vec::new(); let mut bytes = BytesMut::new(); + let body = String::from_utf8_lossy(body); bytes.extend_from_slice(body.as_bytes()); + let mut events = Vec::new(); loop { match self.decoder.decode_eof(&mut bytes) { Ok(Some((next, _))) => { @@ -178,9 +171,6 @@ impl super::HttpScraper for HttpScrapeContext { } } } - for event in &events { - dbg!(event); - } Some(events) } } @@ -197,22 +187,22 @@ mod test { }; #[test] - fn generate_config() { + fn test_http_scrape_generate_config() { test_generate_config::(); } #[tokio::test] - async fn test_() { + async fn test_http_scrape_bytes_decoding() { let in_addr = next_addr(); - let dummy_endpoint = warp::path!("metrics") + let dummy_endpoint = warp::path!("endpoint") .and(warp::header::exact("Accept", "text/plain")) .map(|| r#"A plain text event"#); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); let config = HttpScrapeConfig { - endpoint: format!("http://{}/metrics", in_addr), + endpoint: format!("http://{}/endpoint", in_addr), scrape_interval_secs: 1, query: None, decoding: default_decoding(), @@ -230,4 +220,99 @@ mod test { .await; assert!(!events.is_empty()); } + + #[tokio::test] + async fn test_http_scrape_json_decoding() { + let in_addr = next_addr(); + + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "text/plain")) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + let config = HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: None, + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + } + + #[tokio::test] + async fn test_http_scrape_request_query() { + let in_addr = next_addr(); + + let dummy_endpoint = warp::path!("endpoint") + .and(warp::query::raw()) + .map(|query| format!(r#"{{"data" : "{}"}}"#, query)); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + let config = HttpScrapeConfig { + endpoint: format!("http://{}/endpoint?key1=val1", in_addr), + scrape_interval_secs: 1, + query: Some(HashMap::from([ + ("key1".to_string(), vec!["val2".to_string()]), + ( + "key2".to_string(), + vec!["val1".to_string(), "val2".to_string()], + ), + ])), + decoding: DeserializerConfig::Json, + framing: None, + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + + let logs: Vec<_> = events.into_iter().map(|event| event.into_log()).collect(); + + let expected = HashMap::from([ + ( + "key1".to_string(), + vec!["val1".to_string(), "val2".to_string()], + ), + ( + "key2".to_string(), + vec!["val1".to_string(), "val2".to_string()], + ), + ]); + + for log in logs { + let query = log.get("data").expect("data must be available"); + let mut got: HashMap> = HashMap::new(); + for (k, v) in url::form_urlencoded::parse( + query.as_bytes().expect("byte conversion should succeed"), + ) { + got.entry(k.to_string()) + .or_insert_with(Vec::new) + .push(v.to_string()); + } + for v in got.values_mut() { + v.sort(); + } + assert_eq!(got, expected); + } + } } diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 640b89580f625..3c049bda958d8 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -15,7 +15,10 @@ use crate::{ internal_events::{PrometheusEventsReceived, PrometheusParseError}, sources::{ self, - http_scrape::{get_url, http_scrape, HttpScraper}, + http_scrape::{ + default_scrape_interval_secs, get_url, http_scrape, GenericHttpScrapeInputs, + HttpScraper, + }, }, tls::{TlsConfig, TlsSettings}, Result, @@ -84,10 +87,6 @@ pub struct PrometheusScrapeConfig { auth: Option, } -pub(crate) const fn default_scrape_interval_secs() -> u64 { - 15 -} - inventory::submit! { SourceDescription::new::("prometheus") } @@ -124,7 +123,7 @@ impl SourceConfig for PrometheusScrapeConfig { .collect::, sources::BuildError>>()?; let tls = TlsSettings::from_options(&self.tls)?; - let context = HttpScrapeContext { + let context = PrometheusScrapeContext { honor_labels: self.honor_labels, instance_tag: self.instance_tag.clone(), endpoint_tag: self.endpoint_tag.clone(), @@ -132,17 +131,16 @@ impl SourceConfig for PrometheusScrapeConfig { endpoint_info: None, }; - Ok(http_scrape( - context, + let inputs = GenericHttpScrapeInputs::new( urls, self.scrape_interval_secs, self.auth.clone(), tls, cx.proxy.clone(), cx.shutdown, - cx.out, - ) - .boxed()) + ); + + Ok(http_scrape(inputs, context, cx.out).boxed()) } fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { @@ -230,7 +228,7 @@ struct EndpointInfo { } #[derive(Clone)] -struct HttpScrapeContext { +struct PrometheusScrapeContext { honor_labels: bool, instance_tag: Option, endpoint_tag: Option, @@ -238,7 +236,7 @@ struct HttpScrapeContext { endpoint_info: Option, } -impl HttpScraper for HttpScrapeContext { +impl HttpScraper for PrometheusScrapeContext { /// fn build(&mut self, url: &Uri) { self.instance_info = self.instance_tag.as_ref().map(|tag| { @@ -267,7 +265,7 @@ impl HttpScraper for HttpScrapeContext { /// fn on_response(&mut self, url: &Uri, _header: &Parts, body: &Bytes) -> Option> { - let body = String::from_utf8_lossy(&body); + let body = String::from_utf8_lossy(body); match parser::parse_text(&body) { Ok(mut events) => { From 32e9cf4607f356931e4b4d62b2100ac761a59391 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 27 Jul 2022 15:47:24 -0600 Subject: [PATCH 05/50] extract http_scrape internal events --- src/internal_events/http_scrape.rs | 90 ++++++++++++++++++++++++++++++ src/internal_events/mod.rs | 4 ++ src/sources/http_scrape/mod.rs | 11 ++-- 3 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 src/internal_events/http_scrape.rs diff --git a/src/internal_events/http_scrape.rs b/src/internal_events/http_scrape.rs new file mode 100644 index 0000000000000..801e859d8f714 --- /dev/null +++ b/src/internal_events/http_scrape.rs @@ -0,0 +1,90 @@ +use metrics::counter; +use vector_core::internal_event::InternalEvent; + +use super::prelude::{error_stage, error_type, http_error_code}; + +#[derive(Debug)] +pub struct HttpScrapeEventsReceived { + pub byte_size: usize, + pub count: usize, + pub uri: http::Uri, +} + +impl InternalEvent for HttpScrapeEventsReceived { + fn emit(self) { + trace!( + message = "Events received.", + count = %self.count, + byte_size = %self.byte_size, + uri = %self.uri, + ); + counter!( + "component_received_events_total", self.count as u64, + "uri" => self.uri.to_string(), + ); + counter!( + "component_received_event_bytes_total", self.byte_size as u64, + "uri" => self.uri.to_string(), + ); + // deprecated + counter!( + "events_in_total", self.count as u64, + "uri" => self.uri.to_string(), + ); + } +} + +#[derive(Debug)] +pub struct HttpScrapeHttpResponseError { + pub code: hyper::StatusCode, + pub url: http::Uri, +} + +impl InternalEvent for HttpScrapeHttpResponseError { + fn emit(self) { + error!( + message = "HTTP error response.", + url = %self.url, + stage = error_stage::RECEIVING, + error_type = error_type::REQUEST_FAILED, + error_code = %http_error_code(self.code.as_u16()), + internal_log_rate_secs = 10, + ); + counter!( + "component_errors_total", 1, + "url" => self.url.to_string(), + "stage" => error_stage::RECEIVING, + "error_type" => error_type::REQUEST_FAILED, + "error_code" => http_error_code(self.code.as_u16()), + ); + // deprecated + counter!("http_error_response_total", 1); + } +} + +#[derive(Debug)] +pub struct HttpScrapeHttpError { + pub error: crate::Error, + pub url: http::Uri, +} + +impl InternalEvent for HttpScrapeHttpError { + fn emit(self) { + error!( + message = "HTTP request processing error.", + url = %self.url, + error = ?self.error, + error_type = error_type::REQUEST_FAILED, + stage = error_stage::RECEIVING, + internal_log_rate_secs = 10, + ); + counter!( + "component_errors_total", 1, + "url" => self.url.to_string(), + "error_type" => error_type::REQUEST_FAILED, + "stage" => error_stage::RECEIVING, + ); + // deprecated + counter!("http_request_errors_total", 1); + } +} diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index a63212f1d958e..55e98d4768307 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -53,6 +53,8 @@ mod geoip; mod heartbeat; mod http; pub mod http_client; +#[cfg(feature = "sources-http_scrape")] +mod http_scrape; #[cfg(feature = "sources-internal_logs")] mod internal_logs; #[cfg(all(unix, feature = "sources-journald"))] @@ -187,6 +189,8 @@ pub(crate) use self::geoip::*; feature = "sources-splunk_hec", ))] pub(crate) use self::http::*; +#[cfg(feature = "sources-http_scrape")] +pub(crate) use self::http_scrape::*; #[cfg(feature = "sources-internal_logs")] pub(crate) use self::internal_logs::*; #[cfg(all(unix, feature = "sources-journald"))] diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 23ddd853a6b59..886bda652b228 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -17,8 +17,8 @@ use tokio_stream::wrappers::IntervalStream; use crate::{ http::{Auth, HttpClient}, internal_events::{ - EndpointBytesReceived, PrometheusEventsReceived, PrometheusHttpError, - PrometheusHttpResponseError, RequestCompleted, StreamClosedError, + EndpointBytesReceived, HttpScrapeEventsReceived, HttpScrapeHttpError, + HttpScrapeHttpResponseError, RequestCompleted, StreamClosedError, }, tls::TlsSettings, Error, SourceSender, @@ -153,8 +153,7 @@ pub(crate) async fn http_scrape( }); match context.on_response(&url, &header, &body) { Some(events) => { - // TODO emit EventsReceived (PrometheusEventsReceived) - emit!(PrometheusEventsReceived { + emit!(HttpScrapeEventsReceived { byte_size: events.size_of(), count: events.len(), uri: url.clone() @@ -166,14 +165,14 @@ pub(crate) async fn http_scrape( } Ok((header, _)) => { context.on_http_response_error(&url, &header); - emit!(PrometheusHttpResponseError { + emit!(HttpScrapeHttpResponseError { code: header.status, url: url.clone(), }); None } Err(error) => { - emit!(PrometheusHttpError { + emit!(HttpScrapeHttpError { error, url: url.clone(), }); From c4d42ad074046b87d3b912d34ae87065de00cd55 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 27 Jul 2022 16:29:58 -0600 Subject: [PATCH 06/50] documenting --- src/sources/http_scrape/mod.rs | 28 +++++++++++++++++++--------- src/sources/http_scrape/scrape.rs | 28 ++++++++++++++++++---------- src/sources/prometheus/scrape.rs | 5 ++--- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 886bda652b228..c02e6b9677353 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -1,5 +1,12 @@ +//! Common logic for sources that are HTTP scrapers. //! -//! +//! Specific HTTP scraping sources will: +//! - Call get_url() to build the URL(s) to scrape. +//! - Implmement a specific context struct which: +//! - Contains the data that source needs in order to process the HTTP responses into internal_events +//! - Implements the HttpScraper trait +//! - Call http_scrape() supplying the generic inputs for scraping and the source-specific +//! context. #[cfg(all(unix, feature = "sources-http_scrape"))] pub mod scrape; @@ -26,7 +33,7 @@ use crate::{ use vector_common::shutdown::ShutdownSignal; use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; -/// TODO +/// Contains the inputs generic to any http scrape. pub(crate) struct GenericHttpScrapeInputs { urls: Vec, interval_secs: u64, @@ -56,24 +63,24 @@ impl GenericHttpScrapeInputs { } } -/// TODO +/// The default interval to scrape the http endpoint if none is configured. pub(crate) const fn default_scrape_interval_secs() -> u64 { 15 } -/// +/// Methods that allow context-specific behavior during the scraping procedure. pub(crate) trait HttpScraper { - /// + /// (Optional) Called before the HTTP request is made, allows building context. fn build(&mut self, _url: &Uri) {} - /// + /// Called after the HTTP request succeeds and returns the decoded/parsed Event array. fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option>; - /// + /// (Optional) Called if the HTTP response is not 200 ('OK'). fn on_http_response_error(&self, _uri: &Uri, _header: &Parts) {} } -/// +/// Builds a url for the HTTP requests. pub(crate) fn get_url(uri: &Uri, query: &Option>>) -> Uri { let mut serializer = url::form_urlencoded::Serializer::new(String::new()); if let Some(query) = uri.query() { @@ -100,7 +107,10 @@ pub(crate) fn get_url(uri: &Uri, query: &Option>>) - builder.build().expect("error building URI") } -/// +/// Scrapes one or more urls at an interval. +/// - The HTTP request is built per the options in provided generic inputs. +/// - The HTTP response is decoded/parsed into events by the specific context. +/// - The events are then sent to the output stream. pub(crate) async fn http_scrape( inputs: GenericHttpScrapeInputs, context: H, diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 7ef43d87661fc..5461e6e0598bc 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -1,5 +1,5 @@ -//! -//! +//! Generalized HTTP scrape source. +//! Scrapes an endpoint at an interval, decoding the HTTP responses into events. use bytes::{Bytes, BytesMut}; use futures_util::FutureExt; @@ -27,13 +27,18 @@ use vector_core::{ event::Event, }; +/// The name of this source const NAME: &str = "http_scrape"; +// TODO: +// - request headers +// - framing for the decoding? + /// Configuration for the `http_scrape` source. #[configurable_component(source)] #[derive(Clone, Debug)] pub struct HttpScrapeConfig { - /// Endpoints to scrape metrics from. + /// Endpoint to scrape events from. endpoint: String, /// Custom parameters for the scrape request query string. @@ -46,24 +51,24 @@ pub struct HttpScrapeConfig { #[serde(default = "super::default_scrape_interval_secs")] scrape_interval_secs: u64, - /// TODO + /// Decoder to use on the HTTP responses. #[configurable(derived)] #[serde(default = "default_decoding")] decoding: DeserializerConfig, - /// TODO + /// Framing to use in the decoding. #[configurable(derived)] framing: Option, - /// TODO + /// Headers to apply to the HTTP requests. #[serde(default)] headers: Option>, - /// TODO + /// TLS configuration. #[configurable(derived)] tls: Option, - /// TODO + /// HTTP Authentication. #[configurable(derived)] auth: Option, } @@ -75,7 +80,7 @@ inventory::submit! { impl GenerateConfig for HttpScrapeConfig { fn generate_config() -> toml::Value { toml::Value::try_from(Self { - endpoint: "http://localhost:9090/metrics".to_string(), + endpoint: "http://localhost:9898/logs".to_string(), query: None, scrape_interval_secs: super::default_scrape_interval_secs(), decoding: default_decoding(), @@ -92,6 +97,7 @@ impl GenerateConfig for HttpScrapeConfig { #[typetag::serde(name = "http_scrape")] impl SourceConfig for HttpScrapeConfig { async fn build(&self, cx: SourceContext) -> Result { + // build the url let endpoints = vec![self.endpoint.clone()]; let urls = endpoints .iter() @@ -101,6 +107,7 @@ impl SourceConfig for HttpScrapeConfig { let tls = TlsSettings::from_options(&self.tls)?; + // build the decoder let decoder = DecodingConfig::new( self.framing .clone() @@ -110,6 +117,7 @@ impl SourceConfig for HttpScrapeConfig { ) .build(); + // the only specific context needed is the ability to decode let context = HttpScrapeContext { decoder }; let inputs = super::GenericHttpScrapeInputs::new( @@ -143,7 +151,7 @@ struct HttpScrapeContext { } impl super::HttpScraper for HttpScrapeContext { - /// + /// Decodes the HTTP response body into events per the decoder configured. fn on_response( &mut self, _url: &http::Uri, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 3c049bda958d8..9b0aa2ea2d1d9 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -237,7 +237,7 @@ struct PrometheusScrapeContext { } impl HttpScraper for PrometheusScrapeContext { - /// + /// Builds the instance info and endpoint info for the current request fn build(&mut self, url: &Uri) { self.instance_info = self.instance_tag.as_ref().map(|tag| { let instance = format!( @@ -263,7 +263,7 @@ impl HttpScraper for PrometheusScrapeContext { }); } - /// + /// Parses the Prometheus HTTP response into metric events fn on_response(&mut self, url: &Uri, _header: &Parts, body: &Bytes) -> Option> { let body = String::from_utf8_lossy(body); @@ -331,7 +331,6 @@ impl HttpScraper for PrometheusScrapeContext { } } - /// fn on_http_response_error(&self, url: &Uri, header: &Parts) { if header.status == hyper::StatusCode::NOT_FOUND && url.path() == "/" { // https://github.com/vectordotdev/vector/pull/3801#issuecomment-700723178 From 702f1e227231bf83a185b5866881bbe5644b9dad Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 27 Jul 2022 16:30:22 -0600 Subject: [PATCH 07/50] todo --- src/sources/http_scrape/scrape.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 5461e6e0598bc..95b5b05e27ca1 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -33,6 +33,7 @@ const NAME: &str = "http_scrape"; // TODO: // - request headers // - framing for the decoding? +// - cue files /// Configuration for the `http_scrape` source. #[configurable_component(source)] From fff0cee5e511e6d22e16ade5c7358f2babcd468b Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Thu, 28 Jul 2022 11:10:38 -0600 Subject: [PATCH 08/50] added request headers --- .github/workflows/integration-test.yml | 1 + Cargo.toml | 2 ++ Makefile | 2 +- .../docker-compose.http-scrape.yml | 36 +++++++++++++++++++ src/sources/http_scrape/mod.rs | 16 ++++++--- src/sources/http_scrape/scrape.rs | 26 +++++++------- src/sources/prometheus/scrape.rs | 1 + 7 files changed, 67 insertions(+), 17 deletions(-) create mode 100644 scripts/integration/docker-compose.http-scrape.yml diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 01fca15b5038c..4c216a00c2dfa 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -70,6 +70,7 @@ jobs: - test: 'fluent' - test: 'gcp' - test: 'humio' + - test: 'http-scrape' - test: 'influxdb' - test: 'kafka' - test: 'logstash' diff --git a/Cargo.toml b/Cargo.toml index a6ba50b0e117a..bdc119df74028 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -705,6 +705,7 @@ all-integration-tests = [ "gcp-cloud-storage-integration-tests", "gcp-integration-tests", "gcp-pubsub-integration-tests", + "http-scrape-integration-tests", "humio-integration-tests", "influxdb-integration-tests", "kafka-integration-tests", @@ -760,6 +761,7 @@ gcp-cloud-storage-integration-tests = ["sinks-gcp"] gcp-integration-tests = ["sinks-gcp"] gcp-pubsub-integration-tests = ["sinks-gcp", "sources-gcp_pubsub"] humio-integration-tests = ["sinks-humio"] +http-scrape-integration-tests = ["sources-http_scrape"] influxdb-integration-tests = ["sinks-influxdb"] kafka-integration-tests = ["sinks-kafka", "sources-kafka"] logstash-integration-tests = ["docker", "sources-logstash"] diff --git a/Makefile b/Makefile index 8b3aa4ba0bd0f..2ab8464d9febf 100644 --- a/Makefile +++ b/Makefile @@ -334,7 +334,7 @@ test-enterprise: ## Runs enterprise related behavioral tests test-integration: ## Runs all integration tests test-integration: test-integration-aws test-integration-axiom test-integration-azure test-integration-clickhouse test-integration-docker-logs test-integration-elasticsearch test-integration: test-integration-azure test-integration-clickhouse test-integration-docker-logs test-integration-elasticsearch -test-integration: test-integration-eventstoredb test-integration-fluent test-integration-gcp test-integration-humio test-integration-influxdb +test-integration: test-integration-eventstoredb test-integration-fluent test-integration-gcp test-integration-humio test-integration-http-scrape test-integration-influxdb test-integration: test-integration-kafka test-integration-logstash test-integration-loki test-integration-mongodb test-integration-nats test-integration: test-integration-nginx test-integration-postgres test-integration-prometheus test-integration-pulsar test-integration: test-integration-redis test-integration-splunk test-integration-dnstap test-integration-datadog-agent test-integration-datadog-logs diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml new file mode 100644 index 0000000000000..1944841aa9069 --- /dev/null +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -0,0 +1,36 @@ +version: "3" + +services: + runner: + build: + context: ${PWD} + dockerfile: scripts/integration/Dockerfile + args: + - RUST_VERSION=${RUST_VERSION} + working_dir: /code + command: + - "cargo" + - "nextest" + - "run" + - "--no-fail-fast" + - "--no-default-features" + - "--features" + - "http-scrape-integration-tests" + # depends_on: + # environment: + networks: + - public + volumes: + - ${PWD}:/code + - target:/code/target + - cargogit:/usr/local/cargo/git + - cargoregistry:/usr/local/cargo/registry + +networks: + public: {} + proxy: {} + +volumes: + target: {} + cargogit: {} + cargoregistry: {} diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index c02e6b9677353..e592f53bf0fb0 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -37,6 +37,7 @@ use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; pub(crate) struct GenericHttpScrapeInputs { urls: Vec, interval_secs: u64, + headers: Option>, auth: Option, tls: TlsSettings, proxy: ProxyConfig, @@ -47,6 +48,7 @@ impl GenericHttpScrapeInputs { pub fn new( urls: Vec, interval_secs: u64, + headers: Option>, auth: Option, tls: TlsSettings, proxy: ProxyConfig, @@ -55,6 +57,7 @@ impl GenericHttpScrapeInputs { Self { urls, interval_secs, + headers, auth, tls, proxy, @@ -130,10 +133,15 @@ pub(crate) async fn http_scrape( let mut context = context.clone(); context.build(&url); - let mut request = Request::get(&url) - .header(http::header::ACCEPT, "text/plain") - .body(Body::empty()) - .expect("error creating request"); + let mut builder = Request::get(&url).header(http::header::ACCEPT, "text/plain"); + + // add user supplied headers + if let Some(headers) = &inputs.headers { + for header in headers { + builder = builder.header(header.0, header.1); + } + } + let mut request = builder.body(Body::empty()).expect("error creating request"); if let Some(auth) = &inputs.auth { auth.apply(&mut request); diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 95b5b05e27ca1..9d7385daa1bb8 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -10,7 +10,7 @@ use tokio_util::codec::Decoder as _; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{self, GenerateConfig, SourceConfig, SourceContext, SourceDescription}, + config::{self, SourceConfig, SourceContext, SourceDescription}, http::Auth, serde::default_decoding, sources, @@ -31,7 +31,7 @@ use vector_core::{ const NAME: &str = "http_scrape"; // TODO: -// - request headers +// - integration tests // - framing for the decoding? // - cue files @@ -63,7 +63,7 @@ pub struct HttpScrapeConfig { /// Headers to apply to the HTTP requests. #[serde(default)] - headers: Option>, + headers: Option>, /// TLS configuration. #[configurable(derived)] @@ -74,13 +74,9 @@ pub struct HttpScrapeConfig { auth: Option, } -inventory::submit! { - SourceDescription::new::(NAME) -} - -impl GenerateConfig for HttpScrapeConfig { - fn generate_config() -> toml::Value { - toml::Value::try_from(Self { +impl Default for HttpScrapeConfig { + fn default() -> Self { + Self { endpoint: "http://localhost:9898/logs".to_string(), query: None, scrape_interval_secs: super::default_scrape_interval_secs(), @@ -89,11 +85,16 @@ impl GenerateConfig for HttpScrapeConfig { headers: None, tls: None, auth: None, - }) - .unwrap() + } } } +inventory::submit! { + SourceDescription::new::(NAME) +} + +impl_generate_config_from_default!(HttpScrapeConfig); + #[async_trait::async_trait] #[typetag::serde(name = "http_scrape")] impl SourceConfig for HttpScrapeConfig { @@ -124,6 +125,7 @@ impl SourceConfig for HttpScrapeConfig { let inputs = super::GenericHttpScrapeInputs::new( urls, self.scrape_interval_secs, + self.headers.clone(), self.auth.clone(), tls, cx.proxy.clone(), diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 9b0aa2ea2d1d9..5dc29805856ee 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -134,6 +134,7 @@ impl SourceConfig for PrometheusScrapeConfig { let inputs = GenericHttpScrapeInputs::new( urls, self.scrape_interval_secs, + None, self.auth.clone(), tls, cx.proxy.clone(), From f557d0eef80f9cb377873531058968cc78f0598e Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Thu, 28 Jul 2022 23:37:23 +0000 Subject: [PATCH 09/50] starting integration test --- Cargo.toml | 2 +- .../docker-compose.http-scrape.yml | 36 +++++-- src/sources/http_scrape/scrape.rs | 96 +++++++++++++++---- tests/data/http-scrape/logs/foo | 1 + tests/data/http-scrape/logs/foo.json | 1 + 5 files changed, 109 insertions(+), 27 deletions(-) create mode 100644 tests/data/http-scrape/logs/foo create mode 100644 tests/data/http-scrape/logs/foo.json diff --git a/Cargo.toml b/Cargo.toml index bdc119df74028..8a8cbf93d2659 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -495,7 +495,7 @@ sources-gcp_pubsub = ["gcp", "dep:h2", "dep:prost-types", "protobuf-build", "dep sources-heroku_logs = ["sources-utils-http", "sources-utils-http-query", "sources-http"] sources-host_metrics = ["dep:heim"] sources-http = ["sources-utils-http", "sources-utils-http-query"] -sources-http_scrape = ["sources-utils-http", "sources-utils-http-query"] +sources-http_scrape = ["sources-utils-http", "sources-http"] sources-internal_logs = [] sources-internal_metrics = [] sources-journald = [] diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 1944841aa9069..22374241d81a8 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -1,6 +1,17 @@ version: "3" services: + dufs: + image: docker.io/sigoden/dufs:latest + #network_mode: host + # networks: + #- backend + ports: + - 5000:5000 + command: + - "/data" + volumes: + - ${PWD}/tests/data/http-scrape/:/data runner: build: context: ${PWD} @@ -8,6 +19,9 @@ services: args: - RUST_VERSION=${RUST_VERSION} working_dir: /code + #network_mode: host + #networks: + #- backend command: - "cargo" - "nextest" @@ -16,21 +30,27 @@ services: - "--no-default-features" - "--features" - "http-scrape-integration-tests" - # depends_on: + - "--lib" + - "sources::http_scrape::scrape::integration_tests::" + depends_on: + - dufs # environment: - networks: - - public + #networks: + #- public volumes: - ${PWD}:/code - - target:/code/target + #- target:/code/target - cargogit:/usr/local/cargo/git - cargoregistry:/usr/local/cargo/registry -networks: - public: {} - proxy: {} + #networks: + #backend: {} + + #networks: + # public: {} + # proxy: {} volumes: - target: {} + # target: {} cargogit: {} cargoregistry: {} diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 9d7385daa1bb8..e697705f399fb 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -2,6 +2,7 @@ //! Scrapes an endpoint at an interval, decoding the HTTP responses into events. use bytes::{Bytes, BytesMut}; +use chrono::Utc; use futures_util::FutureExt; use http::{response::Parts, Uri}; use snafu::ResultExt; @@ -23,7 +24,7 @@ use codecs::{ }; use vector_config::configurable_component; use vector_core::{ - config::{LogNamespace, Output}, + config::{log_schema, LogNamespace, Output}, event::Event, }; @@ -153,21 +154,12 @@ struct HttpScrapeContext { decoder: Decoder, } -impl super::HttpScraper for HttpScrapeContext { - /// Decodes the HTTP response body into events per the decoder configured. - fn on_response( - &mut self, - _url: &http::Uri, - _header: &Parts, - body: &Bytes, - ) -> Option> { - let mut bytes = BytesMut::new(); - let body = String::from_utf8_lossy(body); - bytes.extend_from_slice(body.as_bytes()); - +impl HttpScrapeContext { + /// Decode the events from the byte buffer + fn decode_events(&mut self, buf: &mut BytesMut) -> Vec { let mut events = Vec::new(); loop { - match self.decoder.decode_eof(&mut bytes) { + match self.decoder.decode_eof(buf) { Ok(Some((next, _))) => { events.extend(next.into_iter()); } @@ -182,6 +174,44 @@ impl super::HttpScraper for HttpScrapeContext { } } } + events + } + + /// Enriches log events + fn enrich_events(&self, events: &mut Vec) { + for event in events { + if let Event::Log(ref mut log) = event { + log.try_insert(log_schema().source_type_key(), Bytes::from(NAME)); + log.try_insert(log_schema().timestamp_key(), Utc::now()); + } + } + } +} + +impl super::HttpScraper for HttpScrapeContext { + /// Decodes the HTTP response body into events per the decoder configured. + fn on_response( + &mut self, + _url: &http::Uri, + _header: &Parts, + body: &Bytes, + ) -> Option> { + // get the body into a byte array + let mut buf = BytesMut::new(); + let body = String::from_utf8_lossy(body); + buf.extend_from_slice(body.as_bytes()); + + //TODO delete + dbg!(body); + + // decode and enrich + let mut events = self.decode_events(&mut buf); + self.enrich_events(&mut events); + + // TODO delete + for event in &events { + dbg!(event); + } Some(events) } } @@ -198,12 +228,12 @@ mod test { }; #[test] - fn test_http_scrape_generate_config() { + fn http_scrape_generate_config() { test_generate_config::(); } #[tokio::test] - async fn test_http_scrape_bytes_decoding() { + async fn http_scrape_bytes_decoding() { let in_addr = next_addr(); let dummy_endpoint = warp::path!("endpoint") @@ -233,7 +263,7 @@ mod test { } #[tokio::test] - async fn test_http_scrape_json_decoding() { + async fn http_scrape_json_decoding() { let in_addr = next_addr(); let dummy_endpoint = warp::path!("endpoint") @@ -263,7 +293,7 @@ mod test { } #[tokio::test] - async fn test_http_scrape_request_query() { + async fn http_scrape_request_query() { let in_addr = next_addr(); let dummy_endpoint = warp::path!("endpoint") @@ -327,3 +357,33 @@ mod test { } } } + +#[cfg(all(test, feature = "http-scrape-integration-tests"))] +mod integration_tests { + use tokio::time::Duration; + + use super::*; + use crate::test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}; + + #[tokio::test] + async fn http_scrape_logs_json() { + let config = HttpScrapeConfig { + endpoint: format!("http://localhost:5000/logs/foo.json"), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: None, + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + } +} diff --git a/tests/data/http-scrape/logs/foo b/tests/data/http-scrape/logs/foo new file mode 100644 index 0000000000000..257cc5642cb1a --- /dev/null +++ b/tests/data/http-scrape/logs/foo @@ -0,0 +1 @@ +foo diff --git a/tests/data/http-scrape/logs/foo.json b/tests/data/http-scrape/logs/foo.json new file mode 100644 index 0000000000000..9672096ad8861 --- /dev/null +++ b/tests/data/http-scrape/logs/foo.json @@ -0,0 +1 @@ +{ "foo" : "baz" } From a5366ecb4cd4e4a5f9a9ef695cb44b3594da9881 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 29 Jul 2022 17:21:38 +0000 Subject: [PATCH 10/50] fixed integration test configuration --- .../docker-compose.http-scrape.yml | 25 +++------ src/sources/http_scrape/scrape.rs | 53 +++++++++++++++++-- tests/data/http-scrape/logs/{foo => 1} | 0 .../http-scrape/logs/{foo.json => 1.json} | 0 tests/data/http-scrape/metrics/1.json | 9 ++++ 5 files changed, 65 insertions(+), 22 deletions(-) rename tests/data/http-scrape/logs/{foo => 1} (100%) rename tests/data/http-scrape/logs/{foo.json => 1.json} (100%) create mode 100644 tests/data/http-scrape/metrics/1.json diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 22374241d81a8..d37dc9de261e9 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -3,11 +3,8 @@ version: "3" services: dufs: image: docker.io/sigoden/dufs:latest - #network_mode: host - # networks: - #- backend - ports: - - 5000:5000 + networks: + - backend command: - "/data" volumes: @@ -19,9 +16,8 @@ services: args: - RUST_VERSION=${RUST_VERSION} working_dir: /code - #network_mode: host - #networks: - #- backend + networks: + - backend command: - "cargo" - "nextest" @@ -34,23 +30,14 @@ services: - "sources::http_scrape::scrape::integration_tests::" depends_on: - dufs - # environment: - #networks: - #- public volumes: - ${PWD}:/code - #- target:/code/target - cargogit:/usr/local/cargo/git - cargoregistry:/usr/local/cargo/registry - #networks: - #backend: {} - - #networks: - # public: {} - # proxy: {} +networks: + backend: {} volumes: - # target: {} cargogit: {} cargoregistry: {} diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index e697705f399fb..0458c2c8b42ec 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -11,7 +11,7 @@ use tokio_util::codec::Decoder as _; use crate::{ codecs::{Decoder, DecodingConfig}, - config::{self, SourceConfig, SourceContext, SourceDescription}, + config::{SourceConfig, SourceContext, SourceDescription}, http::Auth, serde::default_decoding, sources, @@ -137,7 +137,7 @@ impl SourceConfig for HttpScrapeConfig { } fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(config::DataType::Metric)] + vec![Output::default(self.decoding.output_type())] } fn source_type(&self) -> &'static str { @@ -360,6 +360,7 @@ mod test { #[cfg(all(test, feature = "http-scrape-integration-tests"))] mod integration_tests { + use codecs::decoding::NewlineDelimitedDecoderOptions; use tokio::time::Duration; use super::*; @@ -368,7 +369,7 @@ mod integration_tests { #[tokio::test] async fn http_scrape_logs_json() { let config = HttpScrapeConfig { - endpoint: format!("http://localhost:5000/logs/foo.json"), + endpoint: format!("http://dufs:5000/logs/1.json"), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Json, @@ -386,4 +387,50 @@ mod integration_tests { .await; assert!(!events.is_empty()); } + + #[tokio::test] + async fn http_scrape_logs_text() { + let config = HttpScrapeConfig { + endpoint: format!("http://dufs:5000/logs/1"), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Bytes, + framing: None, + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + } + + #[tokio::test] + async fn http_scrape_metrics_json() { + let config = HttpScrapeConfig { + endpoint: format!("http://dufs:5000/metrics/1.json"), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: Some(FramingConfig::NewlineDelimited { + newline_delimited: NewlineDelimitedDecoderOptions::new_with_max_length(10), + }), + headers: None, + auth: None, + tls: None, + }; + + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + } } diff --git a/tests/data/http-scrape/logs/foo b/tests/data/http-scrape/logs/1 similarity index 100% rename from tests/data/http-scrape/logs/foo rename to tests/data/http-scrape/logs/1 diff --git a/tests/data/http-scrape/logs/foo.json b/tests/data/http-scrape/logs/1.json similarity index 100% rename from tests/data/http-scrape/logs/foo.json rename to tests/data/http-scrape/logs/1.json diff --git a/tests/data/http-scrape/metrics/1.json b/tests/data/http-scrape/metrics/1.json new file mode 100644 index 0000000000000..e4bdabd8d4a7e --- /dev/null +++ b/tests/data/http-scrape/metrics/1.json @@ -0,0 +1,9 @@ +{ + "histogram" : { + "name" : "login.time", + "val" : 22.0, + "lables" : { + "host" : "0.0.0.0" + } + } +} From c30256d34aa96b64bbf37bded525445e242ff76b Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 29 Jul 2022 21:56:43 +0000 Subject: [PATCH 11/50] more int tests --- .../docker-compose.http-scrape.yml | 31 ++- src/internal_events/http_scrape.rs | 14 ++ src/sources/http_scrape/mod.rs | 17 ++ src/sources/http_scrape/scrape.rs | 223 +++++++++++------- .../{logs/1.json => auth/json.json} | 0 tests/data/http-scrape/logs/{1 => bytes} | 0 tests/data/http-scrape/logs/json.json | 1 + tests/data/http-scrape/metrics/1.json | 9 - tests/data/http-scrape/metrics/native.json | 1 + 9 files changed, 204 insertions(+), 92 deletions(-) rename tests/data/http-scrape/{logs/1.json => auth/json.json} (100%) rename tests/data/http-scrape/logs/{1 => bytes} (100%) create mode 100644 tests/data/http-scrape/logs/json.json delete mode 100644 tests/data/http-scrape/metrics/1.json create mode 100644 tests/data/http-scrape/metrics/native.json diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index d37dc9de261e9..042c59b895484 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -1,6 +1,8 @@ version: "3" services: + # https://github.com/sigoden/dufs + # serves static files at an HTTP endpoint dufs: image: docker.io/sigoden/dufs:latest networks: @@ -9,6 +11,32 @@ services: - "/data" volumes: - ${PWD}/tests/data/http-scrape/:/data + # the project suggests that two services should not be necessary + # yet I was not able to only auth protect one path for view/download access. + dufs-auth: + image: docker.io/sigoden/dufs:latest + networks: + - backend + command: + - "-a" + - "/@user:pass" + - "--auth-method" + - "basic" + - "/data" + volumes: + - ${PWD}/tests/data/http-scrape/:/data + dufs-https: + image: docker.io/sigoden/dufs:latest + networks: + - backend + command: + - "--tls-cert" + - "dufs.crt" + - "--tls-key" + - "dufs.key" + - "/data" + volumes: + - ${PWD}/tests/data/http-scrape/:/data runner: build: context: ${PWD} @@ -27,9 +55,10 @@ services: - "--features" - "http-scrape-integration-tests" - "--lib" - - "sources::http_scrape::scrape::integration_tests::" + - "sources::http_scrape::scrape::" depends_on: - dufs + - dufs-auth volumes: - ${PWD}:/code - cargogit:/usr/local/cargo/git diff --git a/src/internal_events/http_scrape.rs b/src/internal_events/http_scrape.rs index 801e859d8f714..04ee0ebf5c6b8 100644 --- a/src/internal_events/http_scrape.rs +++ b/src/internal_events/http_scrape.rs @@ -88,3 +88,17 @@ impl InternalEvent for HttpScrapeHttpError { counter!("http_request_errors_total", 1); } } + +#[derive(Debug)] +pub struct HttpScrapeEventsSent { + pub count: u64, + pub byte_size: usize, +} + +impl InternalEvent for HttpScrapeEventsSent { + fn emit(self) { + trace!(message = "Events sent.", count = %self.count, byte_size = %self.byte_size); + counter!("component_sent_events_total", self.count as u64); + counter!("component_sent_event_bytes_total", self.byte_size as u64); + } +} diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index e592f53bf0fb0..573428451ab30 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -21,6 +21,7 @@ use std::time::{Duration, Instant}; use std::{collections::HashMap, future::ready}; use tokio_stream::wrappers::IntervalStream; +use crate::internal_events::HttpScrapeEventsSent; use crate::{ http::{Auth, HttpClient}, internal_events::{ @@ -144,6 +145,7 @@ pub(crate) async fn http_scrape( let mut request = builder.body(Body::empty()).expect("error creating request"); if let Some(auth) = &inputs.auth { + dbg!(auth); auth.apply(&mut request); } @@ -176,6 +178,21 @@ pub(crate) async fn http_scrape( count: events.len(), uri: url.clone() }); + + // TODO the below seems wrong placement. It seems should happen + // after the stream is written ? Yet I'm not seeing how to go about + // that. + // emit EventsSent if metrics + if events.len() > 0 { + if let Event::Metric(ref _metric) = + events.first().expect("should have event") + { + emit!(HttpScrapeEventsSent { + count: events.len() as u64, + byte_size: events.size_of() + }); + } + } Some(stream::iter(events)) } None => None, diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 0458c2c8b42ec..502c658e0b5d8 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -14,6 +14,7 @@ use crate::{ config::{SourceConfig, SourceContext, SourceDescription}, http::Auth, serde::default_decoding, + serde::default_framing_message_based, sources, tls::{TlsConfig, TlsSettings}, Result, @@ -33,7 +34,6 @@ const NAME: &str = "http_scrape"; // TODO: // - integration tests -// - framing for the decoding? // - cue files /// Configuration for the `http_scrape` source. @@ -60,7 +60,8 @@ pub struct HttpScrapeConfig { /// Framing to use in the decoding. #[configurable(derived)] - framing: Option, + #[serde(default = "default_framing_message_based")] + framing: FramingConfig, /// Headers to apply to the HTTP requests. #[serde(default)] @@ -82,7 +83,7 @@ impl Default for HttpScrapeConfig { query: None, scrape_interval_secs: super::default_scrape_interval_secs(), decoding: default_decoding(), - framing: None, + framing: default_framing_message_based(), headers: None, tls: None, auth: None, @@ -112,9 +113,7 @@ impl SourceConfig for HttpScrapeConfig { // build the decoder let decoder = DecodingConfig::new( - self.framing - .clone() - .unwrap_or_else(|| self.decoding.default_stream_framing()), + self.framing.clone(), self.decoding.clone(), LogNamespace::Vector, ) @@ -218,13 +217,18 @@ impl super::HttpScraper for HttpScrapeContext { #[cfg(test)] mod test { - use tokio::time::Duration; + use tokio::time::{sleep, Duration}; use warp::Filter; use super::*; - use crate::test_util::{ - components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, test_generate_config, + use crate::{ + test_util::{ + //collect_ready, + components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, + next_addr, + test_generate_config, + }, + SourceSender, }; #[test] @@ -233,7 +237,48 @@ mod test { } #[tokio::test] - async fn http_scrape_bytes_decoding() { + async fn invalid_endpoint() { + let (tx, _rx) = SourceSender::new_test(); + + let source = HttpScrapeConfig { + endpoint: "http://nope".to_string(), + scrape_interval_secs: 1, + query: None, + decoding: default_decoding(), + framing: default_framing_message_based(), + headers: None, + auth: None, + tls: None, + } + .build(SourceContext::new_test(tx, None)) + .await + .unwrap(); + tokio::spawn(source); + + sleep(Duration::from_secs(1)).await; + + // TODO how to verify there was an error + + // let _ = collect_ready(rx) + // .await + // .into_iter() + // .map(|e| e.into_metric()) + // .collect::>(); + } + + async fn run_test(config: HttpScrapeConfig) -> Vec { + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; + assert!(!events.is_empty()); + events + } + + #[tokio::test] + async fn bytes_decoding() { let in_addr = next_addr(); let dummy_endpoint = warp::path!("endpoint") @@ -242,28 +287,21 @@ mod test { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - let config = HttpScrapeConfig { + run_test(HttpScrapeConfig { endpoint: format!("http://{}/endpoint", in_addr), scrape_interval_secs: 1, query: None, decoding: default_decoding(), - framing: None, + framing: default_framing_message_based(), headers: None, auth: None, tls: None, - }; - - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) + }) .await; - assert!(!events.is_empty()); } #[tokio::test] - async fn http_scrape_json_decoding() { + async fn json_decoding() { let in_addr = next_addr(); let dummy_endpoint = warp::path!("endpoint") @@ -272,28 +310,21 @@ mod test { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - let config = HttpScrapeConfig { + run_test(HttpScrapeConfig { endpoint: format!("http://{}/endpoint", in_addr), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Json, - framing: None, + framing: default_framing_message_based(), headers: None, auth: None, tls: None, - }; - - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) + }) .await; - assert!(!events.is_empty()); } #[tokio::test] - async fn http_scrape_request_query() { + async fn request_query_applied() { let in_addr = next_addr(); let dummy_endpoint = warp::path!("endpoint") @@ -302,7 +333,7 @@ mod test { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - let config = HttpScrapeConfig { + let events = run_test(HttpScrapeConfig { endpoint: format!("http://{}/endpoint?key1=val1", in_addr), scrape_interval_secs: 1, query: Some(HashMap::from([ @@ -313,19 +344,12 @@ mod test { ), ])), decoding: DeserializerConfig::Json, - framing: None, + framing: default_framing_message_based(), headers: None, auth: None, tls: None, - }; - - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) + }) .await; - assert!(!events.is_empty()); let logs: Vec<_> = events.into_iter().map(|event| event.into_log()).collect(); @@ -360,25 +384,12 @@ mod test { #[cfg(all(test, feature = "http-scrape-integration-tests"))] mod integration_tests { - use codecs::decoding::NewlineDelimitedDecoderOptions; use tokio::time::Duration; use super::*; use crate::test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}; - #[tokio::test] - async fn http_scrape_logs_json() { - let config = HttpScrapeConfig { - endpoint: format!("http://dufs:5000/logs/1.json"), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: None, - headers: None, - auth: None, - tls: None, - }; - + async fn run_test(config: HttpScrapeConfig) { let events = run_and_assert_source_compliance( config, Duration::from_secs(1), @@ -389,48 +400,96 @@ mod integration_tests { } #[tokio::test] - async fn http_scrape_logs_text() { - let config = HttpScrapeConfig { - endpoint: format!("http://dufs:5000/logs/1"), + async fn scraped_logs_bytes() { + run_test(HttpScrapeConfig { + endpoint: format!("http://dufs:5000/logs/bytes"), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Bytes, - framing: None, + framing: default_framing_message_based(), headers: None, auth: None, tls: None, - }; - - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) + }) .await; - assert!(!events.is_empty()); } #[tokio::test] - async fn http_scrape_metrics_json() { - let config = HttpScrapeConfig { - endpoint: format!("http://dufs:5000/metrics/1.json"), + async fn scraped_logs_json() { + run_test(HttpScrapeConfig { + endpoint: format!("http://dufs:5000/logs/json.json"), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Json, - framing: Some(FramingConfig::NewlineDelimited { - newline_delimited: NewlineDelimitedDecoderOptions::new_with_max_length(10), - }), + framing: default_framing_message_based(), headers: None, auth: None, tls: None, - }; + }) + .await; + } - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) + #[tokio::test] + async fn scraped_metrics_native_json() { + run_test(HttpScrapeConfig { + endpoint: format!("http://dufs:5000/metrics/native.json"), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::NativeJson, + framing: default_framing_message_based(), + headers: None, + auth: None, + tls: None, + }) .await; - assert!(!events.is_empty()); + } + + #[tokio::test] + async fn unauthorized() { + // TODO how to assert failure + + // let config = HttpScrapeConfig { + // endpoint: format!("http://dufs:5000/auth/json.json"), + // scrape_interval_secs: 1, + // query: None, + // decoding: DeserializerConfig::NativeJson, + // framing: default_framing_message_based(), + // headers: None, + // auth: None, + // tls: None, + // }; + } + + #[tokio::test] + async fn authorized() { + run_test(HttpScrapeConfig { + endpoint: format!("http://dufs-auth:5000/logs/json.json"), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: None, + auth: Some(Auth::Basic { + user: "user".to_string(), + password: "pass".to_string(), + }), + tls: None, + }) + .await; + } + + #[tokio::test] + async fn headers() { + // TODO - is this worthy of testing and how to verify + } + + #[tokio::test] + async fn tls() { + // TODO - is this worthy of testing and how to verify + } + + #[tokio::test] + async fn shutdown() { + // TODO - is this worthy of testing and how to verify } } diff --git a/tests/data/http-scrape/logs/1.json b/tests/data/http-scrape/auth/json.json similarity index 100% rename from tests/data/http-scrape/logs/1.json rename to tests/data/http-scrape/auth/json.json diff --git a/tests/data/http-scrape/logs/1 b/tests/data/http-scrape/logs/bytes similarity index 100% rename from tests/data/http-scrape/logs/1 rename to tests/data/http-scrape/logs/bytes diff --git a/tests/data/http-scrape/logs/json.json b/tests/data/http-scrape/logs/json.json new file mode 100644 index 0000000000000..9672096ad8861 --- /dev/null +++ b/tests/data/http-scrape/logs/json.json @@ -0,0 +1 @@ +{ "foo" : "baz" } diff --git a/tests/data/http-scrape/metrics/1.json b/tests/data/http-scrape/metrics/1.json deleted file mode 100644 index e4bdabd8d4a7e..0000000000000 --- a/tests/data/http-scrape/metrics/1.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "histogram" : { - "name" : "login.time", - "val" : 22.0, - "lables" : { - "host" : "0.0.0.0" - } - } -} diff --git a/tests/data/http-scrape/metrics/native.json b/tests/data/http-scrape/metrics/native.json new file mode 100644 index 0000000000000..4aea8048614d2 --- /dev/null +++ b/tests/data/http-scrape/metrics/native.json @@ -0,0 +1 @@ +{ "metric" : { "name" : "a_metric", "kind" : "absolute" , "counter" : { "value" : 1 } } } From d55aee10cb952758a4361eef29c09882a1c4dfab Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 29 Jul 2022 22:20:13 +0000 Subject: [PATCH 12/50] remove unused file --- tests/data/http-scrape/auth/json.json | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tests/data/http-scrape/auth/json.json diff --git a/tests/data/http-scrape/auth/json.json b/tests/data/http-scrape/auth/json.json deleted file mode 100644 index 9672096ad8861..0000000000000 --- a/tests/data/http-scrape/auth/json.json +++ /dev/null @@ -1 +0,0 @@ -{ "foo" : "baz" } From d58abaa3956ca5b3ebc268c2d31d5bfdd033585c Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 29 Jul 2022 22:21:42 +0000 Subject: [PATCH 13/50] todo --- src/sources/http_scrape/scrape.rs | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 502c658e0b5d8..dc1bbc2af5a06 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -444,6 +444,11 @@ mod integration_tests { .await; } + #[tokio::test] + async fn scraped_trace_native_json() { + // TODO - add a trace + } + #[tokio::test] async fn unauthorized() { // TODO how to assert failure @@ -485,7 +490,7 @@ mod integration_tests { #[tokio::test] async fn tls() { - // TODO - is this worthy of testing and how to verify + // TODO - use dufs with tls settings } #[tokio::test] From 2e0ed24a84e24613ebe9aaef38fbcf067047429e Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 1 Aug 2022 20:22:09 +0000 Subject: [PATCH 14/50] incomplete int tests.. add external docs --- .../docker-compose.http-scrape.yml | 7 +- src/sources/http_scrape/mod.rs | 1 - src/sources/http_scrape/scrape.rs | 53 ++++--- .../components/sources/http_scrape.cue | 142 ++++++++++++++++++ .../cue/reference/services/http_scrape.cue | 8 + website/cue/reference/urls.cue | 1 + 6 files changed, 191 insertions(+), 21 deletions(-) create mode 100644 website/cue/reference/components/sources/http_scrape.cue create mode 100644 website/cue/reference/services/http_scrape.cue diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 042c59b895484..8f5a0b6d7456c 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -31,12 +31,14 @@ services: - backend command: - "--tls-cert" - - "dufs.crt" + - "/certs/ca.cert.pem" - "--tls-key" - - "dufs.key" + - "/certs/ca.key.pem" - "/data" volumes: - ${PWD}/tests/data/http-scrape/:/data + - ${PWD}/tests/data/ca/certs/ca.cert.pem:/certs/ca.cert.pem + - ${PWD}/tests/data/ca/private/ca.key.pem:/certs/ca.key.pem runner: build: context: ${PWD} @@ -59,6 +61,7 @@ services: depends_on: - dufs - dufs-auth + - dufs-https volumes: - ${PWD}:/code - cargogit:/usr/local/cargo/git diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 573428451ab30..348ad1fde7b71 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -145,7 +145,6 @@ pub(crate) async fn http_scrape( let mut request = builder.body(Body::empty()).expect("error creating request"); if let Some(auth) = &inputs.auth { - dbg!(auth); auth.apply(&mut request); } diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index dc1bbc2af5a06..08834645e10c3 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -33,7 +33,7 @@ use vector_core::{ const NAME: &str = "http_scrape"; // TODO: -// - integration tests +// - finish the TODOs in the unit and integration tests // - cue files /// Configuration for the `http_scrape` source. @@ -43,16 +43,16 @@ pub struct HttpScrapeConfig { /// Endpoint to scrape events from. endpoint: String, + /// The interval between scrapes, in seconds. + #[serde(default = "super::default_scrape_interval_secs")] + scrape_interval_secs: u64, + /// Custom parameters for the scrape request query string. /// /// One or more values for the same parameter key can be provided. The parameters provided in this option are /// appended to any parameters manually provided in the `endpoint` option. query: Option>>, - /// The interval between scrapes, in seconds. - #[serde(default = "super::default_scrape_interval_secs")] - scrape_interval_secs: u64, - /// Decoder to use on the HTTP responses. #[configurable(derived)] #[serde(default = "default_decoding")] @@ -200,17 +200,10 @@ impl super::HttpScraper for HttpScrapeContext { let body = String::from_utf8_lossy(body); buf.extend_from_slice(body.as_bytes()); - //TODO delete - dbg!(body); - // decode and enrich let mut events = self.decode_events(&mut buf); self.enrich_events(&mut events); - // TODO delete - for event in &events { - dbg!(event); - } Some(events) } } @@ -223,10 +216,8 @@ mod test { use super::*; use crate::{ test_util::{ - //collect_ready, components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, - test_generate_config, + next_addr, test_generate_config, }, SourceSender, }; @@ -454,7 +445,7 @@ mod integration_tests { // TODO how to assert failure // let config = HttpScrapeConfig { - // endpoint: format!("http://dufs:5000/auth/json.json"), + // endpoint: format!("http://dufs-auth:5000/auth/json.json"), // scrape_interval_secs: 1, // query: None, // decoding: DeserializerConfig::NativeJson, @@ -485,12 +476,38 @@ mod integration_tests { #[tokio::test] async fn headers() { - // TODO - is this worthy of testing and how to verify + // TODO - is this worthy of testing and how to verify ? } #[tokio::test] async fn tls() { - // TODO - use dufs with tls settings + + // TODO fix this, as it is there is an error from dufs with "Sending fatal alert + // BadRecordMac" + + // and in vector error is: + // + // 2022-08-01T19:11:23.382932Z ERROR vector::internal_events::http_client: HTTP error. error=error trying to connect: error:1416F086:SSL routines:tls_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1919:: self signed certificate error_type="request_failed" stage="processing" + // 2022-08-01T19:11:23.383435Z ERROR vector::internal_events::http_scrape: HTTP request processing error. url=https://dufs-https:5000/logs/json.json error=CallRequest { source: hyper::Error(Connect, Custom { kind: Other, error: ConnectError { error: Error { code: ErrorCode(1), cause: Some(Ssl(ErrorStack([Error { code: 337047686, library: "SSL routines", function: "tls_process_server_certificate", reason: "certificate verify failed", file: "ssl/statem/statem_clnt.c", line: 1919 }]))) }, verify_result: X509VerifyResult { code: 18, error: "self signed certificate" } } }) } error_type="request_failed" stage="receiving" internal_log_rate_secs=10 + + // let cert_path = "tests/data/ca/certs/ca.cert.pem"; + // let key_path = "tests/data/ca/private/ca.key.pem"; + + // run_test(HttpScrapeConfig { + // endpoint: format!("https://dufs-https:5000/logs/json.json"), + // scrape_interval_secs: 1, + // query: None, + // decoding: DeserializerConfig::Json, + // framing: default_framing_message_based(), + // headers: None, + // auth: None, + // tls: Some(TlsConfig { + // crt_file: Some(cert_path.into()), + // key_file: Some(key_path.into()), + // ..Default::default() + // }), + // }) + // .await; } #[tokio::test] diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue new file mode 100644 index 0000000000000..f8e3a3fa31925 --- /dev/null +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -0,0 +1,142 @@ +package metadata + +components: sources: http_scrape: { + title: "HTTP Scrape" + alias: "http_scrape" + + classes: { + commonly_used: false + delivery: "at_least_once" + deployment_roles: ["daemon", "sidecar"] + development: "beta" + egress_method: "batch" + stateful: false + } + + features: { + acknowledgements: false + codecs: { + enabled: true + default_framing: "`bytes`" + } + collect: { + checkpoint: enabled: false + from: { + service: services.http_scrape + + interface: socket: { + direction: "outgoing" + protocols: ["http"] + ssl: "optional" + } + } + proxy: enabled: true + tls: { + enabled: true + can_verify_certificate: true + can_verify_hostname: true + enabled_default: false + } + } + multiline: enabled: false + } + + support: { + requirements: [] + warnings: [] + notices: [] + } + + installation: { + platform_name: null + } + + configuration: { + auth: configuration._http_auth & {_args: { + password_example: "${PASSWORD}" + username_example: "${USERNAME}" + }} + endpoint: { + description: "Endpoint to scrape observability data from." + required: true + warnings: ["You must explicitly add the path to your endpoint."] + type: string: { + examples: ["http://127.0.0.1:9898/logs"] + } + } + headers: { + common: false + description: "A list of HTTP headers to include in request." + required: false + type: object: { + examples: [{"Your-Custom-Header": "it's-value"}] + } + } + query: { + common: false + description: """ + Custom parameters for the scrape request query string. + One or more values for the same parameter key can be provided. + The parameters provided in this option are appended to the `endpoint` option. + """ + required: false + type: object: { + examples: [{"match[]": [#"{job="somejob"}"#, #"{__name__=~"job:.*"}"#]}] + options: { + "*": { + common: false + description: "Any query key" + required: false + type: array: { + default: null + examples: [[ + #"{job="somejob"}"#, + #"{__name__=~"job:.*"}"#, + ]] + items: type: string: { + examples: [ + #"{job="somejob"}"#, + #"{__name__=~"job:.*"}"#, + ] + syntax: "literal" + } + } + } + } + } + } + scrape_interval_secs: { + common: true + description: "The interval between scrapes, in seconds." + required: false + type: uint: { + default: 15 + unit: "seconds" + } + } + } + + output: metrics: { + counter: output._passthrough_counter + distribution: output._passthrough_distribution + gauge: output._passthrough_gauge + histogram: output._passthrough_histogram + set: output._passthrough_set + } + + telemetry: metrics: { + events_in_total: components.sources.internal_metrics.output.metrics.events_in_total + http_error_response_total: components.sources.internal_metrics.output.metrics.http_error_response_total + http_request_errors_total: components.sources.internal_metrics.output.metrics.http_request_errors_total + parse_errors_total: components.sources.internal_metrics.output.metrics.parse_errors_total + processed_bytes_total: components.sources.internal_metrics.output.metrics.processed_bytes_total + processed_events_total: components.sources.internal_metrics.output.metrics.processed_events_total + component_discarded_events_total: components.sources.internal_metrics.output.metrics.component_discarded_events_total + component_errors_total: components.sources.internal_metrics.output.metrics.component_errors_total + component_received_bytes_total: components.sources.internal_metrics.output.metrics.component_received_bytes_total + component_received_event_bytes_total: components.sources.internal_metrics.output.metrics.component_received_event_bytes_total + component_received_events_total: components.sources.internal_metrics.output.metrics.component_received_events_total + requests_completed_total: components.sources.internal_metrics.output.metrics.requests_completed_total + request_duration_seconds: components.sources.internal_metrics.output.metrics.request_duration_seconds + } +} diff --git a/website/cue/reference/services/http_scrape.cue b/website/cue/reference/services/http_scrape.cue new file mode 100644 index 0000000000000..ed36e51be8a90 --- /dev/null +++ b/website/cue/reference/services/http_scrape.cue @@ -0,0 +1,8 @@ +package metadata + +services: http_scrape: { + name: "HTTP scrape" + thing: "a \(name)" + url: urls.http_scrape + versions: null +} diff --git a/website/cue/reference/urls.cue b/website/cue/reference/urls.cue index 6482c77b4e0f8..7db46e97d52e5 100644 --- a/website/cue/reference/urls.cue +++ b/website/cue/reference/urls.cue @@ -240,6 +240,7 @@ urls: { http: "https://www.w3.org/Protocols/" http_client: "\(wikipedia)/wiki/Hypertext_Transfer_Protocol#Client_request" http_server: "\(wikipedia)/wiki/Web_server" + http_scrape: "https://www.rfc-editor.org/rfc/rfc9110.html" humio: "https://humio.com" humio_hec: "https://docs.humio.com/integrations/data-shippers/hec/" humio_hec_format_of_data: "https://docs.humio.com/integrations/data-shippers/hec/#format-of-data" From f4cee3868b82a7dc2b6090dde94f9487c346581c Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 1 Aug 2022 20:39:43 +0000 Subject: [PATCH 15/50] remove redundant events --- Cargo.toml | 2 +- src/internal_events/prometheus.rs | 88 +------------------------------ src/sources/prometheus/scrape.rs | 9 +--- 3 files changed, 4 insertions(+), 95 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8a8cbf93d2659..d091dfc098633 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -507,7 +507,7 @@ sources-nats = ["dep:nats", "dep:nkeys"] sources-nginx_metrics = ["dep:nom"] sources-opentelemetry = ["sources-vector", "opentelemetry"] sources-postgresql_metrics = ["dep:postgres-openssl", "dep:tokio-postgres"] -sources-prometheus = ["dep:prometheus-parser", "sinks-prometheus", "sources-http", "sources-utils-http"] +sources-prometheus = ["dep:prometheus-parser", "sinks-prometheus", "sources-http", "sources-utils-http", "sources-http_scrape"] sources-redis= ["dep:redis"] sources-socket = ["listenfd", "tokio-util/net", "sources-utils-udp", "sources-utils-tcp-keepalive", "sources-utils-tcp-socket", "sources-utils-tls", "sources-utils-unix"] sources-splunk_hec = ["sources-utils-tls", "dep:roaring"] diff --git a/src/internal_events/prometheus.rs b/src/internal_events/prometheus.rs index b1e40ae639460..3eeb86714a2e2 100644 --- a/src/internal_events/prometheus.rs +++ b/src/internal_events/prometheus.rs @@ -7,38 +7,7 @@ use metrics::counter; use prometheus_parser::ParserError; use vector_core::internal_event::InternalEvent; -use super::prelude::{error_stage, error_type, http_error_code}; - -#[derive(Debug)] -pub struct PrometheusEventsReceived { - pub byte_size: usize, - pub count: usize, - pub uri: http::Uri, -} - -impl InternalEvent for PrometheusEventsReceived { - fn emit(self) { - trace!( - message = "Events received.", - count = %self.count, - byte_size = %self.byte_size, - uri = %self.uri, - ); - counter!( - "component_received_events_total", self.count as u64, - "uri" => self.uri.to_string(), - ); - counter!( - "component_received_event_bytes_total", self.byte_size as u64, - "uri" => self.uri.to_string(), - ); - // deprecated - counter!( - "events_in_total", self.count as u64, - "uri" => self.uri.to_string(), - ); - } -} +use super::prelude::{error_stage, error_type}; #[cfg(feature = "sources-prometheus")] #[derive(Debug)] @@ -75,61 +44,6 @@ impl<'a> InternalEvent for PrometheusParseError<'a> { } } -#[derive(Debug)] -pub struct PrometheusHttpResponseError { - pub code: hyper::StatusCode, - pub url: http::Uri, -} - -impl InternalEvent for PrometheusHttpResponseError { - fn emit(self) { - error!( - message = "HTTP error response.", - url = %self.url, - stage = error_stage::RECEIVING, - error_type = error_type::REQUEST_FAILED, - error_code = %http_error_code(self.code.as_u16()), - internal_log_rate_secs = 10, - ); - counter!( - "component_errors_total", 1, - "url" => self.url.to_string(), - "stage" => error_stage::RECEIVING, - "error_type" => error_type::REQUEST_FAILED, - "error_code" => http_error_code(self.code.as_u16()), - ); - // deprecated - counter!("http_error_response_total", 1); - } -} - -#[derive(Debug)] -pub struct PrometheusHttpError { - pub error: crate::Error, - pub url: http::Uri, -} - -impl InternalEvent for PrometheusHttpError { - fn emit(self) { - error!( - message = "HTTP request processing error.", - url = %self.url, - error = ?self.error, - error_type = error_type::REQUEST_FAILED, - stage = error_stage::RECEIVING, - internal_log_rate_secs = 10, - ); - counter!( - "component_errors_total", 1, - "url" => self.url.to_string(), - "error_type" => error_type::REQUEST_FAILED, - "stage" => error_stage::RECEIVING, - ); - // deprecated - counter!("http_request_errors_total", 1); - } -} - #[derive(Debug)] pub struct PrometheusRemoteWriteParseError { pub error: prost::DecodeError, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 5dc29805856ee..e2707b883e542 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -6,13 +6,13 @@ use http::{response::Parts, Uri}; use serde::{Deserialize, Serialize}; use snafu::{ResultExt, Snafu}; use vector_config::configurable_component; -use vector_core::{config::LogNamespace, event::Event, ByteSizeOf}; +use vector_core::{config::LogNamespace, event::Event}; use super::parser; use crate::{ config::{self, GenerateConfig, Output, SourceConfig, SourceContext, SourceDescription}, http::Auth, - internal_events::{PrometheusEventsReceived, PrometheusParseError}, + internal_events::PrometheusParseError, sources::{ self, http_scrape::{ @@ -270,11 +270,6 @@ impl HttpScraper for PrometheusScrapeContext { match parser::parse_text(&body) { Ok(mut events) => { - emit!(PrometheusEventsReceived { - byte_size: events.size_of(), - count: events.len(), - uri: url.clone() - }); for event in events.iter_mut() { let metric = event.as_mut_metric(); if let Some(InstanceInfo { From e6d6d1b038ef2009d65a23157d93e84e7841cb89 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 1 Aug 2022 21:01:13 +0000 Subject: [PATCH 16/50] clippy / cleanup --- src/sources/http_scrape/mod.rs | 2 +- src/sources/http_scrape/scrape.rs | 16 ++++++++-------- website/cue/reference/urls.cue | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 348ad1fde7b71..968f258ea7e38 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -182,7 +182,7 @@ pub(crate) async fn http_scrape( // after the stream is written ? Yet I'm not seeing how to go about // that. // emit EventsSent if metrics - if events.len() > 0 { + if !events.is_empty() { if let Event::Metric(ref _metric) = events.first().expect("should have event") { diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 08834645e10c3..6a966a649b36a 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -34,7 +34,7 @@ const NAME: &str = "http_scrape"; // TODO: // - finish the TODOs in the unit and integration tests -// - cue files +// - solve the placement of the HttpScrapeEventsSent /// Configuration for the `http_scrape` source. #[configurable_component(source)] @@ -119,7 +119,7 @@ impl SourceConfig for HttpScrapeConfig { ) .build(); - // the only specific context needed is the ability to decode + // the only specific context needed is the codec decoding let context = HttpScrapeContext { decoder }; let inputs = super::GenericHttpScrapeInputs::new( @@ -393,7 +393,7 @@ mod integration_tests { #[tokio::test] async fn scraped_logs_bytes() { run_test(HttpScrapeConfig { - endpoint: format!("http://dufs:5000/logs/bytes"), + endpoint: "http://dufs:5000/logs/bytes".to_string(), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Bytes, @@ -408,7 +408,7 @@ mod integration_tests { #[tokio::test] async fn scraped_logs_json() { run_test(HttpScrapeConfig { - endpoint: format!("http://dufs:5000/logs/json.json"), + endpoint: "http://dufs:5000/logs/json.json".to_string(), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Json, @@ -423,7 +423,7 @@ mod integration_tests { #[tokio::test] async fn scraped_metrics_native_json() { run_test(HttpScrapeConfig { - endpoint: format!("http://dufs:5000/metrics/native.json"), + endpoint: "http://dufs:5000/metrics/native.json".to_string(), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::NativeJson, @@ -437,7 +437,7 @@ mod integration_tests { #[tokio::test] async fn scraped_trace_native_json() { - // TODO - add a trace + // TODO - add a trace ? Or is it not really helpful since basically the same as a log ? } #[tokio::test] @@ -459,7 +459,7 @@ mod integration_tests { #[tokio::test] async fn authorized() { run_test(HttpScrapeConfig { - endpoint: format!("http://dufs-auth:5000/logs/json.json"), + endpoint: "http://dufs-auth:5000/logs/json.json".to_string(), scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Json, @@ -494,7 +494,7 @@ mod integration_tests { // let key_path = "tests/data/ca/private/ca.key.pem"; // run_test(HttpScrapeConfig { - // endpoint: format!("https://dufs-https:5000/logs/json.json"), + // endpoint: "https://dufs-https:5000/logs/json.json".to_string(), // scrape_interval_secs: 1, // query: None, // decoding: DeserializerConfig::Json, diff --git a/website/cue/reference/urls.cue b/website/cue/reference/urls.cue index 7db46e97d52e5..07f2a1cfb3d78 100644 --- a/website/cue/reference/urls.cue +++ b/website/cue/reference/urls.cue @@ -240,7 +240,7 @@ urls: { http: "https://www.w3.org/Protocols/" http_client: "\(wikipedia)/wiki/Hypertext_Transfer_Protocol#Client_request" http_server: "\(wikipedia)/wiki/Web_server" - http_scrape: "https://www.rfc-editor.org/rfc/rfc9110.html" + http_scrape: "https://www.rfc-editor.org/rfc/rfc9110.html" humio: "https://humio.com" humio_hec: "https://docs.humio.com/integrations/data-shippers/hec/" humio_hec_format_of_data: "https://docs.humio.com/integrations/data-shippers/hec/#format-of-data" From c1ad50f87c942378961bd5409f41c2aea03d4821 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 1 Aug 2022 22:07:17 +0000 Subject: [PATCH 17/50] fix cue fmt, enable on windows --- src/sources/http_scrape/mod.rs | 2 +- src/sources/http_scrape/scrape.rs | 1 - website/cue/reference/components/sources/http_scrape.cue | 2 +- website/cue/reference/urls.cue | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 968f258ea7e38..28472df937f8a 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -8,7 +8,7 @@ //! - Call http_scrape() supplying the generic inputs for scraping and the source-specific //! context. -#[cfg(all(unix, feature = "sources-http_scrape"))] +#[cfg(feature = "sources-http_scrape")] pub mod scrape; pub use scrape::HttpScrapeConfig; diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 6a966a649b36a..f78a8a2abb6a0 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -481,7 +481,6 @@ mod integration_tests { #[tokio::test] async fn tls() { - // TODO fix this, as it is there is an error from dufs with "Sending fatal alert // BadRecordMac" diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue index f8e3a3fa31925..39f2d09cd219f 100644 --- a/website/cue/reference/components/sources/http_scrape.cue +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -117,7 +117,7 @@ components: sources: http_scrape: { } output: metrics: { - counter: output._passthrough_counter + counter: output._passthrough_counter distribution: output._passthrough_distribution gauge: output._passthrough_gauge histogram: output._passthrough_histogram diff --git a/website/cue/reference/urls.cue b/website/cue/reference/urls.cue index 07f2a1cfb3d78..02ef968b7b030 100644 --- a/website/cue/reference/urls.cue +++ b/website/cue/reference/urls.cue @@ -240,7 +240,7 @@ urls: { http: "https://www.w3.org/Protocols/" http_client: "\(wikipedia)/wiki/Hypertext_Transfer_Protocol#Client_request" http_server: "\(wikipedia)/wiki/Web_server" - http_scrape: "https://www.rfc-editor.org/rfc/rfc9110.html" + http_scrape: "https://www.rfc-editor.org/rfc/rfc9110.html" humio: "https://humio.com" humio_hec: "https://docs.humio.com/integrations/data-shippers/hec/" humio_hec_format_of_data: "https://docs.humio.com/integrations/data-shippers/hec/#format-of-data" From 09dbf431b0690ef73beaeb942a62e50be5d7adf6 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 1 Aug 2022 23:10:02 +0000 Subject: [PATCH 18/50] fix one component bug --- src/internal_events/prelude.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/internal_events/prelude.rs b/src/internal_events/prelude.rs index ee191793534cd..b5d11e43537b8 100644 --- a/src/internal_events/prelude.rs +++ b/src/internal_events/prelude.rs @@ -47,11 +47,10 @@ pub mod error_type { #[cfg(any( feature = "sinks-azure_blob", feature = "sinks-elasticsearch", - feature = "sinks-prometheus", feature = "sources-apache_metrics", feature = "sources-aws_ecs_metrics", feature = "sources-aws_kinesis_firehose", - feature = "sources-prometheus", + feature = "sources-http-scrape", feature = "sources-utils-http", ))] pub(crate) fn http_error_code(code: u16) -> String { From 32acc20f385ee0dcfbd3f122b51a44dd4ba793f8 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 2 Aug 2022 18:25:05 +0000 Subject: [PATCH 19/50] feedback from sg, add more testing --- .../docker-compose.http-scrape.yml | 4 +- src/sources/http_scrape/scrape.rs | 75 +++++++++++----- tests/data/http-scrape/traces/native.json | 1 + .../configuration/sources/http_scrape.md | 14 +++ .../components/sources/http_scrape.cue | 86 +++++++++++++++++-- 5 files changed, 151 insertions(+), 29 deletions(-) create mode 100644 tests/data/http-scrape/traces/native.json create mode 100644 website/content/en/docs/reference/configuration/sources/http_scrape.md diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 8f5a0b6d7456c..4b77da454d5f8 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -11,8 +11,7 @@ services: - "/data" volumes: - ${PWD}/tests/data/http-scrape/:/data - # the project suggests that two services should not be necessary - # yet I was not able to only auth protect one path for view/download access. + # To validate Basic HTTP authentication option dufs-auth: image: docker.io/sigoden/dufs:latest networks: @@ -25,6 +24,7 @@ services: - "/data" volumes: - ${PWD}/tests/data/http-scrape/:/data + # To validate TLS options dufs-https: image: docker.io/sigoden/dufs:latest networks: diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index f78a8a2abb6a0..ffb4730388804 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -176,12 +176,20 @@ impl HttpScrapeContext { events } - /// Enriches log events + /// Enriches events with source_type, timestamp fn enrich_events(&self, events: &mut Vec) { for event in events { - if let Event::Log(ref mut log) = event { - log.try_insert(log_schema().source_type_key(), Bytes::from(NAME)); - log.try_insert(log_schema().timestamp_key(), Utc::now()); + match event { + Event::Log(ref mut log) => { + log.try_insert(log_schema().source_type_key(), Bytes::from(NAME)); + log.try_insert(log_schema().timestamp_key(), Utc::now()); + } + Event::Metric(ref mut metric) => { + metric.insert_tag(log_schema().source_type_key().to_string(), NAME.to_string()); + } + Event::Trace(ref mut trace) => { + trace.insert(log_schema().source_type_key(), Bytes::from(NAME)); + } } } } @@ -210,6 +218,8 @@ impl super::HttpScraper for HttpScrapeContext { #[cfg(test)] mod test { + use futures::{poll, StreamExt}; + use std::task::Poll; use tokio::time::{sleep, Duration}; use warp::Filter; @@ -227,10 +237,10 @@ mod test { test_generate_config::(); } + // I haven't seen a better way to validate an error occurred, but it seems like there should be + // a way, since if this is run live it generates an HTTP error. #[tokio::test] async fn invalid_endpoint() { - let (tx, _rx) = SourceSender::new_test(); - let source = HttpScrapeConfig { endpoint: "http://nope".to_string(), scrape_interval_secs: 1, @@ -240,21 +250,22 @@ mod test { headers: None, auth: None, tls: None, - } - .build(SourceContext::new_test(tx, None)) - .await - .unwrap(); - tokio::spawn(source); + }; + + // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. + let (tx, mut rx) = SourceSender::new_test(); + let context = SourceContext::new_test(tx, None); + + let source = source + .build(context) + .await + .expect("source should not fail to build"); sleep(Duration::from_secs(1)).await; - // TODO how to verify there was an error + drop(source); - // let _ = collect_ready(rx) - // .await - // .into_iter() - // .map(|e| e.into_metric()) - // .collect::>(); + assert_eq!(poll!(rx.next()), Poll::Ready(None)); } async fn run_test(config: HttpScrapeConfig) -> Vec { @@ -380,7 +391,7 @@ mod integration_tests { use super::*; use crate::test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}; - async fn run_test(config: HttpScrapeConfig) { + async fn run_test(config: HttpScrapeConfig) -> Vec { let events = run_and_assert_source_compliance( config, Duration::from_secs(1), @@ -388,6 +399,7 @@ mod integration_tests { ) .await; assert!(!events.is_empty()); + events } #[tokio::test] @@ -407,7 +419,7 @@ mod integration_tests { #[tokio::test] async fn scraped_logs_json() { - run_test(HttpScrapeConfig { + let events = run_test(HttpScrapeConfig { endpoint: "http://dufs:5000/logs/json.json".to_string(), scrape_interval_secs: 1, query: None, @@ -418,11 +430,13 @@ mod integration_tests { tls: None, }) .await; + let log = events[0].as_log(); + assert_eq!(log[log_schema().source_type_key()], NAME.into()); } #[tokio::test] async fn scraped_metrics_native_json() { - run_test(HttpScrapeConfig { + let events = run_test(HttpScrapeConfig { endpoint: "http://dufs:5000/metrics/native.json".to_string(), scrape_interval_secs: 1, query: None, @@ -433,11 +447,30 @@ mod integration_tests { tls: None, }) .await; + + let metric = events[0].as_metric(); + assert_eq!( + metric.tags().unwrap()[log_schema().source_type_key()], + NAME.to_string() + ); } #[tokio::test] async fn scraped_trace_native_json() { - // TODO - add a trace ? Or is it not really helpful since basically the same as a log ? + let events = run_test(HttpScrapeConfig { + endpoint: "http://dufs:5000/traces/native.json".to_string(), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::NativeJson, + framing: default_framing_message_based(), + headers: None, + auth: None, + tls: None, + }) + .await; + + let trace = events[0].as_trace(); + assert_eq!(trace.as_map()[log_schema().source_type_key()], NAME.into()); } #[tokio::test] diff --git a/tests/data/http-scrape/traces/native.json b/tests/data/http-scrape/traces/native.json new file mode 100644 index 0000000000000..cc755b92f3bea --- /dev/null +++ b/tests/data/http-scrape/traces/native.json @@ -0,0 +1 @@ +{ "trace" : { "name" : "a_trace", "foo" : 42 } } diff --git a/website/content/en/docs/reference/configuration/sources/http_scrape.md b/website/content/en/docs/reference/configuration/sources/http_scrape.md new file mode 100644 index 0000000000000..2cea00dc99f0d --- /dev/null +++ b/website/content/en/docs/reference/configuration/sources/http_scrape.md @@ -0,0 +1,14 @@ +--- +title: HTTP +description: Collect observability data emitted by an [HTTP](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Client_request) server +kind: source +layout: component +tags: ["http", "scrape", "component", "source", "logs", "metrics", traces"] +--- + +{{/* +This doc is generated using: + +1. The template in layouts/docs/component.html +2. The relevant CUE data in cue/reference/components/... +*/}} diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue index 39f2d09cd219f..6d26d08d83583 100644 --- a/website/cue/reference/components/sources/http_scrape.cue +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -116,12 +116,86 @@ components: sources: http_scrape: { } } - output: metrics: { - counter: output._passthrough_counter - distribution: output._passthrough_distribution - gauge: output._passthrough_gauge - histogram: output._passthrough_histogram - set: output._passthrough_set + output: { + logs: { + text: { + description: "An individual line from a `text/plain` HTTP request" + fields: { + message: { + description: "The raw line line from the incoming payload." + relevant_when: "encoding == \"text\"" + required: true + type: string: { + examples: ["Hello world"] + } + } + source_type: { + description: "The name of the source type." + required: true + type: string: { + examples: ["http_scrape"] + } + } + timestamp: fields._current_timestamp + } + } + structured: { + description: "An individual line from an `application/json` request" + fields: { + "*": { + common: false + description: "Any field contained in your JSON payload" + relevant_when: "encoding != \"text\"" + required: false + type: "*": {} + } + source_type: { + description: "The name of the source type." + required: true + type: string: { + examples: ["http_scrape"] + } + } + timestamp: fields._current_timestamp + } + } + } + metrics: { + _extra_tags: { + "source_type": { + description: "The name of the source type." + examples: ["http_scrape"] + required: true + } + } + counter: output._passthrough_counter & { + tags: _extra_tags + } + distribution: output._passthrough_distribution & { + tags: _extra_tags + } + gauge: output._passthrough_gauge & { + tags: _extra_tags + } + histogram: output._passthrough_histogram & { + tags: _extra_tags + } + set: output._passthrough_set & { + tags: _extra_tags + } + } + traces: { + description: "A trace received through an HTTP request." + fields: { + source_type: { + description: "The name of the source type." + required: true + type: string: { + examples: ["http_scrape"] + } + } + } + } } telemetry: metrics: { From 36761c6008d5e11cdf3c57b6f1c01c8c0b82b76f Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 2 Aug 2022 23:13:27 +0000 Subject: [PATCH 20/50] partial feedback from js & sg --- Cargo.toml | 2 +- .../decoding/framing/character_delimited.rs | 4 +- src/codecs/decoding/decoder.rs | 35 ++++++++++- src/internal_events/http_scrape.rs | 14 ----- src/sources/http_scrape/mod.rs | 63 +++++-------------- src/sources/http_scrape/scrape.rs | 21 +++---- src/sources/prometheus/scrape.rs | 15 ++--- .../components/sources/http_scrape.cue | 2 +- 8 files changed, 71 insertions(+), 85 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 982b3e3dae67a..abe2dcd6485d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -768,7 +768,7 @@ gcp-cloud-storage-integration-tests = ["sinks-gcp"] gcp-integration-tests = ["sinks-gcp"] gcp-pubsub-integration-tests = ["sinks-gcp", "sources-gcp_pubsub"] humio-integration-tests = ["sinks-humio"] -http-scrape-integration-tests = ["sources-http_scrape"] +http-scrape-integration-tests = ["sources-http_scrape", "codecs/syslog"] influxdb-integration-tests = ["sinks-influxdb"] kafka-integration-tests = ["sinks-kafka", "sources-kafka"] logstash-integration-tests = ["docker", "sources-logstash"] diff --git a/lib/codecs/src/decoding/framing/character_delimited.rs b/lib/codecs/src/decoding/framing/character_delimited.rs index 5eb86205a876c..8905693b90cfb 100644 --- a/lib/codecs/src/decoding/framing/character_delimited.rs +++ b/lib/codecs/src/decoding/framing/character_delimited.rs @@ -56,9 +56,9 @@ impl CharacterDelimitedDecoderOptions { #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub struct CharacterDelimitedDecoder { /// The delimiter used to separate byte sequences. - delimiter: u8, + pub delimiter: u8, /// The maximum length of the byte buffer. - max_length: usize, + pub max_length: usize, } impl CharacterDelimitedDecoder { diff --git a/src/codecs/decoding/decoder.rs b/src/codecs/decoding/decoder.rs index 867c9781428b6..b23164a5ec303 100644 --- a/src/codecs/decoding/decoder.rs +++ b/src/codecs/decoding/decoder.rs @@ -1,7 +1,10 @@ use bytes::{Bytes, BytesMut}; -use codecs::decoding::{ - format::Deserializer as _, BoxedFramingError, BytesDeserializer, Deserializer, Error, Framer, - NewlineDelimitedDecoder, +use codecs::{ + decoding::{ + format::Deserializer as _, BoxedFramingError, BytesDeserializer, Deserializer, Error, + Framer, NewlineDelimitedDecoder, + }, + CharacterDelimitedDecoder, }; use smallvec::SmallVec; use vector_core::config::LogNamespace; @@ -76,6 +79,32 @@ impl Decoder { Error::ParsingError(error) }) } + + /// Get the HTTP content type. + pub const fn content_type(&self) -> &str { + match (&self.deserializer, &self.framer) { + (Deserializer::Json(_) | Deserializer::NativeJson(_), Framer::NewlineDelimited(_)) => { + "application/x-ndjson" + } + ( + Deserializer::Gelf(_) | Deserializer::Json(_) | Deserializer::NativeJson(_), + Framer::CharacterDelimited(CharacterDelimitedDecoder { + delimiter: b',', + max_length: usize::MAX, + }), + ) => "application/json", + (Deserializer::Native(_), _) => "application/octet-stream", + ( + Deserializer::Json(_) + | Deserializer::Syslog(_) + | Deserializer::NativeJson(_) + | Deserializer::Bytes(_) + | Deserializer::Gelf(_) + | Deserializer::Boxed(_), + _, + ) => "text/plain", + } + } } impl tokio_util::codec::Decoder for Decoder { diff --git a/src/internal_events/http_scrape.rs b/src/internal_events/http_scrape.rs index 04ee0ebf5c6b8..801e859d8f714 100644 --- a/src/internal_events/http_scrape.rs +++ b/src/internal_events/http_scrape.rs @@ -88,17 +88,3 @@ impl InternalEvent for HttpScrapeHttpError { counter!("http_request_errors_total", 1); } } - -#[derive(Debug)] -pub struct HttpScrapeEventsSent { - pub count: u64, - pub byte_size: usize, -} - -impl InternalEvent for HttpScrapeEventsSent { - fn emit(self) { - trace!(message = "Events sent.", count = %self.count, byte_size = %self.byte_size); - counter!("component_sent_events_total", self.count as u64); - counter!("component_sent_event_bytes_total", self.byte_size as u64); - } -} diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 28472df937f8a..aaa7d6e056aa5 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -21,7 +21,6 @@ use std::time::{Duration, Instant}; use std::{collections::HashMap, future::ready}; use tokio_stream::wrappers::IntervalStream; -use crate::internal_events::HttpScrapeEventsSent; use crate::{ http::{Auth, HttpClient}, internal_events::{ @@ -36,35 +35,18 @@ use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; /// Contains the inputs generic to any http scrape. pub(crate) struct GenericHttpScrapeInputs { - urls: Vec, - interval_secs: u64, - headers: Option>, - auth: Option, - tls: TlsSettings, - proxy: ProxyConfig, - shutdown: ShutdownSignal, -} - -impl GenericHttpScrapeInputs { - pub fn new( - urls: Vec, - interval_secs: u64, - headers: Option>, - auth: Option, - tls: TlsSettings, - proxy: ProxyConfig, - shutdown: ShutdownSignal, - ) -> Self { - Self { - urls, - interval_secs, - headers, - auth, - tls, - proxy, - shutdown, - } - } + /// Array of URLs to scrape + pub urls: Vec, + /// Interval to scrape on in seconds + pub interval_secs: u64, + /// Map of Header+Value to apply to HTTP request + pub headers: Option>, + /// Content type of the HTTP request, determined by the source + pub content_type: String, + pub auth: Option, + pub tls: TlsSettings, + pub proxy: ProxyConfig, + pub shutdown: ShutdownSignal, } /// The default interval to scrape the http endpoint if none is configured. @@ -127,6 +109,8 @@ pub(crate) async fn http_scrape( .map(move |_| stream::iter(inputs.urls.clone())) .flatten() .map(move |url| { + // Building the HttpClient should not fail as it is just setting up the client with the + // proxy and tls settings. let client = HttpClient::new(inputs.tls.clone(), &inputs.proxy) .expect("Building HTTP client failed"); let endpoint = url.to_string(); @@ -134,7 +118,7 @@ pub(crate) async fn http_scrape( let mut context = context.clone(); context.build(&url); - let mut builder = Request::get(&url).header(http::header::ACCEPT, "text/plain"); + let mut builder = Request::get(&url).header(http::header::ACCEPT, &inputs.content_type); // add user supplied headers if let Some(headers) = &inputs.headers { @@ -142,6 +126,8 @@ pub(crate) async fn http_scrape( builder = builder.header(header.0, header.1); } } + + // building an empty request should be infallible let mut request = builder.body(Body::empty()).expect("error creating request"); if let Some(auth) = &inputs.auth { @@ -177,21 +163,6 @@ pub(crate) async fn http_scrape( count: events.len(), uri: url.clone() }); - - // TODO the below seems wrong placement. It seems should happen - // after the stream is written ? Yet I'm not seeing how to go about - // that. - // emit EventsSent if metrics - if !events.is_empty() { - if let Event::Metric(ref _metric) = - events.first().expect("should have event") - { - emit!(HttpScrapeEventsSent { - count: events.len() as u64, - byte_size: events.size_of() - }); - } - } Some(stream::iter(events)) } None => None, diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index ffb4730388804..852ea07b1ef1c 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -32,10 +32,6 @@ use vector_core::{ /// The name of this source const NAME: &str = "http_scrape"; -// TODO: -// - finish the TODOs in the unit and integration tests -// - solve the placement of the HttpScrapeEventsSent - /// Configuration for the `http_scrape` source. #[configurable_component(source)] #[derive(Clone, Debug)] @@ -119,18 +115,21 @@ impl SourceConfig for HttpScrapeConfig { ) .build(); + let content_type = decoder.content_type().to_string(); + // the only specific context needed is the codec decoding let context = HttpScrapeContext { decoder }; - let inputs = super::GenericHttpScrapeInputs::new( + let inputs = super::GenericHttpScrapeInputs { urls, - self.scrape_interval_secs, - self.headers.clone(), - self.auth.clone(), + interval_secs: self.scrape_interval_secs, + headers: self.headers.clone(), + content_type, + auth: self.auth.clone(), tls, - cx.proxy.clone(), - cx.shutdown, - ); + proxy: cx.proxy.clone(), + shutdown: cx.shutdown, + }; Ok(super::http_scrape(inputs, context, cx.out).boxed()) } diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 85059d7b4c5f3..e41581ab8532d 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -131,15 +131,16 @@ impl SourceConfig for PrometheusScrapeConfig { endpoint_info: None, }; - let inputs = GenericHttpScrapeInputs::new( + let inputs = GenericHttpScrapeInputs { urls, - self.scrape_interval_secs, - None, - self.auth.clone(), + interval_secs: self.scrape_interval_secs, + headers: None, + content_type: "text/plain".to_string(), + auth: self.auth.clone(), tls, - cx.proxy.clone(), - cx.shutdown, - ); + proxy: cx.proxy.clone(), + shutdown: cx.shutdown, + }; Ok(http_scrape(inputs, context, cx.out).boxed()) } diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue index 6d26d08d83583..bf748f8dfff04 100644 --- a/website/cue/reference/components/sources/http_scrape.cue +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -7,7 +7,7 @@ components: sources: http_scrape: { classes: { commonly_used: false delivery: "at_least_once" - deployment_roles: ["daemon", "sidecar"] + deployment_roles: ["daemon", "sidecar", "aggregator"] development: "beta" egress_method: "batch" stateful: false From 3d6906e8c5d450b5fdcf23537c9fecb173b94401 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 3 Aug 2022 16:34:52 +0000 Subject: [PATCH 21/50] move the get_content fn --- Cargo.toml | 2 +- .../decoding/framing/character_delimited.rs | 4 +-- lib/codecs/src/decoding/mod.rs | 32 +++++++++++++++++ src/codecs/decoding/decoder.rs | 35 ++----------------- src/sources/http_scrape/scrape.rs | 2 +- 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index abe2dcd6485d3..982b3e3dae67a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -768,7 +768,7 @@ gcp-cloud-storage-integration-tests = ["sinks-gcp"] gcp-integration-tests = ["sinks-gcp"] gcp-pubsub-integration-tests = ["sinks-gcp", "sources-gcp_pubsub"] humio-integration-tests = ["sinks-humio"] -http-scrape-integration-tests = ["sources-http_scrape", "codecs/syslog"] +http-scrape-integration-tests = ["sources-http_scrape"] influxdb-integration-tests = ["sinks-influxdb"] kafka-integration-tests = ["sinks-kafka", "sources-kafka"] logstash-integration-tests = ["docker", "sources-logstash"] diff --git a/lib/codecs/src/decoding/framing/character_delimited.rs b/lib/codecs/src/decoding/framing/character_delimited.rs index 8905693b90cfb..d1dcdf72967f6 100644 --- a/lib/codecs/src/decoding/framing/character_delimited.rs +++ b/lib/codecs/src/decoding/framing/character_delimited.rs @@ -34,12 +34,12 @@ impl CharacterDelimitedDecoderConfig { pub struct CharacterDelimitedDecoderOptions { /// The character that delimits byte sequences. #[serde(with = "vector_core::serde::ascii_char")] - delimiter: u8, + pub delimiter: u8, /// The maximum length of the byte buffer. /// /// This length does *not* include the trailing delimiter. #[serde(skip_serializing_if = "vector_core::serde::skip_serializing_if_default")] - max_length: Option, + pub max_length: Option, } impl CharacterDelimitedDecoderOptions { diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index 4bd8b7bcb5a06..b8226853823e3 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -324,6 +324,38 @@ impl DeserializerConfig { DeserializerConfig::Gelf => GelfDeserializerConfig.schema_definition(log_namespace), } } + + /// Get the HTTP content type. + pub const fn content_type(&self, framer: &FramingConfig) -> &str { + match (&self, framer) { + ( + DeserializerConfig::Json | DeserializerConfig::NativeJson, + FramingConfig::NewlineDelimited { .. }, + ) => "application/x-ndjson", + ( + DeserializerConfig::Gelf + | DeserializerConfig::Json + | DeserializerConfig::NativeJson, + FramingConfig::CharacterDelimited { + character_delimited: + CharacterDelimitedDecoderOptions { + delimiter: b',', + max_length: None, + }, + }, + ) => "application/json", + (DeserializerConfig::Native, _) => "application/octet-stream", + ( + DeserializerConfig::Json + | DeserializerConfig::NativeJson + | DeserializerConfig::Bytes + | DeserializerConfig::Gelf, + _, + ) => "text/plain", + #[cfg(feature = "syslog")] + (DeserializerConfig::Syslog, _) => "text/plain", + } + } } /// Parse structured events from bytes. diff --git a/src/codecs/decoding/decoder.rs b/src/codecs/decoding/decoder.rs index b23164a5ec303..867c9781428b6 100644 --- a/src/codecs/decoding/decoder.rs +++ b/src/codecs/decoding/decoder.rs @@ -1,10 +1,7 @@ use bytes::{Bytes, BytesMut}; -use codecs::{ - decoding::{ - format::Deserializer as _, BoxedFramingError, BytesDeserializer, Deserializer, Error, - Framer, NewlineDelimitedDecoder, - }, - CharacterDelimitedDecoder, +use codecs::decoding::{ + format::Deserializer as _, BoxedFramingError, BytesDeserializer, Deserializer, Error, Framer, + NewlineDelimitedDecoder, }; use smallvec::SmallVec; use vector_core::config::LogNamespace; @@ -79,32 +76,6 @@ impl Decoder { Error::ParsingError(error) }) } - - /// Get the HTTP content type. - pub const fn content_type(&self) -> &str { - match (&self.deserializer, &self.framer) { - (Deserializer::Json(_) | Deserializer::NativeJson(_), Framer::NewlineDelimited(_)) => { - "application/x-ndjson" - } - ( - Deserializer::Gelf(_) | Deserializer::Json(_) | Deserializer::NativeJson(_), - Framer::CharacterDelimited(CharacterDelimitedDecoder { - delimiter: b',', - max_length: usize::MAX, - }), - ) => "application/json", - (Deserializer::Native(_), _) => "application/octet-stream", - ( - Deserializer::Json(_) - | Deserializer::Syslog(_) - | Deserializer::NativeJson(_) - | Deserializer::Bytes(_) - | Deserializer::Gelf(_) - | Deserializer::Boxed(_), - _, - ) => "text/plain", - } - } } impl tokio_util::codec::Decoder for Decoder { diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 852ea07b1ef1c..9fd09d4460064 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -115,7 +115,7 @@ impl SourceConfig for HttpScrapeConfig { ) .build(); - let content_type = decoder.content_type().to_string(); + let content_type = self.decoding.content_type(&self.framing).to_string(); // the only specific context needed is the codec decoding let context = HttpScrapeContext { decoder }; From 815224bd20ae30532bd3d18d88ee2dd01a38c2cc Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 3 Aug 2022 17:35:36 +0000 Subject: [PATCH 22/50] trying more with tls --- scripts/integration/docker-compose.http-scrape.yml | 6 ++++-- src/sources/http_scrape/scrape.rs | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 4b77da454d5f8..8101c4b06fb22 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -37,8 +37,10 @@ services: - "/data" volumes: - ${PWD}/tests/data/http-scrape/:/data - - ${PWD}/tests/data/ca/certs/ca.cert.pem:/certs/ca.cert.pem - - ${PWD}/tests/data/ca/private/ca.key.pem:/certs/ca.key.pem + - ${PWD}/tests/data/ca/intermediate_server/certs/localhost.cert.pem:/certs/ca.cert.pem + - ${PWD}/tests/data/ca/intermediate_server/private/localhost.key.pem:/certs/ca.key.pem + # - ${PWD}/tests/data/ca/certs/ca.cert.pem:/certs/ca.cert.pem + # - ${PWD}/tests/data/ca/private/ca.key.pem:/certs/ca.key.pem runner: build: context: ${PWD} diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 9fd09d4460064..f30982253b284 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -521,8 +521,10 @@ mod integration_tests { // 2022-08-01T19:11:23.382932Z ERROR vector::internal_events::http_client: HTTP error. error=error trying to connect: error:1416F086:SSL routines:tls_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1919:: self signed certificate error_type="request_failed" stage="processing" // 2022-08-01T19:11:23.383435Z ERROR vector::internal_events::http_scrape: HTTP request processing error. url=https://dufs-https:5000/logs/json.json error=CallRequest { source: hyper::Error(Connect, Custom { kind: Other, error: ConnectError { error: Error { code: ErrorCode(1), cause: Some(Ssl(ErrorStack([Error { code: 337047686, library: "SSL routines", function: "tls_process_server_certificate", reason: "certificate verify failed", file: "ssl/statem/statem_clnt.c", line: 1919 }]))) }, verify_result: X509VerifyResult { code: 18, error: "self signed certificate" } } }) } error_type="request_failed" stage="receiving" internal_log_rate_secs=10 - // let cert_path = "tests/data/ca/certs/ca.cert.pem"; - // let key_path = "tests/data/ca/private/ca.key.pem"; + //let cert_path = "tests/data/ca/certs/ca.cert.pem"; + //let key_path = "tests/data/ca/private/ca.key.pem"; + // let cert_path = "tests/data/ca/intermediate_server/certs/localhost.cert.pem"; + // let key_path = "tests/data/ca/intermediate_server/private/localhost.key.pem"; // run_test(HttpScrapeConfig { // endpoint: "https://dufs-https:5000/logs/json.json".to_string(), From 3f4dca36828264a6232f1db7025dc27eb1cd5838 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 3 Aug 2022 19:50:26 +0000 Subject: [PATCH 23/50] use ca cert --- src/sources/http_scrape/scrape.rs | 36 +++++++++++++++++-------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index f30982253b284..37f52801b9bd5 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -388,7 +388,10 @@ mod integration_tests { use tokio::time::Duration; use super::*; - use crate::test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}; + use crate::{ + test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, + tls, + }; async fn run_test(config: HttpScrapeConfig) -> Vec { let events = run_and_assert_source_compliance( @@ -526,21 +529,22 @@ mod integration_tests { // let cert_path = "tests/data/ca/intermediate_server/certs/localhost.cert.pem"; // let key_path = "tests/data/ca/intermediate_server/private/localhost.key.pem"; - // run_test(HttpScrapeConfig { - // endpoint: "https://dufs-https:5000/logs/json.json".to_string(), - // scrape_interval_secs: 1, - // query: None, - // decoding: DeserializerConfig::Json, - // framing: default_framing_message_based(), - // headers: None, - // auth: None, - // tls: Some(TlsConfig { - // crt_file: Some(cert_path.into()), - // key_file: Some(key_path.into()), - // ..Default::default() - // }), - // }) - // .await; + run_test(HttpScrapeConfig { + endpoint: "https://dufs-https:5000/logs/json.json".to_string(), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: None, + auth: None, + tls: Some(TlsConfig { + crt_file: Some(tls::TEST_PEM_CRT_PATH.into()), + key_file: Some(tls::TEST_PEM_KEY_PATH.into()), + ca_file: Some(tls::TEST_PEM_CA_PATH.into()), + ..Default::default() + }), + }) + .await; } #[tokio::test] From 8b2fdd57b01af64a4cf8ee2b9e3fac4205078ca5 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 3 Aug 2022 20:34:54 +0000 Subject: [PATCH 24/50] tls test working --- .../docker-compose.http-scrape.yml | 6 +- src/sources/http_scrape/scrape.rs | 87 +++++++++++----- tests/data/Makefile | 18 ++++ .../certs/dufs-https-chain.cert.pem | 98 +++++++++++++++++++ .../certs/dufs-https.cert.pem | 32 ++++++ .../csr/dufs-https.csr.pem | 17 ++++ tests/data/ca/intermediate_server/index.txt | 1 + .../data/ca/intermediate_server/index.txt.old | 1 + .../ca/intermediate_server/newcerts/1006.pem | 32 ++++++ .../private/dufs-https.key.pem | 28 ++++++ tests/data/ca/intermediate_server/serial | 2 +- tests/data/ca/intermediate_server/serial.old | 2 +- 12 files changed, 295 insertions(+), 29 deletions(-) create mode 100644 tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem create mode 100644 tests/data/ca/intermediate_server/certs/dufs-https.cert.pem create mode 100644 tests/data/ca/intermediate_server/csr/dufs-https.csr.pem create mode 100644 tests/data/ca/intermediate_server/newcerts/1006.pem create mode 100644 tests/data/ca/intermediate_server/private/dufs-https.key.pem diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 8101c4b06fb22..6f233ee0bd467 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -37,10 +37,8 @@ services: - "/data" volumes: - ${PWD}/tests/data/http-scrape/:/data - - ${PWD}/tests/data/ca/intermediate_server/certs/localhost.cert.pem:/certs/ca.cert.pem - - ${PWD}/tests/data/ca/intermediate_server/private/localhost.key.pem:/certs/ca.key.pem - # - ${PWD}/tests/data/ca/certs/ca.cert.pem:/certs/ca.cert.pem - # - ${PWD}/tests/data/ca/private/ca.key.pem:/certs/ca.key.pem + - ${PWD}/tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem:/certs/ca.cert.pem + - ${PWD}/tests/data/ca/intermediate_server/private/dufs-https.key.pem:/certs/ca.key.pem runner: build: context: ${PWD} diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 37f52801b9bd5..938f923090010 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -217,9 +217,11 @@ impl super::HttpScraper for HttpScrapeContext { #[cfg(test)] mod test { - use futures::{poll, StreamExt}; - use std::task::Poll; + //use futures::{poll, StreamExt}; + use futures::StreamExt; + //use std::task::Poll; use tokio::time::{sleep, Duration}; + use tokio::{pin, select}; use warp::Filter; use super::*; @@ -260,11 +262,51 @@ mod test { .await .expect("source should not fail to build"); - sleep(Duration::from_secs(1)).await; + // If a timeout was given, use that, otherwise, use an infinitely long one. + let source_timeout = sleep(Duration::from_millis(3000)); + pin!(source_timeout); - drop(source); + let _source_handle = tokio::spawn(source); - assert_eq!(poll!(rx.next()), Poll::Ready(None)); + loop { + select! { + _ = &mut source_timeout => { + assert!(false, "should error before timing out"); + break + }, + Some(_event) = rx.next() => { + assert!(false, "should not be a valid endpoint"); + break + }, + //result = &mut source => { + // match result { + // Ok(_) => { + // assert!(false, "should not be a valid endpoint"); + // } + // Err(e) => { + // dbg!(e); + // } + // } + // break + //}, + } + } + + //drop(source); + + //sleep(Duration::from_secs(1)).await; + + //let option = source.now_or_never(); + + //assert!(option.is_some()); + + //let result = option.unwrap(); + + //assert!(result.is_err()); + + //drop(source); + + //assert_eq!(poll!(rx.next()), Poll::Ready(None)); } async fn run_test(config: HttpScrapeConfig) -> Vec { @@ -479,16 +521,30 @@ mod integration_tests { async fn unauthorized() { // TODO how to assert failure - // let config = HttpScrapeConfig { - // endpoint: format!("http://dufs-auth:5000/auth/json.json"), + // let source = HttpScrapeConfig { + // endpoint: format!("http://dufs-auth:5000/logs/json.json"), // scrape_interval_secs: 1, // query: None, - // decoding: DeserializerConfig::NativeJson, + // decoding: DeserializerConfig::Json, // framing: default_framing_message_based(), // headers: None, // auth: None, // tls: None, // }; + // // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. + // let (tx, mut rx) = SourceSender::new_test(); + // let context = SourceContext::new_test(tx, None); + + // let source = source + // .build(context) + // .await + // .expect("source should not fail to build"); + + // sleep(Duration::from_secs(1)).await; + + // drop(source); + + // assert_eq!(poll!(rx.next()), Poll::Ready(None)); } #[tokio::test] @@ -516,19 +572,6 @@ mod integration_tests { #[tokio::test] async fn tls() { - // TODO fix this, as it is there is an error from dufs with "Sending fatal alert - // BadRecordMac" - - // and in vector error is: - // - // 2022-08-01T19:11:23.382932Z ERROR vector::internal_events::http_client: HTTP error. error=error trying to connect: error:1416F086:SSL routines:tls_process_server_certificate:certificate verify failed:ssl/statem/statem_clnt.c:1919:: self signed certificate error_type="request_failed" stage="processing" - // 2022-08-01T19:11:23.383435Z ERROR vector::internal_events::http_scrape: HTTP request processing error. url=https://dufs-https:5000/logs/json.json error=CallRequest { source: hyper::Error(Connect, Custom { kind: Other, error: ConnectError { error: Error { code: ErrorCode(1), cause: Some(Ssl(ErrorStack([Error { code: 337047686, library: "SSL routines", function: "tls_process_server_certificate", reason: "certificate verify failed", file: "ssl/statem/statem_clnt.c", line: 1919 }]))) }, verify_result: X509VerifyResult { code: 18, error: "self signed certificate" } } }) } error_type="request_failed" stage="receiving" internal_log_rate_secs=10 - - //let cert_path = "tests/data/ca/certs/ca.cert.pem"; - //let key_path = "tests/data/ca/private/ca.key.pem"; - // let cert_path = "tests/data/ca/intermediate_server/certs/localhost.cert.pem"; - // let key_path = "tests/data/ca/intermediate_server/private/localhost.key.pem"; - run_test(HttpScrapeConfig { endpoint: "https://dufs-https:5000/logs/json.json".to_string(), scrape_interval_secs: 1, @@ -538,8 +581,6 @@ mod integration_tests { headers: None, auth: None, tls: Some(TlsConfig { - crt_file: Some(tls::TEST_PEM_CRT_PATH.into()), - key_file: Some(tls::TEST_PEM_KEY_PATH.into()), ca_file: Some(tls::TEST_PEM_CA_PATH.into()), ..Default::default() }), diff --git a/tests/data/Makefile b/tests/data/Makefile index 3dcb7c8beed6b..bba075eeecffc 100644 --- a/tests/data/Makefile +++ b/tests/data/Makefile @@ -79,6 +79,24 @@ ca/intermediate_server/certs/elasticsearch-secure.cert.pem: ca/intermediate_serv ca/intermediate_server/certs/elasticsearch-secure-chain.cert.pem: ca/intermediate_server/certs/ca-chain.cert.pem ca/intermediate_server/certs/elasticsearch-secure.cert.pem cat ca/intermediate_server/certs/elasticsearch-secure.cert.pem ca/intermediate_server/certs/ca-chain.cert.pem > ca/intermediate_server/certs/elasticsearch-secure-chain.cert.pem +ca/intermediate_server/private/dufs-https.key.pem: + openssl genrsa -out ca/intermediate_server/private/dufs-https.key.pem 2048 + +ca/intermediate_server/csr/dufs-https.csr.pem: ca/intermediate_server/private/dufs-https.key.pem + openssl req -config ca/intermediate_server/openssl.cnf \ + -key ca/intermediate_server/private/dufs-https.key.pem \ + -subj '/CN=dufs-https/OU=Vector/O=Datadog/ST=New York/L=New York/C=US' \ + -new -sha256 -out ca/intermediate_server/csr/dufs-https.csr.pem + +ca/intermediate_server/certs/dufs-https.cert.pem: ca/intermediate_server/csr/dufs-https.csr.pem + openssl ca -batch -config ca/intermediate_server/openssl.cnf \ + -extensions server_cert -days 3650 -notext -md sha256 \ + -in ca/intermediate_server/csr/dufs-https.csr.pem \ + -out ca/intermediate_server/certs/dufs-https.cert.pem + +ca/intermediate_server/certs/dufs-https-chain.cert.pem: ca/intermediate_server/certs/ca-chain.cert.pem ca/intermediate_server/certs/dufs-https.cert.pem + cat ca/intermediate_server/certs/dufs-https.cert.pem ca/intermediate_server/certs/ca-chain.cert.pem > ca/intermediate_server/certs/dufs-https-chain.cert.pem + ca/intermediate_server/private/influxdb-v1-tls.key.pem: openssl genrsa -out ca/intermediate_server/private/influxdb-v1-tls.key.pem 2048 diff --git a/tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem b/tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem new file mode 100644 index 0000000000000..8448fa2055afc --- /dev/null +++ b/tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem @@ -0,0 +1,98 @@ +-----BEGIN CERTIFICATE----- +MIIFhzCCA2+gAwIBAgICEAYwDQYJKoZIhvcNAQELBQAwazELMAkGA1UEBhMCVVMx +ETAPBgNVBAgMCE5ldyBZb3JrMRAwDgYDVQQKDAdEYXRhZG9nMQ8wDQYDVQQLDAZW +ZWN0b3IxJjAkBgNVBAMMHVZlY3RvciBJbnRlcm1lZGlhdGUgU2VydmVyIENBMB4X +DTIyMDgwMzIwMDgzN1oXDTMyMDczMTIwMDgzN1owazELMAkGA1UEBhMCVVMxETAP +BgNVBAgMCE5ldyBZb3JrMREwDwYDVQQHDAhOZXcgWW9yazEQMA4GA1UECgwHRGF0 +YWRvZzEPMA0GA1UECwwGVmVjdG9yMRMwEQYDVQQDDApkdWZzLWh0dHBzMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmeEAPC9Cn8SC4qubIOfGZxJ8dxHi +cqfx33nJLSaRUiucgYQOzopSjyDRGPU8HjM+sr0G5rlCFskLZ21Cn97S++esKo2I +NO5lUS6erYtzRnZ6a67jHMKQoGnMe0Tf42uXY9aGlt81MCE5R0p7nQbWl++z+4Pn +yKcyzIySbA/Z1DWIo9xlqhjUUTKp9oRUpNo62t3r44jWzk6pigB9ClBUSSWftuMi +QVRIyKUZCOliteJbl2OiyaJaaNCam5XpUcEPeVlbk4cqZU35zEznm8SfQaZnMAo0 +08b97uL7LynFB6iSmsnRi0Z9ODCX7ccMWTFev8m4poKG5UOUdaYSGVIpYQIDAQAB +o4IBMzCCAS8wCQYDVR0TBAIwADARBglghkgBhvhCAQEEBAMCBkAwMwYJYIZIAYb4 +QgENBCYWJE9wZW5TU0wgR2VuZXJhdGVkIFNlcnZlciBDZXJ0aWZpY2F0ZTAdBgNV +HQ4EFgQU59iYQDPf0h4VrUrNQZc/WXPVyHowgZUGA1UdIwSBjTCBioAUPD06L8zV +ggN9mcRY8eHbNu+tDUGhbqRsMGoxEjAQBgNVBAMMCVZlY3RvciBDQTEPMA0GA1UE +CwwGVmVjdG9yMRAwDgYDVQQKDAdEYXRhZG9nMREwDwYDVQQIDAhOZXcgWW9yazER +MA8GA1UEBwwITmV3IFlvcmsxCzAJBgNVBAYTAlVTggIQADAOBgNVHQ8BAf8EBAMC +BaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDQYJKoZIhvcNAQELBQADggIBAH0zSEgM +zMZudeXx0brNog9yKeRHf7fwVAwaUHo9qrlvCPcwA33YDkNofqRTgknQB2PMklis +ekHrp8MUnHIj2OoWNd8Wg5pr+poxuz9l9LTdDjNLGAJt5NHNYFN3PWrzu6+3BHPM +tspIPkQcxwYwZJjbhbu+SRxDMN++GFojkqssJQTG3yz7ZPBu78nFZBBhW1UT4N+X +p76Qqx09SG7tXUcgqcfszTYZpIHwGqhrg57ps+ATWt8V4GqnZPl2w2mLS+fW9IpQ +WcNvXIpjzjrLDMQom+Ml+V8ZOM5jg5f/1WHR+SZij8W3C+e1sQ1qlCBqPlUS3Cqs +u4zCPYuSWiUAJ/dmm/k4N66n/GgczlN2T7TDFnM+RScDzJJklUO7Fo01nQ6Us4WI +xzGuaQ8eHzsJeEAGSoCh8crA01Lm29Ks8qj3Mlztl7fD21VvJJGqPSiCQYpXgP4y +NuI28fWjmBYlNH4REKAr3DE0d6Pi2CuikBFosh9iCRMxpoTBp7sysK1zBlKaQTtE +4R/vqoTSNg/uAt3+QlILFHEp9x3ieRI7LOu64Ly23welR3aoEtxjR7mklFsfXTZ8 +Z6+2Y3Qt4WYlfAJXNpmCqpH8gjWeRTynZWtYyJJI0PHC1Dn0zHwLjgcBgk94Xk+S +QBJYZLvZXxN/Pjfig2geBTG5DvjvAIWH06J4 +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIFtzCCA5+gAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwajESMBAGA1UEAwwJVmVj +dG9yIENBMQ8wDQYDVQQLDAZWZWN0b3IxEDAOBgNVBAoMB0RhdGFkb2cxETAPBgNV +BAgMCE5ldyBZb3JrMREwDwYDVQQHDAhOZXcgWW9yazELMAkGA1UEBhMCVVMwHhcN +MjIwNjA3MjIyNzUzWhcNMzIwNjA0MjIyNzUzWjBrMQswCQYDVQQGEwJVUzERMA8G +A1UECAwITmV3IFlvcmsxEDAOBgNVBAoMB0RhdGFkb2cxDzANBgNVBAsMBlZlY3Rv +cjEmMCQGA1UEAwwdVmVjdG9yIEludGVybWVkaWF0ZSBTZXJ2ZXIgQ0EwggIiMA0G +CSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCy/mB0/ZwfgKrSZPQIFaGPtRA9xL2N +o2SsHndZ8k2XOCV225Hb2fzNH+o2WGNSjwmGjLP/uXb47KH0cHCAyFGzSjp++8/O +zoZaFiO0P5El02hQxmoabO3Cqu/N62EFsLfpSM828JM6YOn9p+WXUDn1+YPNoOOE +H142p4/RjFnXNHkzR3geXU4Pfi3KXDrMi8vK42lDqXPLPs6rhreBAfQ2dsYyqhz6 +tg6FzZuXxxzEYyYtNgGh+zTji99WCBMLbCmRcDurRjdTDO7m4O3PrwbGUy0xdLeb +HJiNGvUDCPH4bfwLiNqwVIZY38RBCAqbCnrqRhDaZIfAUev4mq3Kqh6KUeO/U7Vx +/5J5rL5ApREKOfWPATHMprBuEU2rs3N+MPBA04HoiFlu311urCxVEA1qsZCTkoCg +GHuDIVSU4E4hT4co95/J0to4zWgPlfPg1+cXyU8lAIMe7JdCGkG9cDe7Umw/GSbt +ZdoCMQZ6WyyiW2Hw+7sFD3V3VzYa5YA/rjKZRduPmGWKrs+mAa5J5pM2M22rrjbd +EpfTHWLS9s6cPN3/jxpCxn6Hv/KhIYRAcIterugag1+clvS1ajVjxBRavOxPBsf+ +hYdh7S5NTZnT98gjkc3yOuGQm7BPtXau+IYZRlWcB0dJ4/E2P69hmWQezSo9VVWh +5/K1RkbPvqTGZQIDAQABo2YwZDAdBgNVHQ4EFgQUPD06L8zVggN9mcRY8eHbNu+t +DUEwHwYDVR0jBBgwFoAURTWK6ARqnZkz8rktUc5PrtasIh8wEgYDVR0TAQH/BAgw +BgEB/wIBADAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBAGqaGBuL +2J6Na8RHx/GmSeuZFiVcWhmd/I9bVpeMSYHSZujA2nay6OGaUYs0Lq/G5OKgsuT9 +AIHnsn7VUE1zqoDfXac/K8sXlOig8if7rTb+06jgymaP1YSELg3R+pBsdkZnXVil +izh/9FvzoyV+QQlIhojqCIybVFgxa1XFHq4QCPhDfwkg+tp9RctfwNmWgsJ63H19 +RmxN+H2xIrySvObwXnB4j6D4wvgu468QXQMEuSsnLcIQFg6Zteqe8fixbqTiOTBf +Dk1k+EpB9VMEkIPvMdfa48vseXdBEe6Ma9zGuJC76q4q1ZapVLTvOUP5Y24khlgd +cj5tfP7o7yc6HqymfXAcD1lzP2JQhqaRxA4I18Nrd+aHi+G1EM2c3cicuD3n6Iw9 +9oqdCwmMfS25fv5cyA5B6hRusIZ9wRopTi7at+JHl0GIt/FelaTYI7kRmAqgakQe +oEKLpXcH8lRJW802DmXm7ka4eQzwxa7Ngyf8O+JOFtGO0+EshuLJovxiPl6IyLyG +NJ/dHq3ad+46YVManbHdyjHxgT5PSvJFkq0Yluvf44NIyP5QRTCAvfH76bu7hXgS +QoQj5t5ILn6meQRTR79r2iwpQTanPLTEdoZvmrE4TeUBev9BA5KpiPPA3i3ZF/oV +0EYorXCNri7M/jylGW7AuWvNUyaVR6xgxAn6 +-----END CERTIFICATE----- +-----BEGIN CERTIFICATE----- +MIIFujCCA6KgAwIBAgIJAKhPL9BkNaFGMA0GCSqGSIb3DQEBCwUAMGoxEjAQBgNV +BAMMCVZlY3RvciBDQTEPMA0GA1UECwwGVmVjdG9yMRAwDgYDVQQKDAdEYXRhZG9n +MREwDwYDVQQIDAhOZXcgWW9yazERMA8GA1UEBwwITmV3IFlvcmsxCzAJBgNVBAYT +AlVTMB4XDTIyMDYwNzIyMjc1MloXDTQyMDYwMjIyMjc1MlowajESMBAGA1UEAwwJ +VmVjdG9yIENBMQ8wDQYDVQQLDAZWZWN0b3IxEDAOBgNVBAoMB0RhdGFkb2cxETAP +BgNVBAgMCE5ldyBZb3JrMREwDwYDVQQHDAhOZXcgWW9yazELMAkGA1UEBhMCVVMw +ggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC9c1T+NXTNmqiiV36NSEJt +7mo0cyv8Byk2ZGdC85vHBm45QDY5USoh0vgonzPpWgSMggPn1WbR0f1y+LBwXdlM ++ZyZh2RVVeUrSjJ88lLHVn4DfywpdDkwQaFj1VmOsj2I9rMMrgc5x5n1Hj7lwZ+t +uPVSAGmgKp4iFfzLph9r/rjP1TUAnVUComfTUVS+Gd7zoGPOc14cMJXG6g2P2aAU +P6dg5uQlTxRmagnlx7bwm3lRwv6LMtnAdnjwBDBxr933nucAnk21GgE92GejiO3Z +OwlzIdzBI23lPcWi5pq+vCTgAArNq24W1Ha+7Jn5QewNTGKFyyYAJetZAwCUR8QS +Ip++2GE2pNhaGqcV5u1Tbwl02eD6p2qRqjfgLxmb+aC6xfl0n9kiFGPZppjCqDEW +sw+gX66nf+qxZVRWpJon2kWcFvhTnLqoa3T3+9+KIeamz2lW6wxMnki/Co2EA1Wa +mmedaUUcRPCgMx9aCktRkMyH6bEY8/vfJ07juxUsszOc46T00Scmn6Vkuo9Uc3Kf +2Q2N6Wo4jtyAiMO4gAwq5kzzpBAhNgRfLHOb83r2gAUj2Y4Vln/UUR/KR8ZbJi4i +r1BjX16Lz3yblJXXb1lp4uZynlbHNaAevXyGlRqHddM2ykKtAX/vgJcZRGSvms11 +uce/cqzrzx60AhpLRma5CwIDAQABo2MwYTAdBgNVHQ4EFgQURTWK6ARqnZkz8rkt +Uc5PrtasIh8wHwYDVR0jBBgwFoAURTWK6ARqnZkz8rktUc5PrtasIh8wDwYDVR0T +AQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQADggIBAEf5 +TR3hq/DtSAmsYotu1lAWz/OlTpG+7AdqSOHB878X4ETN3xaQ+KWvSwvf0K70ZDTV +tFOTh/r43cpzPifPKd1P+2ctnQEzrBtAacvyETLq1ABRK9VJOtfJ6Xk5KZXPhKdY +t353PQgBgW8YzQ2adq2B7FtgIlX7f1DIndjcMZBbolETR6xt9QwB/UnPI7Mwt01T ++bCBhr1fWAbZ4YAMlQ0xRam4qUOTjxgfmePrmSrv4HO7cXHMsRMLiXk+BLcx959/ +K/B6xzpzn6366Eqnqlo/uDiMpo5ud2I/Snz5PduB6oLztPMEf/8RmkG5tpHXYdWr +tM64WqNGO+ikluIrrtYvtyZS4DfsLAMfMYZcxX/Uw56gHo0i2c8I6+6JvGWdvOJ0 +FjrsKeIQoRlV77z025kI4V9jKi3XNMEsAIH+W7KNSut0X80yX7SugvQGoe0GDkXu +0fy8hMC3uTN2LEycYFRRfoIeKPLi6OZFK0PdS2E15d8PEU3n3W4eBCPgMtmiOKLY +d8QNBC8XLAuBoK9R8luCJpOJWUcFXjLpjcDab4V2hKTuAs+GQyDh/Xx4wF1yHX0r +zIkyN0EkOD/SvD8X4uFaM4mdsAh+ucn4ryUV7i5PgvDM9z4InHAMAee1ebBl0U+h ++NzMWF5c5OwxD5o6/Wh1HopmzJiVNT2v9u0kHT/f +-----END CERTIFICATE----- diff --git a/tests/data/ca/intermediate_server/certs/dufs-https.cert.pem b/tests/data/ca/intermediate_server/certs/dufs-https.cert.pem new file mode 100644 index 0000000000000..ee90be5c2b664 --- /dev/null +++ b/tests/data/ca/intermediate_server/certs/dufs-https.cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFhzCCA2+gAwIBAgICEAYwDQYJKoZIhvcNAQELBQAwazELMAkGA1UEBhMCVVMx +ETAPBgNVBAgMCE5ldyBZb3JrMRAwDgYDVQQKDAdEYXRhZG9nMQ8wDQYDVQQLDAZW +ZWN0b3IxJjAkBgNVBAMMHVZlY3RvciBJbnRlcm1lZGlhdGUgU2VydmVyIENBMB4X +DTIyMDgwMzIwMDgzN1oXDTMyMDczMTIwMDgzN1owazELMAkGA1UEBhMCVVMxETAP +BgNVBAgMCE5ldyBZb3JrMREwDwYDVQQHDAhOZXcgWW9yazEQMA4GA1UECgwHRGF0 +YWRvZzEPMA0GA1UECwwGVmVjdG9yMRMwEQYDVQQDDApkdWZzLWh0dHBzMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmeEAPC9Cn8SC4qubIOfGZxJ8dxHi +cqfx33nJLSaRUiucgYQOzopSjyDRGPU8HjM+sr0G5rlCFskLZ21Cn97S++esKo2I +NO5lUS6erYtzRnZ6a67jHMKQoGnMe0Tf42uXY9aGlt81MCE5R0p7nQbWl++z+4Pn +yKcyzIySbA/Z1DWIo9xlqhjUUTKp9oRUpNo62t3r44jWzk6pigB9ClBUSSWftuMi +QVRIyKUZCOliteJbl2OiyaJaaNCam5XpUcEPeVlbk4cqZU35zEznm8SfQaZnMAo0 +08b97uL7LynFB6iSmsnRi0Z9ODCX7ccMWTFev8m4poKG5UOUdaYSGVIpYQIDAQAB +o4IBMzCCAS8wCQYDVR0TBAIwADARBglghkgBhvhCAQEEBAMCBkAwMwYJYIZIAYb4 +QgENBCYWJE9wZW5TU0wgR2VuZXJhdGVkIFNlcnZlciBDZXJ0aWZpY2F0ZTAdBgNV +HQ4EFgQU59iYQDPf0h4VrUrNQZc/WXPVyHowgZUGA1UdIwSBjTCBioAUPD06L8zV +ggN9mcRY8eHbNu+tDUGhbqRsMGoxEjAQBgNVBAMMCVZlY3RvciBDQTEPMA0GA1UE +CwwGVmVjdG9yMRAwDgYDVQQKDAdEYXRhZG9nMREwDwYDVQQIDAhOZXcgWW9yazER +MA8GA1UEBwwITmV3IFlvcmsxCzAJBgNVBAYTAlVTggIQADAOBgNVHQ8BAf8EBAMC +BaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDQYJKoZIhvcNAQELBQADggIBAH0zSEgM +zMZudeXx0brNog9yKeRHf7fwVAwaUHo9qrlvCPcwA33YDkNofqRTgknQB2PMklis +ekHrp8MUnHIj2OoWNd8Wg5pr+poxuz9l9LTdDjNLGAJt5NHNYFN3PWrzu6+3BHPM +tspIPkQcxwYwZJjbhbu+SRxDMN++GFojkqssJQTG3yz7ZPBu78nFZBBhW1UT4N+X +p76Qqx09SG7tXUcgqcfszTYZpIHwGqhrg57ps+ATWt8V4GqnZPl2w2mLS+fW9IpQ +WcNvXIpjzjrLDMQom+Ml+V8ZOM5jg5f/1WHR+SZij8W3C+e1sQ1qlCBqPlUS3Cqs +u4zCPYuSWiUAJ/dmm/k4N66n/GgczlN2T7TDFnM+RScDzJJklUO7Fo01nQ6Us4WI +xzGuaQ8eHzsJeEAGSoCh8crA01Lm29Ks8qj3Mlztl7fD21VvJJGqPSiCQYpXgP4y +NuI28fWjmBYlNH4REKAr3DE0d6Pi2CuikBFosh9iCRMxpoTBp7sysK1zBlKaQTtE +4R/vqoTSNg/uAt3+QlILFHEp9x3ieRI7LOu64Ly23welR3aoEtxjR7mklFsfXTZ8 +Z6+2Y3Qt4WYlfAJXNpmCqpH8gjWeRTynZWtYyJJI0PHC1Dn0zHwLjgcBgk94Xk+S +QBJYZLvZXxN/Pjfig2geBTG5DvjvAIWH06J4 +-----END CERTIFICATE----- diff --git a/tests/data/ca/intermediate_server/csr/dufs-https.csr.pem b/tests/data/ca/intermediate_server/csr/dufs-https.csr.pem new file mode 100644 index 0000000000000..bf0b64de3a129 --- /dev/null +++ b/tests/data/ca/intermediate_server/csr/dufs-https.csr.pem @@ -0,0 +1,17 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIICsDCCAZgCAQAwazETMBEGA1UEAwwKZHVmcy1odHRwczEPMA0GA1UECwwGVmVj +dG9yMRAwDgYDVQQKDAdEYXRhZG9nMREwDwYDVQQIDAhOZXcgWW9yazERMA8GA1UE +BwwITmV3IFlvcmsxCzAJBgNVBAYTAlVTMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A +MIIBCgKCAQEAmeEAPC9Cn8SC4qubIOfGZxJ8dxHicqfx33nJLSaRUiucgYQOzopS +jyDRGPU8HjM+sr0G5rlCFskLZ21Cn97S++esKo2INO5lUS6erYtzRnZ6a67jHMKQ +oGnMe0Tf42uXY9aGlt81MCE5R0p7nQbWl++z+4PnyKcyzIySbA/Z1DWIo9xlqhjU +UTKp9oRUpNo62t3r44jWzk6pigB9ClBUSSWftuMiQVRIyKUZCOliteJbl2OiyaJa +aNCam5XpUcEPeVlbk4cqZU35zEznm8SfQaZnMAo008b97uL7LynFB6iSmsnRi0Z9 +ODCX7ccMWTFev8m4poKG5UOUdaYSGVIpYQIDAQABoAAwDQYJKoZIhvcNAQELBQAD +ggEBAGPTk8jUFhgfFOiy/aK7DlRoHPQfbwir84ittwehgHNcmXF9ijmDWfpZk42a +lCcmGRr7m6IHtgbb6WRTts0FDsX1qk3iFCSaLyu2HdTqnscmg7MMniTKpa2JK0nC +/Hy72EvEnb2YiP6iVFVykaNDGJqreG6KqZs+OIkbAW7BLPzazMGgFnsVV/DSx60W ++0FieL+Suun+gBp5mDe6W9K1BeJwun3UPN196gTgNGJWvobo1kzVKiPBu3JBtR+c +4njLlOgSWDh97kQn7XmPFcvNa3P4zd3c71M7H4ZZlk70EBCWOyLL5pByYFbLhSTp +IdAGKEEsWEmIJYeAQXRsrMBtLtg= +-----END CERTIFICATE REQUEST----- diff --git a/tests/data/ca/intermediate_server/index.txt b/tests/data/ca/intermediate_server/index.txt index d266d78d10d97..e05b3ec98edb8 100644 --- a/tests/data/ca/intermediate_server/index.txt +++ b/tests/data/ca/intermediate_server/index.txt @@ -4,3 +4,4 @@ V 320613194628Z 1002 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/C V 320613194901Z 1003 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=influxdb-v1-tls V 320613195026Z 1004 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=postgres V 320613195253Z 1005 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=kafka +V 320731200837Z 1006 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=dufs-https diff --git a/tests/data/ca/intermediate_server/index.txt.old b/tests/data/ca/intermediate_server/index.txt.old index a75e5703009e6..d266d78d10d97 100644 --- a/tests/data/ca/intermediate_server/index.txt.old +++ b/tests/data/ca/intermediate_server/index.txt.old @@ -3,3 +3,4 @@ V 320613192819Z 1001 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/C V 320613194628Z 1002 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=infuxdb-v1-tls V 320613194901Z 1003 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=influxdb-v1-tls V 320613195026Z 1004 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=postgres +V 320613195253Z 1005 unknown /C=US/ST=New York/L=New York/O=Datadog/OU=Vector/CN=kafka diff --git a/tests/data/ca/intermediate_server/newcerts/1006.pem b/tests/data/ca/intermediate_server/newcerts/1006.pem new file mode 100644 index 0000000000000..ee90be5c2b664 --- /dev/null +++ b/tests/data/ca/intermediate_server/newcerts/1006.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFhzCCA2+gAwIBAgICEAYwDQYJKoZIhvcNAQELBQAwazELMAkGA1UEBhMCVVMx +ETAPBgNVBAgMCE5ldyBZb3JrMRAwDgYDVQQKDAdEYXRhZG9nMQ8wDQYDVQQLDAZW +ZWN0b3IxJjAkBgNVBAMMHVZlY3RvciBJbnRlcm1lZGlhdGUgU2VydmVyIENBMB4X +DTIyMDgwMzIwMDgzN1oXDTMyMDczMTIwMDgzN1owazELMAkGA1UEBhMCVVMxETAP +BgNVBAgMCE5ldyBZb3JrMREwDwYDVQQHDAhOZXcgWW9yazEQMA4GA1UECgwHRGF0 +YWRvZzEPMA0GA1UECwwGVmVjdG9yMRMwEQYDVQQDDApkdWZzLWh0dHBzMIIBIjAN +BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAmeEAPC9Cn8SC4qubIOfGZxJ8dxHi +cqfx33nJLSaRUiucgYQOzopSjyDRGPU8HjM+sr0G5rlCFskLZ21Cn97S++esKo2I +NO5lUS6erYtzRnZ6a67jHMKQoGnMe0Tf42uXY9aGlt81MCE5R0p7nQbWl++z+4Pn +yKcyzIySbA/Z1DWIo9xlqhjUUTKp9oRUpNo62t3r44jWzk6pigB9ClBUSSWftuMi +QVRIyKUZCOliteJbl2OiyaJaaNCam5XpUcEPeVlbk4cqZU35zEznm8SfQaZnMAo0 +08b97uL7LynFB6iSmsnRi0Z9ODCX7ccMWTFev8m4poKG5UOUdaYSGVIpYQIDAQAB +o4IBMzCCAS8wCQYDVR0TBAIwADARBglghkgBhvhCAQEEBAMCBkAwMwYJYIZIAYb4 +QgENBCYWJE9wZW5TU0wgR2VuZXJhdGVkIFNlcnZlciBDZXJ0aWZpY2F0ZTAdBgNV +HQ4EFgQU59iYQDPf0h4VrUrNQZc/WXPVyHowgZUGA1UdIwSBjTCBioAUPD06L8zV +ggN9mcRY8eHbNu+tDUGhbqRsMGoxEjAQBgNVBAMMCVZlY3RvciBDQTEPMA0GA1UE +CwwGVmVjdG9yMRAwDgYDVQQKDAdEYXRhZG9nMREwDwYDVQQIDAhOZXcgWW9yazER +MA8GA1UEBwwITmV3IFlvcmsxCzAJBgNVBAYTAlVTggIQADAOBgNVHQ8BAf8EBAMC +BaAwEwYDVR0lBAwwCgYIKwYBBQUHAwEwDQYJKoZIhvcNAQELBQADggIBAH0zSEgM +zMZudeXx0brNog9yKeRHf7fwVAwaUHo9qrlvCPcwA33YDkNofqRTgknQB2PMklis +ekHrp8MUnHIj2OoWNd8Wg5pr+poxuz9l9LTdDjNLGAJt5NHNYFN3PWrzu6+3BHPM +tspIPkQcxwYwZJjbhbu+SRxDMN++GFojkqssJQTG3yz7ZPBu78nFZBBhW1UT4N+X +p76Qqx09SG7tXUcgqcfszTYZpIHwGqhrg57ps+ATWt8V4GqnZPl2w2mLS+fW9IpQ +WcNvXIpjzjrLDMQom+Ml+V8ZOM5jg5f/1WHR+SZij8W3C+e1sQ1qlCBqPlUS3Cqs +u4zCPYuSWiUAJ/dmm/k4N66n/GgczlN2T7TDFnM+RScDzJJklUO7Fo01nQ6Us4WI +xzGuaQ8eHzsJeEAGSoCh8crA01Lm29Ks8qj3Mlztl7fD21VvJJGqPSiCQYpXgP4y +NuI28fWjmBYlNH4REKAr3DE0d6Pi2CuikBFosh9iCRMxpoTBp7sysK1zBlKaQTtE +4R/vqoTSNg/uAt3+QlILFHEp9x3ieRI7LOu64Ly23welR3aoEtxjR7mklFsfXTZ8 +Z6+2Y3Qt4WYlfAJXNpmCqpH8gjWeRTynZWtYyJJI0PHC1Dn0zHwLjgcBgk94Xk+S +QBJYZLvZXxN/Pjfig2geBTG5DvjvAIWH06J4 +-----END CERTIFICATE----- diff --git a/tests/data/ca/intermediate_server/private/dufs-https.key.pem b/tests/data/ca/intermediate_server/private/dufs-https.key.pem new file mode 100644 index 0000000000000..9a9d704cb1244 --- /dev/null +++ b/tests/data/ca/intermediate_server/private/dufs-https.key.pem @@ -0,0 +1,28 @@ +-----BEGIN PRIVATE KEY----- +MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCZ4QA8L0KfxILi +q5sg58ZnEnx3EeJyp/HfecktJpFSK5yBhA7OilKPINEY9TweMz6yvQbmuUIWyQtn +bUKf3tL756wqjYg07mVRLp6ti3NGdnprruMcwpCgacx7RN/ja5dj1oaW3zUwITlH +SnudBtaX77P7g+fIpzLMjJJsD9nUNYij3GWqGNRRMqn2hFSk2jra3evjiNbOTqmK +AH0KUFRJJZ+24yJBVEjIpRkI6WK14luXY6LJolpo0JqblelRwQ95WVuThyplTfnM +TOebxJ9BpmcwCjTTxv3u4vsvKcUHqJKaydGLRn04MJftxwxZMV6/ybimgoblQ5R1 +phIZUilhAgMBAAECggEAAUX8Y4Mcm7KGRsFBdQyV9rpkxJ4lmpSgisc44/fCvMRW +9OIWi/JXFo919i0+rx1Lcoff677dY3o3GZxXhc0pP85op/192A+fpQQBaibnmvir +W1gaJwhxJp1ZhNegEVi4QFTrzjR7UzJN2rTXawg9ybTBGjQQPKKC6BsYXG6RJea8 +ORboOxpVfywjLKymRjwXqKoLdZD5Hp7PQQQ0gWwmDgI1V4zM/+JmIQ+z1XUstjle +KDBiEJ3y5H5/qYy8tyKfWiikW3cYXDpSaSlMGUpEdDWCEnp5mJeAGwQ5GC+80/nd +Eio+Oq5o1nxXjhWCwVWqAtq9CTOgiPqRtabNc0LztQKBgQDMdaUJa1+VsU70qSz3 +G3VAFOoPUSTrjy+XKqmiIo5af/zV9jmcMYPlu+kpPrvdyF82+vhdN6uRQSln4SxX +yTTIVoT365dHBQWka9nNgL6j7Qb832vOXlJpBXhHdP1OLrl6ege5EvN7xsHEIsto +e01GJwviHwAUDrpC2nCng0XnewKBgQDAqz5ybQfy3Cr6n2am6f0GX9GAjSaz5U+X +4giQ0HoAUezU4XrY+UhvQ0FB2R8e7/4Bu3nI8/u81Iw35c7uRQt/QQrm1LlfNhtY +nDiasZY6/Necq+9lzEOiCsayRnJ50JU6ameX9HuDTVDpnrzvdxqwWNIHpjGKT6SH +eKpuzd9t0wKBgG9zQX8c9IGYoT1kaopYgVVpER1AUivFZV+1dbpXs9lJIv1OtqCZ +rRgxkitVcvq2GpDRxUiq6+7F4lgpug4wM4FxCBJMba3c4vJ5h8QphcG9lq2I7C8V +4z40KRhqgRl5BsnocwVDe03hTgYoIVDWxYyUBDpCQ7yrOoTupbHqMNS5AoGAO4+0 +ymLyZh2yG7BTZ55GXkYY2gjIwixNosJ5q2sHjDnHYTgjZhgKUHsWHWklucavW5G9 +YGYsmH+MqWsdwYMOvtQ4rotskwaDBa2lwgzMSIgJj6GHbIztC5cRPeORWB8nQtvK +Equ90yV/sfkkp1LGqWWL6Fl0TzG3xzmCVqbGP4kCgYEAoRByZusXWJ261WOocXlw +n7eHzn5OnxHkica6MqRgVYOtQ0sbYTT7mVh3/2sm18Kufy7h40VyvBdHQZQXdSxf +AHgJwEO65iKJkGr7PPDUNgRUmtahQ5T08y2l2ibUnNoLwEyeqoE0hV6U8pBDrTIv +bIns5WH3+D0+AIMN8Zig2IQ= +-----END PRIVATE KEY----- diff --git a/tests/data/ca/intermediate_server/serial b/tests/data/ca/intermediate_server/serial index 9540e56f97ca2..fb35a14c02716 100644 --- a/tests/data/ca/intermediate_server/serial +++ b/tests/data/ca/intermediate_server/serial @@ -1 +1 @@ -1006 +1007 diff --git a/tests/data/ca/intermediate_server/serial.old b/tests/data/ca/intermediate_server/serial.old index 49bc2728c7a08..9540e56f97ca2 100644 --- a/tests/data/ca/intermediate_server/serial.old +++ b/tests/data/ca/intermediate_server/serial.old @@ -1 +1 @@ -1005 +1006 From 4f1184c35958f8f45f1db0b0957baa8738d03fbc Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 3 Aug 2022 22:24:41 +0000 Subject: [PATCH 25/50] headers and shutdown int tests --- lib/codecs/src/decoding/mod.rs | 2 +- src/sources/http_scrape/scrape.rs | 221 +++++++++++++++++++++--------- 2 files changed, 155 insertions(+), 68 deletions(-) diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index b8226853823e3..5c4e9aac5a860 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -340,7 +340,7 @@ impl DeserializerConfig { character_delimited: CharacterDelimitedDecoderOptions { delimiter: b',', - max_length: None, + max_length: Some(usize::MAX), }, }, ) => "application/json", diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 938f923090010..37d4255360663 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -217,20 +217,19 @@ impl super::HttpScraper for HttpScrapeContext { #[cfg(test)] mod test { + use codecs::decoding::{CharacterDelimitedDecoderOptions, NewlineDelimitedDecoderOptions}; //use futures::{poll, StreamExt}; - use futures::StreamExt; + //use futures::StreamExt; //use std::task::Poll; - use tokio::time::{sleep, Duration}; - use tokio::{pin, select}; + //use tokio::time::{sleep, Duration}; + use tokio::time::Duration; + //use tokio::{pin, select}; use warp::Filter; use super::*; - use crate::{ - test_util::{ - components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, test_generate_config, - }, - SourceSender, + use crate::test_util::{ + components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, + next_addr, test_generate_config, }; #[test] @@ -242,55 +241,55 @@ mod test { // a way, since if this is run live it generates an HTTP error. #[tokio::test] async fn invalid_endpoint() { - let source = HttpScrapeConfig { - endpoint: "http://nope".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: default_decoding(), - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }; - - // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. - let (tx, mut rx) = SourceSender::new_test(); - let context = SourceContext::new_test(tx, None); - - let source = source - .build(context) - .await - .expect("source should not fail to build"); + // let source = HttpScrapeConfig { + // endpoint: "http://nope".to_string(), + // scrape_interval_secs: 1, + // query: None, + // decoding: default_decoding(), + // framing: default_framing_message_based(), + // headers: None, + // auth: None, + // tls: None, + // }; - // If a timeout was given, use that, otherwise, use an infinitely long one. - let source_timeout = sleep(Duration::from_millis(3000)); - pin!(source_timeout); + // // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. + // let (tx, mut rx) = SourceSender::new_test(); + // let context = SourceContext::new_test(tx, None); - let _source_handle = tokio::spawn(source); + // let source = source + // .build(context) + // .await + // .expect("source should not fail to build"); - loop { - select! { - _ = &mut source_timeout => { - assert!(false, "should error before timing out"); - break - }, - Some(_event) = rx.next() => { - assert!(false, "should not be a valid endpoint"); - break - }, - //result = &mut source => { - // match result { - // Ok(_) => { - // assert!(false, "should not be a valid endpoint"); - // } - // Err(e) => { - // dbg!(e); - // } - // } - // break - //}, - } - } + // // If a timeout was given, use that, otherwise, use an infinitely long one. + // let source_timeout = sleep(Duration::from_millis(3000)); + // pin!(source_timeout); + + // let _source_handle = tokio::spawn(source); + + // loop { + // select! { + // _ = &mut source_timeout => { + // assert!(false, "should error before timing out"); + // break + // }, + // Some(_event) = rx.next() => { + // assert!(false, "should not be a valid endpoint"); + // break + // }, + // //result = &mut source => { + // // match result { + // // Ok(_) => { + // // assert!(false, "should not be a valid endpoint"); + // // } + // // Err(e) => { + // // dbg!(e); + // // } + // // } + // // break + // //}, + // } + // } //drop(source); @@ -324,6 +323,7 @@ mod test { async fn bytes_decoding() { let in_addr = next_addr(); + // validates the Accept header is set correctly for the Bytes codec let dummy_endpoint = warp::path!("endpoint") .and(warp::header::exact("Accept", "text/plain")) .map(|| r#"A plain text event"#); @@ -344,11 +344,12 @@ mod test { } #[tokio::test] - async fn json_decoding() { + async fn json_decoding_newline_delimited() { let in_addr = next_addr(); + // validates the Content-Type is set correctly for the Json codec let dummy_endpoint = warp::path!("endpoint") - .and(warp::header::exact("Accept", "text/plain")) + .and(warp::header::exact("Accept", "application/x-ndjson")) .map(|| r#"{"data" : "foo"}"#); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); @@ -358,7 +359,38 @@ mod test { scrape_interval_secs: 1, query: None, decoding: DeserializerConfig::Json, - framing: default_framing_message_based(), + framing: FramingConfig::NewlineDelimited { + newline_delimited: NewlineDelimitedDecoderOptions::default(), + }, + headers: None, + auth: None, + tls: None, + }) + .await; + } + + #[tokio::test] + async fn json_decoding_character_delimited() { + let in_addr = next_addr(); + + // validates the Content-Type is set correctly for the Json codec + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "application/json")) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + run_test(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: FramingConfig::CharacterDelimited { + character_delimited: CharacterDelimitedDecoderOptions { + delimiter: b',', + max_length: Some(usize::MAX), + }, + }, headers: None, auth: None, tls: None, @@ -423,16 +455,46 @@ mod test { assert_eq!(got, expected); } } + + #[tokio::test] + async fn headers_applied() { + let in_addr = next_addr(); + let header_key = "f00"; + let header_val = "bazz"; + + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "text/plain")) + .and(warp::header::exact(header_key, header_val)) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + run_test(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: 1, + query: None, + decoding: default_decoding(), + framing: default_framing_message_based(), + headers: Some(HashMap::from([( + header_key.to_string(), + header_val.to_string(), + )])), + auth: None, + tls: None, + }) + .await; + } } #[cfg(all(test, feature = "http-scrape-integration-tests"))] mod integration_tests { - use tokio::time::Duration; + use tokio::time::{Duration, Instant}; use super::*; use crate::{ + config::ComponentKey, test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - tls, + tls, SourceSender, }; async fn run_test(config: HttpScrapeConfig) -> Vec { @@ -519,7 +581,7 @@ mod integration_tests { #[tokio::test] async fn unauthorized() { - // TODO how to assert failure + // TODO how to surface the failure for validation // let source = HttpScrapeConfig { // endpoint: format!("http://dufs-auth:5000/logs/json.json"), @@ -565,11 +627,6 @@ mod integration_tests { .await; } - #[tokio::test] - async fn headers() { - // TODO - is this worthy of testing and how to verify ? - } - #[tokio::test] async fn tls() { run_test(HttpScrapeConfig { @@ -590,6 +647,36 @@ mod integration_tests { #[tokio::test] async fn shutdown() { - // TODO - is this worthy of testing and how to verify + let source_id = ComponentKey::from("http_scrape_shutdown"); + let source = HttpScrapeConfig { + endpoint: "http://dufs:5000/logs/json.json".to_string(), + scrape_interval_secs: 1, + query: None, + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: None, + auth: None, + tls: None, + }; + + // build the context for the source and get a SourceShutdownCoordinator to signal with + let (tx, _rx) = SourceSender::new_test(); + let (context, mut shutdown) = SourceContext::new_shutdown(&source_id, tx); + + // start source + let source = source + .build(context) + .await + .expect("source should not fail to build"); + let source_handle = tokio::spawn(source); + + // signal the source to shut down + let deadline = Instant::now() + Duration::from_secs(1); + let shutdown_complete = shutdown.shutdown_source(&source_id, deadline); + let shutdown_success = shutdown_complete.await; + assert!(shutdown_success); + + // Ensure source actually shut down successfully. + let _ = source_handle.await.unwrap(); } } From 18f74482443a54c84d53f34af2e4f3ed8e112133 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Wed, 3 Aug 2022 22:44:56 +0000 Subject: [PATCH 26/50] add missingtarget volumes to int tests --- scripts/integration/docker-compose.axiom.yml | 2 ++ scripts/integration/docker-compose.chronicle.yml | 2 ++ scripts/integration/docker-compose.http-scrape.yml | 2 ++ 3 files changed, 6 insertions(+) diff --git a/scripts/integration/docker-compose.axiom.yml b/scripts/integration/docker-compose.axiom.yml index 5d56354f9699d..8b6a4d6b68f6a 100644 --- a/scripts/integration/docker-compose.axiom.yml +++ b/scripts/integration/docker-compose.axiom.yml @@ -51,10 +51,12 @@ services: - axiom-db volumes: - ${PWD}:/code + - target:/code/target - cargogit:/usr/local/cargo/git - cargoregistry:/usr/local/cargo/registry volumes: + target: {} cargogit: {} cargoregistry: {} postgres_data: {} diff --git a/scripts/integration/docker-compose.chronicle.yml b/scripts/integration/docker-compose.chronicle.yml index d28095030fa93..6a1ea6bf77af9 100644 --- a/scripts/integration/docker-compose.chronicle.yml +++ b/scripts/integration/docker-compose.chronicle.yml @@ -31,11 +31,13 @@ services: - CHRONICLE_ADDRESS=http://chronicle-emulator:3000 volumes: - ${PWD}:/code + - target:/code/target - cargogit:/usr/local/cargo/git - cargoregistry:/usr/local/cargo/registry - ${PWD}/scripts/integration/chronicleauth.json:/chronicleauth.json - ${PWD}/scripts/integration/invalidchronicleauth.json:/invalidchronicleauth.json volumes: + target: {} cargogit: {} cargoregistry: {} diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 6f233ee0bd467..777d241611d27 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -64,6 +64,7 @@ services: - dufs-https volumes: - ${PWD}:/code + - target:/code/target - cargogit:/usr/local/cargo/git - cargoregistry:/usr/local/cargo/registry @@ -71,5 +72,6 @@ networks: backend: {} volumes: + target: {} cargogit: {} cargoregistry: {} From aaeac7482c81db4f72755164e977bdef61fa35b5 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Thu, 4 Aug 2022 21:32:25 +0000 Subject: [PATCH 27/50] added error path integration tests --- .../docker-compose.http-scrape.yml | 2 +- src/sources/http_scrape/integration_tests.rs | 229 ++++++++ src/sources/http_scrape/mod.rs | 6 + src/sources/http_scrape/scrape.rs | 493 +----------------- src/sources/http_scrape/tests.rs | 232 +++++++++ src/test_util/components.rs | 71 ++- src/tls/mod.rs | 8 +- src/tls/settings.rs | 8 +- 8 files changed, 573 insertions(+), 476 deletions(-) create mode 100644 src/sources/http_scrape/integration_tests.rs create mode 100644 src/sources/http_scrape/tests.rs diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index 777d241611d27..b62612d8abc06 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -57,7 +57,7 @@ services: - "--features" - "http-scrape-integration-tests" - "--lib" - - "sources::http_scrape::scrape::" + - "sources::http_scrape::" depends_on: - dufs - dufs-auth diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs new file mode 100644 index 0000000000000..211153ae4829c --- /dev/null +++ b/src/sources/http_scrape/integration_tests.rs @@ -0,0 +1,229 @@ +//! Integration tests for http_scrape source. +//! The container configuration file is `docker-compose-.http_scrape.yml` +//! It leverages a static file server which serves the files in tests/data/http-scrape + +use tokio::time::{Duration, Instant}; + +use crate::{ + config::{ComponentKey, SourceConfig, SourceContext}, + http::Auth, + serde::default_framing_message_based, + sources::http_scrape::scrape::NAME, + tls, + tls::TlsConfig, + SourceSender, +}; +use codecs::decoding::DeserializerConfig; +use vector_core::config::log_schema; + +use super::{ + tests::{run_compliance, run_error, INTERVAL_SECS}, + HttpScrapeConfig, +}; + +/// Logs (raw bytes) should be scraped and decoded successfully. +#[tokio::test] +async fn scraped_logs_bytes() { + let events = run_compliance(HttpScrapeConfig::new( + "http://dufs:5000/logs/bytes".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Bytes, + default_framing_message_based(), + None, + None, + None, + )) + .await; + let log = events[0].as_log(); + assert_eq!(log[log_schema().source_type_key()], NAME.into()); +} + +/// Logs (json) should be scraped and decoded successfully. +#[tokio::test] +async fn scraped_logs_json() { + let events = run_compliance(HttpScrapeConfig::new( + "http://dufs:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + None, + None, + )) + .await; + let log = events[0].as_log(); + assert_eq!(log[log_schema().source_type_key()], NAME.into()); +} + +/// Metrics should be scraped and decoded successfully. +#[tokio::test] +async fn scraped_metrics_native_json() { + let events = run_compliance(HttpScrapeConfig::new( + "http://dufs:5000/metrics/native.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::NativeJson, + default_framing_message_based(), + None, + None, + None, + )) + .await; + + let metric = events[0].as_metric(); + assert_eq!( + metric.tags().unwrap()[log_schema().source_type_key()], + NAME.to_string() + ); +} + +/// Traces should be scraped and decoded successfully. +#[tokio::test] +async fn scraped_trace_native_json() { + let events = run_compliance(HttpScrapeConfig::new( + "http://dufs:5000/traces/native.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::NativeJson, + default_framing_message_based(), + None, + None, + None, + )) + .await; + + let trace = events[0].as_trace(); + assert_eq!(trace.as_map()[log_schema().source_type_key()], NAME.into()); +} + +/// Passing no authentication for the auth-gated endpoint should yield errors. +#[tokio::test] +async fn unauthorized_no_auth() { + run_error(HttpScrapeConfig::new( + "http://dufs-auth:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + None, + None, + )) + .await; +} + +/// Passing the incorrect credentials for the auth-gated endpoint should yield errors. +#[tokio::test] +async fn unauthorized_wrong_auth() { + run_error(HttpScrapeConfig::new( + "http://dufs-auth:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + None, + Some(Auth::Basic { + user: "white_rabbit".to_string(), + password: "morpheus".to_string(), + }), + )) + .await; +} + +/// Passing the correct credentials for the auth-gated endpoint should succeed. +#[tokio::test] +async fn authorized() { + run_compliance(HttpScrapeConfig::new( + "http://dufs-auth:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + None, + Some(Auth::Basic { + user: "user".to_string(), + password: "pass".to_string(), + }), + )) + .await; +} + +/// Passing the CA file for TLS should yield errors. +#[tokio::test] +async fn tls_invalid_ca() { + run_compliance(HttpScrapeConfig::new( + "https://dufs-https:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + Some(TlsConfig { + ca_file: Some(tls::TEST_PEM_INTERMEDIATE_CA_PATH.into()), + ..Default::default() + }), + None, + )) + .await; +} + +/// Passing the correct CA file for TLS should succeed. +#[tokio::test] +async fn tls_valid() { + run_compliance(HttpScrapeConfig::new( + "https://dufs-https:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + Some(TlsConfig { + ca_file: Some(tls::TEST_PEM_CA_PATH.into()), + ..Default::default() + }), + None, + )) + .await; +} + +/// The source should shutdown cleanly when the shutdown signal is received. +/// TODO this can probably be extracted into the test_utils and generalized for other sources to +/// use. +#[tokio::test] +async fn shutdown() { + let source_id = ComponentKey::from("http_scrape_shutdown"); + let source = HttpScrapeConfig::new( + "http://dufs:5000/logs/json.json".to_string(), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + default_framing_message_based(), + None, + None, + None, + ); + + // build the context for the source and get a SourceShutdownCoordinator to signal with + let (tx, _rx) = SourceSender::new_test(); + let (context, mut shutdown) = SourceContext::new_shutdown(&source_id, tx); + + // start source + let source = source + .build(context) + .await + .expect("source should not fail to build"); + let source_handle = tokio::spawn(source); + + // signal the source to shut down + let deadline = Instant::now() + Duration::from_secs(1); + let shutdown_complete = shutdown.shutdown_source(&source_id, deadline); + let shutdown_success = shutdown_complete.await; + assert!(shutdown_success); + + // Ensure source actually shut down successfully. + let _ = source_handle.await.unwrap(); +} diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index aaa7d6e056aa5..ca264a786bbb1 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -11,6 +11,12 @@ #[cfg(feature = "sources-http_scrape")] pub mod scrape; +#[cfg(test)] +mod tests; + +#[cfg(all(test, feature = "http-scrape-integration-tests"))] +mod integration_tests; + pub use scrape::HttpScrapeConfig; use bytes::Bytes; diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 37d4255360663..1b9310a8b5b71 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -30,7 +30,7 @@ use vector_core::{ }; /// The name of this source -const NAME: &str = "http_scrape"; +pub(crate) const NAME: &str = "http_scrape"; /// Configuration for the `http_scrape` source. #[configurable_component(source)] @@ -87,6 +87,31 @@ impl Default for HttpScrapeConfig { } } +#[allow(clippy::too_many_arguments)] +impl HttpScrapeConfig { + pub const fn new( + endpoint: String, + scrape_interval_secs: u64, + query: Option>>, + decoding: DeserializerConfig, + framing: FramingConfig, + headers: Option>, + tls: Option, + auth: Option, + ) -> Self { + Self { + endpoint, + scrape_interval_secs, + query, + decoding, + framing, + headers, + tls, + auth, + } + } +} + inventory::submit! { SourceDescription::new::(NAME) } @@ -214,469 +239,3 @@ impl super::HttpScraper for HttpScrapeContext { Some(events) } } - -#[cfg(test)] -mod test { - use codecs::decoding::{CharacterDelimitedDecoderOptions, NewlineDelimitedDecoderOptions}; - //use futures::{poll, StreamExt}; - //use futures::StreamExt; - //use std::task::Poll; - //use tokio::time::{sleep, Duration}; - use tokio::time::Duration; - //use tokio::{pin, select}; - use warp::Filter; - - use super::*; - use crate::test_util::{ - components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, test_generate_config, - }; - - #[test] - fn http_scrape_generate_config() { - test_generate_config::(); - } - - // I haven't seen a better way to validate an error occurred, but it seems like there should be - // a way, since if this is run live it generates an HTTP error. - #[tokio::test] - async fn invalid_endpoint() { - // let source = HttpScrapeConfig { - // endpoint: "http://nope".to_string(), - // scrape_interval_secs: 1, - // query: None, - // decoding: default_decoding(), - // framing: default_framing_message_based(), - // headers: None, - // auth: None, - // tls: None, - // }; - - // // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. - // let (tx, mut rx) = SourceSender::new_test(); - // let context = SourceContext::new_test(tx, None); - - // let source = source - // .build(context) - // .await - // .expect("source should not fail to build"); - - // // If a timeout was given, use that, otherwise, use an infinitely long one. - // let source_timeout = sleep(Duration::from_millis(3000)); - // pin!(source_timeout); - - // let _source_handle = tokio::spawn(source); - - // loop { - // select! { - // _ = &mut source_timeout => { - // assert!(false, "should error before timing out"); - // break - // }, - // Some(_event) = rx.next() => { - // assert!(false, "should not be a valid endpoint"); - // break - // }, - // //result = &mut source => { - // // match result { - // // Ok(_) => { - // // assert!(false, "should not be a valid endpoint"); - // // } - // // Err(e) => { - // // dbg!(e); - // // } - // // } - // // break - // //}, - // } - // } - - //drop(source); - - //sleep(Duration::from_secs(1)).await; - - //let option = source.now_or_never(); - - //assert!(option.is_some()); - - //let result = option.unwrap(); - - //assert!(result.is_err()); - - //drop(source); - - //assert_eq!(poll!(rx.next()), Poll::Ready(None)); - } - - async fn run_test(config: HttpScrapeConfig) -> Vec { - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) - .await; - assert!(!events.is_empty()); - events - } - - #[tokio::test] - async fn bytes_decoding() { - let in_addr = next_addr(); - - // validates the Accept header is set correctly for the Bytes codec - let dummy_endpoint = warp::path!("endpoint") - .and(warp::header::exact("Accept", "text/plain")) - .map(|| r#"A plain text event"#); - - tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - - run_test(HttpScrapeConfig { - endpoint: format!("http://{}/endpoint", in_addr), - scrape_interval_secs: 1, - query: None, - decoding: default_decoding(), - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }) - .await; - } - - #[tokio::test] - async fn json_decoding_newline_delimited() { - let in_addr = next_addr(); - - // validates the Content-Type is set correctly for the Json codec - let dummy_endpoint = warp::path!("endpoint") - .and(warp::header::exact("Accept", "application/x-ndjson")) - .map(|| r#"{"data" : "foo"}"#); - - tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - - run_test(HttpScrapeConfig { - endpoint: format!("http://{}/endpoint", in_addr), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: FramingConfig::NewlineDelimited { - newline_delimited: NewlineDelimitedDecoderOptions::default(), - }, - headers: None, - auth: None, - tls: None, - }) - .await; - } - - #[tokio::test] - async fn json_decoding_character_delimited() { - let in_addr = next_addr(); - - // validates the Content-Type is set correctly for the Json codec - let dummy_endpoint = warp::path!("endpoint") - .and(warp::header::exact("Accept", "application/json")) - .map(|| r#"{"data" : "foo"}"#); - - tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - - run_test(HttpScrapeConfig { - endpoint: format!("http://{}/endpoint", in_addr), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: FramingConfig::CharacterDelimited { - character_delimited: CharacterDelimitedDecoderOptions { - delimiter: b',', - max_length: Some(usize::MAX), - }, - }, - headers: None, - auth: None, - tls: None, - }) - .await; - } - - #[tokio::test] - async fn request_query_applied() { - let in_addr = next_addr(); - - let dummy_endpoint = warp::path!("endpoint") - .and(warp::query::raw()) - .map(|query| format!(r#"{{"data" : "{}"}}"#, query)); - - tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - - let events = run_test(HttpScrapeConfig { - endpoint: format!("http://{}/endpoint?key1=val1", in_addr), - scrape_interval_secs: 1, - query: Some(HashMap::from([ - ("key1".to_string(), vec!["val2".to_string()]), - ( - "key2".to_string(), - vec!["val1".to_string(), "val2".to_string()], - ), - ])), - decoding: DeserializerConfig::Json, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }) - .await; - - let logs: Vec<_> = events.into_iter().map(|event| event.into_log()).collect(); - - let expected = HashMap::from([ - ( - "key1".to_string(), - vec!["val1".to_string(), "val2".to_string()], - ), - ( - "key2".to_string(), - vec!["val1".to_string(), "val2".to_string()], - ), - ]); - - for log in logs { - let query = log.get("data").expect("data must be available"); - let mut got: HashMap> = HashMap::new(); - for (k, v) in url::form_urlencoded::parse( - query.as_bytes().expect("byte conversion should succeed"), - ) { - got.entry(k.to_string()) - .or_insert_with(Vec::new) - .push(v.to_string()); - } - for v in got.values_mut() { - v.sort(); - } - assert_eq!(got, expected); - } - } - - #[tokio::test] - async fn headers_applied() { - let in_addr = next_addr(); - let header_key = "f00"; - let header_val = "bazz"; - - let dummy_endpoint = warp::path!("endpoint") - .and(warp::header::exact("Accept", "text/plain")) - .and(warp::header::exact(header_key, header_val)) - .map(|| r#"{"data" : "foo"}"#); - - tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - - run_test(HttpScrapeConfig { - endpoint: format!("http://{}/endpoint", in_addr), - scrape_interval_secs: 1, - query: None, - decoding: default_decoding(), - framing: default_framing_message_based(), - headers: Some(HashMap::from([( - header_key.to_string(), - header_val.to_string(), - )])), - auth: None, - tls: None, - }) - .await; - } -} - -#[cfg(all(test, feature = "http-scrape-integration-tests"))] -mod integration_tests { - use tokio::time::{Duration, Instant}; - - use super::*; - use crate::{ - config::ComponentKey, - test_util::components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - tls, SourceSender, - }; - - async fn run_test(config: HttpScrapeConfig) -> Vec { - let events = run_and_assert_source_compliance( - config, - Duration::from_secs(1), - &HTTP_PULL_SOURCE_TAGS, - ) - .await; - assert!(!events.is_empty()); - events - } - - #[tokio::test] - async fn scraped_logs_bytes() { - run_test(HttpScrapeConfig { - endpoint: "http://dufs:5000/logs/bytes".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Bytes, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }) - .await; - } - - #[tokio::test] - async fn scraped_logs_json() { - let events = run_test(HttpScrapeConfig { - endpoint: "http://dufs:5000/logs/json.json".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }) - .await; - let log = events[0].as_log(); - assert_eq!(log[log_schema().source_type_key()], NAME.into()); - } - - #[tokio::test] - async fn scraped_metrics_native_json() { - let events = run_test(HttpScrapeConfig { - endpoint: "http://dufs:5000/metrics/native.json".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::NativeJson, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }) - .await; - - let metric = events[0].as_metric(); - assert_eq!( - metric.tags().unwrap()[log_schema().source_type_key()], - NAME.to_string() - ); - } - - #[tokio::test] - async fn scraped_trace_native_json() { - let events = run_test(HttpScrapeConfig { - endpoint: "http://dufs:5000/traces/native.json".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::NativeJson, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }) - .await; - - let trace = events[0].as_trace(); - assert_eq!(trace.as_map()[log_schema().source_type_key()], NAME.into()); - } - - #[tokio::test] - async fn unauthorized() { - // TODO how to surface the failure for validation - - // let source = HttpScrapeConfig { - // endpoint: format!("http://dufs-auth:5000/logs/json.json"), - // scrape_interval_secs: 1, - // query: None, - // decoding: DeserializerConfig::Json, - // framing: default_framing_message_based(), - // headers: None, - // auth: None, - // tls: None, - // }; - // // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. - // let (tx, mut rx) = SourceSender::new_test(); - // let context = SourceContext::new_test(tx, None); - - // let source = source - // .build(context) - // .await - // .expect("source should not fail to build"); - - // sleep(Duration::from_secs(1)).await; - - // drop(source); - - // assert_eq!(poll!(rx.next()), Poll::Ready(None)); - } - - #[tokio::test] - async fn authorized() { - run_test(HttpScrapeConfig { - endpoint: "http://dufs-auth:5000/logs/json.json".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: default_framing_message_based(), - headers: None, - auth: Some(Auth::Basic { - user: "user".to_string(), - password: "pass".to_string(), - }), - tls: None, - }) - .await; - } - - #[tokio::test] - async fn tls() { - run_test(HttpScrapeConfig { - endpoint: "https://dufs-https:5000/logs/json.json".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: Some(TlsConfig { - ca_file: Some(tls::TEST_PEM_CA_PATH.into()), - ..Default::default() - }), - }) - .await; - } - - #[tokio::test] - async fn shutdown() { - let source_id = ComponentKey::from("http_scrape_shutdown"); - let source = HttpScrapeConfig { - endpoint: "http://dufs:5000/logs/json.json".to_string(), - scrape_interval_secs: 1, - query: None, - decoding: DeserializerConfig::Json, - framing: default_framing_message_based(), - headers: None, - auth: None, - tls: None, - }; - - // build the context for the source and get a SourceShutdownCoordinator to signal with - let (tx, _rx) = SourceSender::new_test(); - let (context, mut shutdown) = SourceContext::new_shutdown(&source_id, tx); - - // start source - let source = source - .build(context) - .await - .expect("source should not fail to build"); - let source_handle = tokio::spawn(source); - - // signal the source to shut down - let deadline = Instant::now() + Duration::from_secs(1); - let shutdown_complete = shutdown.shutdown_source(&source_id, deadline); - let shutdown_success = shutdown_complete.await; - assert!(shutdown_success); - - // Ensure source actually shut down successfully. - let _ = source_handle.await.unwrap(); - } -} diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs new file mode 100644 index 0000000000000..d03a21d603c2e --- /dev/null +++ b/src/sources/http_scrape/tests.rs @@ -0,0 +1,232 @@ +use std::collections::HashMap; +use tokio::time::Duration; +use warp::Filter; + +use crate::{serde::default_decoding, serde::default_framing_message_based}; +use codecs::decoding::{ + CharacterDelimitedDecoderOptions, DeserializerConfig, FramingConfig, + NewlineDelimitedDecoderOptions, +}; +use vector_core::event::Event; + +use super::HttpScrapeConfig; +use crate::test_util::{ + components::{ + run_and_assert_source_compliance, run_and_assert_source_error, HTTP_PULL_SOURCE_TAGS, + SOURCE_ERROR_TAGS, + }, + next_addr, test_generate_config, +}; + +pub(crate) const INTERVAL_SECS: u64 = 1; + +/// The happy path should yield at least one event and must emit the required internal events for sources. +pub(crate) async fn run_compliance(config: HttpScrapeConfig) -> Vec { + let events = + run_and_assert_source_compliance(config, Duration::from_secs(1), &HTTP_PULL_SOURCE_TAGS) + .await; + + assert!(!events.is_empty()); + + events +} + +/// The error path should not yield any events and must emit the required error internal events. +pub(crate) async fn run_error(config: HttpScrapeConfig) { + let events = + run_and_assert_source_error(config, Duration::from_secs(1), &SOURCE_ERROR_TAGS).await; + + assert!(events.is_empty()); +} + +#[test] +fn http_scrape_generate_config() { + test_generate_config::(); +} + +/// An endpoint in the config that is not reachable should generate errors. +#[tokio::test] +async fn invalid_endpoint() { + run_error(HttpScrapeConfig::new( + "http://nope".to_string(), + INTERVAL_SECS, + None, + default_decoding(), + default_framing_message_based(), + None, + None, + None, + )) + .await; +} + +/// Bytes should be decoded and HTTP header set to text/plain. +#[tokio::test] +async fn bytes_decoding() { + let in_addr = next_addr(); + + // validates the Accept header is set correctly for the Bytes codec + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "text/plain")) + .map(|| r#"A plain text event"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + run_compliance(HttpScrapeConfig::new( + format!("http://{}/endpoint", in_addr), + INTERVAL_SECS, + None, + default_decoding(), + default_framing_message_based(), + None, + None, + None, + )) + .await; +} + +/// JSON with newline delimiter should be decoded and HTTP header set to application/x-ndjson. +#[tokio::test] +async fn json_decoding_newline_delimited() { + let in_addr = next_addr(); + + // validates the Content-Type is set correctly for the Json codec + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "application/x-ndjson")) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + run_compliance(HttpScrapeConfig::new( + format!("http://{}/endpoint", in_addr), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + FramingConfig::NewlineDelimited { + newline_delimited: NewlineDelimitedDecoderOptions::default(), + }, + None, + None, + None, + )) + .await; +} + +/// JSON with character delimiter should be decoded and HTTP header set to application/json. +#[tokio::test] +async fn json_decoding_character_delimited() { + let in_addr = next_addr(); + + // validates the Content-Type is set correctly for the Json codec + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "application/json")) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + run_compliance(HttpScrapeConfig::new( + format!("http://{}/endpoint", in_addr), + INTERVAL_SECS, + None, + DeserializerConfig::Json, + FramingConfig::CharacterDelimited { + character_delimited: CharacterDelimitedDecoderOptions { + delimiter: b',', + max_length: Some(usize::MAX), + }, + }, + None, + None, + None, + )) + .await; +} + +/// HTTP request queries configured by the user should be applied correctly. +#[tokio::test] +async fn request_query_applied() { + let in_addr = next_addr(); + + let dummy_endpoint = warp::path!("endpoint") + .and(warp::query::raw()) + .map(|query| format!(r#"{{"data" : "{}"}}"#, query)); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + let events = run_compliance(HttpScrapeConfig::new( + format!("http://{}/endpoint?key1=val1", in_addr), + INTERVAL_SECS, + Some(HashMap::from([ + ("key1".to_string(), vec!["val2".to_string()]), + ( + "key2".to_string(), + vec!["val1".to_string(), "val2".to_string()], + ), + ])), + DeserializerConfig::Json, + default_framing_message_based(), + None, + None, + None, + )) + .await; + + let logs: Vec<_> = events.into_iter().map(|event| event.into_log()).collect(); + + let expected = HashMap::from([ + ( + "key1".to_string(), + vec!["val1".to_string(), "val2".to_string()], + ), + ( + "key2".to_string(), + vec!["val1".to_string(), "val2".to_string()], + ), + ]); + + for log in logs { + let query = log.get("data").expect("data must be available"); + let mut got: HashMap> = HashMap::new(); + for (k, v) in + url::form_urlencoded::parse(query.as_bytes().expect("byte conversion should succeed")) + { + got.entry(k.to_string()) + .or_insert_with(Vec::new) + .push(v.to_string()); + } + for v in got.values_mut() { + v.sort(); + } + assert_eq!(got, expected); + } +} + +/// HTTP request headers configured by the user should be applied correctly. +#[tokio::test] +async fn headers_applied() { + let in_addr = next_addr(); + let header_key = "f00"; + let header_val = "bazz"; + + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "text/plain")) + .and(warp::header::exact(header_key, header_val)) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + + run_compliance(HttpScrapeConfig::new( + format!("http://{}/endpoint", in_addr), + INTERVAL_SECS, + None, + default_decoding(), + default_framing_message_based(), + Some(HashMap::from([( + header_key.to_string(), + header_val.to_string(), + )])), + None, + None, + )) + .await; +} diff --git a/src/test_util/components.rs b/src/test_util/components.rs index 1c195a94e72f2..457f93c88f9cd 100644 --- a/src/test_util/components.rs +++ b/src/test_util/components.rs @@ -26,6 +26,9 @@ use crate::{ /// The most basic set of tags for sources, regardless of whether or not they pull data or have it pushed in. pub const SOURCE_TAGS: [&str; 1] = ["protocol"]; +/// The most basic set of error tags for sources, regardless of whether or not they pull data or have it pushed in. +pub const SOURCE_ERROR_TAGS: [&str; 1] = ["error_type"]; + /// The standard set of tags for sources that have their data pushed in from an external source. pub const PUSH_SOURCE_TAGS: [&str; 2] = ["endpoint", "protocol"]; @@ -86,6 +89,13 @@ pub static SOURCE_TESTS: Lazy = Lazy::new(|| ComponentTests { ], }); +/// The component error test specification for all sources. +pub static SOURCE_TESTS_ERROR: Lazy = Lazy::new(|| ComponentTests { + events: &["Error"], + tagged_counters: &["component_errors_total"], + untagged_counters: &[], +}); + /// The component test specification for all transforms. pub static TRANSFORM_TESTS: Lazy = Lazy::new(|| ComponentTests { events: &["EventsReceived", "EventsSent"], @@ -242,18 +252,29 @@ impl ComponentTester { } } -/// Convenience wrapper for running source tests +/// Runs and returns a future and asserts that the provided test specification passes. #[track_caller] -pub async fn assert_source_compliance(tags: &[&str], f: impl Future) -> T { +pub async fn assert_source( + tests: &Lazy, + tags: &[&str], + f: impl Future, +) -> T { init_test(); let result = f.await; - SOURCE_TESTS.assert(tags); + tests.assert(tags); result } +/// Convenience wrapper for running source tests. +#[track_caller] +pub async fn assert_source_compliance(tags: &[&str], f: impl Future) -> T { + assert_source(&SOURCE_TESTS, tags, f).await +} + +/// Runs source tests with timeout and asserts happy path compliance. #[track_caller] pub async fn run_and_assert_source_compliance( source: SC, @@ -263,9 +284,10 @@ pub async fn run_and_assert_source_compliance( where SC: SourceConfig, { - run_and_assert_source_compliance_advanced(source, |_| {}, Some(timeout), None, tags).await + run_and_assert_source_advanced(source, |_| {}, Some(timeout), None, &SOURCE_TESTS, tags).await } +/// Runs source tests with an event count limit and asserts happy path compliance. #[track_caller] pub async fn run_and_assert_source_compliance_n( source: SC, @@ -275,10 +297,32 @@ pub async fn run_and_assert_source_compliance_n( where SC: SourceConfig, { - run_and_assert_source_compliance_advanced(source, |_| {}, None, Some(event_count), tags).await + run_and_assert_source_advanced(source, |_| {}, None, Some(event_count), &SOURCE_TESTS, tags) + .await } +/// Runs source tests with timeout and asserts error path compliance. #[track_caller] +pub async fn run_and_assert_source_error( + source: SC, + timeout: Duration, + tags: &[&str], +) -> Vec +where + SC: SourceConfig, +{ + run_and_assert_source_advanced( + source, + |_| {}, + Some(timeout), + None, + &SOURCE_TESTS_ERROR, + tags, + ) + .await +} + +/// Runs source tests with setup, timeout, and event count limit and asserts happy path compliance. pub async fn run_and_assert_source_compliance_advanced( source: SC, setup: impl FnOnce(&mut SourceContext), @@ -289,7 +333,22 @@ pub async fn run_and_assert_source_compliance_advanced( where SC: SourceConfig, { - assert_source_compliance(tags, async move { + run_and_assert_source_advanced(source, setup, timeout, event_count, &SOURCE_TESTS, tags).await +} + +#[track_caller] +pub async fn run_and_assert_source_advanced( + source: SC, + setup: impl FnOnce(&mut SourceContext), + timeout: Option, + event_count: Option, + tests: &Lazy, + tags: &[&str], +) -> Vec +where + SC: SourceConfig, +{ + assert_source(tests, tags, async move { // Build the source and set ourselves up to both drive it to completion as well as collect all the events it sends out. let (tx, mut rx) = SourceSender::new_test(); let mut context = SourceContext::new_test(tx, None); diff --git a/src/tls/mod.rs b/src/tls/mod.rs index ea7490b3c4de1..b97bb29bebead 100644 --- a/src/tls/mod.rs +++ b/src/tls/mod.rs @@ -19,7 +19,13 @@ mod settings; #[cfg(all(feature = "sources-utils-tls", feature = "listenfd"))] pub(crate) use incoming::{CertificateMetadata, MaybeTlsIncomingStream, MaybeTlsListener}; pub(crate) use maybe_tls::MaybeTls; -#[cfg(all(test, feature = "kafka-integration-tests"))] +#[cfg(all( + test, + any( + feature = "kafka-integration-tests", + feature = "http-scrape-integration-tests" + ) +))] pub use settings::TEST_PEM_INTERMEDIATE_CA_PATH; pub use settings::{ MaybeTlsSettings, TlsConfig, TlsEnableableConfig, TlsSettings, TlsSourceConfig, diff --git a/src/tls/settings.rs b/src/tls/settings.rs index c8d4b2d43f536..e4bf603ca7076 100644 --- a/src/tls/settings.rs +++ b/src/tls/settings.rs @@ -26,7 +26,13 @@ const PEM_START_MARKER: &str = "-----BEGIN "; #[cfg(test)] pub const TEST_PEM_CA_PATH: &str = "tests/data/ca/certs/ca.cert.pem"; -#[cfg(all(test, feature = "kafka-integration-tests"))] +#[cfg(all( + test, + any( + feature = "kafka-integration-tests", + feature = "http-scrape-integration-tests" + ) +))] pub const TEST_PEM_INTERMEDIATE_CA_PATH: &str = "tests/data/ca/intermediate_server/certs/ca-chain.cert.pem"; #[cfg(test)] From 5f7c44231f801eb5ee0be2f2a3549efb6c908e8c Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 5 Aug 2022 20:25:31 +0000 Subject: [PATCH 28/50] feedback from sg, fixed an issue with one of the int tests --- .../docker-compose.http-scrape.yml | 10 +++-- src/internal_events/http_scrape.rs | 18 ++++---- src/sources/http_scrape/integration_tests.rs | 41 +++++++++++++------ src/sources/http_scrape/mod.rs | 6 +-- src/sources/http_scrape/scrape.rs | 3 +- src/sources/http_scrape/tests.rs | 2 + src/tls/mod.rs | 8 +--- src/tls/settings.rs | 8 +--- .../http-scrape/certs/invalid-ca-cert.pem | 33 +++++++++++++++ tests/data/http-scrape/{ => serve}/logs/bytes | 0 .../http-scrape/{ => serve}/logs/json.json | 0 .../{ => serve}/metrics/native.json | 0 .../{ => serve}/traces/native.json | 0 .../configuration/sources/http_scrape.md | 2 +- .../components/sources/http_scrape.cue | 2 +- 15 files changed, 88 insertions(+), 45 deletions(-) create mode 100644 tests/data/http-scrape/certs/invalid-ca-cert.pem rename tests/data/http-scrape/{ => serve}/logs/bytes (100%) rename tests/data/http-scrape/{ => serve}/logs/json.json (100%) rename tests/data/http-scrape/{ => serve}/metrics/native.json (100%) rename tests/data/http-scrape/{ => serve}/traces/native.json (100%) diff --git a/scripts/integration/docker-compose.http-scrape.yml b/scripts/integration/docker-compose.http-scrape.yml index b62612d8abc06..87ce077fff6cb 100644 --- a/scripts/integration/docker-compose.http-scrape.yml +++ b/scripts/integration/docker-compose.http-scrape.yml @@ -10,7 +10,7 @@ services: command: - "/data" volumes: - - ${PWD}/tests/data/http-scrape/:/data + - ${PWD}/tests/data/http-scrape/serve:/data # To validate Basic HTTP authentication option dufs-auth: image: docker.io/sigoden/dufs:latest @@ -23,7 +23,7 @@ services: - "basic" - "/data" volumes: - - ${PWD}/tests/data/http-scrape/:/data + - ${PWD}/tests/data/http-scrape/serve:/data # To validate TLS options dufs-https: image: docker.io/sigoden/dufs:latest @@ -36,7 +36,7 @@ services: - "/certs/ca.key.pem" - "/data" volumes: - - ${PWD}/tests/data/http-scrape/:/data + - ${PWD}/tests/data/http-scrape/serve:/data - ${PWD}/tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem:/certs/ca.cert.pem - ${PWD}/tests/data/ca/intermediate_server/private/dufs-https.key.pem:/certs/ca.key.pem runner: @@ -62,6 +62,10 @@ services: - dufs - dufs-auth - dufs-https + environment: + - DUFS_ADDRESS=http://dufs:5000 + - DUFS_AUTH_ADDRESS=http://dufs-auth:5000 + - DUFS_HTTPS_ADDRESS=https://dufs-https:5000 volumes: - ${PWD}:/code - target:/code/target diff --git a/src/internal_events/http_scrape.rs b/src/internal_events/http_scrape.rs index 801e859d8f714..d1b7e793674c1 100644 --- a/src/internal_events/http_scrape.rs +++ b/src/internal_events/http_scrape.rs @@ -7,7 +7,7 @@ use super::prelude::{error_stage, error_type, http_error_code}; pub struct HttpScrapeEventsReceived { pub byte_size: usize, pub count: usize, - pub uri: http::Uri, + pub url: String, } impl InternalEvent for HttpScrapeEventsReceived { @@ -16,20 +16,20 @@ impl InternalEvent for HttpScrapeEventsReceived { message = "Events received.", count = %self.count, byte_size = %self.byte_size, - uri = %self.uri, + url = %self.url, ); counter!( "component_received_events_total", self.count as u64, - "uri" => self.uri.to_string(), + "uri" => self.url.clone(), ); counter!( "component_received_event_bytes_total", self.byte_size as u64, - "uri" => self.uri.to_string(), + "uri" => self.url.clone(), ); // deprecated counter!( "events_in_total", self.count as u64, - "uri" => self.uri.to_string(), + "uri" => self.url, ); } } @@ -37,7 +37,7 @@ impl InternalEvent for HttpScrapeEventsReceived { #[derive(Debug)] pub struct HttpScrapeHttpResponseError { pub code: hyper::StatusCode, - pub url: http::Uri, + pub url: String, } impl InternalEvent for HttpScrapeHttpResponseError { @@ -52,7 +52,7 @@ impl InternalEvent for HttpScrapeHttpResponseError { ); counter!( "component_errors_total", 1, - "url" => self.url.to_string(), + "url" => self.url, "stage" => error_stage::RECEIVING, "error_type" => error_type::REQUEST_FAILED, "error_code" => http_error_code(self.code.as_u16()), @@ -65,7 +65,7 @@ impl InternalEvent for HttpScrapeHttpResponseError { #[derive(Debug)] pub struct HttpScrapeHttpError { pub error: crate::Error, - pub url: http::Uri, + pub url: String, } impl InternalEvent for HttpScrapeHttpError { @@ -80,7 +80,7 @@ impl InternalEvent for HttpScrapeHttpError { ); counter!( "component_errors_total", 1, - "url" => self.url.to_string(), + "url" => self.url, "error_type" => error_type::REQUEST_FAILED, "stage" => error_stage::RECEIVING, ); diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index 211153ae4829c..bf4948256701b 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -21,11 +21,23 @@ use super::{ HttpScrapeConfig, }; +fn dufs_address() -> String { + std::env::var("DUFS_ADDRESS").unwrap_or_else(|_| "http://localhost:5000".into()) +} + +fn dufs_auth_address() -> String { + std::env::var("DUFS_AUTH_ADDRESS").unwrap_or_else(|_| "http://localhost:5000".into()) +} + +fn dufs_https_address() -> String { + std::env::var("DUFS_HTTPS_ADDRESS").unwrap_or_else(|_| "https://localhost:5000".into()) +} + /// Logs (raw bytes) should be scraped and decoded successfully. #[tokio::test] async fn scraped_logs_bytes() { let events = run_compliance(HttpScrapeConfig::new( - "http://dufs:5000/logs/bytes".to_string(), + format!("{}/logs/bytes", dufs_address()), INTERVAL_SECS, None, DeserializerConfig::Bytes, @@ -35,6 +47,7 @@ async fn scraped_logs_bytes() { None, )) .await; + // panics if not log event let log = events[0].as_log(); assert_eq!(log[log_schema().source_type_key()], NAME.into()); } @@ -43,7 +56,7 @@ async fn scraped_logs_bytes() { #[tokio::test] async fn scraped_logs_json() { let events = run_compliance(HttpScrapeConfig::new( - "http://dufs:5000/logs/json.json".to_string(), + format!("{}/logs/json.json", dufs_address()), INTERVAL_SECS, None, DeserializerConfig::Json, @@ -53,6 +66,7 @@ async fn scraped_logs_json() { None, )) .await; + // panics if not log event let log = events[0].as_log(); assert_eq!(log[log_schema().source_type_key()], NAME.into()); } @@ -61,7 +75,7 @@ async fn scraped_logs_json() { #[tokio::test] async fn scraped_metrics_native_json() { let events = run_compliance(HttpScrapeConfig::new( - "http://dufs:5000/metrics/native.json".to_string(), + format!("{}/metrics/native.json", dufs_address()), INTERVAL_SECS, None, DeserializerConfig::NativeJson, @@ -72,6 +86,7 @@ async fn scraped_metrics_native_json() { )) .await; + // panics if not metric event let metric = events[0].as_metric(); assert_eq!( metric.tags().unwrap()[log_schema().source_type_key()], @@ -83,7 +98,7 @@ async fn scraped_metrics_native_json() { #[tokio::test] async fn scraped_trace_native_json() { let events = run_compliance(HttpScrapeConfig::new( - "http://dufs:5000/traces/native.json".to_string(), + format!("{}/traces/native.json", dufs_address()), INTERVAL_SECS, None, DeserializerConfig::NativeJson, @@ -102,7 +117,7 @@ async fn scraped_trace_native_json() { #[tokio::test] async fn unauthorized_no_auth() { run_error(HttpScrapeConfig::new( - "http://dufs-auth:5000/logs/json.json".to_string(), + format!("{}/logs/json.json", dufs_auth_address()), INTERVAL_SECS, None, DeserializerConfig::Json, @@ -118,7 +133,7 @@ async fn unauthorized_no_auth() { #[tokio::test] async fn unauthorized_wrong_auth() { run_error(HttpScrapeConfig::new( - "http://dufs-auth:5000/logs/json.json".to_string(), + format!("{}/logs/json.json", dufs_auth_address()), INTERVAL_SECS, None, DeserializerConfig::Json, @@ -137,7 +152,7 @@ async fn unauthorized_wrong_auth() { #[tokio::test] async fn authorized() { run_compliance(HttpScrapeConfig::new( - "http://dufs-auth:5000/logs/json.json".to_string(), + format!("{}/logs/json.json", dufs_auth_address()), INTERVAL_SECS, None, DeserializerConfig::Json, @@ -152,18 +167,18 @@ async fn authorized() { .await; } -/// Passing the CA file for TLS should yield errors. +/// Passing an incorrect CA file for TLS should yield errors. #[tokio::test] async fn tls_invalid_ca() { - run_compliance(HttpScrapeConfig::new( - "https://dufs-https:5000/logs/json.json".to_string(), + run_error(HttpScrapeConfig::new( + format!("{}/logs/json.json", dufs_https_address()), INTERVAL_SECS, None, DeserializerConfig::Json, default_framing_message_based(), None, Some(TlsConfig { - ca_file: Some(tls::TEST_PEM_INTERMEDIATE_CA_PATH.into()), + ca_file: Some("tests/data/http-scrape/certs/invalid-ca-cert.pem".into()), ..Default::default() }), None, @@ -175,7 +190,7 @@ async fn tls_invalid_ca() { #[tokio::test] async fn tls_valid() { run_compliance(HttpScrapeConfig::new( - "https://dufs-https:5000/logs/json.json".to_string(), + format!("{}/logs/json.json", dufs_https_address()), INTERVAL_SECS, None, DeserializerConfig::Json, @@ -197,7 +212,7 @@ async fn tls_valid() { async fn shutdown() { let source_id = ComponentKey::from("http_scrape_shutdown"); let source = HttpScrapeConfig::new( - "http://dufs:5000/logs/json.json".to_string(), + format!("{}/logs/json.json", dufs_address()), INTERVAL_SECS, None, DeserializerConfig::Json, diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index ca264a786bbb1..26bd5aa1ceb28 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -167,7 +167,7 @@ pub(crate) async fn http_scrape( emit!(HttpScrapeEventsReceived { byte_size: events.size_of(), count: events.len(), - uri: url.clone() + url: url.to_string() }); Some(stream::iter(events)) } @@ -178,14 +178,14 @@ pub(crate) async fn http_scrape( context.on_http_response_error(&url, &header); emit!(HttpScrapeHttpResponseError { code: header.status, - url: url.clone(), + url: url.to_string(), }); None } Err(error) => { emit!(HttpScrapeHttpError { error, - url: url.clone(), + url: url.to_string() }); None } diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 1b9310a8b5b71..22d11c19431a1 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -36,7 +36,8 @@ pub(crate) const NAME: &str = "http_scrape"; #[configurable_component(source)] #[derive(Clone, Debug)] pub struct HttpScrapeConfig { - /// Endpoint to scrape events from. + /// Endpoint to scrape events from. The full path must be specified. + /// Example: "http://127.0.0.1:9898/logs" endpoint: String, /// The interval between scrapes, in seconds. diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index d03a21d603c2e..07b06615f7d16 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -32,6 +32,8 @@ pub(crate) async fn run_compliance(config: HttpScrapeConfig) -> Vec { } /// The error path should not yield any events and must emit the required error internal events. +/// Consider extracting this function into test_util , if it is always true that if the error +/// internal event metric is fired that no events would be outputed by the source. pub(crate) async fn run_error(config: HttpScrapeConfig) { let events = run_and_assert_source_error(config, Duration::from_secs(1), &SOURCE_ERROR_TAGS).await; diff --git a/src/tls/mod.rs b/src/tls/mod.rs index b97bb29bebead..ea7490b3c4de1 100644 --- a/src/tls/mod.rs +++ b/src/tls/mod.rs @@ -19,13 +19,7 @@ mod settings; #[cfg(all(feature = "sources-utils-tls", feature = "listenfd"))] pub(crate) use incoming::{CertificateMetadata, MaybeTlsIncomingStream, MaybeTlsListener}; pub(crate) use maybe_tls::MaybeTls; -#[cfg(all( - test, - any( - feature = "kafka-integration-tests", - feature = "http-scrape-integration-tests" - ) -))] +#[cfg(all(test, feature = "kafka-integration-tests"))] pub use settings::TEST_PEM_INTERMEDIATE_CA_PATH; pub use settings::{ MaybeTlsSettings, TlsConfig, TlsEnableableConfig, TlsSettings, TlsSourceConfig, diff --git a/src/tls/settings.rs b/src/tls/settings.rs index e4bf603ca7076..c8d4b2d43f536 100644 --- a/src/tls/settings.rs +++ b/src/tls/settings.rs @@ -26,13 +26,7 @@ const PEM_START_MARKER: &str = "-----BEGIN "; #[cfg(test)] pub const TEST_PEM_CA_PATH: &str = "tests/data/ca/certs/ca.cert.pem"; -#[cfg(all( - test, - any( - feature = "kafka-integration-tests", - feature = "http-scrape-integration-tests" - ) -))] +#[cfg(all(test, feature = "kafka-integration-tests"))] pub const TEST_PEM_INTERMEDIATE_CA_PATH: &str = "tests/data/ca/intermediate_server/certs/ca-chain.cert.pem"; #[cfg(test)] diff --git a/tests/data/http-scrape/certs/invalid-ca-cert.pem b/tests/data/http-scrape/certs/invalid-ca-cert.pem new file mode 100644 index 0000000000000..60beeeee5684f --- /dev/null +++ b/tests/data/http-scrape/certs/invalid-ca-cert.pem @@ -0,0 +1,33 @@ +-----BEGIN CERTIFICATE----- +MIIFozCCA4ugAwIBAgIULTV36TwzYVtphJw9BVnuSYmZkkcwDQYJKoZIhvcNAQEL +BQAwYTELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAklEMQ4wDAYDVQQHDAVCb2lzZTEQ +MA4GA1UECgwHZGF0YWRvZzEPMA0GA1UECwwGdmVjdG9yMRIwEAYDVQQDDAluZXVy +b251bGwwHhcNMjIwODAxMTczMzU2WhcNMzIwNzI5MTczMzU2WjBhMQswCQYDVQQG +EwJVUzELMAkGA1UECAwCSUQxDjAMBgNVBAcMBUJvaXNlMRAwDgYDVQQKDAdkYXRh +ZG9nMQ8wDQYDVQQLDAZ2ZWN0b3IxEjAQBgNVBAMMCW5ldXJvbnVsbDCCAiIwDQYJ +KoZIhvcNAQEBBQADggIPADCCAgoCggIBANXGzY9+ALiZSUTHcDZdrnSi2GQV/DiX +MGkUDWJZL2AVbVWw9AtOWDFybaBRXhkgfZKpaZOiH4fTAI6PX6XHOA9shbEZiAYE +AhDHZGdc/JnxHk3jyhXKhfgKaGN7cx5yj8Hw7JoYV114S3z/00xPgwI0mrb89E9p +UzqsO+MNfE7NVt4u0Ned3DF0YipH8ErzCgsXMeysoFqWFRPbtSUS0lyFUV1gP3He +KVBQ7A+Pxmyia5lchKLftzyTHGryOYI4cgrxxFAV0v7xlxs5rATYDdyC3o8bmbhh +CVdAHarBpYoOY6bmACHyo+rEOge241Kp6pZnrFFM/r6uSmQ+b87dZ6Cs750soJz0 +QB61VXxFbflBvuP8Oe1r5I5SpdIhzuiTvQ7uviwDsBj98QjeLXhh7OcSmL9UinMb +WEI1MymHHNs02R1S1NLYV78Dp/GplRWG3D89JZCXNEJCZuPnYnaeo0GgfbfdLb7b +X6UIVuN96Xkvxprd2fJy2cJrMG9DjGpEsSO7SZu0/CPiB4cOGEZkr9jOPUX89iT1 +wtipyM0y7tIVa0GLc7z5syFscOifrq/f3Kn3cy66Vueox9EtPsJ/oUD8we6xXIuW +NvQmuKVE4K5u+loibaKwOP5CkkG56W83ddYryvN2uy6uMPT7kgCh8vWy5pjxGBrk +nd8YPgdR21GbAgMBAAGjUzBRMB0GA1UdDgQWBBRYOflW8kYbmflQBJ2iDGS6buhz +zTAfBgNVHSMEGDAWgBRYOflW8kYbmflQBJ2iDGS6buhzzTAPBgNVHRMBAf8EBTAD +AQH/MA0GCSqGSIb3DQEBCwUAA4ICAQBytRbTq11KlDLsFBZIpp6BhY7oMyAZRJ9W +WaS4qmcSGCXM+e+xrkkxe+yxKikYCD02V3VRo4YhiX4JgTTVeZkXF0fKoGLvyOZE +fg3vguq6SBBKFpCPJ0zs9/78w6EFcEAzMy/JTInAXvZmctAIw3WCnlVZlZ1QgrD0 +inpyI8QPmNdexGQrM8v+jhwrtUh0+FVMUd1v4rBTaaKsHy3o4t7XzQyAa+0N/w+K +M4KWAycBTx1/m8hCcmBTrxYd1Xdgq2r91+5xETGz2QUaLwhywG6kJW9Hsu5P5FyH +0C43sh6vRR6073k93AWvGVGqnEgK5cfnduPUJGy3ahqjvTEnL1FcSnFGlOsk3Pk0 +W7DEK220caQ+dLCixT4RmLLYrd3FbbF+4HZbutm969svVHv0l/c4pn3PbE/N3Tdn +k543acYgodSswYdn+UZs3vzan2beBjr9JcUsC0S80WirNaEAsF9mafbhEMerud7j +p3ikV7a2Q+8NjlsQBsjImypE2vIpWjJUyJiM20rR4xq0STHPegahwvMjLMKVmZ/O +liaiAu/L1SS6WHYY87mPo/zqG9MXhb0ou5kJng5ybTjTk9DcbCKLt7OutcKPthOa +vwZioJeFvRHiedH+2wdf3XkOKBrcc5KAVvHww2AHKnIXwijvTTGggnUXj+iPHgAF +fwsSqO6c5g== +-----END CERTIFICATE----- diff --git a/tests/data/http-scrape/logs/bytes b/tests/data/http-scrape/serve/logs/bytes similarity index 100% rename from tests/data/http-scrape/logs/bytes rename to tests/data/http-scrape/serve/logs/bytes diff --git a/tests/data/http-scrape/logs/json.json b/tests/data/http-scrape/serve/logs/json.json similarity index 100% rename from tests/data/http-scrape/logs/json.json rename to tests/data/http-scrape/serve/logs/json.json diff --git a/tests/data/http-scrape/metrics/native.json b/tests/data/http-scrape/serve/metrics/native.json similarity index 100% rename from tests/data/http-scrape/metrics/native.json rename to tests/data/http-scrape/serve/metrics/native.json diff --git a/tests/data/http-scrape/traces/native.json b/tests/data/http-scrape/serve/traces/native.json similarity index 100% rename from tests/data/http-scrape/traces/native.json rename to tests/data/http-scrape/serve/traces/native.json diff --git a/website/content/en/docs/reference/configuration/sources/http_scrape.md b/website/content/en/docs/reference/configuration/sources/http_scrape.md index 2cea00dc99f0d..822079736a14b 100644 --- a/website/content/en/docs/reference/configuration/sources/http_scrape.md +++ b/website/content/en/docs/reference/configuration/sources/http_scrape.md @@ -1,6 +1,6 @@ --- title: HTTP -description: Collect observability data emitted by an [HTTP](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Client_request) server +description: Pull observability data emitted by an [HTTP](https://en.wikipedia.org/wiki/Hypertext_Transfer_Protocol#Client_request) server at a configured interval. kind: source layout: component tags: ["http", "scrape", "component", "source", "logs", "metrics", traces"] diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue index bf748f8dfff04..56d8183bdd889 100644 --- a/website/cue/reference/components/sources/http_scrape.cue +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -145,7 +145,7 @@ components: sources: http_scrape: { "*": { common: false description: "Any field contained in your JSON payload" - relevant_when: "encoding != \"text\"" + relevant_when: "encoding == \"json\"" required: false type: "*": {} } From bb43aa7ea4ce7ee0f1edb07c64682b646dcaa384 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 5 Aug 2022 21:06:32 +0000 Subject: [PATCH 29/50] improve query cue examples --- website/cue/reference/components/sources/http_scrape.cue | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue index 56d8183bdd889..f0c33ddcbf3d8 100644 --- a/website/cue/reference/components/sources/http_scrape.cue +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -81,7 +81,7 @@ components: sources: http_scrape: { """ required: false type: object: { - examples: [{"match[]": [#"{job="somejob"}"#, #"{__name__=~"job:.*"}"#]}] + examples: [{"key1": ["value1", "value2"]}] options: { "*": { common: false @@ -90,13 +90,11 @@ components: sources: http_scrape: { type: array: { default: null examples: [[ - #"{job="somejob"}"#, - #"{__name__=~"job:.*"}"#, + "value1", "value2", ]] items: type: string: { examples: [ - #"{job="somejob"}"#, - #"{__name__=~"job:.*"}"#, + "key1", "key2", ] syntax: "literal" } From d6e8ed9ee67ea74bcc9ab550dd8a7e8e3f568e41 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 5 Aug 2022 21:27:00 +0000 Subject: [PATCH 30/50] move invalid_endpoint to int --- src/sources/http_scrape/integration_tests.rs | 17 +++++++++++++++++ src/sources/http_scrape/tests.rs | 16 ---------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index bf4948256701b..63a332e19a3ad 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -7,6 +7,7 @@ use tokio::time::{Duration, Instant}; use crate::{ config::{ComponentKey, SourceConfig, SourceContext}, http::Auth, + serde::default_decoding, serde::default_framing_message_based, sources::http_scrape::scrape::NAME, tls, @@ -33,6 +34,22 @@ fn dufs_https_address() -> String { std::env::var("DUFS_HTTPS_ADDRESS").unwrap_or_else(|_| "https://localhost:5000".into()) } +/// An endpoint in the config that is not reachable should generate errors. +#[tokio::test] +async fn invalid_endpoint() { + run_error(HttpScrapeConfig::new( + "http://nope".to_string(), + INTERVAL_SECS, + None, + default_decoding(), + default_framing_message_based(), + None, + None, + None, + )) + .await; +} + /// Logs (raw bytes) should be scraped and decoded successfully. #[tokio::test] async fn scraped_logs_bytes() { diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index 07b06615f7d16..98d4e4d056507 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -46,22 +46,6 @@ fn http_scrape_generate_config() { test_generate_config::(); } -/// An endpoint in the config that is not reachable should generate errors. -#[tokio::test] -async fn invalid_endpoint() { - run_error(HttpScrapeConfig::new( - "http://nope".to_string(), - INTERVAL_SECS, - None, - default_decoding(), - default_framing_message_based(), - None, - None, - None, - )) - .await; -} - /// Bytes should be decoded and HTTP header set to text/plain. #[tokio::test] async fn bytes_decoding() { From 56e939d727948f8749c629a97f321a7b53b6ac7e Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Fri, 5 Aug 2022 21:50:34 +0000 Subject: [PATCH 31/50] relocate run_error --- src/sources/http_scrape/integration_tests.rs | 14 +++++++++++++- src/sources/http_scrape/tests.rs | 15 +-------------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index 63a332e19a3ad..c56e36fa823f2 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -18,10 +18,12 @@ use codecs::decoding::DeserializerConfig; use vector_core::config::log_schema; use super::{ - tests::{run_compliance, run_error, INTERVAL_SECS}, + tests::{run_compliance, INTERVAL_SECS}, HttpScrapeConfig, }; +use crate::test_util::components::{run_and_assert_source_error, SOURCE_ERROR_TAGS}; + fn dufs_address() -> String { std::env::var("DUFS_ADDRESS").unwrap_or_else(|_| "http://localhost:5000".into()) } @@ -34,6 +36,16 @@ fn dufs_https_address() -> String { std::env::var("DUFS_HTTPS_ADDRESS").unwrap_or_else(|_| "https://localhost:5000".into()) } +/// The error path should not yield any events and must emit the required error internal events. +/// Consider extracting this function into test_util , if it is always true that if the error +/// internal event metric is fired that no events would be outputed by the source. +pub(crate) async fn run_error(config: HttpScrapeConfig) { + let events = + run_and_assert_source_error(config, Duration::from_secs(1), &SOURCE_ERROR_TAGS).await; + + assert!(events.is_empty()); +} + /// An endpoint in the config that is not reachable should generate errors. #[tokio::test] async fn invalid_endpoint() { diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index 98d4e4d056507..4be4d077cc94b 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -11,10 +11,7 @@ use vector_core::event::Event; use super::HttpScrapeConfig; use crate::test_util::{ - components::{ - run_and_assert_source_compliance, run_and_assert_source_error, HTTP_PULL_SOURCE_TAGS, - SOURCE_ERROR_TAGS, - }, + components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, next_addr, test_generate_config, }; @@ -31,16 +28,6 @@ pub(crate) async fn run_compliance(config: HttpScrapeConfig) -> Vec { events } -/// The error path should not yield any events and must emit the required error internal events. -/// Consider extracting this function into test_util , if it is always true that if the error -/// internal event metric is fired that no events would be outputed by the source. -pub(crate) async fn run_error(config: HttpScrapeConfig) { - let events = - run_and_assert_source_error(config, Duration::from_secs(1), &SOURCE_ERROR_TAGS).await; - - assert!(events.is_empty()); -} - #[test] fn http_scrape_generate_config() { test_generate_config::(); From b9ccd001362ee2f1b13a16425e3fe966950f67eb Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 8 Aug 2022 15:35:13 +0000 Subject: [PATCH 32/50] multi header val support, increase test interval --- src/sources/http_scrape/mod.rs | 8 +++++--- src/sources/http_scrape/scrape.rs | 5 +++-- src/sources/http_scrape/tests.rs | 19 +++++++++++-------- src/sources/prometheus/scrape.rs | 26 +++++++++++++------------- 4 files changed, 32 insertions(+), 26 deletions(-) diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 26bd5aa1ceb28..3f1dc2b9553fb 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -46,7 +46,7 @@ pub(crate) struct GenericHttpScrapeInputs { /// Interval to scrape on in seconds pub interval_secs: u64, /// Map of Header+Value to apply to HTTP request - pub headers: Option>, + pub headers: Option>>, /// Content type of the HTTP request, determined by the source pub content_type: String, pub auth: Option, @@ -128,8 +128,10 @@ pub(crate) async fn http_scrape( // add user supplied headers if let Some(headers) = &inputs.headers { - for header in headers { - builder = builder.header(header.0, header.1); + for (header, values) in headers { + for value in values { + builder = builder.header(header, value); + } } } diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 22d11c19431a1..f5192143139de 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -61,8 +61,9 @@ pub struct HttpScrapeConfig { framing: FramingConfig, /// Headers to apply to the HTTP requests. + /// One or more values for the same header can be provided. #[serde(default)] - headers: Option>, + headers: Option>>, /// TLS configuration. #[configurable(derived)] @@ -96,7 +97,7 @@ impl HttpScrapeConfig { query: Option>>, decoding: DeserializerConfig, framing: FramingConfig, - headers: Option>, + headers: Option>>, tls: Option, auth: Option, ) -> Self { diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index 4be4d077cc94b..c926bc6345325 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -1,6 +1,6 @@ use std::collections::HashMap; use tokio::time::Duration; -use warp::Filter; +use warp::{http::HeaderMap, Filter}; use crate::{serde::default_decoding, serde::default_framing_message_based}; use codecs::decoding::{ @@ -15,7 +15,7 @@ use crate::test_util::{ next_addr, test_generate_config, }; -pub(crate) const INTERVAL_SECS: u64 = 1; +pub(crate) const INTERVAL_SECS: u64 = 3; /// The happy path should yield at least one event and must emit the required internal events for sources. pub(crate) async fn run_compliance(config: HttpScrapeConfig) -> Vec { @@ -178,13 +178,16 @@ async fn request_query_applied() { #[tokio::test] async fn headers_applied() { let in_addr = next_addr(); - let header_key = "f00"; - let header_val = "bazz"; let dummy_endpoint = warp::path!("endpoint") .and(warp::header::exact("Accept", "text/plain")) - .and(warp::header::exact(header_key, header_val)) - .map(|| r#"{"data" : "foo"}"#); + .and(warp::header::headers_cloned().map(|headers: HeaderMap| { + let view = headers.get_all("f00"); + let mut iter = view.iter(); + assert_eq!(&"bazz", iter.next().unwrap()); + assert_eq!(&"bizz", iter.next().unwrap()); + })) + .map(|_| r#"{"data" : "foo"}"#); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); @@ -195,8 +198,8 @@ async fn headers_applied() { default_decoding(), default_framing_message_based(), Some(HashMap::from([( - header_key.to_string(), - header_val.to_string(), + "f00".to_string(), + vec!["bazz".to_string(), "bizz".to_string()], )])), None, None, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index e41581ab8532d..8c06a3c193a1c 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -381,7 +381,7 @@ mod test { let config = PrometheusScrapeConfig { endpoints: vec![format!("http://{}/metrics", in_addr)], - scrape_interval_secs: 1, + scrape_interval_secs: 3, instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: true, @@ -392,7 +392,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(1), + Duration::from_secs(3), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -413,7 +413,7 @@ mod test { let config = PrometheusScrapeConfig { endpoints: vec![format!("http://{}/metrics", in_addr)], - scrape_interval_secs: 1, + scrape_interval_secs: 3, instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: true, @@ -424,7 +424,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(1), + Duration::from_secs(3), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -463,7 +463,7 @@ mod test { let config = PrometheusScrapeConfig { endpoints: vec![format!("http://{}/metrics", in_addr)], - scrape_interval_secs: 1, + scrape_interval_secs: 3, instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, @@ -474,7 +474,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(1), + Duration::from_secs(3), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -526,7 +526,7 @@ mod test { let config = PrometheusScrapeConfig { endpoints: vec![format!("http://{}/metrics?key1=val1", in_addr)], - scrape_interval_secs: 1, + scrape_interval_secs: 3, instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, @@ -543,7 +543,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(1), + Duration::from_secs(3), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -635,7 +635,7 @@ mod test { endpoint_tag: None, honor_labels: false, query: None, - scrape_interval_secs: 1, + scrape_interval_secs: 3, tls: None, auth: None, }, @@ -650,7 +650,7 @@ mod test { buckets: vec![1.0, 2.0, 4.0], quantiles: vec![], distributions_as_summaries: false, - flush_period_secs: Duration::from_secs(1), + flush_period_secs: Duration::from_secs(3), suppress_timestamp: false, acknowledgements: Default::default(), }, @@ -658,7 +658,7 @@ mod test { assert_source_compliance(&HTTP_PULL_SOURCE_TAGS, async move { let (topology, _crash) = start_topology(config.build().unwrap(), false).await; - sleep(Duration::from_secs(1)).await; + sleep(Duration::from_secs(3)).await; let response = Client::new() .get(format!("http://{}/metrics", out_addr).parse().unwrap()) @@ -721,7 +721,7 @@ mod integration_tests { async fn scrapes_metrics() { let config = PrometheusScrapeConfig { endpoints: vec!["http://localhost:9090/metrics".into()], - scrape_interval_secs: 1, + scrape_interval_secs: 3, instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, @@ -732,7 +732,7 @@ mod integration_tests { let events = run_and_assert_source_compliance( config, - Duration::from_secs(1), + Duration::from_secs(3), &HTTP_PULL_SOURCE_TAGS, ) .await; From 6772cd0f75f4a7606afb18484fd66a0e4281519f Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 8 Aug 2022 16:48:35 +0000 Subject: [PATCH 33/50] int test timing tweaks --- src/sources/http_scrape/integration_tests.rs | 2 +- src/sources/http_scrape/tests.rs | 9 ++++++--- src/sources/prometheus/scrape.rs | 12 ++++++------ 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index c56e36fa823f2..c92f6f99b2c38 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -1,5 +1,5 @@ //! Integration tests for http_scrape source. -//! The container configuration file is `docker-compose-.http_scrape.yml` +//! The container configuration file is `docker-compose.http_scrape.yml` //! It leverages a static file server which serves the files in tests/data/http-scrape use tokio::time::{Duration, Instant}; diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index c926bc6345325..d2a0f5f1b03b6 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -19,9 +19,12 @@ pub(crate) const INTERVAL_SECS: u64 = 3; /// The happy path should yield at least one event and must emit the required internal events for sources. pub(crate) async fn run_compliance(config: HttpScrapeConfig) -> Vec { - let events = - run_and_assert_source_compliance(config, Duration::from_secs(1), &HTTP_PULL_SOURCE_TAGS) - .await; + let events = run_and_assert_source_compliance( + config, + Duration::from_secs(INTERVAL_SECS + 1), + &HTTP_PULL_SOURCE_TAGS, + ) + .await; assert!(!events.is_empty()); diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 8c06a3c193a1c..f6d0093680bfc 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -392,7 +392,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(3), + Duration::from_secs(4), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -424,7 +424,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(3), + Duration::from_secs(4), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -474,7 +474,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(3), + Duration::from_secs(4), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -543,7 +543,7 @@ mod test { let events = run_and_assert_source_compliance( config, - Duration::from_secs(3), + Duration::from_secs(4), &HTTP_PULL_SOURCE_TAGS, ) .await; @@ -658,7 +658,7 @@ mod test { assert_source_compliance(&HTTP_PULL_SOURCE_TAGS, async move { let (topology, _crash) = start_topology(config.build().unwrap(), false).await; - sleep(Duration::from_secs(3)).await; + sleep(Duration::from_secs(4)).await; let response = Client::new() .get(format!("http://{}/metrics", out_addr).parse().unwrap()) @@ -732,7 +732,7 @@ mod integration_tests { let events = run_and_assert_source_compliance( config, - Duration::from_secs(3), + Duration::from_secs(4), &HTTP_PULL_SOURCE_TAGS, ) .await; From b9f512deb628393088ff16e9942221f83f3ea98d Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 8 Aug 2022 19:04:36 +0000 Subject: [PATCH 34/50] add wait_for_tcp --- src/sources/http_scrape/tests.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index d2a0f5f1b03b6..0576a33615191 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -12,7 +12,7 @@ use vector_core::event::Event; use super::HttpScrapeConfig; use crate::test_util::{ components::{run_and_assert_source_compliance, HTTP_PULL_SOURCE_TAGS}, - next_addr, test_generate_config, + next_addr, test_generate_config, wait_for_tcp, }; pub(crate) const INTERVAL_SECS: u64 = 3; @@ -72,6 +72,7 @@ async fn json_decoding_newline_delimited() { .map(|| r#"{"data" : "foo"}"#); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + wait_for_tcp(in_addr).await; run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), @@ -99,6 +100,7 @@ async fn json_decoding_character_delimited() { .map(|| r#"{"data" : "foo"}"#); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + wait_for_tcp(in_addr).await; run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), @@ -128,6 +130,7 @@ async fn request_query_applied() { .map(|query| format!(r#"{{"data" : "{}"}}"#, query)); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + wait_for_tcp(in_addr).await; let events = run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint?key1=val1", in_addr), @@ -193,6 +196,7 @@ async fn headers_applied() { .map(|_| r#"{"data" : "foo"}"#); tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + wait_for_tcp(in_addr).await; run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), From 9818da2f0dc9558f2ac3fb96c45de3c8029c5618 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 17:27:04 +0000 Subject: [PATCH 35/50] feedback from js and bg --- lib/codecs/src/decoding/mod.rs | 2 +- src/codecs/encoding/encoder.rs | 2 +- src/sources/http_scrape/integration_tests.rs | 49 +++++++++---------- src/sources/http_scrape/mod.rs | 26 +++++----- src/sources/http_scrape/scrape.rs | 42 ++++++++-------- src/sources/http_scrape/tests.rs | 24 ++++----- src/sources/prometheus/scrape.rs | 22 ++++----- .../cue/reference/services/http_scrape.cue | 2 +- 8 files changed, 83 insertions(+), 86 deletions(-) diff --git a/lib/codecs/src/decoding/mod.rs b/lib/codecs/src/decoding/mod.rs index 5c4e9aac5a860..3ebed3cd2daa8 100644 --- a/lib/codecs/src/decoding/mod.rs +++ b/lib/codecs/src/decoding/mod.rs @@ -326,7 +326,7 @@ impl DeserializerConfig { } /// Get the HTTP content type. - pub const fn content_type(&self, framer: &FramingConfig) -> &str { + pub const fn content_type(&self, framer: &FramingConfig) -> &'static str { match (&self, framer) { ( DeserializerConfig::Json | DeserializerConfig::NativeJson, diff --git a/src/codecs/encoding/encoder.rs b/src/codecs/encoding/encoder.rs index 4747178f8c0f3..784a600aa0e83 100644 --- a/src/codecs/encoding/encoder.rs +++ b/src/codecs/encoding/encoder.rs @@ -104,7 +104,7 @@ impl Encoder { } /// Get the HTTP content type. - pub const fn content_type(&self) -> &str { + pub const fn content_type(&self) -> &'static str { match (&self.serializer, &self.framer) { (Serializer::Json(_) | Serializer::NativeJson(_), Framer::NewlineDelimited(_)) => { "application/x-ndjson" diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index e7edbb52319ab..851c350793861 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -1,7 +1,8 @@ //! Integration tests for http_scrape source. //! The container configuration file is `docker-compose.http_scrape.yml` -//! It leverages a static file server which serves the files in tests/data/http-scrape +//! It leverages a static file server ("dufs"), which serves the files in tests/data/http-scrape +use std::collections::HashMap; use tokio::time::{Duration, Instant}; use crate::{ @@ -52,10 +53,10 @@ async fn invalid_endpoint() { run_error(HttpScrapeConfig::new( "http://nope".to_string(), INTERVAL_SECS, - None, + HashMap::new(), default_decoding(), default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -68,10 +69,10 @@ async fn scraped_logs_bytes() { let events = run_compliance(HttpScrapeConfig::new( format!("{}/logs/bytes", dufs_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Bytes, default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -87,10 +88,10 @@ async fn scraped_logs_json() { let events = run_compliance(HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -106,10 +107,10 @@ async fn scraped_metrics_native_json() { let events = run_compliance(HttpScrapeConfig::new( format!("{}/metrics/native.json", dufs_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::NativeJson, default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -129,10 +130,10 @@ async fn scraped_trace_native_json() { let events = run_compliance(HttpScrapeConfig::new( format!("{}/traces/native.json", dufs_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::NativeJson, default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -148,10 +149,10 @@ async fn unauthorized_no_auth() { run_error(HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_auth_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -164,10 +165,10 @@ async fn unauthorized_wrong_auth() { run_error(HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_auth_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), None, Some(Auth::Basic { user: "white_rabbit".to_string(), @@ -183,10 +184,10 @@ async fn authorized() { run_compliance(HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_auth_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), None, Some(Auth::Basic { user: "user".to_string(), @@ -202,10 +203,10 @@ async fn tls_invalid_ca() { run_error(HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_https_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), Some(TlsConfig { ca_file: Some("tests/data/http-scrape/certs/invalid-ca-cert.pem".into()), ..Default::default() @@ -221,10 +222,10 @@ async fn tls_valid() { run_compliance(HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_https_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), Some(TlsConfig { ca_file: Some(tls::TEST_PEM_CA_PATH.into()), ..Default::default() @@ -235,18 +236,16 @@ async fn tls_valid() { } /// The source should shutdown cleanly when the shutdown signal is received. -/// TODO this can probably be extracted into the test_utils and generalized for other sources to -/// use. #[tokio::test] async fn shutdown() { let source_id = ComponentKey::from("http_scrape_shutdown"); let source = HttpScrapeConfig::new( format!("{}/logs/json.json", dufs_address()), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), None, None, ); diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index 3f1dc2b9553fb..f4abb00cb549d 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -46,7 +46,7 @@ pub(crate) struct GenericHttpScrapeInputs { /// Interval to scrape on in seconds pub interval_secs: u64, /// Map of Header+Value to apply to HTTP request - pub headers: Option>>, + pub headers: HashMap>, /// Content type of the HTTP request, determined by the source pub content_type: String, pub auth: Option, @@ -73,18 +73,16 @@ pub(crate) trait HttpScraper { } /// Builds a url for the HTTP requests. -pub(crate) fn get_url(uri: &Uri, query: &Option>>) -> Uri { +pub(crate) fn get_url(uri: &Uri, query: &HashMap>) -> Uri { let mut serializer = url::form_urlencoded::Serializer::new(String::new()); if let Some(query) = uri.query() { serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); }; - if let Some(query) = &query { - for (k, l) in query { - for v in l { - serializer.append_pair(k, v); - } + for (k, l) in query { + for v in l { + serializer.append_pair(k, v); } - }; + } let mut builder = Uri::builder(); if let Some(scheme) = uri.scheme() { builder = builder.scheme(scheme.clone()); @@ -96,7 +94,9 @@ pub(crate) fn get_url(uri: &Uri, query: &Option>>) - query if !query.is_empty() => format!("{}?{}", uri.path(), query), _ => uri.path().to_string(), }); - builder.build().expect("error building URI") + builder + .build() + .expect("Failed to build URI from parsed arguments") } /// Scrapes one or more urls at an interval. @@ -127,11 +127,9 @@ pub(crate) async fn http_scrape( let mut builder = Request::get(&url).header(http::header::ACCEPT, &inputs.content_type); // add user supplied headers - if let Some(headers) = &inputs.headers { - for (header, values) in headers { - for value in values { - builder = builder.header(header, value); - } + for (header, values) in &inputs.headers { + for value in values { + builder = builder.header(header, value); } } diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index f5192143139de..cd7894ac675ee 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -48,7 +48,7 @@ pub struct HttpScrapeConfig { /// /// One or more values for the same parameter key can be provided. The parameters provided in this option are /// appended to any parameters manually provided in the `endpoint` option. - query: Option>>, + query: HashMap>, /// Decoder to use on the HTTP responses. #[configurable(derived)] @@ -63,7 +63,7 @@ pub struct HttpScrapeConfig { /// Headers to apply to the HTTP requests. /// One or more values for the same header can be provided. #[serde(default)] - headers: Option>>, + headers: HashMap>, /// TLS configuration. #[configurable(derived)] @@ -78,11 +78,11 @@ impl Default for HttpScrapeConfig { fn default() -> Self { Self { endpoint: "http://localhost:9898/logs".to_string(), - query: None, + query: HashMap::new(), scrape_interval_secs: super::default_scrape_interval_secs(), decoding: default_decoding(), framing: default_framing_message_based(), - headers: None, + headers: HashMap::new(), tls: None, auth: None, } @@ -94,10 +94,10 @@ impl HttpScrapeConfig { pub const fn new( endpoint: String, scrape_interval_secs: u64, - query: Option>>, + query: HashMap>, decoding: DeserializerConfig, framing: FramingConfig, - headers: Option>>, + headers: HashMap>, tls: Option, auth: Option, ) -> Self { @@ -201,21 +201,21 @@ impl HttpScrapeContext { } events } +} - /// Enriches events with source_type, timestamp - fn enrich_events(&self, events: &mut Vec) { - for event in events { - match event { - Event::Log(ref mut log) => { - log.try_insert(log_schema().source_type_key(), Bytes::from(NAME)); - log.try_insert(log_schema().timestamp_key(), Utc::now()); - } - Event::Metric(ref mut metric) => { - metric.insert_tag(log_schema().source_type_key().to_string(), NAME.to_string()); - } - Event::Trace(ref mut trace) => { - trace.insert(log_schema().source_type_key(), Bytes::from(NAME)); - } +/// Enriches events with source_type, timestamp +fn enrich_events(events: &mut Vec) { + for event in events { + match event { + Event::Log(ref mut log) => { + log.try_insert(log_schema().source_type_key(), Bytes::from(NAME)); + log.try_insert(log_schema().timestamp_key(), Utc::now()); + } + Event::Metric(ref mut metric) => { + metric.insert_tag(log_schema().source_type_key().to_string(), NAME.to_string()); + } + Event::Trace(ref mut trace) => { + trace.insert(log_schema().source_type_key(), Bytes::from(NAME)); } } } @@ -236,7 +236,7 @@ impl super::HttpScraper for HttpScrapeContext { // decode and enrich let mut events = self.decode_events(&mut buf); - self.enrich_events(&mut events); + enrich_events(&mut events); Some(events) } diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index 53da11286d5f0..00b8ec4357e1f 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -48,10 +48,10 @@ async fn bytes_decoding() { run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), INTERVAL_SECS, - None, + HashMap::new(), default_decoding(), default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -74,12 +74,12 @@ async fn json_decoding_newline_delimited() { run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, FramingConfig::NewlineDelimited { newline_delimited: NewlineDelimitedDecoderOptions::default(), }, - None, + HashMap::new(), None, None, )) @@ -102,7 +102,7 @@ async fn json_decoding_character_delimited() { run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), INTERVAL_SECS, - None, + HashMap::new(), DeserializerConfig::Json, FramingConfig::CharacterDelimited { character_delimited: CharacterDelimitedDecoderOptions { @@ -110,7 +110,7 @@ async fn json_decoding_character_delimited() { max_length: Some(usize::MAX), }, }, - None, + HashMap::new(), None, None, )) @@ -132,16 +132,16 @@ async fn request_query_applied() { let events = run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint?key1=val1", in_addr), INTERVAL_SECS, - Some(HashMap::from([ + HashMap::from([ ("key1".to_string(), vec!["val2".to_string()]), ( "key2".to_string(), vec!["val1".to_string(), "val2".to_string()], ), - ])), + ]), DeserializerConfig::Json, default_framing_message_based(), - None, + HashMap::new(), None, None, )) @@ -198,13 +198,13 @@ async fn headers_applied() { run_compliance(HttpScrapeConfig::new( format!("http://{}/endpoint", in_addr), INTERVAL_SECS, - None, + HashMap::new(), default_decoding(), default_framing_message_based(), - Some(HashMap::from([( + HashMap::from([( "f00".to_string(), vec!["bazz".to_string(), "bizz".to_string()], - )])), + )]), None, None, )) diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 6c746c81630ce..bc0b8e2258a1e 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -78,7 +78,7 @@ pub struct PrometheusScrapeConfig { /// One or more values for the same parameter key can be provided. The parameters provided in this option are /// appended to any parameters manually provided in the `endpoints` option. This option is especially useful when /// scraping the `/federate` endpoint. - query: Option>>, + query: HashMap>, #[configurable(derived)] tls: Option, @@ -103,7 +103,7 @@ impl GenerateConfig for PrometheusScrapeConfig { instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, - query: None, + query: HashMap::new(), tls: None, auth: None, }) @@ -134,7 +134,7 @@ impl SourceConfig for PrometheusScrapeConfig { let inputs = GenericHttpScrapeInputs { urls, interval_secs: self.scrape_interval_secs, - headers: None, + headers: HashMap::new(), content_type: "text/plain".to_string(), auth: self.auth.clone(), tls, @@ -170,7 +170,7 @@ struct PrometheusCompatConfig { endpoint_tag: Option, #[serde(default = "crate::serde::default_false")] honor_labels: bool, - query: Option>>, + query: HashMap>, #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, tls: Option, @@ -386,7 +386,7 @@ mod test { instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: true, - query: None, + query: HashMap::new(), auth: None, tls: None, }; @@ -419,7 +419,7 @@ mod test { instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: true, - query: None, + query: HashMap::new(), auth: None, tls: None, }; @@ -470,7 +470,7 @@ mod test { instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, - query: None, + query: HashMap::new(), auth: None, tls: None, }; @@ -534,13 +534,13 @@ mod test { instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, - query: Some(HashMap::from([ + query: HashMap::from([ ("key1".to_string(), vec!["val2".to_string()]), ( "key2".to_string(), vec!["val1".to_string(), "val2".to_string()], ), - ])), + ]), auth: None, tls: None, }; @@ -639,7 +639,7 @@ mod test { instance_tag: None, endpoint_tag: None, honor_labels: false, - query: None, + query: HashMap::new(), scrape_interval_secs: 1, tls: None, auth: None, @@ -730,7 +730,7 @@ mod integration_tests { instance_tag: Some("instance".to_string()), endpoint_tag: Some("endpoint".to_string()), honor_labels: false, - query: None, + query: HashMap::new(), auth: None, tls: None, }; diff --git a/website/cue/reference/services/http_scrape.cue b/website/cue/reference/services/http_scrape.cue index ed36e51be8a90..1e14c364c1521 100644 --- a/website/cue/reference/services/http_scrape.cue +++ b/website/cue/reference/services/http_scrape.cue @@ -2,7 +2,7 @@ package metadata services: http_scrape: { name: "HTTP scrape" - thing: "a \(name)" + thing: "an \(name)er" url: urls.http_scrape versions: null } From 45396a944621db1a035122e368b4fb4b07c69066 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 18:07:43 +0000 Subject: [PATCH 36/50] relocate common code to sources/util --- Cargo.toml | 5 +- src/sources/http_scrape/mod.rs | 200 ------------------------------ src/sources/http_scrape/scrape.rs | 15 ++- src/sources/prometheus/scrape.rs | 2 +- src/sources/util/http_scrape.rs | 199 +++++++++++++++++++++++++++++ src/sources/util/mod.rs | 2 + 6 files changed, 214 insertions(+), 209 deletions(-) create mode 100644 src/sources/util/http_scrape.rs diff --git a/Cargo.toml b/Cargo.toml index 37027e1a4186b..adf7d7bc80d7f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -502,7 +502,7 @@ sources-gcp_pubsub = ["gcp", "dep:h2", "dep:prost-types", "protobuf-build", "dep sources-heroku_logs = ["sources-utils-http", "sources-utils-http-query", "sources-http"] sources-host_metrics = ["dep:heim"] sources-http = ["sources-utils-http", "sources-utils-http-query"] -sources-http_scrape = ["sources-utils-http", "sources-http"] +sources-http_scrape = ["sources-utils-http-scrape"] sources-internal_logs = [] sources-internal_metrics = [] sources-journald = [] @@ -514,7 +514,7 @@ sources-nats = ["dep:nats", "dep:nkeys"] sources-nginx_metrics = ["dep:nom"] sources-opentelemetry = ["dep:prost-types", "sources-http", "sources-utils-http", "sources-vector", "opentelemetry"] sources-postgresql_metrics = ["dep:postgres-openssl", "dep:tokio-postgres"] -sources-prometheus = ["dep:prometheus-parser", "sinks-prometheus", "sources-http", "sources-utils-http", "sources-http_scrape"] +sources-prometheus = ["dep:prometheus-parser", "sinks-prometheus", "sources-utils-http-scrape"] sources-redis= ["dep:redis"] sources-socket = ["listenfd", "tokio-util/net", "sources-utils-udp", "sources-utils-tcp-keepalive", "sources-utils-tcp-socket", "sources-utils-tls", "sources-utils-unix"] sources-splunk_hec = ["sources-utils-tls", "dep:roaring"] @@ -527,6 +527,7 @@ sources-utils-http-encoding = ["dep:snap", "sources-utils-http-error"] sources-utils-http-error = [] sources-utils-http-prelude = ["sources-utils-http", "sources-utils-tls", "sources-utils-http-auth", "sources-utils-http-encoding", "sources-utils-http-error"] sources-utils-http-query = [] +sources-utils-http-scrape = ["sources-utils-http", "sources-http"] sources-utils-tcp-keepalive = [] sources-utils-tcp-socket = [] sources-utils-tls = [] diff --git a/src/sources/http_scrape/mod.rs b/src/sources/http_scrape/mod.rs index f4abb00cb549d..b2528796da72b 100644 --- a/src/sources/http_scrape/mod.rs +++ b/src/sources/http_scrape/mod.rs @@ -1,13 +1,3 @@ -//! Common logic for sources that are HTTP scrapers. -//! -//! Specific HTTP scraping sources will: -//! - Call get_url() to build the URL(s) to scrape. -//! - Implmement a specific context struct which: -//! - Contains the data that source needs in order to process the HTTP responses into internal_events -//! - Implements the HttpScraper trait -//! - Call http_scrape() supplying the generic inputs for scraping and the source-specific -//! context. - #[cfg(feature = "sources-http_scrape")] pub mod scrape; @@ -18,193 +8,3 @@ mod tests; mod integration_tests; pub use scrape::HttpScrapeConfig; - -use bytes::Bytes; -use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; -use http::{response::Parts, Uri}; -use hyper::{Body, Request}; -use std::time::{Duration, Instant}; -use std::{collections::HashMap, future::ready}; -use tokio_stream::wrappers::IntervalStream; - -use crate::{ - http::{Auth, HttpClient}, - internal_events::{ - EndpointBytesReceived, HttpScrapeEventsReceived, HttpScrapeHttpError, - HttpScrapeHttpResponseError, RequestCompleted, StreamClosedError, - }, - tls::TlsSettings, - Error, SourceSender, -}; -use vector_common::shutdown::ShutdownSignal; -use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; - -/// Contains the inputs generic to any http scrape. -pub(crate) struct GenericHttpScrapeInputs { - /// Array of URLs to scrape - pub urls: Vec, - /// Interval to scrape on in seconds - pub interval_secs: u64, - /// Map of Header+Value to apply to HTTP request - pub headers: HashMap>, - /// Content type of the HTTP request, determined by the source - pub content_type: String, - pub auth: Option, - pub tls: TlsSettings, - pub proxy: ProxyConfig, - pub shutdown: ShutdownSignal, -} - -/// The default interval to scrape the http endpoint if none is configured. -pub(crate) const fn default_scrape_interval_secs() -> u64 { - 15 -} - -/// Methods that allow context-specific behavior during the scraping procedure. -pub(crate) trait HttpScraper { - /// (Optional) Called before the HTTP request is made, allows building context. - fn build(&mut self, _url: &Uri) {} - - /// Called after the HTTP request succeeds and returns the decoded/parsed Event array. - fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option>; - - /// (Optional) Called if the HTTP response is not 200 ('OK'). - fn on_http_response_error(&self, _uri: &Uri, _header: &Parts) {} -} - -/// Builds a url for the HTTP requests. -pub(crate) fn get_url(uri: &Uri, query: &HashMap>) -> Uri { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - if let Some(query) = uri.query() { - serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); - }; - for (k, l) in query { - for v in l { - serializer.append_pair(k, v); - } - } - let mut builder = Uri::builder(); - if let Some(scheme) = uri.scheme() { - builder = builder.scheme(scheme.clone()); - }; - if let Some(authority) = uri.authority() { - builder = builder.authority(authority.clone()); - }; - builder = builder.path_and_query(match serializer.finish() { - query if !query.is_empty() => format!("{}?{}", uri.path(), query), - _ => uri.path().to_string(), - }); - builder - .build() - .expect("Failed to build URI from parsed arguments") -} - -/// Scrapes one or more urls at an interval. -/// - The HTTP request is built per the options in provided generic inputs. -/// - The HTTP response is decoded/parsed into events by the specific context. -/// - The events are then sent to the output stream. -pub(crate) async fn http_scrape( - inputs: GenericHttpScrapeInputs, - context: H, - mut out: SourceSender, -) -> Result<(), ()> { - let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( - inputs.interval_secs, - ))) - .take_until(inputs.shutdown) - .map(move |_| stream::iter(inputs.urls.clone())) - .flatten() - .map(move |url| { - // Building the HttpClient should not fail as it is just setting up the client with the - // proxy and tls settings. - let client = HttpClient::new(inputs.tls.clone(), &inputs.proxy) - .expect("Building HTTP client failed"); - let endpoint = url.to_string(); - - let mut context = context.clone(); - context.build(&url); - - let mut builder = Request::get(&url).header(http::header::ACCEPT, &inputs.content_type); - - // add user supplied headers - for (header, values) in &inputs.headers { - for value in values { - builder = builder.header(header, value); - } - } - - // building an empty request should be infallible - let mut request = builder.body(Body::empty()).expect("error creating request"); - - if let Some(auth) = &inputs.auth { - auth.apply(&mut request); - } - - let start = Instant::now(); - client - .send(request) - .map_err(Error::from) - .and_then(|response| async move { - let (header, body) = response.into_parts(); - let body = hyper::body::to_bytes(body).await?; - emit!(EndpointBytesReceived { - byte_size: body.len(), - protocol: "http", - endpoint: endpoint.as_str(), - }); - Ok((header, body)) - }) - .into_stream() - .filter_map(move |response| { - ready(match response { - Ok((header, body)) if header.status == hyper::StatusCode::OK => { - emit!(RequestCompleted { - start, - end: Instant::now() - }); - match context.on_response(&url, &header, &body) { - Some(events) => { - emit!(HttpScrapeEventsReceived { - byte_size: events.size_of(), - count: events.len(), - url: url.to_string() - }); - Some(stream::iter(events)) - } - None => None, - } - } - Ok((header, _)) => { - context.on_http_response_error(&url, &header); - emit!(HttpScrapeHttpResponseError { - code: header.status, - url: url.to_string(), - }); - None - } - Err(error) => { - emit!(HttpScrapeHttpError { - error, - url: url.to_string() - }); - None - } - }) - }) - .flatten() - }) - .flatten() - .boxed(); - - match out.send_event_stream(&mut stream).await { - Ok(()) => { - info!("Finished sending."); - Ok(()) - } - Err(error) => { - let (count, _) = stream.size_hint(); - emit!(StreamClosedError { error, count }); - Err(()) - } - } -} diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index cd7894ac675ee..e1e06c4baab03 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -16,6 +16,9 @@ use crate::{ serde::default_decoding, serde::default_framing_message_based, sources, + sources::util::http_scrape::{ + default_scrape_interval_secs, get_url, http_scrape, GenericHttpScrapeInputs, HttpScraper, + }, tls::{TlsConfig, TlsSettings}, Result, }; @@ -41,7 +44,7 @@ pub struct HttpScrapeConfig { endpoint: String, /// The interval between scrapes, in seconds. - #[serde(default = "super::default_scrape_interval_secs")] + #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, /// Custom parameters for the scrape request query string. @@ -79,7 +82,7 @@ impl Default for HttpScrapeConfig { Self { endpoint: "http://localhost:9898/logs".to_string(), query: HashMap::new(), - scrape_interval_secs: super::default_scrape_interval_secs(), + scrape_interval_secs: default_scrape_interval_secs(), decoding: default_decoding(), framing: default_framing_message_based(), headers: HashMap::new(), @@ -129,7 +132,7 @@ impl SourceConfig for HttpScrapeConfig { let urls = endpoints .iter() .map(|s| s.parse::().context(sources::UriParseSnafu)) - .map(|r| r.map(|uri| super::get_url(&uri, &self.query))) + .map(|r| r.map(|uri| get_url(&uri, &self.query))) .collect::, sources::BuildError>>()?; let tls = TlsSettings::from_options(&self.tls)?; @@ -147,7 +150,7 @@ impl SourceConfig for HttpScrapeConfig { // the only specific context needed is the codec decoding let context = HttpScrapeContext { decoder }; - let inputs = super::GenericHttpScrapeInputs { + let inputs = GenericHttpScrapeInputs { urls, interval_secs: self.scrape_interval_secs, headers: self.headers.clone(), @@ -158,7 +161,7 @@ impl SourceConfig for HttpScrapeConfig { shutdown: cx.shutdown, }; - Ok(super::http_scrape(inputs, context, cx.out).boxed()) + Ok(http_scrape(inputs, context, cx.out).boxed()) } fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { @@ -221,7 +224,7 @@ fn enrich_events(events: &mut Vec) { } } -impl super::HttpScraper for HttpScrapeContext { +impl HttpScraper for HttpScrapeContext { /// Decodes the HTTP response body into events per the decoder configured. fn on_response( &mut self, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index bc0b8e2258a1e..0b203973816b4 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -15,7 +15,7 @@ use crate::{ internal_events::PrometheusParseError, sources::{ self, - http_scrape::{ + util::http_scrape::{ default_scrape_interval_secs, get_url, http_scrape, GenericHttpScrapeInputs, HttpScraper, }, diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs new file mode 100644 index 0000000000000..ec9b438fb4967 --- /dev/null +++ b/src/sources/util/http_scrape.rs @@ -0,0 +1,199 @@ +//! Common logic for sources that are HTTP scrapers. +//! +//! Specific HTTP scraping sources will: +//! - Call get_url() to build the URL(s) to scrape. +//! - Implmement a specific context struct which: +//! - Contains the data that source needs in order to process the HTTP responses into internal_events +//! - Implements the HttpScraper trait +//! - Call http_scrape() supplying the generic inputs for scraping and the source-specific +//! context. + +use bytes::Bytes; +use futures_util::{stream, FutureExt, StreamExt, TryFutureExt}; +use http::{response::Parts, Uri}; +use hyper::{Body, Request}; +use std::time::{Duration, Instant}; +use std::{collections::HashMap, future::ready}; +use tokio_stream::wrappers::IntervalStream; + +use crate::{ + http::{Auth, HttpClient}, + internal_events::{ + EndpointBytesReceived, HttpScrapeEventsReceived, HttpScrapeHttpError, + HttpScrapeHttpResponseError, RequestCompleted, StreamClosedError, + }, + tls::TlsSettings, + Error, SourceSender, +}; +use vector_common::shutdown::ShutdownSignal; +use vector_core::{config::proxy::ProxyConfig, event::Event, ByteSizeOf}; + +/// Contains the inputs generic to any http scrape. +pub(crate) struct GenericHttpScrapeInputs { + /// Array of URLs to scrape + pub urls: Vec, + /// Interval to scrape on in seconds + pub interval_secs: u64, + /// Map of Header+Value to apply to HTTP request + pub headers: HashMap>, + /// Content type of the HTTP request, determined by the source + pub content_type: String, + pub auth: Option, + pub tls: TlsSettings, + pub proxy: ProxyConfig, + pub shutdown: ShutdownSignal, +} + +/// The default interval to scrape the http endpoint if none is configured. +pub(crate) const fn default_scrape_interval_secs() -> u64 { + 15 +} + +/// Methods that allow context-specific behavior during the scraping procedure. +pub(crate) trait HttpScraper { + /// (Optional) Called before the HTTP request is made, allows building context. + fn build(&mut self, _url: &Uri) {} + + /// Called after the HTTP request succeeds and returns the decoded/parsed Event array. + fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option>; + + /// (Optional) Called if the HTTP response is not 200 ('OK'). + fn on_http_response_error(&self, _uri: &Uri, _header: &Parts) {} +} + +/// Builds a url for the HTTP requests. +pub(crate) fn get_url(uri: &Uri, query: &HashMap>) -> Uri { + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + if let Some(query) = uri.query() { + serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); + }; + for (k, l) in query { + for v in l { + serializer.append_pair(k, v); + } + } + let mut builder = Uri::builder(); + if let Some(scheme) = uri.scheme() { + builder = builder.scheme(scheme.clone()); + }; + if let Some(authority) = uri.authority() { + builder = builder.authority(authority.clone()); + }; + builder = builder.path_and_query(match serializer.finish() { + query if !query.is_empty() => format!("{}?{}", uri.path(), query), + _ => uri.path().to_string(), + }); + builder + .build() + .expect("Failed to build URI from parsed arguments") +} + +/// Scrapes one or more urls at an interval. +/// - The HTTP request is built per the options in provided generic inputs. +/// - The HTTP response is decoded/parsed into events by the specific context. +/// - The events are then sent to the output stream. +pub(crate) async fn http_scrape( + inputs: GenericHttpScrapeInputs, + context: H, + mut out: SourceSender, +) -> Result<(), ()> { + let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( + inputs.interval_secs, + ))) + .take_until(inputs.shutdown) + .map(move |_| stream::iter(inputs.urls.clone())) + .flatten() + .map(move |url| { + // Building the HttpClient should not fail as it is just setting up the client with the + // proxy and tls settings. + let client = HttpClient::new(inputs.tls.clone(), &inputs.proxy) + .expect("Building HTTP client failed"); + let endpoint = url.to_string(); + + let mut context = context.clone(); + context.build(&url); + + let mut builder = Request::get(&url).header(http::header::ACCEPT, &inputs.content_type); + + // add user supplied headers + for (header, values) in &inputs.headers { + for value in values { + builder = builder.header(header, value); + } + } + + // building an empty request should be infallible + let mut request = builder.body(Body::empty()).expect("error creating request"); + + if let Some(auth) = &inputs.auth { + auth.apply(&mut request); + } + + let start = Instant::now(); + client + .send(request) + .map_err(Error::from) + .and_then(|response| async move { + let (header, body) = response.into_parts(); + let body = hyper::body::to_bytes(body).await?; + emit!(EndpointBytesReceived { + byte_size: body.len(), + protocol: "http", + endpoint: endpoint.as_str(), + }); + Ok((header, body)) + }) + .into_stream() + .filter_map(move |response| { + ready(match response { + Ok((header, body)) if header.status == hyper::StatusCode::OK => { + emit!(RequestCompleted { + start, + end: Instant::now() + }); + match context.on_response(&url, &header, &body) { + Some(events) => { + emit!(HttpScrapeEventsReceived { + byte_size: events.size_of(), + count: events.len(), + url: url.to_string() + }); + Some(stream::iter(events)) + } + None => None, + } + } + Ok((header, _)) => { + context.on_http_response_error(&url, &header); + emit!(HttpScrapeHttpResponseError { + code: header.status, + url: url.to_string(), + }); + None + } + Err(error) => { + emit!(HttpScrapeHttpError { + error, + url: url.to_string() + }); + None + } + }) + }) + .flatten() + }) + .flatten() + .boxed(); + + match out.send_event_stream(&mut stream).await { + Ok(()) => { + info!("Finished sending."); + Ok(()) + } + Err(error) => { + let (count, _) = stream.size_hint(); + emit!(StreamClosedError { error, count }); + Err(()) + } + } +} diff --git a/src/sources/util/mod.rs b/src/sources/util/mod.rs index 7c0455b75ed50..c796bc49708be 100644 --- a/src/sources/util/mod.rs +++ b/src/sources/util/mod.rs @@ -13,6 +13,8 @@ pub mod grpc; feature = "sources-utils-http-query" ))] mod http; +#[cfg(any(feature = "sources-http_scrape", feature = "sources-prometheus"))] +pub mod http_scrape; #[cfg(any(feature = "sources-aws_sqs", feature = "sources-gcp_pubsub"))] mod message_decoding; pub mod multiline_config; From a70ca337ad89e848d6da21de42de9cac6f855a65 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 18:11:21 +0000 Subject: [PATCH 37/50] update cue file --- website/cue/reference/services/http_scrape.cue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/cue/reference/services/http_scrape.cue b/website/cue/reference/services/http_scrape.cue index 1e14c364c1521..bc7333f28a06c 100644 --- a/website/cue/reference/services/http_scrape.cue +++ b/website/cue/reference/services/http_scrape.cue @@ -2,7 +2,7 @@ package metadata services: http_scrape: { name: "HTTP scrape" - thing: "an \(name)er" + thing: "an \(name)r" url: urls.http_scrape versions: null } From 951f8d2d3a67b9ef32d14e7f91be5ade9ed8f9f9 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 18:31:49 +0000 Subject: [PATCH 38/50] fix flags --- src/internal_events/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index 55e98d4768307..70fd824aa1e5c 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -53,7 +53,7 @@ mod geoip; mod heartbeat; mod http; pub mod http_client; -#[cfg(feature = "sources-http_scrape")] +#[cfg(feature = "sources-utils-http-scrape")] mod http_scrape; #[cfg(feature = "sources-internal_logs")] mod internal_logs; @@ -189,7 +189,7 @@ pub(crate) use self::geoip::*; feature = "sources-splunk_hec", ))] pub(crate) use self::http::*; -#[cfg(feature = "sources-http_scrape")] +#[cfg(feature = "sources-utils-http-scrape")] pub(crate) use self::http_scrape::*; #[cfg(feature = "sources-internal_logs")] pub(crate) use self::internal_logs::*; From f0b0a26677b7c8f2092a08db420bd8e8ba42dcf9 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 19:12:45 +0000 Subject: [PATCH 39/50] default --- src/sources/http_scrape/scrape.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index e1e06c4baab03..e68339f7ec513 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -51,6 +51,7 @@ pub struct HttpScrapeConfig { /// /// One or more values for the same parameter key can be provided. The parameters provided in this option are /// appended to any parameters manually provided in the `endpoint` option. + #[serde(default)] query: HashMap>, /// Decoder to use on the HTTP responses. From 1e806aee634d15dc77a5b58d9336e4ccc0ea99e1 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 19:35:18 +0000 Subject: [PATCH 40/50] default2 --- src/sources/prometheus/scrape.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 0b203973816b4..01f4081768ad5 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -78,6 +78,7 @@ pub struct PrometheusScrapeConfig { /// One or more values for the same parameter key can be provided. The parameters provided in this option are /// appended to any parameters manually provided in the `endpoints` option. This option is especially useful when /// scraping the `/federate` endpoint. + #[serde(default)] query: HashMap>, #[configurable(derived)] From 074489d35bdad5ace2847d2a724d3073b4838b6c Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 9 Aug 2022 21:18:07 +0000 Subject: [PATCH 41/50] default3 --- src/sources/prometheus/scrape.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 01f4081768ad5..5016c265e70ba 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -171,6 +171,7 @@ struct PrometheusCompatConfig { endpoint_tag: Option, #[serde(default = "crate::serde::default_false")] honor_labels: bool, + #[serde(default)] query: HashMap>, #[serde(default = "default_scrape_interval_secs")] scrape_interval_secs: u64, From 6d1b5e2c904768159289d9ef9ec5ea134e7d9d37 Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 15 Aug 2022 13:26:06 +0000 Subject: [PATCH 42/50] change to build_url --- src/sources/http_scrape/scrape.rs | 4 ++-- src/sources/prometheus/scrape.rs | 4 ++-- src/sources/util/http_scrape.rs | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index e68339f7ec513..a26e16557b738 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -17,7 +17,7 @@ use crate::{ serde::default_framing_message_based, sources, sources::util::http_scrape::{ - default_scrape_interval_secs, get_url, http_scrape, GenericHttpScrapeInputs, HttpScraper, + build_url, default_scrape_interval_secs, http_scrape, GenericHttpScrapeInputs, HttpScraper, }, tls::{TlsConfig, TlsSettings}, Result, @@ -133,7 +133,7 @@ impl SourceConfig for HttpScrapeConfig { let urls = endpoints .iter() .map(|s| s.parse::().context(sources::UriParseSnafu)) - .map(|r| r.map(|uri| get_url(&uri, &self.query))) + .map(|r| r.map(|uri| build_url(&uri, &self.query))) .collect::, sources::BuildError>>()?; let tls = TlsSettings::from_options(&self.tls)?; diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 5016c265e70ba..84753fcd92897 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -16,7 +16,7 @@ use crate::{ sources::{ self, util::http_scrape::{ - default_scrape_interval_secs, get_url, http_scrape, GenericHttpScrapeInputs, + build_url, default_scrape_interval_secs, http_scrape, GenericHttpScrapeInputs, HttpScraper, }, }, @@ -120,7 +120,7 @@ impl SourceConfig for PrometheusScrapeConfig { .endpoints .iter() .map(|s| s.parse::().context(sources::UriParseSnafu)) - .map(|r| r.map(|uri| get_url(&uri, &self.query))) + .map(|r| r.map(|uri| build_url(&uri, &self.query))) .collect::, sources::BuildError>>()?; let tls = TlsSettings::from_options(&self.tls)?; diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index ec9b438fb4967..9866958562db5 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -1,7 +1,7 @@ //! Common logic for sources that are HTTP scrapers. //! //! Specific HTTP scraping sources will: -//! - Call get_url() to build the URL(s) to scrape. +//! - Call build_url() to build the URL(s) to scrape. //! - Implmement a specific context struct which: //! - Contains the data that source needs in order to process the HTTP responses into internal_events //! - Implements the HttpScraper trait @@ -62,7 +62,7 @@ pub(crate) trait HttpScraper { } /// Builds a url for the HTTP requests. -pub(crate) fn get_url(uri: &Uri, query: &HashMap>) -> Uri { +pub(crate) fn build_url(uri: &Uri, query: &HashMap>) -> Uri { let mut serializer = url::form_urlencoded::Serializer::new(String::new()); if let Some(query) = uri.query() { serializer.extend_pairs(url::form_urlencoded::parse(query.as_bytes())); From 0ba62769673531a4ee649875e9710a0e32c47b3b Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 15 Aug 2022 13:32:13 +0000 Subject: [PATCH 43/50] match -> map --- src/sources/util/http_scrape.rs | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index 9866958562db5..055375f4e14e5 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -151,17 +151,25 @@ pub(crate) async fn http_scrape( start, end: Instant::now() }); - match context.on_response(&url, &header, &body) { - Some(events) => { - emit!(HttpScrapeEventsReceived { - byte_size: events.size_of(), - count: events.len(), - url: url.to_string() - }); - Some(stream::iter(events)) - } - None => None, - } + // match context.on_response(&url, &header, &body) { + // Some(events) => { + // emit!(HttpScrapeEventsReceived { + // byte_size: events.size_of(), + // count: events.len(), + // url: url.to_string() + // }); + // Some(stream::iter(events)) + // } + // None => None, + // } + context.on_response(&url, &header, &body).map(|events| { + emit!(HttpScrapeEventsReceived { + byte_size: events.size_of(), + count: events.len(), + url: url.to_string() + }); + stream::iter(events) + }) } Ok((header, _)) => { context.on_http_response_error(&url, &header); From b3c33f972dae070178ef12095bdb67ff2c94b9ac Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 15 Aug 2022 13:54:03 +0000 Subject: [PATCH 44/50] remove new() --- src/sources/http_scrape/integration_tests.rs | 220 +++++++++---------- src/sources/http_scrape/scrape.rs | 41 +--- src/sources/http_scrape/tests.rs | 100 ++++----- src/sources/util/http_scrape.rs | 11 - 4 files changed, 168 insertions(+), 204 deletions(-) diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index 851c350793861..1f47955ee2070 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -50,32 +50,32 @@ pub(crate) async fn run_error(config: HttpScrapeConfig) { /// An endpoint in the config that is not reachable should generate errors. #[tokio::test] async fn invalid_endpoint() { - run_error(HttpScrapeConfig::new( - "http://nope".to_string(), - INTERVAL_SECS, - HashMap::new(), - default_decoding(), - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + run_error(HttpScrapeConfig { + endpoint: "http://nope".to_string(), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: default_decoding(), + framing: default_framing_message_based(), + headers: HashMap::new(), + auth: None, + tls: None, + }) .await; } /// Logs (raw bytes) should be scraped and decoded successfully. #[tokio::test] async fn scraped_logs_bytes() { - let events = run_compliance(HttpScrapeConfig::new( - format!("{}/logs/bytes", dufs_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Bytes, - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + let events = run_compliance(HttpScrapeConfig { + endpoint: format!("{}/logs/bytes", dufs_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Bytes, + framing: default_framing_message_based(), + headers: HashMap::new(), + auth: None, + tls: None, + }) .await; // panics if not log event let log = events[0].as_log(); @@ -85,16 +85,16 @@ async fn scraped_logs_bytes() { /// Logs (json) should be scraped and decoded successfully. #[tokio::test] async fn scraped_logs_json() { - let events = run_compliance(HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + let events = run_compliance(HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + auth: None, + tls: None, + }) .await; // panics if not log event let log = events[0].as_log(); @@ -104,16 +104,16 @@ async fn scraped_logs_json() { /// Metrics should be scraped and decoded successfully. #[tokio::test] async fn scraped_metrics_native_json() { - let events = run_compliance(HttpScrapeConfig::new( - format!("{}/metrics/native.json", dufs_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::NativeJson, - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + let events = run_compliance(HttpScrapeConfig { + endpoint: format!("{}/metrics/native.json", dufs_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::NativeJson, + framing: default_framing_message_based(), + headers: HashMap::new(), + auth: None, + tls: None, + }) .await; // panics if not metric event @@ -127,16 +127,16 @@ async fn scraped_metrics_native_json() { /// Traces should be scraped and decoded successfully. #[tokio::test] async fn scraped_trace_native_json() { - let events = run_compliance(HttpScrapeConfig::new( - format!("{}/traces/native.json", dufs_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::NativeJson, - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + let events = run_compliance(HttpScrapeConfig { + endpoint: format!("{}/traces/native.json", dufs_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::NativeJson, + framing: default_framing_message_based(), + headers: HashMap::new(), + auth: None, + tls: None, + }) .await; let trace = events[0].as_trace(); @@ -146,92 +146,92 @@ async fn scraped_trace_native_json() { /// Passing no authentication for the auth-gated endpoint should yield errors. #[tokio::test] async fn unauthorized_no_auth() { - run_error(HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_auth_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + run_error(HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_auth_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + auth: None, + tls: None, + }) .await; } /// Passing the incorrect credentials for the auth-gated endpoint should yield errors. #[tokio::test] async fn unauthorized_wrong_auth() { - run_error(HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_auth_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - None, - Some(Auth::Basic { + run_error(HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_auth_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: None, + auth: Some(Auth::Basic { user: "white_rabbit".to_string(), password: "morpheus".to_string(), }), - )) + }) .await; } /// Passing the correct credentials for the auth-gated endpoint should succeed. #[tokio::test] async fn authorized() { - run_compliance(HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_auth_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - None, - Some(Auth::Basic { + run_compliance(HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_auth_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: None, + auth: Some(Auth::Basic { user: "user".to_string(), password: "pass".to_string(), }), - )) + }) .await; } /// Passing an incorrect CA file for TLS should yield errors. #[tokio::test] async fn tls_invalid_ca() { - run_error(HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_https_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - Some(TlsConfig { + run_error(HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_https_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: Some(TlsConfig { ca_file: Some("tests/data/http-scrape/certs/invalid-ca-cert.pem".into()), ..Default::default() }), - None, - )) + auth: None, + }) .await; } /// Passing the correct CA file for TLS should succeed. #[tokio::test] async fn tls_valid() { - run_compliance(HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_https_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - Some(TlsConfig { + run_compliance(HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_https_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: Some(TlsConfig { ca_file: Some(tls::TEST_PEM_CA_PATH.into()), ..Default::default() }), - None, - )) + auth: None, + }) .await; } @@ -239,16 +239,16 @@ async fn tls_valid() { #[tokio::test] async fn shutdown() { let source_id = ComponentKey::from("http_scrape_shutdown"); - let source = HttpScrapeConfig::new( - format!("{}/logs/json.json", dufs_address()), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - None, - None, - ); + let source = HttpScrapeConfig { + endpoint: format!("{}/logs/json.json", dufs_address()), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: None, + auth: None, + }; // build the context for the source and get a SourceShutdownCoordinator to signal with let (tx, _rx) = SourceSender::new_test(); diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index a26e16557b738..24d9192af7886 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -41,41 +41,41 @@ pub(crate) const NAME: &str = "http_scrape"; pub struct HttpScrapeConfig { /// Endpoint to scrape events from. The full path must be specified. /// Example: "http://127.0.0.1:9898/logs" - endpoint: String, + pub endpoint: String, /// The interval between scrapes, in seconds. #[serde(default = "default_scrape_interval_secs")] - scrape_interval_secs: u64, + pub scrape_interval_secs: u64, /// Custom parameters for the scrape request query string. /// /// One or more values for the same parameter key can be provided. The parameters provided in this option are /// appended to any parameters manually provided in the `endpoint` option. #[serde(default)] - query: HashMap>, + pub query: HashMap>, /// Decoder to use on the HTTP responses. #[configurable(derived)] #[serde(default = "default_decoding")] - decoding: DeserializerConfig, + pub decoding: DeserializerConfig, /// Framing to use in the decoding. #[configurable(derived)] #[serde(default = "default_framing_message_based")] - framing: FramingConfig, + pub framing: FramingConfig, /// Headers to apply to the HTTP requests. /// One or more values for the same header can be provided. #[serde(default)] - headers: HashMap>, + pub headers: HashMap>, /// TLS configuration. #[configurable(derived)] - tls: Option, + pub tls: Option, /// HTTP Authentication. #[configurable(derived)] - auth: Option, + pub auth: Option, } impl Default for HttpScrapeConfig { @@ -93,31 +93,6 @@ impl Default for HttpScrapeConfig { } } -#[allow(clippy::too_many_arguments)] -impl HttpScrapeConfig { - pub const fn new( - endpoint: String, - scrape_interval_secs: u64, - query: HashMap>, - decoding: DeserializerConfig, - framing: FramingConfig, - headers: HashMap>, - tls: Option, - auth: Option, - ) -> Self { - Self { - endpoint, - scrape_interval_secs, - query, - decoding, - framing, - headers, - tls, - auth, - } - } -} - inventory::submit! { SourceDescription::new::(NAME) } diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index 00b8ec4357e1f..f0333f83af665 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -45,16 +45,16 @@ async fn bytes_decoding() { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); - run_compliance(HttpScrapeConfig::new( - format!("http://{}/endpoint", in_addr), - INTERVAL_SECS, - HashMap::new(), - default_decoding(), - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + run_compliance(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: default_decoding(), + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: None, + auth: None, + }) .await; } @@ -71,18 +71,18 @@ async fn json_decoding_newline_delimited() { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); wait_for_tcp(in_addr).await; - run_compliance(HttpScrapeConfig::new( - format!("http://{}/endpoint", in_addr), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - FramingConfig::NewlineDelimited { + run_compliance(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: FramingConfig::NewlineDelimited { newline_delimited: NewlineDelimitedDecoderOptions::default(), }, - HashMap::new(), - None, - None, - )) + headers: HashMap::new(), + tls: None, + auth: None, + }) .await; } @@ -99,21 +99,21 @@ async fn json_decoding_character_delimited() { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); wait_for_tcp(in_addr).await; - run_compliance(HttpScrapeConfig::new( - format!("http://{}/endpoint", in_addr), - INTERVAL_SECS, - HashMap::new(), - DeserializerConfig::Json, - FramingConfig::CharacterDelimited { + run_compliance(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Json, + framing: FramingConfig::CharacterDelimited { character_delimited: CharacterDelimitedDecoderOptions { delimiter: b',', max_length: Some(usize::MAX), }, }, - HashMap::new(), - None, - None, - )) + headers: HashMap::new(), + tls: None, + auth: None, + }) .await; } @@ -129,22 +129,22 @@ async fn request_query_applied() { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); wait_for_tcp(in_addr).await; - let events = run_compliance(HttpScrapeConfig::new( - format!("http://{}/endpoint?key1=val1", in_addr), - INTERVAL_SECS, - HashMap::from([ + let events = run_compliance(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint?key1=val1", in_addr), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::from([ ("key1".to_string(), vec!["val2".to_string()]), ( "key2".to_string(), vec!["val1".to_string(), "val2".to_string()], ), ]), - DeserializerConfig::Json, - default_framing_message_based(), - HashMap::new(), - None, - None, - )) + decoding: DeserializerConfig::Json, + framing: default_framing_message_based(), + headers: HashMap::new(), + tls: None, + auth: None, + }) .await; let logs: Vec<_> = events.into_iter().map(|event| event.into_log()).collect(); @@ -195,18 +195,18 @@ async fn headers_applied() { tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); wait_for_tcp(in_addr).await; - run_compliance(HttpScrapeConfig::new( - format!("http://{}/endpoint", in_addr), - INTERVAL_SECS, - HashMap::new(), - default_decoding(), - default_framing_message_based(), - HashMap::from([( + run_compliance(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: default_decoding(), + framing: default_framing_message_based(), + headers: HashMap::from([( "f00".to_string(), vec!["bazz".to_string(), "bizz".to_string()], )]), - None, - None, - )) + auth: None, + tls: None, + }) .await; } diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index 055375f4e14e5..39fd669c826cc 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -151,17 +151,6 @@ pub(crate) async fn http_scrape( start, end: Instant::now() }); - // match context.on_response(&url, &header, &body) { - // Some(events) => { - // emit!(HttpScrapeEventsReceived { - // byte_size: events.size_of(), - // count: events.len(), - // url: url.to_string() - // }); - // Some(stream::iter(events)) - // } - // None => None, - // } context.on_response(&url, &header, &body).map(|events| { emit!(HttpScrapeEventsReceived { byte_size: events.size_of(), From be778424be3165dc4303f88d0dabf271feda04cf Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 15 Aug 2022 15:22:49 +0000 Subject: [PATCH 45/50] remove mutable state from build() --- src/sources/http_scrape/scrape.rs | 4 ++++ src/sources/prometheus/scrape.rs | 16 +++++++++++----- src/sources/util/http_scrape.rs | 6 +++--- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 24d9192af7886..3f53da559cb83 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -201,6 +201,10 @@ fn enrich_events(events: &mut Vec) { } impl HttpScraper for HttpScrapeContext { + fn build(self, _uri: &Uri) -> HttpScrapeContext { + self + } + /// Decodes the HTTP response body into events per the decoder configured. fn on_response( &mut self, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index 84753fcd92897..e6ee074004e27 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -241,9 +241,9 @@ struct PrometheusScrapeContext { } impl HttpScraper for PrometheusScrapeContext { - /// Builds the instance info and endpoint info for the current request - fn build(&mut self, url: &Uri) { - self.instance_info = self.instance_tag.as_ref().map(|tag| { + /// Expands the context with the instance info and endpoint info for the current request + fn build(self, url: &Uri) -> PrometheusScrapeContext { + let instance_info = self.instance_tag.as_ref().map(|tag| { let instance = format!( "{}:{}", url.host().unwrap_or_default(), @@ -259,12 +259,18 @@ impl HttpScraper for PrometheusScrapeContext { honor_label: self.honor_labels, } }); - - self.endpoint_info = self.endpoint_tag.as_ref().map(|tag| EndpointInfo { + let endpoint_info = self.endpoint_tag.as_ref().map(|tag| EndpointInfo { tag: tag.to_string(), endpoint: url.to_string(), honor_label: self.honor_labels, }); + PrometheusScrapeContext { + honor_labels: self.honor_labels, + instance_tag: self.instance_tag, + endpoint_tag: self.endpoint_tag, + instance_info, + endpoint_info, + } } /// Parses the Prometheus HTTP response into metric events diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index 39fd669c826cc..10d101ce95013 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -51,8 +51,8 @@ pub(crate) const fn default_scrape_interval_secs() -> u64 { /// Methods that allow context-specific behavior during the scraping procedure. pub(crate) trait HttpScraper { - /// (Optional) Called before the HTTP request is made, allows building context. - fn build(&mut self, _url: &Uri) {} + /// Called before the HTTP request is made, allows expanding context. + fn build(self, url: &Uri) -> Self; /// Called after the HTTP request succeeds and returns the decoded/parsed Event array. fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option>; @@ -111,7 +111,7 @@ pub(crate) async fn http_scrape( let endpoint = url.to_string(); let mut context = context.clone(); - context.build(&url); + context = context.build(&url); let mut builder = Request::get(&url).header(http::header::ACCEPT, &inputs.content_type); From 080ae439115f790ee83bda1c2d9dc55870033f0a Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Mon, 15 Aug 2022 16:55:18 +0000 Subject: [PATCH 46/50] allow user to set ACCEPT header --- src/sources/http_scrape/tests.rs | 26 ++++++++++++++++++++++++++ src/sources/util/http_scrape.rs | 9 +++++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index f0333f83af665..0e321c5a94aaf 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -210,3 +210,29 @@ async fn headers_applied() { }) .await; } + +/// ACCEPT HTTP request headers configured by the user should take precedence +#[tokio::test] +async fn accept_header_override() { + let in_addr = next_addr(); + + // (The Bytes decoder will default to text/plain encoding) + let dummy_endpoint = warp::path!("endpoint") + .and(warp::header::exact("Accept", "application/json")) + .map(|| r#"{"data" : "foo"}"#); + + tokio::spawn(warp::serve(dummy_endpoint).run(in_addr)); + wait_for_tcp(in_addr).await; + + run_compliance(HttpScrapeConfig { + endpoint: format!("http://{}/endpoint", in_addr), + scrape_interval_secs: INTERVAL_SECS, + query: HashMap::new(), + decoding: DeserializerConfig::Bytes, + framing: default_framing_message_based(), + headers: HashMap::from([("ACCEPT".to_string(), vec!["application/json".to_string()])]), + auth: None, + tls: None, + }) + .await; +} diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index 10d101ce95013..4714345f41e9f 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -113,15 +113,20 @@ pub(crate) async fn http_scrape( let mut context = context.clone(); context = context.build(&url); - let mut builder = Request::get(&url).header(http::header::ACCEPT, &inputs.content_type); + let mut builder = Request::get(&url); - // add user supplied headers + // add user specified headers for (header, values) in &inputs.headers { for value in values { builder = builder.header(header, value); } } + // set ACCEPT header if not user specified + if !inputs.headers.contains_key(http::header::ACCEPT.as_str()) { + builder = builder.header(http::header::ACCEPT, &inputs.content_type); + } + // building an empty request should be infallible let mut request = builder.body(Body::empty()).expect("error creating request"); From 29f763a1014582a08bcd65540475f5eff3a7893f Mon Sep 17 00:00:00 2001 From: Kyle Criddle Date: Tue, 16 Aug 2022 19:31:20 +0000 Subject: [PATCH 47/50] add enabled_by_scheme --- website/cue/reference/components/sources/http_scrape.cue | 1 + 1 file changed, 1 insertion(+) diff --git a/website/cue/reference/components/sources/http_scrape.cue b/website/cue/reference/components/sources/http_scrape.cue index f0c33ddcbf3d8..715dc21f2478c 100644 --- a/website/cue/reference/components/sources/http_scrape.cue +++ b/website/cue/reference/components/sources/http_scrape.cue @@ -36,6 +36,7 @@ components: sources: http_scrape: { can_verify_certificate: true can_verify_hostname: true enabled_default: false + enabled_by_scheme: false } } multiline: enabled: false From 1193263ece1ceae83a62d613a07856568f2b6518 Mon Sep 17 00:00:00 2001 From: kyle criddle Date: Mon, 22 Aug 2022 10:20:59 -0600 Subject: [PATCH 48/50] add log_namespace support --- src/sources/http_scrape/integration_tests.rs | 11 +++ src/sources/http_scrape/scrape.rs | 74 +++++++++++++------- src/sources/http_scrape/tests.rs | 6 ++ 3 files changed, 67 insertions(+), 24 deletions(-) diff --git a/src/sources/http_scrape/integration_tests.rs b/src/sources/http_scrape/integration_tests.rs index 1f47955ee2070..8e6a7ab18d84d 100644 --- a/src/sources/http_scrape/integration_tests.rs +++ b/src/sources/http_scrape/integration_tests.rs @@ -59,6 +59,7 @@ async fn invalid_endpoint() { headers: HashMap::new(), auth: None, tls: None, + log_namespace: None, }) .await; } @@ -75,6 +76,7 @@ async fn scraped_logs_bytes() { headers: HashMap::new(), auth: None, tls: None, + log_namespace: None, }) .await; // panics if not log event @@ -94,6 +96,7 @@ async fn scraped_logs_json() { headers: HashMap::new(), auth: None, tls: None, + log_namespace: None, }) .await; // panics if not log event @@ -113,6 +116,7 @@ async fn scraped_metrics_native_json() { headers: HashMap::new(), auth: None, tls: None, + log_namespace: None, }) .await; @@ -136,6 +140,7 @@ async fn scraped_trace_native_json() { headers: HashMap::new(), auth: None, tls: None, + log_namespace: None, }) .await; @@ -155,6 +160,7 @@ async fn unauthorized_no_auth() { headers: HashMap::new(), auth: None, tls: None, + log_namespace: None, }) .await; } @@ -174,6 +180,7 @@ async fn unauthorized_wrong_auth() { user: "white_rabbit".to_string(), password: "morpheus".to_string(), }), + log_namespace: None, }) .await; } @@ -193,6 +200,7 @@ async fn authorized() { user: "user".to_string(), password: "pass".to_string(), }), + log_namespace: None, }) .await; } @@ -212,6 +220,7 @@ async fn tls_invalid_ca() { ..Default::default() }), auth: None, + log_namespace: None, }) .await; } @@ -231,6 +240,7 @@ async fn tls_valid() { ..Default::default() }), auth: None, + log_namespace: None, }) .await; } @@ -248,6 +258,7 @@ async fn shutdown() { headers: HashMap::new(), tls: None, auth: None, + log_namespace: None, }; // build the context for the source and get a SourceShutdownCoordinator to signal with diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 3f53da559cb83..42416869773a5 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -76,6 +76,10 @@ pub struct HttpScrapeConfig { /// HTTP Authentication. #[configurable(derived)] pub auth: Option, + + /// The namespace to use for logs. This overrides the global setting + #[serde(default)] + pub log_namespace: Option, } impl Default for HttpScrapeConfig { @@ -89,6 +93,7 @@ impl Default for HttpScrapeConfig { headers: HashMap::new(), tls: None, auth: None, + log_namespace: None, } } } @@ -113,18 +118,19 @@ impl SourceConfig for HttpScrapeConfig { let tls = TlsSettings::from_options(&self.tls)?; + let log_namespace = cx.log_namespace(self.log_namespace); + // build the decoder - let decoder = DecodingConfig::new( - self.framing.clone(), - self.decoding.clone(), - LogNamespace::Vector, - ) - .build(); + let decoder = + DecodingConfig::new(self.framing.clone(), self.decoding.clone(), log_namespace).build(); let content_type = self.decoding.content_type(&self.framing).to_string(); // the only specific context needed is the codec decoding - let context = HttpScrapeContext { decoder }; + let context = HttpScrapeContext { + decoder, + log_namespace, + }; let inputs = GenericHttpScrapeInputs { urls, @@ -140,8 +146,17 @@ impl SourceConfig for HttpScrapeConfig { Ok(http_scrape(inputs, context, cx.out).boxed()) } - fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { - vec![Output::default(self.decoding.output_type())] + fn outputs(&self, global_log_namespace: LogNamespace) -> Vec { + // There is a global and per-source `log_namespace` config. The source config overrides the global setting, + // and is merged here. + let log_namespace = global_log_namespace.merge(self.log_namespace); + + let schema_definition = self + .decoding + .schema_definition(log_namespace) + .with_standard_vector_source_metadata(); + + vec![Output::default(self.decoding.output_type()).with_schema_definition(schema_definition)] } fn source_type(&self) -> &'static str { @@ -156,6 +171,7 @@ impl SourceConfig for HttpScrapeConfig { #[derive(Clone)] struct HttpScrapeContext { decoder: Decoder, + log_namespace: LogNamespace, } impl HttpScrapeContext { @@ -180,21 +196,31 @@ impl HttpScrapeContext { } events } -} -/// Enriches events with source_type, timestamp -fn enrich_events(events: &mut Vec) { - for event in events { - match event { - Event::Log(ref mut log) => { - log.try_insert(log_schema().source_type_key(), Bytes::from(NAME)); - log.try_insert(log_schema().timestamp_key(), Utc::now()); - } - Event::Metric(ref mut metric) => { - metric.insert_tag(log_schema().source_type_key().to_string(), NAME.to_string()); - } - Event::Trace(ref mut trace) => { - trace.insert(log_schema().source_type_key(), Bytes::from(NAME)); + /// Enriches events with source_type, timestamp + fn enrich_events(&self, events: &mut Vec) { + for event in events { + match event { + Event::Log(ref mut log) => { + self.log_namespace.insert_vector_metadata( + log, + log_schema().source_type_key(), + "source_type", + NAME, + ); + self.log_namespace.insert_vector_metadata( + log, + log_schema().timestamp_key(), + "ingest_timestamp", + Utc::now(), + ); + } + Event::Metric(ref mut metric) => { + metric.insert_tag(log_schema().source_type_key().to_string(), NAME.to_string()); + } + Event::Trace(ref mut trace) => { + trace.insert(log_schema().source_type_key(), Bytes::from(NAME)); + } } } } @@ -219,7 +245,7 @@ impl HttpScraper for HttpScrapeContext { // decode and enrich let mut events = self.decode_events(&mut buf); - enrich_events(&mut events); + self.enrich_events(&mut events); Some(events) } diff --git a/src/sources/http_scrape/tests.rs b/src/sources/http_scrape/tests.rs index 0e321c5a94aaf..e2bfd162f45b8 100644 --- a/src/sources/http_scrape/tests.rs +++ b/src/sources/http_scrape/tests.rs @@ -54,6 +54,7 @@ async fn bytes_decoding() { headers: HashMap::new(), tls: None, auth: None, + log_namespace: None, }) .await; } @@ -82,6 +83,7 @@ async fn json_decoding_newline_delimited() { headers: HashMap::new(), tls: None, auth: None, + log_namespace: None, }) .await; } @@ -113,6 +115,7 @@ async fn json_decoding_character_delimited() { headers: HashMap::new(), tls: None, auth: None, + log_namespace: None, }) .await; } @@ -144,6 +147,7 @@ async fn request_query_applied() { headers: HashMap::new(), tls: None, auth: None, + log_namespace: None, }) .await; @@ -207,6 +211,7 @@ async fn headers_applied() { )]), auth: None, tls: None, + log_namespace: None, }) .await; } @@ -233,6 +238,7 @@ async fn accept_header_override() { headers: HashMap::from([("ACCEPT".to_string(), vec!["application/json".to_string()])]), auth: None, tls: None, + log_namespace: None, }) .await; } From f76a54c53e8c314540cbcac8068d9ff3747d7f90 Mon Sep 17 00:00:00 2001 From: kyle criddle Date: Wed, 24 Aug 2022 13:58:09 -0600 Subject: [PATCH 49/50] mutable state (take 2) --- src/sources/http_scrape/scrape.rs | 13 ++++++++++--- src/sources/prometheus/scrape.rs | 32 +++++++++++++++++-------------- src/sources/util/http_scrape.rs | 25 ++++++++++++++++-------- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 42416869773a5..ee06038055290 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -17,7 +17,8 @@ use crate::{ serde::default_framing_message_based, sources, sources::util::http_scrape::{ - build_url, default_scrape_interval_secs, http_scrape, GenericHttpScrapeInputs, HttpScraper, + build_url, default_scrape_interval_secs, http_scrape, GenericHttpScrapeInputs, + HttpScraperBuilder, HttpScraperContext, }, tls::{TlsConfig, TlsSettings}, Result, @@ -168,6 +169,7 @@ impl SourceConfig for HttpScrapeConfig { } } +/// Captures the configuration options required to decode the incoming requests into events. #[derive(Clone)] struct HttpScrapeContext { decoder: Decoder, @@ -226,11 +228,16 @@ impl HttpScrapeContext { } } -impl HttpScraper for HttpScrapeContext { - fn build(self, _uri: &Uri) -> HttpScrapeContext { +impl HttpScraperBuilder for HttpScrapeContext { + type Context = HttpScrapeContext; + + /// No additional context from request data is needed from this particular scraper. + fn build(self, _uri: &Uri) -> Self::Context { self } +} +impl HttpScraperContext for HttpScrapeContext { /// Decodes the HTTP response body into events per the decoder configured. fn on_response( &mut self, diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index e6ee074004e27..53580a7c33c8e 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -17,7 +17,7 @@ use crate::{ self, util::http_scrape::{ build_url, default_scrape_interval_secs, http_scrape, GenericHttpScrapeInputs, - HttpScraper, + HttpScraperBuilder, HttpScraperContext, }, }, tls::{TlsConfig, TlsSettings}, @@ -124,12 +124,10 @@ impl SourceConfig for PrometheusScrapeConfig { .collect::, sources::BuildError>>()?; let tls = TlsSettings::from_options(&self.tls)?; - let context = PrometheusScrapeContext { + let builder = PrometheusScrapeBuilder { honor_labels: self.honor_labels, instance_tag: self.instance_tag.clone(), endpoint_tag: self.endpoint_tag.clone(), - instance_info: None, - endpoint_info: None, }; let inputs = GenericHttpScrapeInputs { @@ -143,7 +141,7 @@ impl SourceConfig for PrometheusScrapeConfig { shutdown: cx.shutdown, }; - Ok(http_scrape(inputs, context, cx.out).boxed()) + Ok(http_scrape(inputs, builder, cx.out).boxed()) } fn outputs(&self, _global_log_namespace: LogNamespace) -> Vec { @@ -231,18 +229,19 @@ struct EndpointInfo { honor_label: bool, } +/// Captures the configuration options required to build request-specific context. #[derive(Clone)] -struct PrometheusScrapeContext { +struct PrometheusScrapeBuilder { honor_labels: bool, instance_tag: Option, endpoint_tag: Option, - instance_info: Option, - endpoint_info: Option, } -impl HttpScraper for PrometheusScrapeContext { - /// Expands the context with the instance info and endpoint info for the current request - fn build(self, url: &Uri) -> PrometheusScrapeContext { +impl HttpScraperBuilder for PrometheusScrapeBuilder { + type Context = PrometheusScrapeContext; + + /// Expands the context with the instance info and endpoint info for the current request. + fn build(self, url: &Uri) -> Self::Context { let instance_info = self.instance_tag.as_ref().map(|tag| { let instance = format!( "{}:{}", @@ -265,14 +264,19 @@ impl HttpScraper for PrometheusScrapeContext { honor_label: self.honor_labels, }); PrometheusScrapeContext { - honor_labels: self.honor_labels, - instance_tag: self.instance_tag, - endpoint_tag: self.endpoint_tag, instance_info, endpoint_info, } } +} + +/// Request-specific context required for decoding into events. +struct PrometheusScrapeContext { + instance_info: Option, + endpoint_info: Option, +} +impl HttpScraperContext for PrometheusScrapeContext { /// Parses the Prometheus HTTP response into metric events fn on_response(&mut self, url: &Uri, _header: &Parts, body: &Bytes) -> Option> { let body = String::from_utf8_lossy(body); diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index 4714345f41e9f..87415d2f48cae 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -49,11 +49,17 @@ pub(crate) const fn default_scrape_interval_secs() -> u64 { 15 } -/// Methods that allow context-specific behavior during the scraping procedure. -pub(crate) trait HttpScraper { - /// Called before the HTTP request is made, allows expanding context. - fn build(self, url: &Uri) -> Self; +/// Builds the context, allowing the source-specific implementation to leverage data from the +/// config and the current HTTP request. +pub(crate) trait HttpScraperBuilder { + type Context: HttpScraperContext; + + /// Called before the HTTP request is made to build out the context. + fn build(self, url: &Uri) -> Self::Context; +} +/// Methods that allow context-specific behavior during the scraping procedure. +pub(crate) trait HttpScraperContext { /// Called after the HTTP request succeeds and returns the decoded/parsed Event array. fn on_response(&mut self, url: &Uri, header: &Parts, body: &Bytes) -> Option>; @@ -92,9 +98,12 @@ pub(crate) fn build_url(uri: &Uri, query: &HashMap>) -> Uri /// - The HTTP request is built per the options in provided generic inputs. /// - The HTTP response is decoded/parsed into events by the specific context. /// - The events are then sent to the output stream. -pub(crate) async fn http_scrape( +pub(crate) async fn http_scrape< + B: HttpScraperBuilder + std::marker::Send + Clone, + C: HttpScraperContext + std::marker::Send, +>( inputs: GenericHttpScrapeInputs, - context: H, + context_builder: B, mut out: SourceSender, ) -> Result<(), ()> { let mut stream = IntervalStream::new(tokio::time::interval(Duration::from_secs( @@ -110,8 +119,8 @@ pub(crate) async fn http_scrape( .expect("Building HTTP client failed"); let endpoint = url.to_string(); - let mut context = context.clone(); - context = context.build(&url); + let context_builder = context_builder.clone(); + let mut context = context_builder.build(&url); let mut builder = Request::get(&url); From 61792211b1253b8737018395e4559f962230e7ac Mon Sep 17 00:00:00 2001 From: kyle criddle Date: Thu, 25 Aug 2022 13:40:39 -0600 Subject: [PATCH 50/50] optimize --- src/sources/http_scrape/scrape.rs | 4 ++-- src/sources/prometheus/scrape.rs | 2 +- src/sources/util/http_scrape.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sources/http_scrape/scrape.rs b/src/sources/http_scrape/scrape.rs index 71966a1295cfc..08f00b32cbf10 100644 --- a/src/sources/http_scrape/scrape.rs +++ b/src/sources/http_scrape/scrape.rs @@ -223,8 +223,8 @@ impl HttpScraperBuilder for HttpScrapeContext { type Context = HttpScrapeContext; /// No additional context from request data is needed from this particular scraper. - fn build(self, _uri: &Uri) -> Self::Context { - self + fn build(&self, _uri: &Uri) -> Self::Context { + self.clone() } } diff --git a/src/sources/prometheus/scrape.rs b/src/sources/prometheus/scrape.rs index b6d874f7ad1fe..b1de6760b69d4 100644 --- a/src/sources/prometheus/scrape.rs +++ b/src/sources/prometheus/scrape.rs @@ -176,7 +176,7 @@ impl HttpScraperBuilder for PrometheusScrapeBuilder { type Context = PrometheusScrapeContext; /// Expands the context with the instance info and endpoint info for the current request. - fn build(self, url: &Uri) -> Self::Context { + fn build(&self, url: &Uri) -> Self::Context { let instance_info = self.instance_tag.as_ref().map(|tag| { let instance = format!( "{}:{}", diff --git a/src/sources/util/http_scrape.rs b/src/sources/util/http_scrape.rs index 87415d2f48cae..6a59e698f0517 100644 --- a/src/sources/util/http_scrape.rs +++ b/src/sources/util/http_scrape.rs @@ -55,7 +55,7 @@ pub(crate) trait HttpScraperBuilder { type Context: HttpScraperContext; /// Called before the HTTP request is made to build out the context. - fn build(self, url: &Uri) -> Self::Context; + fn build(&self, url: &Uri) -> Self::Context; } /// Methods that allow context-specific behavior during the scraping procedure.