Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(new source): A generic http_scrape source #13793

Merged
merged 58 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 55 commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
9f4f41a
take 1 - lifetime issue
neuronull Jul 26, 2022
12d9ea4
in a working state, refactored prom scrape
neuronull Jul 27, 2022
44db382
cleanup
neuronull Jul 27, 2022
6e2971d
more tests, cleanup, clippy
neuronull Jul 27, 2022
32e9cf4
extract http_scrape internal events
neuronull Jul 27, 2022
c4d42ad
documenting
neuronull Jul 27, 2022
702f1e2
todo
neuronull Jul 27, 2022
fff0cee
added request headers
neuronull Jul 28, 2022
f557d0e
starting integration test
neuronull Jul 28, 2022
a5366ec
fixed integration test configuration
neuronull Jul 29, 2022
c30256d
more int tests
neuronull Jul 29, 2022
d55aee1
remove unused file
neuronull Jul 29, 2022
d58abaa
todo
neuronull Jul 29, 2022
2e0ed24
incomplete int tests.. add external docs
neuronull Aug 1, 2022
f4cee38
remove redundant events
neuronull Aug 1, 2022
e6d6d1b
clippy / cleanup
neuronull Aug 1, 2022
36ca313
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 1, 2022
c1ad50f
fix cue fmt, enable on windows
neuronull Aug 1, 2022
09dbf43
fix one component bug
neuronull Aug 1, 2022
32acc20
feedback from sg, add more testing
neuronull Aug 2, 2022
36761c6
partial feedback from js & sg
neuronull Aug 2, 2022
3d6906e
move the get_content fn
neuronull Aug 3, 2022
815224b
trying more with tls
neuronull Aug 3, 2022
3f4dca3
use ca cert
neuronull Aug 3, 2022
8b2fdd5
tls test working
neuronull Aug 3, 2022
4f1184c
headers and shutdown int tests
neuronull Aug 3, 2022
18f7448
add missingtarget volumes to int tests
neuronull Aug 3, 2022
aaeac74
added error path integration tests
neuronull Aug 4, 2022
5f7c442
feedback from sg, fixed an issue with one of the int tests
neuronull Aug 5, 2022
bb43aa7
improve query cue examples
neuronull Aug 5, 2022
d6e8ed9
move invalid_endpoint to int
neuronull Aug 5, 2022
56e939d
relocate run_error
neuronull Aug 5, 2022
fe4dbd0
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 5, 2022
b9ccd00
multi header val support, increase test interval
neuronull Aug 8, 2022
6772cd0
int test timing tweaks
neuronull Aug 8, 2022
b9f512d
add wait_for_tcp
neuronull Aug 8, 2022
ae11fa3
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 8, 2022
a2a1ddb
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 8, 2022
9818da2
feedback from js and bg
neuronull Aug 9, 2022
45396a9
relocate common code to sources/util
neuronull Aug 9, 2022
a70ca33
update cue file
neuronull Aug 9, 2022
951f8d2
fix flags
neuronull Aug 9, 2022
f0b0a26
default
neuronull Aug 9, 2022
1e806ae
default2
neuronull Aug 9, 2022
074489d
default3
neuronull Aug 9, 2022
6d1b5e2
change to build_url
neuronull Aug 15, 2022
0ba6276
match -> map
neuronull Aug 15, 2022
b3c33f9
remove new()
neuronull Aug 15, 2022
be77842
remove mutable state from build()
neuronull Aug 15, 2022
080ae43
allow user to set ACCEPT header
neuronull Aug 15, 2022
3989c55
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 16, 2022
29f763a
add enabled_by_scheme
neuronull Aug 16, 2022
1193263
add log_namespace support
neuronull Aug 22, 2022
f76a54c
mutable state (take 2)
neuronull Aug 24, 2022
75bec10
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 24, 2022
6179221
optimize
neuronull Aug 25, 2022
db122fd
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 25, 2022
33c2364
Merge branch 'master' into neuronull/source_http_scrape
neuronull Aug 26, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ jobs:
- test: 'fluent'
- test: 'gcp'
- test: 'humio'
- test: 'http-scrape'
- test: 'influxdb'
- test: 'kafka'
- test: 'logstash'
Expand Down
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ sources-logs = [
"sources-gcp_pubsub",
"sources-heroku_logs",
"sources-http",
"sources-http_scrape",
"sources-internal_logs",
"sources-journald",
"sources-kafka",
Expand Down Expand Up @@ -503,6 +504,7 @@ sources-gcp_pubsub = ["gcp", "dep:h2", "dep:prost-types", "protobuf-build", "dep
sources-heroku_logs = ["sources-utils-http", "sources-utils-http-query", "sources-http"]
sources-host_metrics = ["dep:heim"]
sources-http = ["sources-utils-http", "sources-utils-http-query"]
sources-http_scrape = ["sources-utils-http-scrape"]
sources-internal_logs = []
sources-internal_metrics = []
sources-journald = []
Expand All @@ -514,7 +516,7 @@ sources-nats = ["dep:nats", "dep:nkeys"]
sources-nginx_metrics = ["dep:nom"]
sources-opentelemetry = ["dep:hex", "dep:opentelemetry-proto", "dep:prost-types", "sources-http", "sources-utils-http", "sources-vector"]
sources-postgresql_metrics = ["dep:postgres-openssl", "dep:tokio-postgres"]
sources-prometheus = ["dep:prometheus-parser", "sinks-prometheus", "sources-http", "sources-utils-http"]
sources-prometheus = ["dep:prometheus-parser", "sinks-prometheus", "sources-utils-http-scrape"]
sources-redis= ["dep:redis"]
sources-socket = ["listenfd", "tokio-util/net", "sources-utils-udp", "sources-utils-tcp-keepalive", "sources-utils-tcp-socket", "sources-utils-tls", "sources-utils-unix"]
sources-splunk_hec = ["sources-utils-tls", "dep:roaring"]
Expand All @@ -527,6 +529,7 @@ sources-utils-http-encoding = ["dep:snap", "sources-utils-http-error"]
sources-utils-http-error = []
sources-utils-http-prelude = ["sources-utils-http", "sources-utils-tls", "sources-utils-http-auth", "sources-utils-http-encoding", "sources-utils-http-error"]
sources-utils-http-query = []
sources-utils-http-scrape = ["sources-utils-http", "sources-http"]
sources-utils-tcp-keepalive = []
sources-utils-tcp-socket = []
sources-utils-tls = []
Expand Down Expand Up @@ -715,6 +718,7 @@ all-integration-tests = [
"gcp-cloud-storage-integration-tests",
"gcp-integration-tests",
"gcp-pubsub-integration-tests",
"http-scrape-integration-tests",
"humio-integration-tests",
"influxdb-integration-tests",
"kafka-integration-tests",
Expand Down Expand Up @@ -772,6 +776,7 @@ gcp-cloud-storage-integration-tests = ["sinks-gcp"]
gcp-integration-tests = ["sinks-gcp"]
gcp-pubsub-integration-tests = ["sinks-gcp", "sources-gcp_pubsub"]
humio-integration-tests = ["sinks-humio"]
http-scrape-integration-tests = ["sources-http_scrape"]
influxdb-integration-tests = ["sinks-influxdb"]
kafka-integration-tests = ["sinks-kafka", "sources-kafka"]
logstash-integration-tests = ["docker", "sources-logstash"]
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,7 @@ test-enterprise: ## Runs enterprise related behavioral tests
test-integration: ## Runs all integration tests
test-integration: test-integration-apex test-integration-aws test-integration-axiom test-integration-azure test-integration-clickhouse test-integration-docker-logs test-integration-elasticsearch
test-integration: test-integration-azure test-integration-clickhouse test-integration-docker-logs test-integration-elasticsearch
test-integration: test-integration-eventstoredb test-integration-fluent test-integration-gcp test-integration-humio test-integration-influxdb
test-integration: test-integration-eventstoredb test-integration-fluent test-integration-gcp test-integration-humio test-integration-http-scrape test-integration-influxdb
test-integration: test-integration-kafka test-integration-logstash test-integration-loki test-integration-mongodb test-integration-nats
test-integration: test-integration-nginx test-integration-opentelemetry test-integration-postgres test-integration-prometheus test-integration-pulsar
test-integration: test-integration-redis test-integration-splunk test-integration-dnstap test-integration-datadog-agent test-integration-datadog-logs
Expand Down
8 changes: 4 additions & 4 deletions lib/codecs/src/decoding/framing/character_delimited.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ impl CharacterDelimitedDecoderConfig {
pub struct CharacterDelimitedDecoderOptions {
/// The character that delimits byte sequences.
#[serde(with = "vector_core::serde::ascii_char")]
delimiter: u8,
pub delimiter: u8,
/// The maximum length of the byte buffer.
///
/// This length does *not* include the trailing delimiter.
#[serde(skip_serializing_if = "vector_core::serde::skip_serializing_if_default")]
max_length: Option<usize>,
pub max_length: Option<usize>,
}

impl CharacterDelimitedDecoderOptions {
Expand All @@ -56,9 +56,9 @@ impl CharacterDelimitedDecoderOptions {
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct CharacterDelimitedDecoder {
/// The delimiter used to separate byte sequences.
delimiter: u8,
pub delimiter: u8,
/// The maximum length of the byte buffer.
max_length: usize,
pub max_length: usize,
}

impl CharacterDelimitedDecoder {
Expand Down
32 changes: 32 additions & 0 deletions lib/codecs/src/decoding/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,38 @@ impl DeserializerConfig {
DeserializerConfig::Gelf => GelfDeserializerConfig.schema_definition(log_namespace),
}
}

/// Get the HTTP content type.
pub const fn content_type(&self, framer: &FramingConfig) -> &'static str {
match (&self, framer) {
(
DeserializerConfig::Json | DeserializerConfig::NativeJson,
FramingConfig::NewlineDelimited { .. },
) => "application/x-ndjson",
(
DeserializerConfig::Gelf
| DeserializerConfig::Json
| DeserializerConfig::NativeJson,
FramingConfig::CharacterDelimited {
character_delimited:
CharacterDelimitedDecoderOptions {
delimiter: b',',
max_length: Some(usize::MAX),
},
},
) => "application/json",
(DeserializerConfig::Native, _) => "application/octet-stream",
(
DeserializerConfig::Json
| DeserializerConfig::NativeJson
| DeserializerConfig::Bytes
| DeserializerConfig::Gelf,
_,
) => "text/plain",
#[cfg(feature = "syslog")]
(DeserializerConfig::Syslog, _) => "text/plain",
}
}
}

/// Parse structured events from bytes.
Expand Down
2 changes: 2 additions & 0 deletions scripts/integration/docker-compose.axiom.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,12 @@ services:
- axiom-db
volumes:
- ${PWD}:/code
- target:/code/target
jszwedko marked this conversation as resolved.
Show resolved Hide resolved
- cargogit:/usr/local/cargo/git
- cargoregistry:/usr/local/cargo/registry

volumes:
target: {}
cargogit: {}
cargoregistry: {}
postgres_data: {}
2 changes: 2 additions & 0 deletions scripts/integration/docker-compose.chronicle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ services:
- CHRONICLE_ADDRESS=http://chronicle-emulator:3000
volumes:
- ${PWD}:/code
- target:/code/target
jszwedko marked this conversation as resolved.
Show resolved Hide resolved
- cargogit:/usr/local/cargo/git
- cargoregistry:/usr/local/cargo/registry
- ${PWD}/scripts/integration/chronicleauth.json:/chronicleauth.json
- ${PWD}/scripts/integration/invalidchronicleauth.json:/invalidchronicleauth.json

volumes:
target: {}
cargogit: {}
cargoregistry: {}
81 changes: 81 additions & 0 deletions scripts/integration/docker-compose.http-scrape.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
version: "3"

services:
# https://github.com/sigoden/dufs
# serves static files at an HTTP endpoint
dufs:
image: docker.io/sigoden/dufs:latest
networks:
- backend
command:
- "/data"
volumes:
- ${PWD}/tests/data/http-scrape/serve:/data
# To validate Basic HTTP authentication option
dufs-auth:
image: docker.io/sigoden/dufs:latest
networks:
- backend
command:
- "-a"
- "/@user:pass"
- "--auth-method"
- "basic"
- "/data"
volumes:
- ${PWD}/tests/data/http-scrape/serve:/data
# To validate TLS options
dufs-https:
image: docker.io/sigoden/dufs:latest
networks:
- backend
command:
- "--tls-cert"
- "/certs/ca.cert.pem"
- "--tls-key"
- "/certs/ca.key.pem"
- "/data"
volumes:
- ${PWD}/tests/data/http-scrape/serve:/data
- ${PWD}/tests/data/ca/intermediate_server/certs/dufs-https-chain.cert.pem:/certs/ca.cert.pem
- ${PWD}/tests/data/ca/intermediate_server/private/dufs-https.key.pem:/certs/ca.key.pem
runner:
build:
context: ${PWD}
dockerfile: scripts/integration/Dockerfile
args:
- RUST_VERSION=${RUST_VERSION}
working_dir: /code
networks:
- backend
command:
- "cargo"
- "nextest"
- "run"
- "--no-fail-fast"
- "--no-default-features"
- "--features"
- "http-scrape-integration-tests"
- "--lib"
- "sources::http_scrape::"
depends_on:
- dufs
- dufs-auth
- dufs-https
environment:
- DUFS_ADDRESS=http://dufs:5000
- DUFS_AUTH_ADDRESS=http://dufs-auth:5000
- DUFS_HTTPS_ADDRESS=https://dufs-https:5000
volumes:
- ${PWD}:/code
neuronull marked this conversation as resolved.
Show resolved Hide resolved
- target:/code/target
jszwedko marked this conversation as resolved.
Show resolved Hide resolved
- cargogit:/usr/local/cargo/git
- cargoregistry:/usr/local/cargo/registry

networks:
backend: {}

volumes:
target: {}
cargogit: {}
cargoregistry: {}
2 changes: 1 addition & 1 deletion src/codecs/encoding/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ impl Encoder<Framer> {
}

/// Get the HTTP content type.
pub const fn content_type(&self) -> &str {
pub const fn content_type(&self) -> &'static str {
match (&self.serializer, &self.framer) {
(Serializer::Json(_) | Serializer::NativeJson(_), Framer::NewlineDelimited(_)) => {
"application/x-ndjson"
Expand Down
90 changes: 90 additions & 0 deletions src/internal_events/http_scrape.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
use metrics::counter;
use vector_core::internal_event::InternalEvent;

use super::prelude::{error_stage, error_type, http_error_code};

#[derive(Debug)]
pub struct HttpScrapeEventsReceived {
pub byte_size: usize,
pub count: usize,
pub url: String,
neuronull marked this conversation as resolved.
Show resolved Hide resolved
}

impl InternalEvent for HttpScrapeEventsReceived {
fn emit(self) {
trace!(
message = "Events received.",
count = %self.count,
byte_size = %self.byte_size,
url = %self.url,
);
counter!(
"component_received_events_total", self.count as u64,
"uri" => self.url.clone(),
);
counter!(
"component_received_event_bytes_total", self.byte_size as u64,
"uri" => self.url.clone(),
);
// deprecated
counter!(
"events_in_total", self.count as u64,
"uri" => self.url,
);
}
}

#[derive(Debug)]
pub struct HttpScrapeHttpResponseError {
pub code: hyper::StatusCode,
pub url: String,
}

impl InternalEvent for HttpScrapeHttpResponseError {
fn emit(self) {
error!(
message = "HTTP error response.",
url = %self.url,
stage = error_stage::RECEIVING,
error_type = error_type::REQUEST_FAILED,
error_code = %http_error_code(self.code.as_u16()),
internal_log_rate_secs = 10,
);
counter!(
"component_errors_total", 1,
"url" => self.url,
"stage" => error_stage::RECEIVING,
"error_type" => error_type::REQUEST_FAILED,
"error_code" => http_error_code(self.code.as_u16()),
);
// deprecated
counter!("http_error_response_total", 1);
}
}

#[derive(Debug)]
pub struct HttpScrapeHttpError {
pub error: crate::Error,
pub url: String,
}

impl InternalEvent for HttpScrapeHttpError {
fn emit(self) {
error!(
message = "HTTP request processing error.",
url = %self.url,
error = ?self.error,
error_type = error_type::REQUEST_FAILED,
stage = error_stage::RECEIVING,
internal_log_rate_secs = 10,
);
counter!(
"component_errors_total", 1,
"url" => self.url,
"error_type" => error_type::REQUEST_FAILED,
"stage" => error_stage::RECEIVING,
);
// deprecated
counter!("http_request_errors_total", 1);
}
}
4 changes: 4 additions & 0 deletions src/internal_events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ mod geoip;
mod heartbeat;
mod http;
pub mod http_client;
#[cfg(feature = "sources-utils-http-scrape")]
mod http_scrape;
#[cfg(feature = "sources-internal_logs")]
mod internal_logs;
#[cfg(all(unix, feature = "sources-journald"))]
Expand Down Expand Up @@ -187,6 +189,8 @@ pub(crate) use self::geoip::*;
feature = "sources-splunk_hec",
))]
pub(crate) use self::http::*;
#[cfg(feature = "sources-utils-http-scrape")]
pub(crate) use self::http_scrape::*;
#[cfg(feature = "sources-internal_logs")]
pub(crate) use self::internal_logs::*;
#[cfg(all(unix, feature = "sources-journald"))]
Expand Down
3 changes: 1 addition & 2 deletions src/internal_events/prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,10 @@ pub mod error_type {
#[cfg(any(
feature = "sinks-azure_blob",
feature = "sinks-elasticsearch",
feature = "sinks-prometheus",
spencergilbert marked this conversation as resolved.
Show resolved Hide resolved
feature = "sources-apache_metrics",
feature = "sources-aws_ecs_metrics",
feature = "sources-aws_kinesis_firehose",
feature = "sources-prometheus",
feature = "sources-http-scrape",
feature = "sources-utils-http",
))]
pub(crate) fn http_error_code(code: u16) -> String {
Expand Down
Loading