From 0b6a8f2ff1e4819d2b5b5f1578fa9631197d1e58 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 00:17:34 -0700 Subject: [PATCH 01/17] S3 Output plugin changes for pen test Signed-off-by: Wesley Pettit --- CMakeLists.txt | 2 + include/fluent-bit/flb_aws_credentials.h | 2 - include/fluent-bit/flb_aws_util.h | 27 +- include/fluent-bit/flb_s3_local_buffer.h | 76 ++ include/fluent-bit/flb_signv4.h | 7 + plugins/CMakeLists.txt | 1 + plugins/out_es/es.c | 1 + plugins/out_s3/CMakeLists.txt | 5 + plugins/out_s3/s3.c | 1141 ++++++++++++++++++ plugins/out_s3/s3.h | 118 ++ plugins/out_s3/s3_multipart.c | 356 ++++++ src/CMakeLists.txt | 1 + src/aws/flb_aws_credentials.c | 49 - src/aws/flb_aws_util.c | 365 +++++- src/aws/flb_s3_local_buffer.c | 417 +++++++ src/flb_signv4.c | 92 +- tests/internal/aws_s3_local_buffer.c | 130 ++ tests/internal/aws_util.c | 113 ++ tests/internal/data/s3_local_buffer/.gitkeep | 1 + tests/internal/signv4.c | 1 + 20 files changed, 2829 insertions(+), 76 deletions(-) create mode 100644 include/fluent-bit/flb_s3_local_buffer.h create mode 100644 plugins/out_s3/CMakeLists.txt create mode 100644 plugins/out_s3/s3.c create mode 100644 plugins/out_s3/s3.h create mode 100644 plugins/out_s3/s3_multipart.c create mode 100644 src/aws/flb_s3_local_buffer.c create mode 100644 tests/internal/aws_s3_local_buffer.c create mode 100644 tests/internal/data/s3_local_buffer/.gitkeep diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ffc5a161fa..30a413105fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,6 +174,7 @@ option(FLB_OUT_LOGDNA "Enable LogDNA output plugin" Yes) option(FLB_OUT_KAFKA "Enable Kafka output plugin" No) option(FLB_OUT_KAFKA_REST "Enable Kafka Rest output plugin" Yes) option(FLB_OUT_CLOUDWATCH_LOGS "Enable AWS CloudWatch output plugin" Yes) +option(FLB_OUT_S3 "Enable AWS S3 output plugin" Yes) option(FLB_FILTER_ALTER_SIZE "Enable alter_size filter" Yes) option(FLB_FILTER_AWS "Enable aws filter" Yes) option(FLB_FILTER_EXPECT "Enable expect filter" Yes) @@ -224,6 +225,7 @@ if(FLB_ALL) set(FLB_OUT_RETRY 1) set(FLB_OUT_TD 1) set(FLB_OUT_STDOUT 1) + set(FLB_OUT_S3 1) set(FLB_OUT_SYSLOG 1) set(FLB_OUT_LIB 1) set(FLB_OUT_FLOWCOUNTER 1) diff --git a/include/fluent-bit/flb_aws_credentials.h b/include/fluent-bit/flb_aws_credentials.h index fc576ea37db..909c1dd1b00 100644 --- a/include/fluent-bit/flb_aws_credentials.h +++ b/include/fluent-bit/flb_aws_credentials.h @@ -229,8 +229,6 @@ struct flb_aws_provider *flb_profile_provider_create(); time_t flb_aws_cred_expiration(const char* timestamp); -int flb_read_file(const char *path, char **out_buf, size_t *out_size); - struct flb_aws_credentials *flb_parse_sts_resp(char *response, time_t *expiration); flb_sds_t flb_sts_uri(char *action, char *role_arn, char *session_name, diff --git a/include/fluent-bit/flb_aws_util.h b/include/fluent-bit/flb_aws_util.h index a4a2ddbf2b0..bddf01f034b 100644 --- a/include/fluent-bit/flb_aws_util.h +++ b/include/fluent-bit/flb_aws_util.h @@ -25,9 +25,6 @@ #define FLB_AWS_UTIL_H -#define AWS_SERVICE_ENDPOINT_FORMAT "%s.%s.amazonaws.com" -#define AWS_SERVICE_ENDPOINT_BASE_LEN 15 - #define FLB_AWS_CREDENTIAL_REFRESH_LIMIT 60 /* @@ -71,6 +68,7 @@ struct flb_aws_client { /* Sigv4 */ int has_auth; + int s3_mode; struct flb_aws_provider *provider; char *region; char *service; @@ -129,24 +127,39 @@ struct flb_aws_client_generator *flb_aws_client_generator(); */ char *flb_aws_endpoint(char* service, char* region); +char *flb_s3_endpoint(char* bucket, char* region); + +/* Parses AWS XML API Error responses and returns the value of the tag */ +flb_sds_t flb_aws_xml_error(char *response, size_t response_len); + /* - * Parses an AWS API error type returned by a request. + * Parses an AWS JSON API error type returned by a request. */ flb_sds_t flb_aws_error(char *response, size_t response_len); /* - * Similar to 'flb_aws_error', except it prints the error type and message + * Similar to 'flb_aws_error', except it prints the JSON error type and message * to the user in a error log. * 'api' is the name of the API that was called; this is used in the error log. */ void flb_aws_print_error(char *response, size_t response_len, char *api, struct flb_output_instance *ins); +/* Similar to 'flb_aws_print_error', but for APIs that return XML */ +void flb_aws_print_xml_error(char *response, size_t response_len, + char *api, struct flb_output_instance *ins); + /* * Parses the JSON and gets the value for 'key' */ flb_sds_t flb_json_get_val(char *response, size_t response_len, char *key); +/* + * Parses an XML document and returns the value of the given tag + * Param `tag` should include angle brackets; ex "" + */ +flb_sds_t flb_xml_get_val(char *response, size_t response_len, char *tag); + /* * Request data from an IMDS path. */ @@ -158,6 +171,10 @@ int flb_imds_request(struct flb_aws_client *client, char *metadata_path, */ int flb_aws_is_auth_error(char *payload, size_t payload_size); +int flb_read_file(const char *path, char **out_buf, size_t *out_size); + +//* Constructs S3 object key as per the format. */ +flb_sds_t flb_get_s3_key(const char *format, time_t time, const char *tag, char *tag_delimiter); #endif #endif /* FLB_HAVE_AWS */ diff --git a/include/fluent-bit/flb_s3_local_buffer.h b/include/fluent-bit/flb_s3_local_buffer.h new file mode 100644 index 00000000000..10da91b0ed3 --- /dev/null +++ b/include/fluent-bit/flb_s3_local_buffer.h @@ -0,0 +1,76 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef FLB_HAVE_AWS + +#ifndef flb_local_buffer_H +#define flb_local_buffer_H + +struct flb_local_chunk { + /* identifies this chunk in the buffer dir; created with simple_hash fn */ + flb_sds_t key; + /* the original fluent tag for this data */ + flb_sds_t tag; + flb_sds_t file_path; + size_t size; + time_t create_time; + + struct mk_list _head; +}; + +struct flb_local_buffer { + char *dir; + struct flb_output_instance *ins; + + struct mk_list chunks; +}; + +/* + * "Initializes" the local buffer from the file system + * Reads buffer directory and finds any existing files + * This ensures the plugin will still send buffered data even if FB is restarted + */ +int flb_init_local_buffer(struct flb_local_buffer *store); + +/* + * Stores data in the local file system + * 'c' should be NULL if no local chunk suitable for this data has been created yet + */ +int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, + const char *tag, char *data, size_t bytes); + +/* + * Returns the chunk associated with the given tag + */ +struct flb_local_chunk *flb_chunk_get(struct flb_local_buffer *store, const char *tag); + +/* + * Recursively creates directories + */ +int flb_mkdir_all(const char *dir); + +/* Removes all files associated with a chunk once it has been removed */ +int flb_remove_chunk_files(struct flb_local_chunk *c); + +void flb_chunk_destroy(struct flb_local_chunk *c); + +void flb_local_buffer_destroy_chunks(struct flb_local_buffer *store); + +#endif +#endif /* FLB_HAVE_AWS */ diff --git a/include/fluent-bit/flb_signv4.h b/include/fluent-bit/flb_signv4.h index 1e442ab1650..78bbb3964a5 100644 --- a/include/fluent-bit/flb_signv4.h +++ b/include/fluent-bit/flb_signv4.h @@ -27,6 +27,12 @@ #ifndef FLB_SIGNV4_H #define FLB_SIGNV4_H +/* Request is not Amazon S3 PutObject */ +#define S3_MODE_NONE 0 +/* Set the x-amz-content-sha256 header with the sha value */ +#define S3_MODE_SIGNED_PAYLOAD 1 +/* Set the x-amz-content-sha256 header with the value UNSIGNED-PAYLOAD */ +#define S3_MODE_UNSIGNED_PAYLOAD 2 flb_sds_t flb_signv4_uri_normalize_path(char *uri, size_t len); @@ -34,6 +40,7 @@ flb_sds_t flb_signv4_do(struct flb_http_client *c, int normalize_uri, int amz_date, time_t t_now, char *region, char *service, + int s3_mode, struct flb_aws_provider *provider); #endif diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 70e598ca44b..60ba86b58e5 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -199,6 +199,7 @@ REGISTER_OUT_PLUGIN("out_lib") REGISTER_OUT_PLUGIN("out_flowcounter") REGISTER_OUT_PLUGIN("out_gelf") REGISTER_OUT_PLUGIN("out_cloudwatch_logs") +REGISTER_OUT_PLUGIN("out_s3") # FILTERS # ======= diff --git a/plugins/out_es/es.c b/plugins/out_es/es.c index f0bf77d1a1c..2becc355e68 100644 --- a/plugins/out_es/es.c +++ b/plugins/out_es/es.c @@ -62,6 +62,7 @@ static flb_sds_t add_aws_auth(struct flb_http_client *c, signature = flb_signv4_do(c, FLB_TRUE, FLB_TRUE, time(NULL), ctx->aws_region, "es", + 0, ctx->aws_provider); if (!signature) { flb_plg_error(ctx->ins, "could not sign request with sigv4"); diff --git a/plugins/out_s3/CMakeLists.txt b/plugins/out_s3/CMakeLists.txt new file mode 100644 index 00000000000..21e0109d176 --- /dev/null +++ b/plugins/out_s3/CMakeLists.txt @@ -0,0 +1,5 @@ +set(src + s3.c + s3_multipart.c) + +FLB_PLUGIN(out_s3 "${src}" "") diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c new file mode 100644 index 00000000000..efe66fb0a1b --- /dev/null +++ b/plugins/out_s3/s3.c @@ -0,0 +1,1141 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s3.h" + +static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, + struct flb_local_chunk *chunk, + char **out_buf, size_t *out_size); + +static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time, + char *body, size_t body_size); + +static int put_all_chunks(struct flb_s3 *ctx); + +static struct multipart_upload *get_upload(struct flb_s3 *ctx, + const char *tag, int tag_len); + +static struct multipart_upload *create_upload(struct flb_s3 *ctx, + const char *tag, int tag_len); + +static void multipart_upload_destroy(struct multipart_upload *m_upload) +{ + int i; + flb_sds_t etag; + + if (!m_upload) { + return; + } + + if (m_upload->s3_key) { + flb_sds_destroy(m_upload->s3_key); + } + if (m_upload->tag) { + flb_sds_destroy(m_upload->tag); + } + if (m_upload->upload_id) { + flb_sds_destroy(m_upload->upload_id); + } + + for (i = 0; i < m_upload->part_number; i++) { + etag = m_upload->etags[i]; + if (etag) { + flb_sds_destroy(etag); + } + } + + flb_free(m_upload); +} + +static void s3_context_destroy(struct flb_s3 *ctx) +{ + struct mk_list *tmp; + struct mk_list *head; + struct flb_local_chunk *chunk; + struct multipart_upload *m_upload = NULL; + + if (!ctx) { + return; + } + + if (mk_list_is_set(&ctx->store.chunks) == 0) { + mk_list_foreach_safe(head, tmp, &ctx->store.chunks) { + chunk = mk_list_entry(head, struct flb_local_chunk, _head); + flb_chunk_destroy(chunk); + } + } + + if (mk_list_is_set(&ctx->uploads) == 0) { + mk_list_foreach_safe(head, tmp, &ctx->uploads) { + m_upload = mk_list_entry(head, struct multipart_upload, _head); + multipart_upload_destroy(m_upload); + } + } + + if (ctx->base_provider) { + flb_aws_provider_destroy(ctx->base_provider); + } + + if (ctx->provider) { + flb_aws_provider_destroy(ctx->provider); + } + + if (ctx->provider_tls.context) { + flb_tls_context_destroy(ctx->provider_tls.context); + } + + if (ctx->sts_provider_tls.context) { + flb_tls_context_destroy(ctx->sts_provider_tls.context); + } + + if (ctx->client_tls.context) { + flb_tls_context_destroy(ctx->client_tls.context); + } + + if (ctx->s3_client) { + flb_aws_client_destroy(ctx->s3_client); + } + + if (ctx->free_endpoint == FLB_TRUE) { + flb_free(ctx->endpoint); + } + + flb_free(ctx); +} + +static int cb_s3_init(struct flb_output_instance *ins, + struct flb_config *config, void *data) +{ + int ret; + const char *tmp; + int i; + int len; + struct flb_s3 *ctx = NULL; + (void) ins; + (void) config; + (void) data; + + ctx = flb_calloc(1, sizeof(struct flb_s3)); + if (!ctx) { + flb_errno(); + return -1; + } + ctx->ins = ins; + + mk_list_init(&ctx->uploads); + + ret = flb_output_config_map_set(ins, (void *) ctx); + if (ret == -1) { + flb_free(ctx); + return -1; + } + + /* Date format for JSON output */ + ctx->json_date_format = FLB_PACK_JSON_DATE_DOUBLE; + tmp = flb_output_get_property("json_date_format", ins); + if (tmp) { + ret = flb_pack_to_json_date_type(tmp); + if (ret == -1) { + flb_plg_error(ctx->ins, "invalid json_date_format '%s'. " + "Using 'double' type", tmp); + } + else { + ctx->json_date_format = ret; + } + } + + char *role_arn = NULL; + char *external_id = NULL; + struct flb_aws_client_generator *generator; + char *session_name; + + tmp = flb_output_get_property("bucket", ins); + if (tmp) { + ctx->bucket = (char *) tmp; + } + else { + flb_plg_error(ctx->ins, "'bucket' is a required parameter"); + goto error; + } + + tmp = flb_output_get_property("buffer_dir", ins); + if (tmp) { + len = strlen(tmp); + if (tmp[len - 1] == '/' || tmp[len - 1] == '\\') { + flb_plg_error(ctx->ins, "'buffer_dir' can not end in a / of \\"); + goto error; + } + } + + tmp = flb_output_get_property("prefix", ins); + if (tmp) { + ctx->prefix = (char *) tmp; + } + else { + ctx->prefix = "fluent-bit"; + } + + tmp = flb_output_get_property("total_file_size", ins); + if (tmp) { + ctx->file_size = (size_t) flb_utils_size_to_bytes(tmp); + if (ctx->file_size <= 0) { + flb_plg_error(ctx->ins, "Failed to parse total_file_size %s", tmp); + goto error; + } + if (ctx->file_size < 1000000) { + flb_plg_error(ctx->ins, "total_file_size must be at least 1MB"); + goto error; + } + if (ctx->file_size > MAX_FILE_SIZE) { + flb_plg_error(ctx->ins, "Max total_file_size is %s bytes", MAX_FILE_SIZE_STR); + goto error; + } + } + else { + ctx->file_size = DEFAULT_FILE_SIZE; + flb_plg_info(ctx->ins, "Using default file size 100MB"); + } + + tmp = flb_output_get_property("upload_chunk_size", ins); + if (tmp) { + ctx->upload_chunk_size = (size_t) flb_utils_size_to_bytes(tmp); + if (ctx->upload_chunk_size <= 0) { + flb_plg_error(ctx->ins, "Failed to parse upload_chunk_size %s", tmp); + goto error; + } + if (ctx->upload_chunk_size > ctx->file_size) { + flb_plg_error(ctx->ins, "upload_chunk_size can not be larger than total_file_size"); + goto error; + } + if (ctx->upload_chunk_size < MIN_CHUNKED_UPLOAD_SIZE) { + flb_plg_error(ctx->ins, "upload_chunk_size must be at least 5M"); + goto error; + } + if (ctx->upload_chunk_size > MAX_CHUNKED_UPLOAD_SIZE) { + flb_plg_error(ctx->ins, "Max upload_chunk_size is 50M"); + goto error; + } + } + else { + ctx->upload_chunk_size = MIN_CHUNKED_UPLOAD_SIZE; + } + + if (ctx->file_size < MIN_CHUNKED_UPLOAD_SIZE) { + flb_plg_info(ctx->ins, "total_file_size is less than 5 MB, will use PutObject API"); + ctx->use_put_object = FLB_TRUE; + } + + if ((ctx->upload_chunk_size * 2) > ctx->file_size) { + flb_plg_info(ctx->ins, "total_file_size is less than 2x upload_chunk_size, will use PutObject API"); + ctx->use_put_object = FLB_TRUE; + } + + tmp = flb_output_get_property("use_put_object", ins); + if (tmp && (strncasecmp(tmp, "On", 2) == 0 || strncasecmp(tmp, "true", 4) == 0)) { + ctx->use_put_object = FLB_TRUE; + tmp = flb_output_get_property("upload_chunk_size", ins); + if (tmp) { + flb_plg_error(ctx->ins, "upload_chunk_size is not compatible with use_put_object"); + goto error; + } + } + + if (ctx->use_put_object == FLB_TRUE) { + /* + * code internally uses 'upload_chunk_size' as the unit for each Put, + * regardless of which API is used to send data + */ + ctx->upload_chunk_size = ctx->file_size; + if (ctx->file_size > MAX_FILE_SIZE_PUT_OBJECT) { + flb_plg_error(ctx->ins, "Max total_file_size is 50M when use_put_object is enabled"); + goto error; + } + } + + tmp = flb_output_get_property("upload_timeout", ins); + if (tmp) { + i = atoi(tmp); + if (i <= 0) { + flb_plg_error(ctx->ins, "upload_timeout %s is negative or could not be parsed", + tmp); + goto error; + } + ctx->upload_timeout = (time_t) 60 * i; + } + else { + ctx->upload_timeout = DEFAULT_UPLOAD_TIMEOUT; + } + + tmp = flb_output_get_property("region", ins); + if (tmp) { + ctx->region = (char *) tmp; + } + else { + flb_plg_error(ctx->ins, "'region' is a required parameter"); + goto error; + } + + tmp = flb_output_get_property("time_key", ins); + if (tmp) { + ctx->time_key = (char *) tmp; + } + else { + ctx->time_key = "time"; + } + + tmp = flb_output_get_property("endpoint", ins); + if (tmp) { + ctx->endpoint = (char *) tmp; + ctx->free_endpoint = FLB_FALSE; + } + else { + /* default endpoint for the given region */ + ctx->endpoint = flb_s3_endpoint(ctx->bucket, ctx->region); + ctx->free_endpoint = FLB_TRUE; + if (!ctx->endpoint) { + flb_plg_error(ctx->ins, "Could not construct S3 endpoint"); + goto error; + } + } + + ctx->client_tls.context = flb_tls_context_new(FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + if (!ctx->client_tls.context) { + flb_plg_error(ctx->ins, "Failed to create tls context"); + goto error; + } + + /* AWS provider needs a separate TLS instance */ + ctx->provider_tls.context = flb_tls_context_new(FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + if (!ctx->provider_tls.context) { + flb_errno(); + goto error; + } + + ctx->provider = flb_standard_chain_provider_create(config, + &ctx->provider_tls, + ctx->region, + NULL, + flb_aws_client_generator()); + + if (!ctx->provider) { + flb_plg_error(ctx->ins, "Failed to create AWS Credential Provider"); + goto error; + } + + tmp = flb_output_get_property("role_arn", ins); + if (tmp) { + /* Use the STS Provider */ + ctx->base_provider = ctx->provider; + role_arn = (char *) tmp; + tmp = flb_output_get_property("external_id", ins); + if (tmp) { + external_id = (char *) tmp; + } + + /* STS provider needs yet another separate TLS instance */ + ctx->sts_provider_tls.context = flb_tls_context_new(FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + + if (!ctx->sts_provider_tls.context) { + flb_errno(); + goto error; + } + + session_name = flb_sts_session_name(); + if (!session_name) { + flb_plg_error(ctx->ins, "Failed to create aws iam role " + "session name"); + flb_errno(); + goto error; + } + + ctx->provider = flb_sts_provider_create(config, + &ctx->sts_provider_tls, + ctx->base_provider, + external_id, + role_arn, + session_name, + ctx->region, + NULL, + flb_aws_client_generator()); + if (!ctx->provider) { + flb_plg_error(ctx->ins, "Failed to create AWS STS Credential " + "Provider"); + goto error; + } + + } + + ctx->store.ins = ctx->ins; + ctx->store.dir = ctx->buffer_dir; + mk_list_init(&ctx->store.chunks); + ret = flb_mkdir_all(ctx->store.dir); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to create directories for local buffer: %s", + ctx->store.dir); + goto error; + } + + /* read any remaining buffers from previous (failed) executions */ + ctx->has_old_buffers = FLB_FALSE; + ret = flb_init_local_buffer(&ctx->store); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to read existing local buffers at %s", + ctx->store.dir); + /* just ignore the existing local buffers and continue */ + flb_local_buffer_destroy_chunks(&ctx->store); + } + + if (mk_list_size(&ctx->store.chunks) > 0) { + /* note that these should be sent on first flush */ + ctx->has_old_buffers = FLB_TRUE; + } + + /* create S3 client */ + generator = flb_aws_client_generator(); + ctx->s3_client = generator->create(); + if (!ctx->s3_client) { + goto error; + } + ctx->s3_client->name = "s3_client"; + ctx->s3_client->has_auth = FLB_TRUE; + ctx->s3_client->provider = ctx->provider; + ctx->s3_client->region = ctx->region; + ctx->s3_client->service = "s3"; + ctx->s3_client->port = 443; + ctx->s3_client->flags = 0; + ctx->s3_client->proxy = NULL; + ctx->s3_client->s3_mode = S3_MODE_SIGNED_PAYLOAD; + + ctx->s3_client->upstream = flb_upstream_create(config, ctx->endpoint, 443, + FLB_IO_TLS, &ctx->client_tls); + if (!ctx->s3_client->upstream) { + flb_plg_error(ctx->ins, "Connection initialization error"); + goto error; + } + + ctx->s3_client->host = ctx->endpoint; + + /* initialize credentials in sync mode */ + ctx->provider->provider_vtable->sync(ctx->provider); + ctx->provider->provider_vtable->init(ctx->provider); + /* set back to async */ + ctx->provider->provider_vtable->async(ctx->provider); + + + + /* Export context */ + flb_output_set_context(ins, ctx); + + return 0; + +error: + s3_context_destroy(ctx); + return -1; +} + +/* + * return value is one of FLB_OK, FLB_RETRY, FLB_ERROR + * + * Chunk is allowed to be NULL + */ +static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, + char *body, size_t body_size, + const char *tag, int tag_len) +{ + struct multipart_upload *m_upload = NULL; + int init_upload = FLB_FALSE; + int complete_upload = FLB_FALSE; + int size_check = FLB_FALSE; + int part_num_check = FLB_FALSE; + int timeout_check = FLB_FALSE; + time_t create_time; + int ret; + + if (ctx->use_put_object == FLB_TRUE) { + goto put_object; + } + + m_upload = get_upload(ctx, tag, tag_len); + if (m_upload == NULL) { + if (chunk != NULL && time(NULL) > (chunk->create_time + ctx->upload_timeout)) { + /* timeout already reached, just PutObject */ + goto put_object; + } else if (body_size >= ctx->file_size) { + /* already big enough, just use PutObject API */ + goto put_object; + } + else if(body_size > MIN_CHUNKED_UPLOAD_SIZE) { + init_upload = FLB_TRUE; + goto multipart; + } + else { + goto put_object; + } + } + else { + /* existing upload */ + if (body_size < MIN_CHUNKED_UPLOAD_SIZE) { + complete_upload = FLB_TRUE; + } + + goto multipart; + } + +put_object: + + /* + * remove chunk from buffer list- needed for async http so that the + * same chunk won't be sent more than once + */ + if (chunk) { + mk_list_del(&chunk->_head); + create_time = chunk->create_time; + } + else { + create_time = time(NULL); + } + + ret = s3_put_object(ctx, tag, create_time, body, body_size); + if (ret < 0) { + /* re-add chunk to list */ + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } + return FLB_RETRY; + } + + /* data was sent successfully- delete the local buffer */ + if (chunk) { + ret = flb_remove_chunk_files(chunk); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not delete local buffer file %s", + chunk->file_path); + } + flb_chunk_destroy(chunk); + } + return FLB_OK; + +multipart: + + if (init_upload == FLB_TRUE) { + m_upload = create_upload(ctx, tag, tag_len); + if (!m_upload) { + flb_plg_error(ctx->ins, "Could not find or create upload for tag %s", tag); + return FLB_RETRY; + } + } + + if (m_upload->upload_state == MULTIPART_UPLOAD_STATE_NOT_CREATED) { + ret = create_multipart_upload(ctx, m_upload); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not initiate multipart upload"); + return FLB_RETRY; + } + m_upload->upload_state = MULTIPART_UPLOAD_STATE_CREATED; + } + + /* + * remove chunk from buffer list- needed for async http so that the + * same chunk won't be sent more than once + */ + if (chunk) { + mk_list_del(&chunk->_head); + } + + ret = upload_part(ctx, m_upload, body, body_size); + if (ret < 0) { + /* re-add chunk to list */ + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } + return FLB_RETRY; + } + m_upload->part_number += 1; + + + /* data was sent successfully- delete the local buffer */ + if (chunk) { + ret = flb_remove_chunk_files(chunk); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not delete local buffer file %s", + chunk->file_path); + } + flb_chunk_destroy(chunk); + } + + if (m_upload->bytes >= ctx->file_size) { + size_check = FLB_TRUE; + flb_plg_info(ctx->ins, "Completing upload for %s because uploaded data is greater" + " than size set by total_file_size", m_upload->s3_key); + } + if (m_upload->part_number >= 10000) { + part_num_check = FLB_TRUE; + flb_plg_info(ctx->ins, "Completing upload for %s because 10,000 chunks " + "(the API limit) have been uploaded", m_upload->s3_key); + } + if (time(NULL) > (m_upload->init_time + ctx->upload_timeout)) { + timeout_check = FLB_TRUE; + flb_plg_info(ctx->ins, "Completing upload for %s because upload_timeout" + " has elapsed", m_upload->s3_key); + } + if (size_check || part_num_check || timeout_check) { + complete_upload = FLB_TRUE; + } + + if (complete_upload == FLB_TRUE) { + m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + ret = complete_multipart_upload(ctx, m_upload); + if (ret == 0) { + mk_list_del(&m_upload->_head); + multipart_upload_destroy(m_upload); + } else { + /* we return FLB_OK in this case, since data was persisted */ + flb_plg_error(ctx->ins, "Could not complete upload, will retry on next flush..", + m_upload->s3_key); + } + } + + return FLB_OK; +} + + +/* + * Attempts to send all chunks to S3 using PutObject + * Used on shut down to try to send all buffered data + * Used on start up to try to send any leftover buffers from previous executions + */ +static int put_all_chunks(struct flb_s3 *ctx) +{ + struct flb_local_chunk *chunk; + struct mk_list *tmp; + struct mk_list *head; + char *buffer = NULL; + size_t buffer_size; + int ret; + + mk_list_foreach_safe(head, tmp, &ctx->store.chunks) { + chunk = mk_list_entry(head, struct flb_local_chunk, _head); + + ret = construct_request_buffer(ctx, NULL, chunk, &buffer, &buffer_size); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not construct request buffer for %s", + chunk->file_path); + return -1; + } + + /* + * remove chunk from buffer list- needed for async http so that the + * same chunk won't be sent more than once + */ + mk_list_del(&chunk->_head); + + ret = s3_put_object(ctx, chunk->tag, chunk->create_time, buffer, buffer_size); + flb_free(buffer); + if (ret < 0) { + /* re-add chunk to list */ + mk_list_add(&chunk->_head, &ctx->store.chunks); + return -1; + } + + /* data was sent successfully- delete the local buffer */ + ret = flb_remove_chunk_files(chunk); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not delete local buffer file %s", + chunk->file_path); + } + flb_chunk_destroy(chunk); + } + + return 0; +} + +/* + * Either new_data or chunk can be NULL, but not both + */ +static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, + struct flb_local_chunk *chunk, + char **out_buf, size_t *out_size) +{ + char *body; + char *tmp; + size_t body_size; + char *buffered_data = NULL; + size_t buffer_size = 0; + int ret; + + if (new_data == NULL && chunk == NULL) { + flb_plg_error(ctx->ins, "[construct_request_buffer] Something went wrong" + " both chunk and new_data are NULL"); + return -1; + } + if (chunk) { + ret = flb_read_file(chunk->file_path, &buffered_data, &buffer_size); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not read locally buffered data %s", + chunk->file_path); + return -1; + } + body_size = buffer_size; + } + + if (new_data) { + body_size += flb_sds_len(new_data); + } + + body = flb_malloc(body_size + 1); + if (!body) { + flb_errno(); + flb_free(buffered_data); + return -1; + } + tmp = memcpy(body, buffered_data, buffer_size); + if (!tmp) { + flb_errno(); + flb_free(body); + flb_free(buffered_data); + return -1; + } + flb_free(buffered_data); + if (new_data) { + tmp = memcpy(body + buffer_size, new_data, flb_sds_len(new_data)); + if (!tmp) { + flb_errno(); + flb_free(body); + return -1; + } + } + body[body_size] = '\0'; + + *out_buf = body; + *out_size = body_size; + return 0; +} + +static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time, + char *body, size_t body_size) +{ + flb_sds_t uri = NULL; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + + uri = flb_get_s3_key(ctx->s3_key_format, create_time, tag, ctx->tag_delimiters); + if (!uri) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); + return -1; + } + + s3_client = ctx->s3_client; + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, + uri, body, body_size, + NULL, 0); + if (c) { + flb_plg_debug(ctx->ins, "PutObject http status=%d", c->resp.status); + if (c->resp.status == 200) { + flb_plg_info(ctx->ins, "Successfully uploaded object %s", uri); + flb_sds_destroy(uri); + flb_http_client_destroy(c); + return 0; + } + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "PutObject", ctx->ins); + if (c->resp.data != NULL) { + flb_plg_error(ctx->ins, "Raw PutObject response: %s", c->resp.data); + } + flb_http_client_destroy(c); + } + + flb_plg_error(ctx->ins, "PutObject request failed"); + flb_sds_destroy(uri); + return -1; +} + +static struct multipart_upload *get_upload(struct flb_s3 *ctx, + const char *tag, int tag_len) +{ + struct multipart_upload *m_upload = NULL; + struct multipart_upload *tmp_upload = NULL; + struct mk_list *tmp; + struct mk_list *head; + + mk_list_foreach_safe(head, tmp, &ctx->uploads) { + tmp_upload = mk_list_entry(head, struct multipart_upload, _head); + if (tmp_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { + continue; + } + if (strcmp(tmp_upload->tag, tag) == 0) { + m_upload = tmp_upload; + break; + } + } + + return m_upload; +} + +static struct multipart_upload *create_upload(struct flb_s3 *ctx, + const char *tag, int tag_len) +{ + struct multipart_upload *m_upload = NULL; + flb_sds_t s3_key = NULL; + flb_sds_t tmp_sds = NULL; + + /* create new upload for this key */ + m_upload = flb_calloc(1, sizeof(struct multipart_upload)); + if (!m_upload) { + flb_errno(); + return NULL; + } + s3_key = flb_get_s3_key(ctx->s3_key_format, time(NULL), tag, ctx->tag_delimiters); + if (!s3_key) { + flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); + flb_free(m_upload); + return NULL; + } + m_upload->s3_key = s3_key; + tmp_sds = flb_sds_create_len(tag, tag_len); + if (!tmp_sds) { + flb_errno(); + flb_free(m_upload); + return NULL; + } + m_upload->tag = tmp_sds; + m_upload->upload_state = MULTIPART_UPLOAD_STATE_NOT_CREATED; + m_upload->part_number = 1; + m_upload->init_time = time(NULL); + mk_list_add(&m_upload->_head, &ctx->uploads); + + return m_upload; +} + +static void cb_s3_flush(const void *data, size_t bytes, + const char *tag, int tag_len, + struct flb_input_instance *i_ins, + void *out_context, + struct flb_config *config) +{ + struct flb_s3 *ctx = out_context; + flb_sds_t json = NULL; + struct flb_local_chunk *chunk = NULL; + struct multipart_upload *m_upload = NULL; + char *buffer = NULL; + size_t buffer_size; + int timeout_check = FLB_FALSE; + struct mk_list *tmp; + struct mk_list *head; + size_t chunk_size = 0; + int complete; + int ret; + int len; + (void) i_ins; + (void) config; + + /* first, clean up any old buffers found on startup */ + if (ctx->has_old_buffers == FLB_TRUE) { + flb_plg_info(ctx->ins, "Sending locally buffered data from previous " + "executions to S3; buffer=%s", ctx->store.dir); + ret = put_all_chunks(ctx); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to send locally buffered data left over" + " from previous executions; will retry. Buffer=%s", ctx->store.dir); + } else { + ctx->has_old_buffers = FLB_FALSE; + } + } + + json = flb_pack_msgpack_to_json_format(data, bytes, + FLB_PACK_JSON_FORMAT_LINES, + ctx->json_date_format, + ctx->json_date_key); + + if (json == NULL) { + flb_plg_error(ctx->ins, "Could not marshal msgpack to JSON"); + FLB_OUTPUT_RETURN(FLB_ERROR); + } + + len = flb_sds_len(json); + chunk = flb_chunk_get(&ctx->store, tag); + + /* if timeout has elapsed, we must put whatever data we have */ + if (chunk != NULL && time(NULL) > (chunk->create_time + ctx->upload_timeout)) { + timeout_check = FLB_TRUE; + flb_plg_info(ctx->ins, "upload_timeout reached for %s", tag); + } + + chunk_size = len; + if (chunk) { + chunk_size += chunk->size; + } + + if (chunk_size < ctx->upload_chunk_size) { + if (timeout_check == FLB_FALSE) { + /* add data to local buffer */ + ret = flb_buffer_put(&ctx->store, chunk, tag, json, (size_t) len); + flb_sds_destroy(json); + if (ret < 0) { + FLB_OUTPUT_RETURN(FLB_RETRY); + } + /* send any chunks/uploads which have timed out */ + goto cleanup_existing; + } + } + + ret = construct_request_buffer(ctx, json, chunk, &buffer, &buffer_size); + flb_sds_destroy(json); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not construct request buffer for %s", + chunk->file_path); + FLB_OUTPUT_RETURN(FLB_RETRY); + } + + ret = upload_data(ctx, chunk, buffer, buffer_size, tag, tag_len); + flb_free(buffer); + if (ret != FLB_OK) { + FLB_OUTPUT_RETURN(ret); + } + +cleanup_existing: + + /* Check all chunks and see if any have timed out */ + mk_list_foreach_safe(head, tmp, &ctx->store.chunks) { + chunk = mk_list_entry(head, struct flb_local_chunk, _head); + + if (time(NULL) < (chunk->create_time + ctx->upload_timeout)) { + continue; /* Only send chunks which have timed out */ + } + + ret = construct_request_buffer(ctx, NULL, chunk, &buffer, &buffer_size); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not construct request buffer for %s", + chunk->file_path); + continue; + } + + ret = upload_data(ctx, chunk, buffer, buffer_size, tag, tag_len); + flb_free(buffer); + if (ret != FLB_OK) { + /* + * exit- can try again on next flush + * we return OK since the actual data sent in this flush was persisted + */ + FLB_OUTPUT_RETURN(FLB_OK); + } + } + + /* Check all uploads and see if any need completion */ + mk_list_foreach_safe(head, tmp, &ctx->uploads) { + m_upload = mk_list_entry(head, struct multipart_upload, _head); + complete = FLB_FALSE; + if (m_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { + complete = FLB_TRUE; + } + if (time(NULL) > (m_upload->init_time + ctx->upload_timeout)) { + flb_plg_info(ctx->ins, "Completing upload for %s because upload_timeout" + " has passed", m_upload->s3_key); + complete = FLB_TRUE; + } + if (complete == FLB_TRUE) { + m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + ret = complete_multipart_upload(ctx, m_upload); + if (ret == 0) { + mk_list_del(&m_upload->_head); + multipart_upload_destroy(m_upload); + } else { + /* we return FLB_OK in this case, since data was persisted */ + flb_plg_error(ctx->ins, "Could not complete upload %s, will retry on next flush..", + m_upload->s3_key); + } + } + } + + FLB_OUTPUT_RETURN(FLB_OK); +} + +static int cb_s3_exit(void *data, struct flb_config *config) +{ + int ret; + struct flb_s3 *ctx = data; + struct multipart_upload *m_upload = NULL; + struct mk_list *tmp; + struct mk_list *head; + + if (!ctx) { + return 0; + } + + if (mk_list_size(&ctx->store.chunks) > 0) { + /* exit must run in sync mode */ + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); + flb_plg_info(ctx->ins, "Sending all locally buffered data to S3"); + ret = put_all_chunks(ctx); + if (ret < 0) { + return -1; + } + } + + if (mk_list_size(&ctx->uploads) > 0) { + mk_list_foreach_safe(head, tmp, &ctx->uploads) { + m_upload = mk_list_entry(head, struct multipart_upload, _head); + + if (m_upload->bytes > 0) { + m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + ret = complete_multipart_upload(ctx, m_upload); + if (ret == 0) { + mk_list_del(&m_upload->_head); + multipart_upload_destroy(m_upload); + } else { + flb_plg_error(ctx->ins, "Could not complete upload %s", + m_upload->s3_key); + } + } + } + } + + s3_context_destroy(ctx); + + return 0; +} + +/* Configuration properties map */ +static struct flb_config_map config_map[] = { + { + FLB_CONFIG_MAP_STR, "json_date_format", NULL, + 0, FLB_FALSE, 0, + "Specifies the format of the date. Supported formats are double, iso8601 and epoch." + }, + { + FLB_CONFIG_MAP_STR, "total_file_size", NULL, + 0, FLB_FALSE, 0, + "Specifies the size of files in S3. Maximum size is 50GB, minimim is 1MB" + }, + { + FLB_CONFIG_MAP_STR, "upload_chunk_size", NULL, + 0, FLB_FALSE, 0, + "This plugin uses the S3 Multipart Upload API to stream data to S3, " + "ensuring your data gets-off-the-box as quickly as possible. " + "This parameter configures the size of each “part” in the upload. " + "The total_file_size option configures the size of the file you will see " + "in S3; this option determines the size of chunks uploaded until that " + "size is reached. These chunks are temporarily stored in chunk_buffer_path " + "until their size reaches upload_chunk_size, which point the chunk is " + "uploaded to S3. Default: 5M, Max: 50M, Min: 5M." + }, + { + FLB_CONFIG_MAP_INT, "upload_timeout", "60", + 0, FLB_FALSE, 0, + "Optionally specify a timeout for uploads using an integer number of minutes. " + "Whenever this amount of time has elapsed, Fluent Bit will complete an " + "upload and create a new file in S3. For example, set this value to 60 " + "and you will get a new file in S3 every hour. Default is 60." + }, + { + FLB_CONFIG_MAP_STR, "json_date_key", "date", + 0, FLB_TRUE, offsetof(struct flb_s3, json_date_key), + "Specifies the name of the date field in output." + }, + { + FLB_CONFIG_MAP_STR, "bucket", NULL, + 0, FLB_TRUE, offsetof(struct flb_s3, bucket), + "S3 bucket name." + }, + { + FLB_CONFIG_MAP_STR, "region", "us-east-1", + 0, FLB_TRUE, offsetof(struct flb_s3, region), + "AWS region." + }, + + { + FLB_CONFIG_MAP_STR, "buffer_dir", "/fluent-bit/buffer/s3", + 0, FLB_TRUE, offsetof(struct flb_s3, buffer_dir), + "Directory to locally buffer data before sending. Plugin uses the S3 Multipart " + "upload API to send data in chunks of 5 MB at a time- only a small amount of" + " data will be locally buffered at any given point in time." + }, + + { + FLB_CONFIG_MAP_STR, "s3_key_format", "/fluent-bit-logs/$TAG/%Y/%m/%d/%H/%M/%S", + 0, FLB_TRUE, offsetof(struct flb_s3, s3_key_format), + "Format string for keys in S3. This option supports strftime time formatters " + "and a syntax for selecting parts of the Fluent log tag using a syntax inspired " + "by the rewrite_tag filter. Add $TAG in the format string to insert the full " + "log tag; add $TAG[0] to insert the first part of the tag in the s3 key. " + "The tag is split into “parts” using the characters specified with the " + "s3_key_format_tag_delimiters option. See the in depth examples and tutorial" + " in the documentation." + }, + + { + FLB_CONFIG_MAP_STR, "s3_key_format_tag_delimiters", ".", + 0, FLB_TRUE, offsetof(struct flb_s3, tag_delimiters), + "A series of characters which will be used to split the tag into “parts” for " + "use with the s3_key_format option. See the in depth examples and tutorial in " + "the documentation." + }, + + { + FLB_CONFIG_MAP_BOOL, "use_put_object", "false", + 0, FLB_TRUE, offsetof(struct flb_s3, use_put_object), + "Use the S3 PutObject API, instead of the multipart upload API" + }, + + /* EOF */ + {0} +}; + +/* Plugin registration */ +struct flb_output_plugin out_s3_plugin = { + .name = "s3", + .description = "Send to S3", + .cb_init = cb_s3_init, + .cb_flush = cb_s3_flush, + .cb_exit = cb_s3_exit, + .flags = 0, + .config_map = config_map +}; diff --git a/plugins/out_s3/s3.h b/plugins/out_s3/s3.h new file mode 100644 index 00000000000..6d60fda2a83 --- /dev/null +++ b/plugins/out_s3/s3.h @@ -0,0 +1,118 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_OUT_S3 +#define FLB_OUT_S3 + +#include +#include +#include +#include +#include +#include + +/* Upload data to S3 in 5MB chunks */ +#define MIN_CHUNKED_UPLOAD_SIZE 5000000 +#define MAX_CHUNKED_UPLOAD_SIZE 50000000 + + +#define MULTIPART_UPLOAD_STATE_NOT_CREATED 0 +#define MULTIPART_UPLOAD_STATE_CREATED 1 +#define MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS 2 + +#define DEFAULT_FILE_SIZE 100000000 +#define MAX_FILE_SIZE 50000000000 +#define MAX_FILE_SIZE_STR "50,000,000,000" + +#define MAX_FILE_SIZE_PUT_OBJECT 50000000 + +#define DEFAULT_UPLOAD_TIMEOUT 3600 + +struct multipart_upload { + flb_sds_t s3_key; + flb_sds_t tag; + flb_sds_t upload_id; + int upload_state; + time_t init_time; + + /* + * maximum of 10,000 parts in an upload, for each we need to store mapping + * of Part Number to ETag + */ + flb_sds_t etags[10000]; + int part_number; + + /* ongoing tracker of how much data has been sent for this upload */ + size_t bytes; + + struct mk_list _head; +}; + +struct flb_s3 { + char *bucket; + char *region; + char *prefix; + char *time_key; + char *s3_key_format; + char *tag_delimiters; + char *endpoint; + int free_endpoint; + int use_put_object; + + struct flb_aws_provider *provider; + struct flb_aws_provider *base_provider; + /* tls instances can't be re-used; aws provider requires a separate one */ + struct flb_tls provider_tls; + /* one for the standard chain provider, one for sts assume role */ + struct flb_tls sts_provider_tls; + struct flb_tls client_tls; + + struct flb_aws_client *s3_client; + int json_date_format; + flb_sds_t json_date_key; + + struct flb_local_buffer store; + char *buffer_dir; + + /* + * used to track that unset buffers were found on startup that have not + * been sent + */ + int has_old_buffers; + + struct mk_list uploads; + + size_t file_size; + size_t upload_chunk_size; + time_t upload_timeout; + + struct flb_output_instance *ins; +}; + +int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, + char *body, size_t body_size); + +int create_multipart_upload(struct flb_s3 *ctx, + struct multipart_upload *m_upload); + +int complete_multipart_upload(struct flb_s3 *ctx, + struct multipart_upload *m_upload); + +#endif diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c new file mode 100644 index 00000000000..73d9d4d17f8 --- /dev/null +++ b/plugins/out_s3/s3_multipart.c @@ -0,0 +1,356 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "s3.h" + +#define COMPLETE_MULTIPART_UPLOAD_BASE_LEN 100 +#define COMPLETE_MULTIPART_UPLOAD_PART_LEN 124 + +flb_sds_t get_etag(char *response, size_t size); + +static inline int try_to_write(char *buf, int *off, size_t left, + const char *str, size_t str_len) +{ + if (str_len <= 0){ + str_len = strlen(str); + } + if (left <= *off+str_len) { + return FLB_FALSE; + } + memcpy(buf+*off, str, str_len); + *off += str_len; + return FLB_TRUE; +} + +/* + * https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompleteMultipartUpload.html + */ +static int complete_multipart_upload_payload(struct flb_s3 *ctx, + struct multipart_upload *m_upload, + char **out_buf, size_t *out_size) +{ + char *buf; + int i; + int offset = 0; + flb_sds_t etag; + size_t size = COMPLETE_MULTIPART_UPLOAD_BASE_LEN; + char part_num[7]; + int last_part_num; + + /* part_number on the upload will be set to next expected part number */ + last_part_num = m_upload->part_number - 1; + + size = size + (COMPLETE_MULTIPART_UPLOAD_PART_LEN * last_part_num); + + buf = flb_malloc(size + 1); + if (!buf) { + flb_errno(); + return -1; + } + + if (!try_to_write(buf, &offset, size, + "", 73)) { + goto error; + } + + for (i = 0; i < last_part_num; i++) { + etag = m_upload->etags[i]; + if (!try_to_write(buf, &offset, size, + "", 12)) { + goto error; + } + + if (!try_to_write(buf, &offset, size, + etag, 0)) { + goto error; + } + + if (!try_to_write(buf, &offset, size, + "", 19)) { + goto error; + } + + if (!sprintf(part_num, "%d", i + 1)) { + goto error; + } + + if (!try_to_write(buf, &offset, size, + part_num, 0)) { + goto error; + } + + if (!try_to_write(buf, &offset, size, + "", 20)) { + goto error; + } + } + + if (!try_to_write(buf, &offset, size, + "", 26)) { + goto error; + } + + buf[offset] = '\0'; + + *out_buf = buf; + *out_size = offset; + return 0; + +error: + flb_free(buf); + flb_plg_error(ctx->ins, "Failed to construct CompleteMultipartUpload " + "request body"); + return -1; +} + +int complete_multipart_upload(struct flb_s3 *ctx, + struct multipart_upload *m_upload) +{ + char *body; + size_t size; + flb_sds_t uri = NULL; + flb_sds_t tmp; + int ret; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + + flb_sds_len(m_upload->upload_id)); + if (!uri) { + flb_errno(); + return -1; + } + + tmp = flb_sds_printf(&uri, "%s?uploadId=%s", m_upload->s3_key, + m_upload->upload_id); + if (!tmp) { + flb_sds_destroy(uri); + return -1; + } + uri = tmp; + + ret = complete_multipart_upload_payload(ctx, m_upload, &body, &size); + if (ret < 0) { + flb_sds_destroy(uri); + return -1; + } + + s3_client = ctx->s3_client; + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_POST, + uri, body, size, + NULL, 0); + flb_sds_destroy(uri); + flb_free(body); + if (c) { + flb_plg_debug(ctx->ins, "CompleteMultipartUpload http status=%d", + c->resp.status); + if (c->resp.status == 200) { + flb_plg_info(ctx->ins, "Successfully completed multipart upload " + "for %s, UploadId=%s", m_upload->s3_key, + m_upload->upload_id); + flb_http_client_destroy(c); + return 0; + } + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "CompleteMultipartUpload", ctx->ins); + if (c->resp.data != NULL) { + flb_plg_debug(ctx->ins, "Raw CompleteMultipartUpload response: %s", + c->resp.data); + } + flb_http_client_destroy(c); + } + + flb_plg_error(ctx->ins, "CompleteMultipartUpload request failed"); + return -1; +} + + +int create_multipart_upload(struct flb_s3 *ctx, + struct multipart_upload *m_upload) +{ + flb_sds_t uri = NULL; + flb_sds_t tmp; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); + if (!uri) { + flb_errno(); + return -1; + } + + tmp = flb_sds_printf(&uri, "%s?uploads=", m_upload->s3_key); + if (!tmp) { + flb_sds_destroy(uri); + return -1; + } + uri = tmp; + + s3_client = ctx->s3_client; + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_POST, + uri, NULL, 0, NULL, 0); + flb_sds_destroy(uri); + if (c) { + flb_plg_debug(ctx->ins, "CreateMultipartUpload http status=%d", + c->resp.status); + if (c->resp.status == 200) { + tmp = flb_xml_get_val(c->resp.payload, c->resp.payload_size, + ""); + if (!tmp) { + flb_plg_error(ctx->ins, "Could not find upload ID in " + "CreateMultipartUpload response"); + flb_plg_debug(ctx->ins, "Raw CreateMultipartUpload response: %s", + c->resp.data); + flb_http_client_destroy(c); + return -1; + } + m_upload->upload_id = tmp; + flb_plg_info(ctx->ins, "Successfully initiated multipart upload " + "for %s, UploadId=%s", m_upload->s3_key, + m_upload->upload_id); + flb_http_client_destroy(c); + return 0; + } + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "CreateMultipartUpload", ctx->ins); + if (c->resp.data != NULL) { + flb_plg_debug(ctx->ins, "Raw CreateMultipartUpload response: %s", + c->resp.data); + } + flb_http_client_destroy(c); + } + + flb_plg_error(ctx->ins, "CreateMultipartUpload request failed"); + return -1; +} + +/* gets the ETag value from response headers */ +flb_sds_t get_etag(char *response, size_t size) +{ + char *tmp; + int start; + int end; + int len; + int i = 0; + flb_sds_t etag; + tmp = strstr(response, "ETag:"); + if (!tmp) { + return NULL; + } + i = tmp - response; + + /* advance to end of ETag key */ + i += 5; + + /* advance across any whitespace and the opening quote */ + while (i < size && (response[i] == '\"' || isspace(response[i]) != 0)) { + i++; + } + start = i; + /* advance until we hit whitespace or the end quote */ + while (i < size && (response[i] != '\"' && isspace(response[i]) == 0)) { + i++; + } + end = i; + len = end - start; + + etag = flb_sds_create_len(response + start, len); + if (!etag) { + flb_errno(); + return NULL; + } + + return etag; +} + +int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, + char *body, size_t body_size) +{ + flb_sds_t uri = NULL; + flb_sds_t tmp; + struct flb_http_client *c = NULL; + struct flb_aws_client *s3_client; + + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); + if (!uri) { + flb_errno(); + return -1; + } + + tmp = flb_sds_printf(&uri, "%s?partNumber=%d&uploadId=%s", + m_upload->s3_key, m_upload->part_number, + m_upload->upload_id); + if (!tmp) { + flb_errno(); + flb_sds_destroy(uri); + return -1; + } + uri = tmp; + + s3_client = ctx->s3_client; + c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, + uri, body, body_size, + NULL, 0); + flb_sds_destroy(uri); + if (c) { + flb_plg_debug(ctx->ins, "UploadPart http status=%d", + c->resp.status); + if (c->resp.status == 200) { + tmp = get_etag(c->resp.data, c->resp.data_size); + if (!tmp) { + flb_plg_error(ctx->ins, "Could not find ETag in " + "UploadPart response"); + flb_plg_debug(ctx->ins, "Raw UploadPart response: %s", + c->resp.data); + flb_http_client_destroy(c); + return -1; + } + m_upload->etags[m_upload->part_number - 1] = tmp; + flb_plg_info(ctx->ins, "Successfully uploaded part #%d " + "for %s, UploadId=%s, ETag=%s", m_upload->part_number, + m_upload->s3_key, m_upload->upload_id, tmp); + flb_http_client_destroy(c); + /* track how many bytes are have gone toward this upload */ + m_upload->bytes += body_size; + return 0; + } + flb_aws_print_xml_error(c->resp.payload, c->resp.payload_size, + "UploadPart", ctx->ins); + if (c->resp.data != NULL) { + flb_plg_debug(ctx->ins, "Raw UploadPart response: %s", + c->resp.data); + } + flb_http_client_destroy(c); + } + + flb_plg_error(ctx->ins, "UploadPart request failed"); + return -1; +} diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 40eb4ef6197..278423be55e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -122,6 +122,7 @@ if(FLB_AWS) "aws/flb_aws_credentials_ec2.c" "aws/flb_aws_credentials_http.c" "aws/flb_aws_credentials_profile.c" + "aws/flb_s3_local_buffer.c" ) endif() diff --git a/src/aws/flb_aws_credentials.c b/src/aws/flb_aws_credentials.c index f0060d03466..e50af6dc806 100644 --- a/src/aws/flb_aws_credentials.c +++ b/src/aws/flb_aws_credentials.c @@ -26,8 +26,6 @@ #include #include #include -#include -#include #define TEN_MINUTES 600 #define TWELVE_HOURS 43200 @@ -535,53 +533,6 @@ time_t flb_aws_cred_expiration(const char *timestamp) return expiration; } -int flb_read_file(const char *path, char **out_buf, size_t *out_size) -{ - int ret; - long bytes; - char *buf = NULL; - FILE *fp = NULL; - struct stat st; - int fd; - - fp = fopen(path, "r"); - if (!fp) { - return -1; - } - - fd = fileno(fp); - ret = fstat(fd, &st); - if (ret == -1) { - flb_errno(); - fclose(fp); - return -1; - } - - buf = flb_malloc(st.st_size + sizeof(char)); - if (!buf) { - flb_errno(); - fclose(fp); - return -1; - } - - bytes = fread(buf, st.st_size, 1, fp); - if (bytes != 1) { - flb_errno(); - flb_free(buf); - fclose(fp); - return -1; - } - - /* fread does not add null byte */ - buf[st.st_size] = '\0'; - - fclose(fp); - *out_buf = buf; - *out_size = st.st_size; - - return 0; -} - /* * Fluent Bit is single-threaded but asynchonous. Only one co-routine will * be running at a time, and they only pause/resume for IO. diff --git a/src/aws/flb_aws_util.c b/src/aws/flb_aws_util.c index b2c20dc99e4..96971b9f4d3 100644 --- a/src/aws/flb_aws_util.c +++ b/src/aws/flb_aws_util.c @@ -27,6 +27,19 @@ #include #include +#include +#include + +#define AWS_SERVICE_ENDPOINT_FORMAT "%s.%s.amazonaws.com" +#define AWS_SERVICE_ENDPOINT_BASE_LEN 15 + +#define S3_SERVICE_ENDPOINT_FORMAT "%s.s3.amazonaws.com" +#define S3_SERVICE_ENDPOINT_BASE_LEN 17 + +#define TAG_PART_DESCRIPTOR "$TAG[%d]" +#define TAG_DESCRIPTOR "$TAG" +#define MAX_TAG_PARTS 10 +#define S3_KEY_SIZE 1024 struct flb_http_client *request_do(struct flb_aws_client *aws_client, int method, const char *uri, @@ -81,6 +94,99 @@ char *flb_aws_endpoint(char* service, char* region) } +int flb_read_file(const char *path, char **out_buf, size_t *out_size) +{ + int ret; + long bytes; + char *buf = NULL; + FILE *fp = NULL; + struct stat st; + int fd; + + fp = fopen(path, "r"); + if (!fp) { + return -1; + } + + fd = fileno(fp); + ret = fstat(fd, &st); + if (ret == -1) { + flb_errno(); + fclose(fp); + return -1; + } + + buf = flb_malloc(st.st_size + sizeof(char)); + if (!buf) { + flb_errno(); + fclose(fp); + return -1; + } + + bytes = fread(buf, st.st_size, 1, fp); + if (bytes != 1) { + flb_errno(); + flb_free(buf); + fclose(fp); + return -1; + } + + /* fread does not add null byte */ + buf[st.st_size] = '\0'; + + fclose(fp); + *out_buf = buf; + *out_size = st.st_size; + + return 0; +} + +/* + * https://bucket.s3.amazonaws.com(.cn) + */ +char *flb_s3_endpoint(char* bucket, char* region) +{ + char *endpoint = NULL; + size_t len = S3_SERVICE_ENDPOINT_BASE_LEN; + int is_cn = FLB_FALSE; + int bytes; + + + /* In the China regions, ".cn" is appended to the URL */ + if (strcmp("cn-north-1", region) == 0) { + len += 3; + is_cn = FLB_TRUE; + } + if (strcmp("cn-northwest-1", region) == 0) { + len += 3; + is_cn = FLB_TRUE; + } + + len += strlen(bucket); + len++; /* null byte */ + + endpoint = flb_malloc(len); + if (!endpoint) { + flb_errno(); + return NULL; + } + + bytes = snprintf(endpoint, len, S3_SERVICE_ENDPOINT_FORMAT, bucket); + if (bytes < 0) { + flb_errno(); + flb_free(endpoint); + return NULL; + } + + if (is_cn) { + memcpy(endpoint + bytes, ".cn", 3); + endpoint[bytes + 3] = '\0'; + } + + return endpoint; + +} + struct flb_http_client *flb_aws_client_request(struct flb_aws_client *aws_client, int method, const char *uri, const char *body, size_t body_len, @@ -274,6 +380,7 @@ struct flb_http_client *request_do(struct flb_aws_client *aws_client, if (aws_client->has_auth) { signature = flb_signv4_do(c, FLB_TRUE, FLB_TRUE, time(NULL), aws_client->region, aws_client->service, + aws_client->s3_mode, aws_client->provider); if (!signature) { if (aws_client->debug_only == FLB_TRUE) { @@ -294,6 +401,11 @@ struct flb_http_client *request_do(struct flb_aws_client *aws_client, aws_client->host, ret, c->resp.status); } + if (ret != 0 && c != NULL) { + flb_http_client_destroy(c); + c = NULL; + } + flb_upstream_conn_release(u_conn); flb_sds_destroy(signature); return c; @@ -311,6 +423,76 @@ struct flb_http_client *request_do(struct flb_aws_client *aws_client, return NULL; } +void flb_aws_print_xml_error(char *response, size_t response_len, + char *api, struct flb_output_instance *ins) +{ + flb_sds_t error; + flb_sds_t message; + + error = flb_xml_get_val(response, response_len, ""); + if (!error) { + flb_plg_error(ins, "%s: Could not parse response", api); + return; + } + + message = flb_xml_get_val(response, response_len, ""); + if (!message) { + /* just print the error */ + flb_plg_error(ins, "%s API responded with error='%s'", api, error); + } + else { + flb_plg_error(ins, "%s API responded with error='%s', message='%s'", + api, error, message); + flb_sds_destroy(message); + } + + flb_sds_destroy(error); +} + +/* Parses AWS XML API Error responses and returns the value of the tag */ +flb_sds_t flb_aws_xml_error(char *response, size_t response_len) +{ + return flb_xml_get_val(response, response_len, ""); +} + +/* + * Parses an XML document and returns the value of the given tag + * Param `tag` should include angle brackets; ex "" + */ +flb_sds_t flb_xml_get_val(char *response, size_t response_len, char *tag) +{ + flb_sds_t val = NULL; + char *node = NULL; + char *end; + int len; + + if (response_len == 0) { + return NULL; + } + node = strstr(response, tag); + if (!node) { + flb_debug("[aws] Could not find '%s' tag in API response", tag); + return NULL; + } + + /* advance to end of tag */ + node += strlen(tag); + + end = strchr(node, '<'); + if (!end) { + flb_error("[aws] Could not find end of '%s' node in xml", tag); + return NULL; + } + len = end - node; + val = flb_sds_create_len(node, len); + if (!val) { + flb_errno(); + return NULL; + } + + return val; +} + void flb_aws_print_error(char *response, size_t response_len, char *api, struct flb_output_instance *ins) { @@ -336,7 +518,7 @@ void flb_aws_print_error(char *response, size_t response_len, flb_sds_destroy(error); } -/* parses AWS API error responses and returns the value of the __type field */ +/* parses AWS JSON API error responses and returns the value of the __type field */ flb_sds_t flb_aws_error(char *response, size_t response_len) { return flb_json_get_val(response, response_len, "__type"); @@ -459,3 +641,184 @@ int flb_imds_request(struct flb_aws_client *client, char *metadata_path, return -1; } + +/* Generic replace function for strings. */ +static char* replace_uri_tokens(const char* original_string, const char* current_word, + const char* new_word) +{ + char *result; + int i = 0; + int count = 0; + int new_word_len = strlen(new_word); + int old_word_len = strlen(current_word); + + for (i = 0; original_string[i] != '\0'; i++) { + if (strstr(&original_string[i], current_word) == &original_string[i]) { + count++; + i += old_word_len - 1; + } + } + + result = flb_sds_create_size(i + count * (new_word_len - old_word_len) + 1); + if (!result) { + flb_errno(); + return NULL; + } + + i = 0; + while (*original_string) { + if (strstr(original_string, current_word) == original_string) { + strcpy(&result[i], new_word); + i += new_word_len; + original_string += old_word_len; + } + else + result[i++] = *original_string++; + } + + result[i] = '\0'; + return result; +} + +/* Constructs S3 object key as per the format. */ +flb_sds_t flb_get_s3_key(const char *format, time_t time, const char *tag, char *tag_delimiter) +{ + int i = 0; + int ret = 0; + char *tag_token = NULL; + flb_sds_t tmp = NULL; + flb_sds_t buf = NULL; + flb_sds_t s3_key = NULL; + flb_sds_t tmp_key = NULL; + flb_sds_t tmp_tag = NULL; + struct tm *gmt = NULL; + + if (strlen(format) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + tmp_tag = flb_sds_create_len(tag, strlen(tag)); + if(!tmp_tag){ + goto error; + } + + s3_key = flb_sds_create_len(format, strlen(format)); + if (!s3_key) { + goto error; + } + + /* Check if delimiter(s) specifed exists in the tag. */ + for (i = 0; i < strlen(tag_delimiter); i++){ + if (strchr(tag, tag_delimiter[i])){ + ret = 1; + break; + } + } + + if (strstr(format, "$TAG[") != NULL) { + if(ret == 0){ + flb_warn("[s3_key] Invalid Tag delimiter: does not exist in tag. " + "tag=%s, format=%s", tag, format); + } + } + + /* Split the string on the delimiters */ + tag_token = strtok(tmp_tag, tag_delimiter); + + /* Find all occurences of $TAG[*] and + * replaces it with the right token from tag. + */ + while(tag_token != NULL && i < MAX_TAG_PARTS) { + buf = flb_sds_create_size(10); + if (!buf) { + goto error; + } + tmp = flb_sds_printf(&buf, TAG_PART_DESCRIPTOR, i); + if (!tmp) { + goto error; + } + + tmp_key = replace_uri_tokens(s3_key, tmp, tag_token); + if (!tmp_key) { + goto error; + } + + if(strlen(tmp_key) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + flb_sds_destroy(tmp); + flb_sds_destroy(s3_key); + s3_key = tmp_key; + + tag_token = strtok(NULL, tag_delimiter); + i++; + } + + tmp = flb_sds_create_len(TAG_PART_DESCRIPTOR, 5); + if (!tmp) { + goto error; + } + + /* A match against "$TAG[" indicates an invalid or out of bounds tag part. */ + if (strstr(s3_key, tmp)){ + flb_warn("[s3_key] Invalid / Out of bounds tag part: At most 10 tag parts " + "($TAG[0] - $TAG[9]) can be processed. tag=%s, format=%s", tag, format); + } + + /* Find all occurences of $TAG and replace with the entire tag. */ + tmp_key = replace_uri_tokens(s3_key, TAG_DESCRIPTOR, tag); + if (!tmp_key) { + goto error; + } + + if(strlen(tmp_key) > S3_KEY_SIZE){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + flb_sds_destroy(s3_key); + s3_key = tmp_key; + + gmt = gmtime(&time); + + flb_sds_destroy(tmp); + + /* A string longer than S3_KEY_SIZE is created to store the formatted timestamp. */ + tmp = flb_sds_create_size(S3_KEY_SIZE); + if (!tmp) { + goto error; + } + + ret = strftime(tmp, S3_KEY_SIZE, s3_key, gmt); + if(ret == 0){ + flb_warn("[s3_key] Object key length is longer than the 1024 character limit."); + } + + flb_sds_destroy(s3_key); + s3_key = tmp; + + flb_sds_destroy(tmp_tag); + return s3_key; + + error: + flb_errno(); + if (tmp_tag){ + flb_sds_destroy(tmp_tag); + } + if (s3_key){ + flb_sds_destroy(s3_key); + } + if (buf){ + flb_sds_destroy(buf); + } + if (tmp){ + flb_sds_destroy(tmp); + } + if (tmp_key){ + flb_sds_destroy(tmp_key); + } + if (tag_token){ + flb_free(tag_token); + } + return NULL; +} diff --git a/src/aws/flb_s3_local_buffer.c b/src/aws/flb_s3_local_buffer.c new file mode 100644 index 00000000000..d22b1da38d4 --- /dev/null +++ b/src/aws/flb_s3_local_buffer.c @@ -0,0 +1,417 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Simple and fast hashing algorithm to create keys in the local buffer + */ +flb_sds_t simple_hash(const char *str); + +static char *read_tag(char *buffer_path); + +void flb_chunk_destroy(struct flb_local_chunk *c) +{ + if (!c) { + return; + } + if (c->key) { + flb_sds_destroy(c->key); + } + if (c->file_path) { + flb_sds_destroy(c->file_path); + } + if (c->tag) { + flb_sds_destroy(c->tag); + } + flb_free(c); +} + +void flb_local_buffer_destroy_chunks(struct flb_local_buffer *store) +{ + struct mk_list *tmp; + struct mk_list *head; + struct flb_local_chunk *chunk; + + if (!store) { + return; + } + if (mk_list_is_set(&store->chunks) == 0) { + mk_list_foreach_safe(head, tmp, &store->chunks) { + chunk = mk_list_entry(head, struct flb_local_chunk, _head); + flb_chunk_destroy(chunk); + } + } +} + +static int is_tag_file(char *string) +{ + string = strrchr(string, '.'); + + if (string != NULL) { + return (strcmp(string, ".tag")); + } + + return -1; +} + +/* + * "Initializes" the local buffer from the file system + * Reads buffer directory and finds any existing files + * This ensures the plugin will still send buffered data even if FB is restarted + */ +int flb_init_local_buffer(struct flb_local_buffer *store) +{ + DIR *d; + struct dirent *dir; + struct flb_local_chunk *c; + char *tag; + flb_sds_t path; + flb_sds_t tmp_sds; + + d = opendir(store->dir); + if (d) { + while ((dir = readdir(d)) != NULL) { + if (dir->d_type == DT_REG) { + if (strlen(dir->d_name) > 0 && dir->d_name[0] == '.') { + /* ignore hidden files */ + continue; + } + if (is_tag_file(dir->d_name) == 0) { + continue; + } + /* create a new chunk */ + flb_plg_debug(store->ins, "Found existing local buffer file %s", + dir->d_name); + c = flb_calloc(1, sizeof(struct flb_local_chunk)); + if (!c) { + flb_errno(); + return -1; + } + c->create_time = time(NULL); + c->key = flb_sds_create(dir->d_name); + if (!c->key) { + flb_errno(); + flb_chunk_destroy(c); + return -1; + } + path = flb_sds_create_size(strlen(store->dir) + strlen(dir->d_name)); + if (!path) { + flb_errno(); + flb_chunk_destroy(c); + flb_errno(); + return -1; + } + tmp_sds = flb_sds_printf(&path, "%s/%s", store->dir, dir->d_name); + if (!tmp_sds) { + flb_errno(); + flb_chunk_destroy(c); + flb_sds_destroy(path); + return -1; + } + path = tmp_sds; + c->file_path = path; + /* get the fluent tag */ + tag = read_tag(path); + if (!tag) { + flb_plg_error(store->ins, "Could not read Fluent tag from file system; file path=%s.tag", + path); + flb_errno(); + flb_chunk_destroy(c); + return -1; + } + c->tag = flb_sds_create(tag); + flb_free(tag); + if (!c->tag) { + flb_errno(); + flb_chunk_destroy(c); + return -1; + } + flb_plg_info(store->ins, "Found existing local buffer %s", + path); + mk_list_add(&c->_head, &store->chunks); + } + } + closedir(d); + } + else { + flb_errno(); + flb_plg_error(store->ins, "Could not open buffer dir %s", store->dir); + } + return 0; +} + +/* + * Recursively creates directories + */ +int flb_mkdir_all(const char *dir) { + char tmp[PATH_MAX]; + char *p = NULL; + int ret; + size_t len; + + snprintf(tmp, sizeof(tmp), "%s", dir); + len = strlen(tmp); + if(tmp[len - 1] == '/') { + tmp[len - 1] = 0; + } + for(p = tmp + 1; *p; p++) { + if(*p == '/') { + *p = 0; + ret = mkdir(tmp, S_IRWXU); + if (ret < 0 && errno != EEXIST) { + flb_errno(); + return -1; + } + *p = '/'; + } + } + ret = mkdir(tmp, S_IRWXU); + if (ret < 0 && errno != EEXIST) { + flb_errno(); + return -1; + } + + return 0; +} + +static size_t append_data(char *path, char *data, size_t bytes) +{ + FILE *f; + size_t written; + f = fopen(path , "a" ); + if (!f) { + return -1; + } + written = fwrite(data, 1, bytes, f); + fclose(f); + return written; +} + +/* we store the Fluent tag in a file ".tag" */ +static int write_tag(char *buffer_path, const char *tag) +{ + char tmp[PATH_MAX]; + size_t ret; + + snprintf(tmp, sizeof(tmp), "%s.tag", buffer_path); + ret = append_data(tmp, (char *) tag, strlen(tag)); + if (ret <= 0) { + return -1; + } + return 0; +} + +/* we store the Fluent tag in a file ".tag" */ +static char *read_tag(char *buffer_path) +{ + char tmp[PATH_MAX]; + size_t ret; + char *data; + size_t data_size; + + snprintf(tmp, sizeof(tmp), "%s.tag", buffer_path); + ret = flb_read_file(tmp, &data, &data_size); + if (ret < 0) { + return NULL; + } + return data; +} + +/* + * Stores data in the local file system + * 'c' should be NULL if no local chunk suitable for this data has been created yet + */ +int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, + const char *tag, char *data, size_t bytes) +{ + size_t written; + flb_sds_t path; + flb_sds_t tmp_sds; + flb_sds_t hash_key; + int ret; + + hash_key = simple_hash(tag); + if (!hash_key) { + flb_plg_error(store->ins, "Could not create local buffer hash key for %s", + tag); + return -1; + } + + if (c == NULL) { + /* create a new chunk */ + flb_plg_debug(store->ins, "Creating new local buffer for %s", tag); + c = flb_calloc(1, sizeof(struct flb_local_chunk)); + if (!c) { + flb_sds_destroy(hash_key); + flb_errno(); + return -1; + } + c->create_time = time(NULL); + c->key = flb_sds_create(hash_key); + if (!c->key) { + flb_errno(); + flb_sds_destroy(hash_key); + flb_chunk_destroy(c); + return -1; + } + c->tag = flb_sds_create(tag); + if (!c->tag) { + flb_errno(); + flb_sds_destroy(hash_key); + flb_chunk_destroy(c); + return -1; + } + path = flb_sds_create_size(strlen(store->dir) + strlen(hash_key)); + if (!path) { + flb_errno(); + flb_sds_destroy(hash_key); + flb_chunk_destroy(c); + flb_errno(); + return -1; + } + tmp_sds = flb_sds_printf(&path, "%s/%s", store->dir, hash_key); + if (!tmp_sds) { + flb_errno(); + flb_sds_destroy(hash_key); + flb_chunk_destroy(c); + flb_sds_destroy(path); + return -1; + } + path = tmp_sds; + c->file_path = path; + /* save the fluent tag */ + ret = write_tag(path, tag); + if (ret < 0) { + flb_plg_error(store->ins, "Could not save Fluent tag to file system; buffer dir=%s", + store->dir); + } + mk_list_add(&c->_head, &store->chunks); + } + + flb_sds_destroy(hash_key); + + written = append_data(c->file_path, data, bytes); + if (written > 0) { + c->size += written; + } + if (written < bytes) { + flb_plg_error(store->ins, "Failed to write %d bytes to local buffer %s", + bytes - written, path); + flb_errno(); + return -1; + } + + flb_plg_debug(store->ins, "Buffered %d bytes", bytes); + return 0; +} + + + +/* + * Returns the chunk associated with the given key + */ +struct flb_local_chunk *flb_chunk_get(struct flb_local_buffer *store, const char *tag) +{ + struct mk_list *tmp; + struct mk_list *head; + struct flb_local_chunk *c = NULL; + struct flb_local_chunk *tmp_chunk; + flb_sds_t hash_key; + + hash_key = simple_hash(tag); + if (!hash_key) { + flb_plg_error(store->ins, "Could not create local buffer hash key for tag %s", + tag); + return NULL; + } + + mk_list_foreach_safe(head, tmp, &store->chunks) { + tmp_chunk = mk_list_entry(head, struct flb_local_chunk, _head); + if (strcmp(tmp_chunk->key, hash_key) == 0) { + c = tmp_chunk; + break; + } + } + + flb_sds_destroy(hash_key); + return c; +} + +/* + * Simple and fast hashing algorithm to create keys in the local buffer + */ +flb_sds_t simple_hash(const char *str) +{ + unsigned long hash = 5381; + unsigned long hash2 = 5381; + int c; + flb_sds_t hash_str; + flb_sds_t tmp; + + while ((c = *str++)) { + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + hash2 = ((hash2 << 5) + hash) - c; + } + + /* flb_sds_printf allocs if the incoming sds is not at least 64 bytes */ + hash_str = flb_sds_create_size(64); + if (!hash_str) { + flb_errno(); + return NULL; + } + tmp = flb_sds_printf(&hash_str, "%lu%lu", hash, hash2); + if (!tmp) { + flb_errno(); + flb_sds_destroy(hash_str); + return NULL; + } + hash_str = tmp; + + return hash_str; +} + +/* Removes all files associated with a chunk once it has been removed */ +int flb_remove_chunk_files(struct flb_local_chunk *c) +{ + int ret; + char tmp[PATH_MAX]; + + ret = remove(c->file_path); + if (ret < 0) { + flb_errno(); + return ret; + } + + snprintf(tmp, sizeof(tmp), "%s.tag", c->file_path); + ret = remove(tmp); + if (ret < 0) { + flb_errno(); + } + return ret; +} \ No newline at end of file diff --git a/src/flb_signv4.c b/src/flb_signv4.c index 3edb20674e9..c387234a1f6 100644 --- a/src/flb_signv4.c +++ b/src/flb_signv4.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -372,6 +373,14 @@ static flb_sds_t url_params_format(char *params) tmp = flb_sds_printf(&buf, "%s=%s&", kv->key, kv->val); } + else if (kv->val == NULL) { + /* + * special/edge case- last query param has a null value + * This happens in the S3 CreateMultipartUpload request + */ + tmp = flb_sds_printf(&buf, "%s=", + kv->key, kv->val); + } else { tmp = flb_sds_printf(&buf, "%s=%s", kv->key, kv->val); @@ -502,6 +511,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, int amz_date_header, char *amzdate, char *security_token, + int s3_mode, flb_sds_t *signed_headers) { int i; @@ -514,7 +524,8 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_sds_t cr; flb_sds_t uri; flb_sds_t tmp = NULL; - flb_sds_t params; + flb_sds_t params = NULL; + flb_sds_t payload_hash = NULL; struct flb_kv *kv; struct mk_list list_tmp; struct mk_list *head; @@ -664,6 +675,43 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, } cr = tmp; + /* + * Calculate payload hash. + * This is added at the end of all canonical requests, unless + * S3_MODE_UNSIGNED_PAYLOAD is set. + * If we're using S3_MODE_SIGNED_PAYLOAD, then the hash is added to the + * canonical headers. + */ + if (s3_mode == S3_MODE_UNSIGNED_PAYLOAD) { + payload_hash = flb_sds_create("UNSIGNED-PAYLOAD"); + } else { + mbedtls_sha256_init(&sha256_ctx); + mbedtls_sha256_starts(&sha256_ctx, 0); + if (c->body_len > 0 && post_params == FLB_FALSE) { + mbedtls_sha256_update(&sha256_ctx, (const unsigned char *) c->body_buf, + c->body_len); + } + mbedtls_sha256_finish(&sha256_ctx, sha256_buf); + + payload_hash = flb_sds_create_size(64); + if (!payload_hash) { + flb_error("[signv4] error formatting hashed payload"); + flb_sds_destroy(cr); + return NULL; + } + for (i = 0; i < 32; i++) { + tmp = flb_sds_printf(&payload_hash, "%02x", + (unsigned char) sha256_buf[i]); + if (!tmp) { + flb_error("[signv4] error formatting hashed payload"); + flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); + return NULL; + } + payload_hash = tmp; + } + } + /* * Canonical Headers * @@ -671,6 +719,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, * * - x-amz-date * - x-amz-security-token (if set) + * - x-amz-content-sha256 (if S3_MODE_SIGNED_PAYLOAD) */ mk_list_init(&list_tmp); @@ -686,6 +735,10 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_http_add_header(c, "x-amz-security-token", 20, security_token, len); } + if (s3_mode == S3_MODE_SIGNED_PAYLOAD) { + flb_http_add_header(c, "x-amz-content-sha256", 20, payload_hash, 64); + } + headers_sanitize(&c->headers, &list_tmp); /* @@ -699,6 +752,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_errno(); flb_kv_release(&list_tmp); flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); return NULL; } @@ -722,6 +776,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_free(arr); flb_kv_release(&list_tmp); flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); return NULL; } cr = tmp; @@ -734,6 +789,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_free(arr); flb_kv_release(&list_tmp); flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); return NULL; } cr = tmp; @@ -754,6 +810,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_free(arr); flb_kv_release(&list_tmp); flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); return NULL; } cr = tmp; @@ -775,6 +832,7 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_free(arr); flb_kv_release(&list_tmp); flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); return NULL; } *signed_headers = tmp; @@ -783,24 +841,16 @@ static flb_sds_t flb_signv4_canonical_request(struct flb_http_client *c, flb_free(arr); flb_kv_release(&list_tmp); - /* Hashed Payload */ - mbedtls_sha256_init(&sha256_ctx); - mbedtls_sha256_starts(&sha256_ctx, 0); - if (c->body_len > 0 && post_params == FLB_FALSE) { - mbedtls_sha256_update(&sha256_ctx, (const unsigned char *) c->body_buf, - c->body_len); - } - mbedtls_sha256_finish(&sha256_ctx, sha256_buf); - - for (i = 0; i < 32; i++) { - tmp = flb_sds_printf(&cr, "%02x", (unsigned char) sha256_buf[i]); - if (!tmp) { - flb_error("[signedv4] error formatting hashed payload"); - flb_sds_destroy(cr); - return NULL; - } - cr = tmp; + /* Add Payload Hash */ + tmp = flb_sds_printf(&cr, "%s", payload_hash); + if (!tmp) { + flb_error("[signv4] error adding payload hash"); + flb_sds_destroy(cr); + flb_sds_destroy(payload_hash); + return NULL; } + cr = tmp; + flb_sds_destroy(payload_hash); return cr; } @@ -878,6 +928,8 @@ static flb_sds_t flb_signv4_string_to_sign(struct flb_http_client *c, * ====== * * https://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html + * + * TODO: The signing key could be cached to improve performance */ static flb_sds_t flb_signv4_calculate_signature(flb_sds_t string_to_sign, char *datestamp, char *service, @@ -986,6 +1038,7 @@ flb_sds_t flb_signv4_do(struct flb_http_client *c, int normalize_uri, int amz_date_header, time_t t_now, char *region, char *service, + int s3_mode, struct flb_aws_provider *provider) { char amzdate[32]; @@ -1033,7 +1086,8 @@ flb_sds_t flb_signv4_do(struct flb_http_client *c, int normalize_uri, cr = flb_signv4_canonical_request(c, normalize_uri, amz_date_header, amzdate, - creds->session_token, &signed_headers); + creds->session_token, s3_mode, + &signed_headers); if (!cr) { flb_error("[signv4] failed canonical request"); flb_sds_destroy(signed_headers); diff --git a/tests/internal/aws_s3_local_buffer.c b/tests/internal/aws_s3_local_buffer.c new file mode 100644 index 00000000000..fe76b137572 --- /dev/null +++ b/tests/internal/aws_s3_local_buffer.c @@ -0,0 +1,130 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +#include +#include +#include +#include +#include +#include + +#include "flb_tests_internal.h" + +#define BUFFER_DIRECTORY FLB_TESTS_DATA_PATH "data/s3_local_buffer/" +#define PLUGIN_NAME "s3_plugin" +#define TEST_DATA "I love Fluent Bit" +#define KEY_1 "key1" +#define KEY_2 "key2" + +static void check_chunk(struct flb_local_chunk *chunk, char *tag, char *data) +{ + int ret; + size_t buffer_size; + char *buffered_data = NULL; + + /* Ensure data retreived is same as that which was stored. */ + TEST_CHECK(strcmp(chunk->tag, tag) == 0); + ret = flb_read_file(chunk->file_path, &buffered_data, &buffer_size); + TEST_CHECK(ret == 0); + TEST_CHECK(strcmp(buffered_data, data) == 0); + + flb_free(buffered_data); +} + +static void test_flb_buffer_put_valid_chunk() +{ + int ret; + struct flb_local_chunk *chunk = NULL; + struct flb_local_buffer *store = NULL; + struct flb_output_instance *out = NULL; + + store = flb_calloc(1, sizeof(struct flb_local_buffer)); + TEST_CHECK(store != NULL); + out = flb_calloc(1, sizeof(struct flb_output_instance)); + TEST_CHECK(out != NULL); + + store->dir = BUFFER_DIRECTORY; + strcpy(out->name, PLUGIN_NAME); + store->ins = out; + mk_list_init(&store->chunks); + TEST_CHECK(mk_list_size(&store->chunks) == 0); + + /* No local chunk suitable for this data has been created yet, + * hence chunk should be NULL. + */ + chunk = flb_chunk_get(store, KEY_1); + TEST_CHECK(chunk == NULL); + + ret = flb_buffer_put(store, chunk, KEY_1, TEST_DATA, strlen(TEST_DATA)); + TEST_CHECK(ret == 0); + TEST_CHECK(mk_list_size(&store->chunks) == 1); + + /* A new chunk associated with key2 was created in the above statement, + * hence this time, chunk should not be NULL. + */ + chunk = flb_chunk_get(store, KEY_1); + TEST_CHECK(chunk != NULL); + TEST_CHECK(mk_list_size(&store->chunks) == 1); + + chunk = flb_chunk_get(store, KEY_1); + check_chunk(chunk, KEY_1, TEST_DATA); + + ret = flb_remove_chunk_files(chunk); + TEST_CHECK(ret == 0); + flb_chunk_destroy(chunk); + flb_free(out); + flb_free(store); +} + +static void test_flb_init_local_buffer() +{ + int ret; + struct flb_local_chunk *chunk; + struct flb_local_buffer *store = NULL; + struct flb_local_buffer *new_store = NULL; + struct flb_output_instance *out = NULL; + + store = flb_calloc(1, sizeof(struct flb_local_buffer)); + TEST_CHECK(store != NULL); + new_store = flb_calloc(1, sizeof(struct flb_local_buffer)); + TEST_CHECK(new_store != NULL); + out = flb_calloc(1, sizeof(struct flb_output_instance)); + TEST_CHECK(out != NULL); + + store->dir = BUFFER_DIRECTORY; + strcpy(out->name, PLUGIN_NAME); + store->ins = out; + mk_list_init(&store->chunks); + TEST_CHECK(mk_list_size(&store->chunks) == 0); + + new_store->dir = BUFFER_DIRECTORY; + strcpy(out->name, PLUGIN_NAME); + new_store->ins = out; + mk_list_init(&new_store->chunks); + TEST_CHECK(mk_list_size(&new_store->chunks) == 0); + + chunk = flb_chunk_get(store, KEY_2); + TEST_CHECK(chunk == NULL); + ret = flb_buffer_put(store, chunk, KEY_2, TEST_DATA, strlen(TEST_DATA)); + TEST_CHECK(ret == 0); + TEST_CHECK(mk_list_size(&store->chunks) == 1); + + ret = flb_init_local_buffer(new_store); + TEST_CHECK(ret == 0); + + chunk = flb_chunk_get(new_store, KEY_2); + check_chunk(chunk, KEY_2, TEST_DATA); + + ret = flb_remove_chunk_files(chunk); + TEST_CHECK(ret == 0); + flb_chunk_destroy(chunk); + flb_free(out); + flb_free(store); + flb_free(new_store); +} + + +TEST_LIST = { + { "flb_buffer_put_valid_chunk" , test_flb_buffer_put_valid_chunk}, + {"flb_buffer_init_local_buffer", test_flb_init_local_buffer}, + { 0 } +}; \ No newline at end of file diff --git a/tests/internal/aws_util.c b/tests/internal/aws_util.c index df06725d098..24676fd498b 100644 --- a/tests/internal/aws_util.c +++ b/tests/internal/aws_util.c @@ -6,6 +6,29 @@ #include "flb_tests_internal.h" +#define S3_KEY_FORMAT_TAG_PART "logs/$TAG[2]/$TAG[0]/%Y/%m/%d" +#define S3_OBJECT_KEY_TAG_PART "logs/ccc/aa/2020/08/15" + +#define S3_KEY_FORMAT_FULL_TAG "logs/$TAG/%Y/%m/%d" +#define S3_OBJECT_KEY_FULL_TAG "logs/aa.bb.ccc/2020/08/15" + +#define S3_KEY_FORMAT_SPECIAL_CHARCATERS_TAG "logs/my.great_photos-2020:jan/$TAG/%Y/%m/%d" +#define S3_OBJECT_KEY_SPECIAL_CHARCATERS_TAG "logs/my.great_photos-2020:jan/aa.bb.ccc/2020/08/15" + +#define S3_OBJECT_KEY_INVALID_DELIMITER "logs/aa.bb-ccc/aa.bb-ccc[0]/2020/08/15" + +#define S3_KEY_FORMAT_INVALID_TAG "logs/$TAG[2]/$TAG[-1]/%Y/%m/%d" +#define S3_OBJECY_KEY_INVALID_TAG "logs/ccc/aa.bb.ccc[-1]/2020/08/15" + +#define S3_KEY_FORMAT_OUT_OF_BOUNDS_TAG "logs/$TAG[2]/$TAG[]/%Y/%m/%d" + +#define TAG "aa.bb.ccc" +#define MULTI_DELIMITER_TAG "aa.bb-ccc" +#define TAG_DELIMITER "." +#define TAG_DELIMITERS ".-" +#define INVALID_TAG_DELIMITERS ",/" + + static void test_flb_aws_error() { flb_sds_t error_type; @@ -47,8 +70,98 @@ static void test_flb_aws_endpoint() } +static void test_flb_get_s3_key_multi_tag_exists() +{ + flb_sds_t s3_key_format = NULL; + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(S3_KEY_FORMAT_TAG_PART, t, TAG, TAG_DELIMITER); + TEST_CHECK(strcmp(s3_key_format, S3_OBJECT_KEY_TAG_PART) == 0); + + flb_sds_destroy(s3_key_format); +} + +static void test_flb_get_s3_key_full_tag() +{ + flb_sds_t s3_key_format = NULL; + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(S3_KEY_FORMAT_FULL_TAG, t, TAG, TAG_DELIMITER); + TEST_CHECK(strcmp(s3_key_format, S3_OBJECT_KEY_FULL_TAG) == 0); + + flb_sds_destroy(s3_key_format); +} + +static void test_flb_get_s3_key_tag_special_characters() +{ + flb_sds_t s3_key_format = NULL; + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(S3_KEY_FORMAT_SPECIAL_CHARCATERS_TAG, t, TAG, TAG_DELIMITER); + TEST_CHECK(strcmp(s3_key_format, S3_OBJECT_KEY_SPECIAL_CHARCATERS_TAG) == 0); + + flb_sds_destroy(s3_key_format); +} + +static void test_flb_get_s3_key_multi_tag_delimiter() +{ + flb_sds_t s3_key_format = NULL; + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(S3_KEY_FORMAT_TAG_PART, t, MULTI_DELIMITER_TAG, TAG_DELIMITERS); + TEST_CHECK(strcmp(s3_key_format, S3_OBJECT_KEY_TAG_PART) == 0); + + flb_sds_destroy(s3_key_format); +} + +static void test_flb_get_s3_key_invalid_tag_delimiter() +{ + flb_sds_t s3_key_format = NULL; + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(S3_KEY_FORMAT_TAG_PART, t, MULTI_DELIMITER_TAG, INVALID_TAG_DELIMITERS); + TEST_CHECK(strcmp(s3_key_format, S3_OBJECT_KEY_INVALID_DELIMITER) == 0); + + flb_sds_destroy(s3_key_format); +} + +static void test_flb_get_s3_key_invalid_tag_index() +{ + flb_sds_t s3_key_format = NULL; + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(S3_KEY_FORMAT_INVALID_TAG, t, TAG, TAG_DELIMITER); + TEST_CHECK(strcmp(s3_key_format, S3_OBJECY_KEY_INVALID_TAG) == 0); + + flb_sds_destroy(s3_key_format); +} + +static void test_flb_get_s3_key_invalid_key_length() +{ + flb_sds_t s3_key_format = NULL; + char buf[1100] = ""; + char tmp[1024] = ""; + for (int i=0 ; i <= 975; i++){ + tmp[i] = 'a'; + } + snprintf(buf, sizeof(buf), "%s%s", S3_KEY_FORMAT_SPECIAL_CHARCATERS_TAG, tmp); + struct tm day = { 0, 0, 0, 15, 7, 120}; + time_t t = mktime(&day); + s3_key_format = flb_get_s3_key(buf, t, TAG, TAG_DELIMITER); + TEST_CHECK(strlen(s3_key_format) <= 1024); + + flb_sds_destroy(s3_key_format); +} + TEST_LIST = { { "parse_api_error" , test_flb_aws_error}, { "flb_aws_endpoint" , test_flb_aws_endpoint}, + {"flb_get_s3_key_multi_tag_exists", test_flb_get_s3_key_multi_tag_exists}, + {"flb_get_s3_key_full_tag", test_flb_get_s3_key_full_tag}, + {"flb_get_s3_key_tag_special_characters", test_flb_get_s3_key_tag_special_characters}, + {"flb_get_s3_key_multi_tag_delimiter", test_flb_get_s3_key_multi_tag_delimiter}, + {"flb_get_s3_key_invalid_tag_delimiter", test_flb_get_s3_key_invalid_tag_delimiter}, + {"flb_get_s3_key_invalid_tag_index", test_flb_get_s3_key_invalid_tag_index}, + {"flb_get_s3_key_invalid_key_length", test_flb_get_s3_key_invalid_key_length}, { 0 } }; diff --git a/tests/internal/data/s3_local_buffer/.gitkeep b/tests/internal/data/s3_local_buffer/.gitkeep new file mode 100644 index 00000000000..786a4bd9d5b --- /dev/null +++ b/tests/internal/data/s3_local_buffer/.gitkeep @@ -0,0 +1 @@ +# git won't keep an empty dir, hence this file \ No newline at end of file diff --git a/tests/internal/signv4.c b/tests/internal/signv4.c index 8fd84c42183..946943ad94a 100644 --- a/tests/internal/signv4.c +++ b/tests/internal/signv4.c @@ -604,6 +604,7 @@ static void aws_test_suite() FLB_TRUE, /* normalize URI ? */ FLB_FALSE, /* add x-amz-date header ? */ t, region, service, + 0, provider); TEST_CHECK(signature != NULL); if (signature) { From 0a46c3fa7fa75043894e27821ef3cb8779763f7c Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 00:30:12 -0700 Subject: [PATCH 02/17] out_kinesis_firehose: new high performance core plugin for Kinesis Firehose Signed-off-by: Wesley Pettit --- CMakeLists.txt | 1 + plugins/CMakeLists.txt | 1 + plugins/out_kinesis_firehose/CMakeLists.txt | 5 + plugins/out_kinesis_firehose/firehose.c | 417 +++++++++ plugins/out_kinesis_firehose/firehose.h | 100 +++ plugins/out_kinesis_firehose/firehose_api.c | 902 ++++++++++++++++++++ plugins/out_kinesis_firehose/firehose_api.h | 45 + tests/runtime/CMakeLists.txt | 1 + tests/runtime/out_firehose.c | 200 +++++ 9 files changed, 1672 insertions(+) create mode 100644 plugins/out_kinesis_firehose/CMakeLists.txt create mode 100644 plugins/out_kinesis_firehose/firehose.c create mode 100644 plugins/out_kinesis_firehose/firehose.h create mode 100644 plugins/out_kinesis_firehose/firehose_api.c create mode 100644 plugins/out_kinesis_firehose/firehose_api.h create mode 100644 tests/runtime/out_firehose.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 30a413105fd..ef6959ab917 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -174,6 +174,7 @@ option(FLB_OUT_LOGDNA "Enable LogDNA output plugin" Yes) option(FLB_OUT_KAFKA "Enable Kafka output plugin" No) option(FLB_OUT_KAFKA_REST "Enable Kafka Rest output plugin" Yes) option(FLB_OUT_CLOUDWATCH_LOGS "Enable AWS CloudWatch output plugin" Yes) +option(FLB_OUT_KINESIS_FIREHOSE "Enable AWS Firehose output plugin" Yes) option(FLB_OUT_S3 "Enable AWS S3 output plugin" Yes) option(FLB_FILTER_ALTER_SIZE "Enable alter_size filter" Yes) option(FLB_FILTER_AWS "Enable aws filter" Yes) diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 60ba86b58e5..936c727de54 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -200,6 +200,7 @@ REGISTER_OUT_PLUGIN("out_flowcounter") REGISTER_OUT_PLUGIN("out_gelf") REGISTER_OUT_PLUGIN("out_cloudwatch_logs") REGISTER_OUT_PLUGIN("out_s3") +REGISTER_OUT_PLUGIN("out_kinesis_firehose") # FILTERS # ======= diff --git a/plugins/out_kinesis_firehose/CMakeLists.txt b/plugins/out_kinesis_firehose/CMakeLists.txt new file mode 100644 index 00000000000..9cbf05d365e --- /dev/null +++ b/plugins/out_kinesis_firehose/CMakeLists.txt @@ -0,0 +1,5 @@ +set(src + firehose.c + firehose_api.c) + +FLB_PLUGIN(out_kinesis_firehose "${src}" "") diff --git a/plugins/out_kinesis_firehose/firehose.c b/plugins/out_kinesis_firehose/firehose.c new file mode 100644 index 00000000000..b98afb5d429 --- /dev/null +++ b/plugins/out_kinesis_firehose/firehose.c @@ -0,0 +1,417 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "firehose.h" +#include "firehose_api.h" + +static struct flb_aws_header content_type_header = { + .key = "Content-Type", + .key_len = 12, + .val = "application/x-amz-json-1.1", + .val_len = 26, +}; + +static int cb_firehose_init(struct flb_output_instance *ins, + struct flb_config *config, void *data) +{ + const char *tmp; + char *session_name = NULL; + struct flb_firehose *ctx = NULL; + int ret; + (void) config; + (void) data; + + ctx = flb_calloc(1, sizeof(struct flb_firehose)); + if (!ctx) { + flb_errno(); + return -1; + } + + ctx->ins = ins; + + /* Populate context with config map defaults and incoming properties */ + ret = flb_output_config_map_set(ins, (void *) ctx); + if (ret == -1) { + flb_plg_error(ctx->ins, "configuration error"); + goto error; + } + + tmp = flb_output_get_property("delivery_stream", ins); + if (tmp) { + ctx->delivery_stream = tmp; + } else { + flb_plg_error(ctx->ins, "'delivery_stream' is a required field"); + goto error; + } + + tmp = flb_output_get_property("time_key", ins); + if (tmp) { + ctx->time_key = tmp; + } + + tmp = flb_output_get_property("time_key_format", ins); + if (tmp) { + ctx->time_key_format = tmp; + } else { + ctx->time_key_format = DEFAULT_TIME_KEY_FORMAT; + } + + tmp = flb_output_get_property("endpoint", ins); + if (tmp) { + ctx->custom_endpoint = FLB_TRUE; + ctx->endpoint = (char *) tmp; + } + else { + ctx->custom_endpoint = FLB_FALSE; + } + + tmp = flb_output_get_property("log_key", ins); + if (tmp) { + ctx->log_key = tmp; + } + + tmp = flb_output_get_property("region", ins); + if (tmp) { + ctx->region = tmp; + } else { + flb_plg_error(ctx->ins, "'region' is a required field"); + goto error; + } + + tmp = flb_output_get_property("role_arn", ins); + if (tmp) { + ctx->role_arn = tmp; + } + + /* one tls instance for provider, one for cw client */ + ctx->cred_tls.context = flb_tls_context_new(FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + + if (!ctx->cred_tls.context) { + flb_plg_error(ctx->ins, "Failed to create tls context"); + goto error; + } + + ctx->client_tls.context = flb_tls_context_new(FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + if (!ctx->client_tls.context) { + flb_plg_error(ctx->ins, "Failed to create tls context"); + goto error; + } + + ctx->aws_provider = flb_standard_chain_provider_create(config, + &ctx->cred_tls, + (char *) ctx->region, + NULL, + flb_aws_client_generator()); + if (!ctx->aws_provider) { + flb_plg_error(ctx->ins, "Failed to create AWS Credential Provider"); + goto error; + } + + if(ctx->role_arn) { + /* set up sts assume role provider */ + session_name = flb_sts_session_name(); + if (!session_name) { + flb_plg_error(ctx->ins, + "Failed to generate random STS session name"); + goto error; + } + + /* STS provider needs yet another separate TLS instance */ + ctx->sts_tls.context = flb_tls_context_new(FLB_TRUE, + ins->tls_debug, + ins->tls_vhost, + ins->tls_ca_path, + ins->tls_ca_file, + ins->tls_crt_file, + ins->tls_key_file, + ins->tls_key_passwd); + if (!ctx->sts_tls.context) { + flb_errno(); + goto error; + } + + ctx->base_aws_provider = ctx->aws_provider; + + ctx->aws_provider = flb_sts_provider_create(config, + &ctx->sts_tls, + ctx->base_aws_provider, + NULL, + (char *) ctx->role_arn, + session_name, + (char *) ctx->region, + NULL, + flb_aws_client_generator()); + if (!ctx->aws_provider) { + flb_plg_error(ctx->ins, + "Failed to create AWS STS Credential Provider"); + goto error; + } + /* session name can freed after provider is created */ + flb_free(session_name); + session_name = NULL; + } + + /* initialize credentials and set to sync mode */ + ctx->aws_provider->provider_vtable->sync(ctx->aws_provider); + ctx->aws_provider->provider_vtable->init(ctx->aws_provider); + + if (ctx->endpoint == NULL) { + ctx->endpoint = flb_aws_endpoint("firehose", (char *) ctx->region); + if (!ctx->endpoint) { + goto error; + } + } + + struct flb_aws_client_generator *generator = flb_aws_client_generator(); + ctx->firehose_client = generator->create(); + if (!ctx->firehose_client) { + goto error; + } + ctx->firehose_client->name = "firehose_client"; + ctx->firehose_client->has_auth = FLB_TRUE; + ctx->firehose_client->provider = ctx->aws_provider; + ctx->firehose_client->region = (char *) ctx->region; + ctx->firehose_client->service = "firehose"; + ctx->firehose_client->port = 443; + ctx->firehose_client->flags = 0; + ctx->firehose_client->proxy = NULL; + ctx->firehose_client->static_headers = &content_type_header; + ctx->firehose_client->static_headers_len = 1; + + struct flb_upstream *upstream = flb_upstream_create(config, ctx->endpoint, + 443, FLB_IO_TLS, + &ctx->client_tls); + if (!upstream) { + flb_plg_error(ctx->ins, "Connection initialization error"); + goto error; + } + + ctx->firehose_client->upstream = upstream; + ctx->firehose_client->host = ctx->endpoint; + + /* Export context */ + flb_output_set_context(ins, ctx); + + return 0; + +error: + flb_free(session_name); + flb_plg_error(ctx->ins, "Initialization failed"); + flb_firehose_ctx_destroy(ctx); + return -1; +} + +struct flush *new_flush_buffer() +{ + struct flush *buf; + + + buf = flb_calloc(1, sizeof(struct flush)); + if (!buf) { + flb_errno(); + return NULL; + } + + buf->tmp_buf = flb_malloc(sizeof(char) * PUT_RECORD_BATCH_PAYLOAD_SIZE); + if (!buf->tmp_buf) { + flb_errno(); + flush_destroy(buf); + return NULL; + } + buf->tmp_buf_size = PUT_RECORD_BATCH_PAYLOAD_SIZE; + + buf->events = flb_malloc(sizeof(struct event) * MAX_EVENTS_PER_PUT); + if (!buf->events) { + flb_errno(); + flush_destroy(buf); + return NULL; + } + buf->events_capacity = MAX_EVENTS_PER_PUT; + + return buf; +} + +static void cb_firehose_flush(const void *data, size_t bytes, + const char *tag, int tag_len, + struct flb_input_instance *i_ins, + void *out_context, + struct flb_config *config) +{ + struct flb_firehose *ctx = out_context; + int ret; + struct flush *buf; + (void) i_ins; + (void) config; + + buf = new_flush_buffer(); + if (!buf) { + flb_plg_error(ctx->ins, "Failed to construct flush buffer"); + FLB_OUTPUT_RETURN(FLB_RETRY); + } + + ret = process_and_send_records(ctx, buf, data, bytes); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to send records"); + flush_destroy(buf); + FLB_OUTPUT_RETURN(FLB_RETRY); + } + + flb_plg_info(ctx->ins, "Processed %d records, sent %d to %s", + buf->records_processed, buf->records_sent, ctx->delivery_stream); + flush_destroy(buf); + + FLB_OUTPUT_RETURN(FLB_OK); +} + +void flb_firehose_ctx_destroy(struct flb_firehose *ctx) +{ + if (ctx != NULL) { + if (ctx->base_aws_provider) { + flb_aws_provider_destroy(ctx->base_aws_provider); + } + + if (ctx->aws_provider) { + flb_aws_provider_destroy(ctx->aws_provider); + } + + if (ctx->cred_tls.context) { + flb_tls_context_destroy(ctx->cred_tls.context); + } + + if (ctx->sts_tls.context) { + flb_tls_context_destroy(ctx->sts_tls.context); + } + + if (ctx->client_tls.context) { + flb_tls_context_destroy(ctx->client_tls.context); + } + + if (ctx->firehose_client) { + flb_aws_client_destroy(ctx->firehose_client); + } + + if (ctx->custom_endpoint == FLB_FALSE) { + flb_free(ctx->endpoint); + } + + flb_free(ctx); + } +} + +static int cb_firehose_exit(void *data, struct flb_config *config) +{ + struct flb_firehose *ctx = data; + + flb_firehose_ctx_destroy(ctx); + return 0; +} + +/* Configuration properties map */ +static struct flb_config_map config_map[] = { + { + FLB_CONFIG_MAP_STR, "region", NULL, + 0, FLB_FALSE, 0, + "The AWS region of your delivery stream" + }, + + { + FLB_CONFIG_MAP_STR, "delivery_stream", NULL, + 0, FLB_FALSE, 0, + "Firehose delivery stream name" + }, + + { + FLB_CONFIG_MAP_STR, "time_key", NULL, + 0, FLB_FALSE, 0, + "Add the timestamp to the record under this key. By default the timestamp " + "from Fluent Bit will not be added to records sent to Kinesis." + }, + + { + FLB_CONFIG_MAP_STR, "time_key_format", NULL, + 0, FLB_FALSE, 0, + "strftime compliant format string for the timestamp; for example, " + "the default is '%Y-%m-%dT%H:%M:%S'. This option is used with time_key. " + }, + + { + FLB_CONFIG_MAP_STR, "role_arn", NULL, + 0, FLB_FALSE, 0, + "ARN of an IAM role to assume (ex. for cross account access)." + }, + + { + FLB_CONFIG_MAP_STR, "endpoint", NULL, + 0, FLB_FALSE, 0, + "Specify a custom endpoint for the Firehose API" + }, + + /* EOF */ + {0} +}; + +/* Plugin registration */ +struct flb_output_plugin out_kinesis_firehose_plugin = { + .name = "kinesis_firehose", + .description = "Send logs to Amazon Kinesis Firehose", + .cb_init = cb_firehose_init, + .cb_flush = cb_firehose_flush, + .cb_exit = cb_firehose_exit, + .flags = 0, + + /* Configuration */ + .config_map = config_map, +}; diff --git a/plugins/out_kinesis_firehose/firehose.h b/plugins/out_kinesis_firehose/firehose.h new file mode 100644 index 00000000000..7bb3a4c2264 --- /dev/null +++ b/plugins/out_kinesis_firehose/firehose.h @@ -0,0 +1,100 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_OUT_FIREHOSE_H +#define FLB_OUT_FIREHOSE_H + +#include +#include +#include +#include +#include +#include + +#define DEFAULT_TIME_KEY_FORMAT "%Y-%m-%dT%H:%M:%S" + +/* buffers used for each flush */ +struct flush { + /* temporary buffer for storing the serialized event messages */ + char *tmp_buf; + size_t tmp_buf_size; + /* current index of tmp_buf */ + size_t tmp_buf_offset; + + /* projected final size of the payload for this flush */ + size_t data_size; + + /* log records- each of these has a pointer to their message in tmp_buf */ + struct event *events; + int events_capacity; + /* current event */ + int event_index; + + /* the payload of the API request */ + char *out_buf; + size_t out_buf_size; + + /* buffer used to temporarily hold an event during processing */ + char *event_buf; + size_t event_buf_size; + + int records_sent; + int records_processed; +}; + +struct event { + char *json; + size_t len; + struct timespec timestamp; +}; + +struct flb_firehose { + /* + * TLS instances can not be re-used. So we have one for: + * - Base cred provider (needed for EKS provider) + * - STS Assume role provider + * - The CloudWatch Logs client for this plugin + */ + struct flb_tls cred_tls; + struct flb_tls sts_tls; + struct flb_tls client_tls; + struct flb_aws_provider *aws_provider; + struct flb_aws_provider *base_aws_provider; + struct flb_aws_client *firehose_client; + + /* configuration options */ + const char *delivery_stream; + const char *time_key; + const char *time_key_format; + const char *region; + const char *role_arn; + const char *log_key; + int custom_endpoint; + + /* must be freed on shutdown if custom_endpoint is not set */ + char *endpoint; + + /* Plugin output instance reference */ + struct flb_output_instance *ins; +}; + +void flb_firehose_ctx_destroy(struct flb_firehose *ctx); + +#endif diff --git a/plugins/out_kinesis_firehose/firehose_api.c b/plugins/out_kinesis_firehose/firehose_api.c new file mode 100644 index 00000000000..6fa351c5461 --- /dev/null +++ b/plugins/out_kinesis_firehose/firehose_api.c @@ -0,0 +1,902 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "firehose_api.h" + +#define ERR_CODE_SERVICE_UNAVAILABLE "ServiceUnavailableException" + +static struct flb_aws_header put_record_batch_header = { + .key = "X-Amz-Target", + .key_len = 12, + .val = "Firehose_20150804.PutRecordBatch", + .val_len = 32, +}; + +static inline int try_to_write(char *buf, int *off, size_t left, + const char *str, size_t str_len) +{ + if (str_len <= 0){ + str_len = strlen(str); + } + if (left <= *off+str_len) { + return FLB_FALSE; + } + memcpy(buf+*off, str, str_len); + *off += str_len; + return FLB_TRUE; +} + +/* + * Writes the "header" for a put_record_batch payload + */ +static int init_put_payload(struct flb_firehose *ctx, struct flush *buf, + int *offset) +{ + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + "{\"DeliveryStreamName\":\"", 23)) { + goto error; + } + + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + ctx->delivery_stream, 0)) { + goto error; + } + + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + "\",\"Records\":[", 13)) { + goto error; + } + return 0; + +error: + return -1; +} + +/* + * Writes a log event to the output buffer + */ +static int write_event(struct flb_firehose *ctx, struct flush *buf, + struct event *event, int *offset) +{ + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + "{\"Data\":\"", 9)) { + goto error; + } + + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + event->json, event->len)) { + goto error; + } + + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + "\"}", 2)) { + goto error; + } + + return 0; + +error: + return -1; +} + +/* Terminates a PutRecordBatch payload */ +static int end_put_payload(struct flb_firehose *ctx, struct flush *buf, + int *offset) +{ + if (!try_to_write(buf->out_buf, offset, buf->out_buf_size, + "]}", 2)) { + return -1; + } + buf->out_buf[*offset] = '\0'; + + return 0; +} + + +/* + * Processes the msgpack object + * -1 = failure, record not added + * 0 = success, record added + * 1 = we ran out of space, send and retry + * 2 = record could not be processed, discard it + * Returns 0 on success, -1 on general errors, + * and 1 if we ran out of space to write the event + * which means a send must occur + */ +static int process_event(struct flb_firehose *ctx, struct flush *buf, + const msgpack_object *obj, struct flb_time *tms) +{ + size_t written = 0; + int ret; + size_t size; + size_t b64_len; + struct event *event; + char *tmp_buf_ptr; + char *time_key_ptr; + struct tm time_stamp; + struct tm *tmp; + size_t len; + size_t tmp_size; + + tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset; + ret = flb_msgpack_to_json(tmp_buf_ptr, + buf->tmp_buf_size - buf->tmp_buf_offset, + obj); + if (ret <= 0) { + /* + * negative value means failure to write to buffer, + * which means we ran out of space, and must send the logs + * + * TODO: This could also incorrectly be triggered if the record + * is larger than MAX_EVENT_SIZE + */ + return 1; + } + written = (size_t) ret; + + /* Discard empty messages (written == 2 means '""') */ + if (written <= 2) { + flb_plg_debug(ctx->ins, "Found empty log message, %s", ctx->delivery_stream); + return 2; + } + + /* is (written + 1) because we still have to append newline */ + if ((written + 1) >= MAX_EVENT_SIZE) { + flb_plg_warn(ctx->ins, "[size=%zu] Discarding record which is larger than " + "max size allowed by Firehose, %s", written + 1, + ctx->delivery_stream); + return 2; + } + + if (ctx->time_key) { + /* append time_key to end of json string */ + tmp = gmtime_r(&tms->tm.tv_sec, &time_stamp); + if (!tmp) { + flb_plg_error(ctx->ins, "Could not create time stamp for %d unix " + "seconds, discarding record, %s", tms->tm.tv_sec, + ctx->delivery_stream); + return 2; + } + /* guess space needed to write time_key */ + len = 6 + strlen(ctx->time_key) + 6 * strlen(ctx->time_key_format); + /* how much space do we have left */ + tmp_size = (buf->tmp_buf_size - buf->tmp_buf_offset) - written; + if (len > tmp_size) { + /* not enough space- tell caller to retry */ + return 1; + } + time_key_ptr = tmp_buf_ptr + written - 1; + memcpy(time_key_ptr, ",", 1); + time_key_ptr++; + memcpy(time_key_ptr, "\"", 1); + time_key_ptr++; + memcpy(time_key_ptr, ctx->time_key, strlen(ctx->time_key)); + time_key_ptr += strlen(ctx->time_key); + memcpy(time_key_ptr, "\":\"", 3); + time_key_ptr += 3; + tmp_size = buf->tmp_buf_size - buf->tmp_buf_offset; + tmp_size -= (time_key_ptr - tmp_buf_ptr); + len = strftime(time_key_ptr, tmp_size, ctx->time_key_format, &time_stamp); + if (len <= 0) { + /* ran out of space - should not happen because of check above */ + return 1; + } + time_key_ptr += len; + memcpy(time_key_ptr, "\"}", 2); + time_key_ptr += 2; + written = (time_key_ptr - tmp_buf_ptr); + } + + /* is (written + 1) because we still have to append newline */ + if ((written + 1) >= MAX_EVENT_SIZE) { + flb_plg_warn(ctx->ins, "[size=%zu] Discarding record which is larger than " + "max size allowed by Firehose, %s", written + 1, + ctx->delivery_stream); + return 2; + } + + /* append newline to record */ + + tmp_size = (buf->tmp_buf_size - buf->tmp_buf_offset) - written; + if (tmp_size <= 1) { + /* no space left- tell caller to retry */ + return 1; + } + + memcpy(tmp_buf_ptr + written, "\n", 1); + written++; + + /* + * check if event_buf is initialized and big enough + * Base64 encoding will increase size by ~4/3 + */ + size = (written * 1.5) + 4; + if (buf->event_buf == NULL || buf->event_buf_size < size) { + flb_free(buf->event_buf); + buf->event_buf = flb_malloc(size); + buf->event_buf_size = size; + if (buf->event_buf == NULL) { + flb_errno(); + return -1; + } + } + + tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset; + ret = mbedtls_base64_encode((unsigned char *) buf->event_buf, size, &b64_len, + (unsigned char *) tmp_buf_ptr, written); + if (ret != 0) { + flb_errno(); + return -1; + } + written = b64_len; + + tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset; + if ((buf->tmp_buf_size - buf->tmp_buf_offset) < written) { + /* not enough space, send logs */ + return 1; + } + + /* copy serialized json to tmp_buf */ + memcpy(tmp_buf_ptr, buf->event_buf, written); + + buf->tmp_buf_offset += written; + event = &buf->events[buf->event_index]; + event->json = tmp_buf_ptr; + event->len = written; + event->timestamp.tv_sec = tms->tm.tv_sec; + event->timestamp.tv_nsec = tms->tm.tv_nsec; + + return 0; +} + +/* Resets or inits a flush struct */ +static void reset_flush_buf(struct flb_firehose *ctx, struct flush *buf) { + buf->event_index = 0; + buf->tmp_buf_offset = 0; + buf->data_size = PUT_RECORD_BATCH_HEADER_LEN + PUT_RECORD_BATCH_FOOTER_LEN; + buf->data_size += strlen(ctx->delivery_stream); +} + +/* constructs a put payload, and then sends */ +static int send_log_events(struct flb_firehose *ctx, struct flush *buf) { + int ret; + int offset; + int i; + struct event *event; + + if (buf->event_index <= 0) { + /* + * event_index should always be 1 more than the actual last event index + * when this function is called. + * Except in the case where send_log_events() is called at the end of + * process_and_send. If all records were already sent, event_index + * will be 0. Hence this check. + */ + return 0; + } + + /* alloc out_buf if needed */ + if (buf->out_buf == NULL || buf->out_buf_size < buf->data_size) { + if (buf->out_buf != NULL) { + flb_free(buf->out_buf); + } + buf->out_buf = flb_malloc(buf->data_size + 1); + if (!buf->out_buf) { + flb_errno(); + return -1; + } + buf->out_buf_size = buf->data_size; + } + + offset = 0; + ret = init_put_payload(ctx, buf, &offset); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to initialize PutRecordBatch payload, %s", + ctx->delivery_stream); + return -1; + } + + for (i = 0; i < buf->event_index; i++) { + event = &buf->events[i]; + ret = write_event(ctx, buf, event, &offset); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to write log record %d to " + "payload buffer, %s", i, ctx->delivery_stream); + return -1; + } + if (i != (buf->event_index -1)) { + if (!try_to_write(buf->out_buf, &offset, buf->out_buf_size, + ",", 1)) { + flb_plg_error(ctx->ins, "Could not terminate record with ','"); + return -1; + } + } + } + + ret = end_put_payload(ctx, buf, &offset); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not complete PutRecordBatch payload"); + return -1; + } + flb_plg_debug(ctx->ins, "Sending %d records", i); + ret = put_record_batch(ctx, buf, (size_t) offset, i); + if (ret < 0) { + flb_plg_error(ctx->ins, "Failed to send log records"); + return -1; + } + buf->records_sent += i; + + return 0; +} + +/* + * Processes the msgpack object, sends the current batch if needed + */ +static int add_event(struct flb_firehose *ctx, struct flush *buf, + const msgpack_object *obj, struct flb_time *tms) +{ + int ret; + struct event *event; + int retry_add = FLB_FALSE; + int event_bytes = 0; + + if (buf->event_index == 0) { + /* init */ + reset_flush_buf(ctx, buf); + } + +retry_add_event: + retry_add = FLB_FALSE; + ret = process_event(ctx, buf, obj, tms); + if (ret < 0) { + return -1; + } + else if (ret == 1) { + if (buf->event_index <= 0) { + /* somehow the record was larger than our entire request buffer */ + flb_plg_warn(ctx->ins, "Discarding massive log record, %s", + ctx->delivery_stream); + return 0; /* discard this record and return to caller */ + } + /* send logs and then retry the add */ + retry_add = FLB_TRUE; + goto send; + } else if (ret == 2) { + /* discard this record and return to caller */ + flb_plg_warn(ctx->ins, "Discarding large or unprocessable record, %s", + ctx->delivery_stream); + return 0; + } + + event = &buf->events[buf->event_index]; + event_bytes = event->len + PUT_RECORD_BATCH_PER_RECORD_LEN; + + if ((buf->data_size + event_bytes) > PUT_RECORD_BATCH_PAYLOAD_SIZE) { + if (buf->event_index <= 0) { + /* somehow the record was larger than our entire request buffer */ + flb_plg_warn(ctx->ins, "[size=%zu] Discarding massive log record, %s", + event_bytes, ctx->delivery_stream); + return 0; /* discard this record and return to caller */ + } + /* do not send this event */ + retry_add = FLB_TRUE; + goto send; + } + + /* send is not needed yet, return to caller */ + buf->data_size += event_bytes; + buf->event_index++; + + if (buf->event_index == MAX_EVENTS_PER_PUT) { + goto send; + } + + return 0; + +send: + ret = send_log_events(ctx, buf); + reset_flush_buf(ctx, buf); + if (ret < 0) { + return -1; + } + + if (retry_add == FLB_TRUE) { + goto retry_add_event; + } + + return 0; +} + +/* + * Main routine- processes msgpack and sends in batches + * return value is the number of events processed (number sent is stored in buf) + */ +int process_and_send_records(struct flb_firehose *ctx, struct flush *buf, + const char *data, size_t bytes) +{ + size_t off = 0; + int i = 0; + size_t map_size; + msgpack_unpacked result; + msgpack_object *obj; + msgpack_object map; + msgpack_object root; + msgpack_object_kv *kv; + msgpack_object key; + msgpack_object val; + char *key_str = NULL; + size_t key_str_size = 0; + int j; + int ret; + int check = FLB_FALSE; + int found = FLB_FALSE; + struct flb_time tms; + + /* unpack msgpack */ + msgpack_unpacked_init(&result); + while (msgpack_unpack_next(&result, data, bytes, &off) == MSGPACK_UNPACK_SUCCESS) { + /* + * Each record is a msgpack array [timestamp, map] of the + * timestamp and record map. + */ + root = result.data; + if (root.via.array.size != 2) { + continue; + } + + /* unpack the array of [timestamp, map] */ + flb_time_pop_from_msgpack(&tms, &result, &obj); + + /* Get the record/map */ + map = root.via.array.ptr[1]; + map_size = map.via.map.size; + + if (ctx->log_key) { + key_str = NULL; + key_str_size = 0; + check = FLB_FALSE; + found = FLB_FALSE; + + kv = map.via.map.ptr; + + for(j=0; j < map_size; j++) { + key = (kv+j)->key; + if (key.type == MSGPACK_OBJECT_BIN) { + key_str = (char *) key.via.bin.ptr; + key_str_size = key.via.bin.size; + check = FLB_TRUE; + } + if (key.type == MSGPACK_OBJECT_STR) { + key_str = (char *) key.via.str.ptr; + key_str_size = key.via.str.size; + check = FLB_TRUE; + } + + if (check == FLB_TRUE) { + if (strncmp(ctx->log_key, key_str, key_str_size) == 0) { + found = FLB_TRUE; + val = (kv+j)->val; + ret = add_event(ctx, buf, &val, &tms); + if (ret < 0 ) { + goto error; + } + } + } + + } + if (found == FLB_FALSE) { + flb_plg_error(ctx->ins, "Could not find log_key '%s' in record, %s", + ctx->log_key, ctx->delivery_stream); + } + else { + i++; + } + continue; + } + + ret = add_event(ctx, buf, &map, &tms); + if (ret < 0 ) { + goto error; + } + i++; + } + msgpack_unpacked_destroy(&result); + + /* send any remaining events */ + ret = send_log_events(ctx, buf); + reset_flush_buf(ctx, buf); + if (ret < 0) { + return -1; + } + + /* return number of events processed */ + buf->records_processed = i; + return i; + +error: + msgpack_unpacked_destroy(&result); + return -1; +} + +/* + * Returns number of failed records on success, -1 on failure + */ +static int process_api_response(struct flb_firehose *ctx, + struct flb_http_client *c) +{ + int i; + int k; + int w; + int ret; + int failed_records = -1; + int root_type; + char *out_buf; + int throughput_exceeded = FLB_FALSE; + size_t off = 0; + size_t out_size; + msgpack_unpacked result; + msgpack_object root; + msgpack_object key; + msgpack_object val; + msgpack_object response; + msgpack_object response_key; + msgpack_object response_val; + + if (strstr(c->resp.payload, "\"FailedPutCount\":0")) { + return 0; + } + + /* Convert JSON payload to msgpack */ + ret = flb_pack_json(c->resp.payload, c->resp.payload_size, + &out_buf, &out_size, &root_type); + if (ret == -1) { + flb_plg_error(ctx->ins, "could not pack/validate JSON API response\n%s", + c->resp.payload); + return -1; + } + + /* Lookup error field */ + msgpack_unpacked_init(&result); + ret = msgpack_unpack_next(&result, out_buf, out_size, &off); + if (ret != MSGPACK_UNPACK_SUCCESS) { + flb_plg_error(ctx->ins, "Cannot unpack response to find error\n%s", + c->resp.payload); + failed_records = -1; + goto done; + } + + root = result.data; + if (root.type != MSGPACK_OBJECT_MAP) { + flb_plg_error(ctx->ins, "unexpected payload type=%i", + root.type); + failed_records = -1; + goto done; + } + + for (i = 0; i < root.via.map.size; i++) { + key = root.via.map.ptr[i].key; + if (key.type != MSGPACK_OBJECT_STR) { + flb_plg_error(ctx->ins, "unexpected key type=%i", + key.type); + failed_records = -1; + goto done; + } + + if (key.via.str.size >= 14 && + strncmp(key.via.str.ptr, "FailedPutCount", 14) == 0) { + val = root.via.map.ptr[i].val; + if (val.type != MSGPACK_OBJECT_POSITIVE_INTEGER) { + flb_plg_error(ctx->ins, "unexpected 'FailedPutCount' value type=%i", + val.type); + failed_records = -1; + goto done; + } + + failed_records = val.via.u64; + if (failed_records == 0) { + /* no need to check RequestResponses field */ + goto done; + } + } + + if (key.via.str.size >= 14 && + strncmp(key.via.str.ptr, "RequestResponses", 16) == 0) { + val = root.via.map.ptr[i].val; + if (val.type != MSGPACK_OBJECT_ARRAY) { + flb_plg_error(ctx->ins, "unexpected 'RequestResponses' value type=%i", + val.type); + failed_records = -1; + goto done; + } + + if (val.via.array.size == 0) { + flb_plg_error(ctx->ins, "'RequestResponses' field in response is empty"); + failed_records = -1; + goto done; + } + + for (k = 0; k < val.via.array.size; k++) { + /* iterate through the responses */ + response = val.via.array.ptr[k]; + if (response.type != MSGPACK_OBJECT_MAP) { + flb_plg_error(ctx->ins, "unexpected 'RequestResponses[%d]' value type=%i", + k, response.type); + failed_records = -1; + goto done; + } + for (w = 0; w < response.via.map.size; w++) { + /* iterate through the response's keys */ + response_key = response.via.map.ptr[w].key; + if (response_key.type != MSGPACK_OBJECT_STR) { + flb_plg_error(ctx->ins, "unexpected key type=%i", + response_key.type); + failed_records = -1; + goto done; + } + if (response_key.via.str.size >= 9 && + strncmp(response_key.via.str.ptr, "ErrorCode", 9) == 0) { + response_val = response.via.map.ptr[w].val; + if (!throughput_exceeded && + response_val.via.str.size >= 27 && + (strncmp(response_val.via.str.ptr, + ERR_CODE_SERVICE_UNAVAILABLE, 27) == 0)) { + throughput_exceeded = FLB_TRUE; + flb_plg_error(ctx->ins, "Thoughput limits may have been exceeded, %s", + ctx->delivery_stream); + } + flb_plg_debug(ctx->ins, "Record %i failed with err_code=%.*s", + k, response_val.via.str.size, + response_val.via.str.ptr); + } + if (response_key.via.str.size >= 12 && + strncmp(response_key.via.str.ptr, "ErrorMessage", 12) == 0) { + response_val = response.via.map.ptr[w].val; + flb_plg_debug(ctx->ins, "Record %i failed with err_msg=%.*s", + k, response_val.via.str.size, + response_val.via.str.ptr); + } + } + } + } + } + + done: + flb_free(out_buf); + msgpack_unpacked_destroy(&result); + return failed_records; +} + +static int plugin_under_test() +{ + if (getenv("FLB_FIREHOSE_PLUGIN_UNDER_TEST") != NULL) { + return FLB_TRUE; + } + + return FLB_FALSE; +} + +static char *mock_error_response(char *error_env_var) +{ + char *err_val = NULL; + char *error = NULL; + int len = 0; + + err_val = getenv(error_env_var); + if (err_val != NULL && strlen(err_val) > 0) { + error = flb_malloc(strlen(err_val) + sizeof(char)); + if (error == NULL) { + flb_errno(); + return NULL; + } + + len = strlen(err_val); + memcpy(error, err_val, len); + error[len] = '\0'; + return error; + } + + return NULL; +} + +int partial_success() +{ + char *err_val = NULL; + + err_val = getenv("PARTIAL_SUCCESS_CASE"); + if (err_val != NULL && strlen(err_val) > 0) { + return FLB_TRUE; + } + + return FLB_FALSE; +} + +static struct flb_http_client *mock_http_call(char *error_env_var) +{ + /* create an http client so that we can set the response */ + struct flb_http_client *c = NULL; + char *error = mock_error_response(error_env_var); + + c = flb_calloc(1, sizeof(struct flb_http_client)); + if (!c) { + flb_errno(); + flb_free(error); + return NULL; + } + mk_list_init(&c->headers); + + if (error != NULL) { + c->resp.status = 400; + /* resp.data is freed on destroy, payload is supposed to reference it */ + c->resp.data = error; + c->resp.payload = c->resp.data; + c->resp.payload_size = strlen(error); + } + else { + c->resp.status = 200; + c->resp.payload = ""; + c->resp.payload_size = 0; + if (partial_success() == FLB_TRUE) { + /* mocked partial failure response */ + c->resp.payload = "{\"Encrypted\": false,\"FailedPutCount\": 1,\"RequestResponses\":[{\"RecordId\": \"Me0CqhxK3BK3MiBWgy/AydQrVUg7vbc40Z4zNds3jiiJDscqGtWFz9bJugbrAoN70YCaxpXgmyR9R+LFxS2rleDepqFljYArBtXnRmVzSMOAzTJZlwsO84+757kBvA5RUycF3wC3XZjFtUFP0Q4QTdhuD8HMJBvKGiBY9Yy5jBUmZuKhXxCLQ/YTwKQaQKn4fnc5iISxaErPXsWMI7OApHZ1eFGvcHVZ\"},{\"RecordId\": \"NRAZVkblYgWWDSvTAF/9jBR4MlciEUFV+QIjb1D8uar7YbC3wqeLQuSZ0GEopGlE/8JAK9h9aAyTub5lH5V+bZuR3SeKKABWoJ788/tI455Kup9oRzmXTKWiXeklxmAe9MtsSz0y4t3oIrSLq8e3QVH9DJKWdhDkIXd8lXK1wuJi8tKmnNgxFob/Cz398kQFXPc4JwKj3Dv3Ou0qibZiusko6f7yBUve\",\"ErrorCode\":\"ServiceUnavailableException\",\"ErrorMessage\": \"Catsssss\"},{\"RecordId\": \"InFGTFvML/MGCLtnC3moI/zCISrKSScu/D8oCGmeIIeVaYUfywHpr2NmsQiZsxUL9+4ThOm2ypxqFGudZvgXQ45gUWMG+R4Y5xzS03N+vQ71+UaL392jY6HUs2SxYkZQe6vpdK+xHaJJ1b8uE++Laxg9rmsXtNt193WjmH3FhU1veu9pnSiGZgqC7czpyVgvZBNeWc+hTjEVicj3VAHBg/9yRN0sC30C\",\"ErrorCode\":\"ServiceUnavailableException\",\"ErrorMessage\": \"Catsssss 2\"},{\"RecordId\":\"KufmrRJ2z8zAgYAYGz6rm4BQC8SA7g87lQJQl2DQ+Be5EiEpr5bG33ilnQVvo1Q05BJuQBnjbw2cm919Ya72awapxfOBdZcPPKJN7KDZV/n1DFCDDrJ2vgyNK4qhKdo3Mr7nyrBpkLIs93PdxOdrTh11Y9HHEaFtim0cHJYpKCSZBjNObfWjfjHx5TuB7L3PHQqMKMu0MT5L9gPgVXHElGalqKZGTcfB\"}]}"; + c->resp.payload_size = strlen(c->resp.payload); + } + else { + /* mocked success response */ + c->resp.payload = "{\"Encrypted\": false,\"FailedPutCount\": 0,\"RequestResponses\":[{\"RecordId\": \"Me0CqhxK3BK3MiBWgy/AydQrVUg7vbc40Z4zNds3jiiJDscqGtWFz9bJugbrAoN70YCaxpXgmyR9R+LFxS2rleDepqFljYArBtXnRmVzSMOAzTJZlwsO84+757kBvA5RUycF3wC3XZjFtUFP0Q4QTdhuD8HMJBvKGiBY9Yy5jBUmZuKhXxCLQ/YTwKQaQKn4fnc5iISxaErPXsWMI7OApHZ1eFGvcHVZ\"},{\"RecordId\": \"NRAZVkblYgWWDSvTAF/9jBR4MlciEUFV+QIjb1D8uar7YbC3wqeLQuSZ0GEopGlE/8JAK9h9aAyTub5lH5V+bZuR3SeKKABWoJ788/tI455Kup9oRzmXTKWiXeklxmAe9MtsSz0y4t3oIrSLq8e3QVH9DJKWdhDkIXd8lXK1wuJi8tKmnNgxFob/Cz398kQFXPc4JwKj3Dv3Ou0qibZiusko6f7yBUve\"},{\"RecordId\": \"InFGTFvML/MGCLtnC3moI/zCISrKSScu/D8oCGmeIIeVaYUfywHpr2NmsQiZsxUL9+4ThOm2ypxqFGudZvgXQ45gUWMG+R4Y5xzS03N+vQ71+UaL392jY6HUs2SxYkZQe6vpdK+xHaJJ1b8uE++Laxg9rmsXtNt193WjmH3FhU1veu9pnSiGZgqC7czpyVgvZBNeWc+hTjEVicj3VAHBg/9yRN0sC30C\"},{\"RecordId\": \"KufmrRJ2z8zAgYAYGz6rm4BQC8SA7g87lQJQl2DQ+Be5EiEpr5bG33ilnQVvo1Q05BJuQBnjbw2cm919Ya72awapxfOBdZcPPKJN7KDZV/n1DFCDDrJ2vgyNK4qhKdo3Mr7nyrBpkLIs93PdxOdrTh11Y9HHEaFtim0cHJYpKCSZBjNObfWjfjHx5TuB7L3PHQqMKMu0MT5L9gPgVXHElGalqKZGTcfB\"}]}"; + c->resp.payload_size = strlen(c->resp.payload); + } + } + + return c; +} + + +/* + * Returns -1 on failure, 0 on success + */ +int put_record_batch(struct flb_firehose *ctx, struct flush *buf, + size_t payload_size, int num_records) +{ + + struct flb_http_client *c = NULL; + struct flb_aws_client *firehose_client; + flb_sds_t error; + int failed_records = 0; + + flb_plg_debug(ctx->ins, "Sending log records to delivery stream %s", + ctx->delivery_stream); + + if (plugin_under_test() == FLB_TRUE) { + c = mock_http_call("TEST_PUT_RECORD_BATCH_ERROR"); + } + else { + firehose_client = ctx->firehose_client; + c = firehose_client->client_vtable->request(firehose_client, FLB_HTTP_POST, + "/", buf->out_buf, payload_size, + &put_record_batch_header, 1); + } + + if (c) { + flb_plg_debug(ctx->ins, "PutRecordBatch http status=%d", c->resp.status); + + if (c->resp.status == 200) { + /* Firehose API can return partial success- check response */ + if (c->resp.payload_size > 0) { + failed_records = process_api_response(ctx, c); + if (failed_records < 0) { + flb_plg_error(ctx->ins, "PutRecordBatch response " + "could not be parsed, %s", + c->resp.payload); + flb_http_client_destroy(c); + return -1; + } + if (failed_records == num_records) { + flb_plg_error(ctx->ins, "PutRecordBatch request returned " + "with no records successfully recieved, %s", + ctx->delivery_stream); + flb_http_client_destroy(c); + return -1; + } + if (failed_records > 0) { + flb_plg_error(ctx->ins, "%d out of %d records failed to be " + "delivered, will retry this batch, %s", + failed_records, num_records, + ctx->delivery_stream); + flb_http_client_destroy(c); + return -1; + } + } + flb_plg_debug(ctx->ins, "Sent events to %s", ctx->delivery_stream); + flb_http_client_destroy(c); + return 0; + } + + /* Check error */ + if (c->resp.payload_size > 0) { + error = flb_aws_error(c->resp.payload, c->resp.payload_size); + if (error != NULL) { + if (strcmp(error, ERR_CODE_SERVICE_UNAVAILABLE) == 0) { + flb_plg_error(ctx->ins, "Throughput limits for %s " + "may have been exceeded.", + ctx->delivery_stream); + } + if (strncmp(error, "SerializationException", 22) == 0) { + /* + * If this happens, we habe a bug in the code + * User should send us the output to debug + */ + flb_plg_error(ctx->ins, "<<------Bug in Code------>>"); + printf("Malformed request: %s", buf->out_buf); + } + flb_aws_print_error(c->resp.payload, c->resp.payload_size, + "PutRecordBatch", ctx->ins); + flb_sds_destroy(error); + } + else { + /* error could not be parsed, print raw response to debug */ + flb_plg_debug(ctx->ins, "Raw response: %s", c->resp.payload); + } + } + } + + flb_plg_error(ctx->ins, "Failed to send log records to %s", ctx->delivery_stream); + if (c) { + flb_http_client_destroy(c); + } + return -1; +} + + +void flush_destroy(struct flush *buf) +{ + if (buf) { + flb_free(buf->tmp_buf); + flb_free(buf->out_buf); + flb_free(buf->events); + flb_free(buf->event_buf); + flb_free(buf); + } +} diff --git a/plugins/out_kinesis_firehose/firehose_api.h b/plugins/out_kinesis_firehose/firehose_api.h new file mode 100644 index 00000000000..2f0cb787e28 --- /dev/null +++ b/plugins/out_kinesis_firehose/firehose_api.h @@ -0,0 +1,45 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2020 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_OUT_CLOUDWATCH_API +#define FLB_OUT_CLOUDWATCH_API + +#define PUT_RECORD_BATCH_PAYLOAD_SIZE 4194304 +#define MAX_EVENTS_PER_PUT 500 +#define MAX_EVENT_SIZE 1024000 + +/* number of characters needed to 'start' a PutRecordBatch payload */ +#define PUT_RECORD_BATCH_HEADER_LEN 42 +/* number of characters needed per record in a PutRecordBatch payload */ +#define PUT_RECORD_BATCH_PER_RECORD_LEN 12 +/* number of characters needed to 'end' a PutRecordBatch payload */ +#define PUT_RECORD_BATCH_FOOTER_LEN 4 + +#include "firehose.h" + +void flush_destroy(struct flush *buf); + +int process_and_send_records(struct flb_firehose *ctx, struct flush *buf, + const char *data, size_t bytes); + +int put_record_batch(struct flb_firehose *ctx, struct flush *buf, + size_t payload_size, int num_records); + +#endif diff --git a/tests/runtime/CMakeLists.txt b/tests/runtime/CMakeLists.txt index fd88f0382dc..b5b5e133249 100644 --- a/tests/runtime/CMakeLists.txt +++ b/tests/runtime/CMakeLists.txt @@ -64,6 +64,7 @@ if(FLB_IN_LIB) endif() FLB_RT_TEST(FLB_OUT_CLOUDWATCH_LOGS "out_cloudwatch.c") + FLB_RT_TEST(FLB_OUT_KINESIS_FIREHOSE "out_firehose.c") FLB_RT_TEST(FLB_OUT_TD "out_td.c") endif() diff --git a/tests/runtime/out_firehose.c b/tests/runtime/out_firehose.c new file mode 100644 index 00000000000..cead6b3cac1 --- /dev/null +++ b/tests/runtime/out_firehose.c @@ -0,0 +1,200 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +#include +#include "flb_tests_runtime.h" + +/* Test data */ +#include "data/td/json_td.h" /* JSON_TD */ + +#define ERROR_THROUGHPUT "{\"__type\":\"ServiceUnavailableException\"}" +/* not a real error code, but tests that the code can respond to any error */ +#define ERROR_UNKNOWN "{\"__type\":\"UNKNOWN\"}" + +/* It writes a big JSON message (copied from TD test) */ +void flb_test_firehose_success(void) +{ + int ret; + flb_ctx_t *ctx; + int in_ffd; + int out_ffd; + + /* mocks calls- signals that we are in test mode */ + setenv("FLB_FIREHOSE_PLUGIN_UNDER_TEST", "true", 1); + + ctx = flb_create(); + + in_ffd = flb_input(ctx, (char *) "lib", NULL); + TEST_CHECK(in_ffd >= 0); + flb_input_set(ctx,in_ffd, "tag", "test", NULL); + + out_ffd = flb_output(ctx, (char *) "kinesis_firehose", NULL); + TEST_CHECK(out_ffd >= 0); + flb_output_set(ctx, out_ffd,"match", "*", NULL); + flb_output_set(ctx, out_ffd,"region", "us-west-2", NULL); + flb_output_set(ctx, out_ffd,"delivery_stream", "fluent", NULL); + flb_output_set(ctx, out_ffd,"time_key", "time", NULL); + flb_output_set(ctx, out_ffd,"Retry_Limit", "1", NULL); + + ret = flb_start(ctx); + TEST_CHECK(ret == 0); + + flb_lib_push(ctx, in_ffd, (char *) JSON_TD , (int) sizeof(JSON_TD) - 1); + + sleep(2); + flb_stop(ctx); + flb_destroy(ctx); +} + +void flb_test_firehose_partial_success(void) +{ + int ret; + flb_ctx_t *ctx; + int in_ffd; + int out_ffd; + + /* mocks calls- signals that we are in test mode */ + setenv("FLB_FIREHOSE_PLUGIN_UNDER_TEST", "true", 1); + setenv("PARTIAL_SUCCESS_CASE", "true", 1); + + ctx = flb_create(); + + in_ffd = flb_input(ctx, (char *) "lib", NULL); + TEST_CHECK(in_ffd >= 0); + flb_input_set(ctx,in_ffd, "tag", "test", NULL); + + out_ffd = flb_output(ctx, (char *) "kinesis_firehose", NULL); + TEST_CHECK(out_ffd >= 0); + flb_output_set(ctx, out_ffd,"match", "*", NULL); + flb_output_set(ctx, out_ffd,"region", "us-west-2", NULL); + flb_output_set(ctx, out_ffd,"delivery_stream", "fluent", NULL); + flb_output_set(ctx, out_ffd,"time_key", "time", NULL); + flb_output_set(ctx, out_ffd,"Retry_Limit", "1", NULL); + + ret = flb_start(ctx); + TEST_CHECK(ret == 0); + + flb_lib_push(ctx, in_ffd, (char *) JSON_TD , (int) sizeof(JSON_TD) - 1); + + sleep(2); + flb_stop(ctx); + flb_destroy(ctx); + unsetenv("PARTIAL_SUCCESS_CASE"); +} + +void flb_test_firehose_throughput_error(void) +{ + int ret; + flb_ctx_t *ctx; + int in_ffd; + int out_ffd; + + /* mocks calls- signals that we are in test mode */ + setenv("FLB_FIREHOSE_PLUGIN_UNDER_TEST", "true", 1); + setenv("TEST_PUT_RECORD_BATCH_ERROR", ERROR_THROUGHPUT, 1); + + ctx = flb_create(); + + in_ffd = flb_input(ctx, (char *) "lib", NULL); + TEST_CHECK(in_ffd >= 0); + flb_input_set(ctx,in_ffd, "tag", "test", NULL); + + out_ffd = flb_output(ctx, (char *) "kinesis_firehose", NULL); + TEST_CHECK(out_ffd >= 0); + flb_output_set(ctx, out_ffd,"match", "*", NULL); + flb_output_set(ctx, out_ffd,"region", "us-west-2", NULL); + flb_output_set(ctx, out_ffd,"delivery_stream", "fluent", NULL); + flb_output_set(ctx, out_ffd,"time_key", "time", NULL); + flb_output_set(ctx, out_ffd,"Retry_Limit", "1", NULL); + + ret = flb_start(ctx); + TEST_CHECK(ret == 0); + + flb_lib_push(ctx, in_ffd, (char *) JSON_TD , (int) sizeof(JSON_TD) - 1); + + sleep(2); + flb_stop(ctx); + flb_destroy(ctx); + unsetenv("TEST_PUT_RECORD_BATCH_ERROR"); +} + +void flb_test_firehose_error_unknown(void) +{ + int ret; + flb_ctx_t *ctx; + int in_ffd; + int out_ffd; + + /* mocks calls- signals that we are in test mode */ + setenv("FLB_FIREHOSE_PLUGIN_UNDER_TEST", "true", 1); + setenv("TEST_PUT_RECORD_BATCH_ERROR", ERROR_UNKNOWN, 1); + + ctx = flb_create(); + + in_ffd = flb_input(ctx, (char *) "lib", NULL); + TEST_CHECK(in_ffd >= 0); + flb_input_set(ctx,in_ffd, "tag", "test", NULL); + + out_ffd = flb_output(ctx, (char *) "kinesis_firehose", NULL); + TEST_CHECK(out_ffd >= 0); + flb_output_set(ctx, out_ffd,"match", "*", NULL); + flb_output_set(ctx, out_ffd,"region", "us-west-2", NULL); + flb_output_set(ctx, out_ffd,"delivery_stream", "fluent", NULL); + flb_output_set(ctx, out_ffd,"time_key", "time", NULL); + flb_output_set(ctx, out_ffd,"Retry_Limit", "1", NULL); + + ret = flb_start(ctx); + TEST_CHECK(ret == 0); + + flb_lib_push(ctx, in_ffd, (char *) JSON_TD , (int) sizeof(JSON_TD) - 1); + + sleep(2); + flb_stop(ctx); + flb_destroy(ctx); + unsetenv("TEST_PUT_RECORD_BATCH_ERROR"); +} + +void flb_test_firehose_nonsense_error(void) +{ + int ret; + flb_ctx_t *ctx; + int in_ffd; + int out_ffd; + + /* mocks calls- signals that we are in test mode */ + setenv("FLB_FIREHOSE_PLUGIN_UNDER_TEST", "true", 1); + setenv("TEST_PUT_RECORD_BATCH_ERROR", "\tbadresponse\nnotparsable{}", 1); + + ctx = flb_create(); + + in_ffd = flb_input(ctx, (char *) "lib", NULL); + TEST_CHECK(in_ffd >= 0); + flb_input_set(ctx,in_ffd, "tag", "test", NULL); + + out_ffd = flb_output(ctx, (char *) "kinesis_firehose", NULL); + TEST_CHECK(out_ffd >= 0); + flb_output_set(ctx, out_ffd,"match", "*", NULL); + flb_output_set(ctx, out_ffd,"region", "us-west-2", NULL); + flb_output_set(ctx, out_ffd,"delivery_stream", "fluent", NULL); + flb_output_set(ctx, out_ffd,"time_key", "time", NULL); + flb_output_set(ctx, out_ffd,"Retry_Limit", "1", NULL); + + ret = flb_start(ctx); + TEST_CHECK(ret == 0); + + flb_lib_push(ctx, in_ffd, (char *) JSON_TD , (int) sizeof(JSON_TD) - 1); + + sleep(2); + flb_stop(ctx); + flb_destroy(ctx); + unsetenv("TEST_PUT_RECORD_BATCH_ERROR"); +} + + +/* Test list */ +TEST_LIST = { + {"success", flb_test_firehose_success }, + {"partial_success", flb_test_firehose_partial_success }, + {"throughput_error", flb_test_firehose_throughput_error }, + {"unknown_error", flb_test_firehose_error_unknown }, + {"nonsense_error", flb_test_firehose_nonsense_error }, + {NULL, NULL} +}; From 90866f76184d0d558967ba0eb4a3ca856a6958ca Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 17:31:21 -0700 Subject: [PATCH 03/17] Fetch credentials in sync mode Signed-off-by: Wesley Pettit --- plugins/out_kinesis_firehose/firehose.c | 2 +- plugins/out_s3/s3.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/plugins/out_kinesis_firehose/firehose.c b/plugins/out_kinesis_firehose/firehose.c index b98afb5d429..c4300fc2902 100644 --- a/plugins/out_kinesis_firehose/firehose.c +++ b/plugins/out_kinesis_firehose/firehose.c @@ -204,7 +204,7 @@ static int cb_firehose_init(struct flb_output_instance *ins, session_name = NULL; } - /* initialize credentials and set to sync mode */ + /* set to sync mode and initialize credentials */ ctx->aws_provider->provider_vtable->sync(ctx->aws_provider); ctx->aws_provider->provider_vtable->init(ctx->aws_provider); diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index efe66fb0a1b..d61b466c639 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -464,11 +464,9 @@ static int cb_s3_init(struct flb_output_instance *ins, ctx->s3_client->host = ctx->endpoint; - /* initialize credentials in sync mode */ + /* set to sync mode and initialize credentials */ ctx->provider->provider_vtable->sync(ctx->provider); ctx->provider->provider_vtable->init(ctx->provider); - /* set back to async */ - ctx->provider->provider_vtable->async(ctx->provider); From c082c49747ad5edc01c2389653e74bc3e288bc3f Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 18:06:55 -0700 Subject: [PATCH 04/17] Remove chunk from list in construct_request_buffer() to prevent race condition --- plugins/out_s3/s3.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index d61b466c639..bd3e358b615 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -535,7 +535,6 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, * same chunk won't be sent more than once */ if (chunk) { - mk_list_del(&chunk->_head); create_time = chunk->create_time; } else { @@ -568,6 +567,9 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, m_upload = create_upload(ctx, tag, tag_len); if (!m_upload) { flb_plg_error(ctx->ins, "Could not find or create upload for tag %s", tag); + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } return FLB_RETRY; } } @@ -576,19 +578,14 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, ret = create_multipart_upload(ctx, m_upload); if (ret < 0) { flb_plg_error(ctx->ins, "Could not initiate multipart upload"); + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } return FLB_RETRY; } m_upload->upload_state = MULTIPART_UPLOAD_STATE_CREATED; } - /* - * remove chunk from buffer list- needed for async http so that the - * same chunk won't be sent more than once - */ - if (chunk) { - mk_list_del(&chunk->_head); - } - ret = upload_part(ctx, m_upload, body, body_size); if (ret < 0) { /* re-add chunk to list */ @@ -670,12 +667,6 @@ static int put_all_chunks(struct flb_s3 *ctx) return -1; } - /* - * remove chunk from buffer list- needed for async http so that the - * same chunk won't be sent more than once - */ - mk_list_del(&chunk->_head); - ret = s3_put_object(ctx, chunk->tag, chunk->create_time, buffer, buffer_size); flb_free(buffer); if (ret < 0) { @@ -722,6 +713,11 @@ static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, chunk->file_path); return -1; } + /* + * remove chunk from buffer list- needed for async http so that the + * same chunk won't be sent more than once + */ + mk_list_del(&chunk->_head); body_size = buffer_size; } @@ -733,6 +729,9 @@ static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, if (!body) { flb_errno(); flb_free(buffered_data); + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } return -1; } tmp = memcpy(body, buffered_data, buffer_size); @@ -740,6 +739,9 @@ static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, flb_errno(); flb_free(body); flb_free(buffered_data); + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } return -1; } flb_free(buffered_data); @@ -748,6 +750,9 @@ static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, if (!tmp) { flb_errno(); flb_free(body); + if (chunk) { + mk_list_add(&chunk->_head, &ctx->store.chunks); + } return -1; } } From a0122e47f39ca4017af35d4bc7ae6b6006da64c4 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:23:14 -0700 Subject: [PATCH 05/17] Make the buffer chunk filename based on the tag and the timestamp to allow multiple chunks per tag --- include/fluent-bit/flb_s3_local_buffer.h | 1 + src/aws/flb_s3_local_buffer.c | 47 ++++++++---------------- 2 files changed, 16 insertions(+), 32 deletions(-) diff --git a/include/fluent-bit/flb_s3_local_buffer.h b/include/fluent-bit/flb_s3_local_buffer.h index 10da91b0ed3..cfdeacc93e3 100644 --- a/include/fluent-bit/flb_s3_local_buffer.h +++ b/include/fluent-bit/flb_s3_local_buffer.h @@ -29,6 +29,7 @@ struct flb_local_chunk { flb_sds_t tag; flb_sds_t file_path; size_t size; + struct timespec ts; time_t create_time; struct mk_list _head; diff --git a/src/aws/flb_s3_local_buffer.c b/src/aws/flb_s3_local_buffer.c index d22b1da38d4..4fbd36b2b55 100644 --- a/src/aws/flb_s3_local_buffer.c +++ b/src/aws/flb_s3_local_buffer.c @@ -31,7 +31,7 @@ /* * Simple and fast hashing algorithm to create keys in the local buffer */ -flb_sds_t simple_hash(const char *str); +flb_sds_t simple_hash(struct timespec *ts, const char *tag); static char *read_tag(char *buffer_path); @@ -256,13 +256,6 @@ int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, flb_sds_t hash_key; int ret; - hash_key = simple_hash(tag); - if (!hash_key) { - flb_plg_error(store->ins, "Could not create local buffer hash key for %s", - tag); - return -1; - } - if (c == NULL) { /* create a new chunk */ flb_plg_debug(store->ins, "Creating new local buffer for %s", tag); @@ -272,25 +265,25 @@ int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, flb_errno(); return -1; } - c->create_time = time(NULL); - c->key = flb_sds_create(hash_key); - if (!c->key) { - flb_errno(); - flb_sds_destroy(hash_key); + timespec_get(&c->ts, TIME_UTC); + c->create_time = c->ts.tv_sec; + hash_key = simple_hash(&c->ts, tag); + if (!hash_key) { + flb_plg_error(store->ins, "Could not create local buffer hash key for %s", + tag); flb_chunk_destroy(c); return -1; } + c->key = hash_key; c->tag = flb_sds_create(tag); if (!c->tag) { flb_errno(); - flb_sds_destroy(hash_key); flb_chunk_destroy(c); return -1; } path = flb_sds_create_size(strlen(store->dir) + strlen(hash_key)); if (!path) { flb_errno(); - flb_sds_destroy(hash_key); flb_chunk_destroy(c); flb_errno(); return -1; @@ -298,7 +291,6 @@ int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, tmp_sds = flb_sds_printf(&path, "%s/%s", store->dir, hash_key); if (!tmp_sds) { flb_errno(); - flb_sds_destroy(hash_key); flb_chunk_destroy(c); flb_sds_destroy(path); return -1; @@ -314,8 +306,6 @@ int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, mk_list_add(&c->_head, &store->chunks); } - flb_sds_destroy(hash_key); - written = append_data(c->file_path, data, bytes); if (written > 0) { c->size += written; @@ -344,29 +334,21 @@ struct flb_local_chunk *flb_chunk_get(struct flb_local_buffer *store, const char struct flb_local_chunk *tmp_chunk; flb_sds_t hash_key; - hash_key = simple_hash(tag); - if (!hash_key) { - flb_plg_error(store->ins, "Could not create local buffer hash key for tag %s", - tag); - return NULL; - } - mk_list_foreach_safe(head, tmp, &store->chunks) { tmp_chunk = mk_list_entry(head, struct flb_local_chunk, _head); - if (strcmp(tmp_chunk->key, hash_key) == 0) { + if (strcmp(tmp_chunk->tag, tag) == 0) { c = tmp_chunk; break; } } - flb_sds_destroy(hash_key); return c; } /* * Simple and fast hashing algorithm to create keys in the local buffer */ -flb_sds_t simple_hash(const char *str) +flb_sds_t simple_hash(struct timespec *ts, const char *tag) { unsigned long hash = 5381; unsigned long hash2 = 5381; @@ -374,18 +356,19 @@ flb_sds_t simple_hash(const char *str) flb_sds_t hash_str; flb_sds_t tmp; - while ((c = *str++)) { + while ((c = *tag++)) { hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - hash2 = ((hash2 << 5) + hash) - c; } + hash2 = (unsigned long) hash2 * ts->tv_sec * ts->tv_nsec; + /* flb_sds_printf allocs if the incoming sds is not at least 64 bytes */ hash_str = flb_sds_create_size(64); if (!hash_str) { flb_errno(); return NULL; } - tmp = flb_sds_printf(&hash_str, "%lu%lu", hash, hash2); + tmp = flb_sds_printf(&hash_str, "%lu-%lu", hash, hash2); if (!tmp) { flb_errno(); flb_sds_destroy(hash_str); @@ -414,4 +397,4 @@ int flb_remove_chunk_files(struct flb_local_chunk *c) flb_errno(); } return ret; -} \ No newline at end of file +} From d40f77479a5b6772814ff68e1a3419c542e82764 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:24:48 -0700 Subject: [PATCH 06/17] wip --- src/aws/flb_s3_local_buffer.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/aws/flb_s3_local_buffer.c b/src/aws/flb_s3_local_buffer.c index 4fbd36b2b55..12005afc47d 100644 --- a/src/aws/flb_s3_local_buffer.c +++ b/src/aws/flb_s3_local_buffer.c @@ -261,7 +261,6 @@ int flb_buffer_put(struct flb_local_buffer *store, struct flb_local_chunk *c, flb_plg_debug(store->ins, "Creating new local buffer for %s", tag); c = flb_calloc(1, sizeof(struct flb_local_chunk)); if (!c) { - flb_sds_destroy(hash_key); flb_errno(); return -1; } @@ -332,7 +331,6 @@ struct flb_local_chunk *flb_chunk_get(struct flb_local_buffer *store, const char struct mk_list *head; struct flb_local_chunk *c = NULL; struct flb_local_chunk *tmp_chunk; - flb_sds_t hash_key; mk_list_foreach_safe(head, tmp, &store->chunks) { tmp_chunk = mk_list_entry(head, struct flb_local_chunk, _head); From c398b288a88b9233b7f0127c8f6a1ff7b60d964c Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:38:25 -0700 Subject: [PATCH 07/17] Fix logic for handling simultaneous uploads --- plugins/out_s3/s3.c | 29 +++++++++++++++++++++++------ plugins/out_s3/s3_multipart.c | 2 ++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index bd3e358b615..1a33511dca3 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -486,6 +486,7 @@ static int cb_s3_init(struct flb_output_instance *ins, * Chunk is allowed to be NULL */ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, + struct multipart_upload *m_upload, char *body, size_t body_size, const char *tag, int tag_len) { @@ -502,7 +503,6 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, goto put_object; } - m_upload = get_upload(ctx, tag, tag_len); if (m_upload == NULL) { if (chunk != NULL && time(NULL) > (chunk->create_time + ctx->upload_timeout)) { /* timeout already reached, just PutObject */ @@ -628,11 +628,12 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, if (complete_upload == FLB_TRUE) { m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + mk_list_del(&m_upload->_head); ret = complete_multipart_upload(ctx, m_upload); if (ret == 0) { - mk_list_del(&m_upload->_head); multipart_upload_destroy(m_upload); } else { + mk_list_add(&m_upload->_head, &ctx->uploads); /* we return FLB_OK in this case, since data was persisted */ flb_plg_error(ctx->ins, "Could not complete upload, will retry on next flush..", m_upload->s3_key); @@ -874,6 +875,7 @@ static void cb_s3_flush(const void *data, size_t bytes, struct mk_list *tmp; struct mk_list *head; size_t chunk_size = 0; + size_t upload_size = 0; int complete; int ret; int len; @@ -912,12 +914,24 @@ static void cb_s3_flush(const void *data, size_t bytes, flb_plg_info(ctx->ins, "upload_timeout reached for %s", tag); } + m_upload = get_upload(ctx, tag, tag_len); + + if (m_upload != NULL && time(NULL) > (m_upload->init_time + ctx->upload_timeout)) { + timeout_check = FLB_TRUE; + flb_plg_info(ctx->ins, "upload_timeout reached for %s", tag); + } + chunk_size = len; if (chunk) { chunk_size += chunk->size; } - if (chunk_size < ctx->upload_chunk_size) { + upload_size = len; + if (m_upload) { + upload_size += m_upload->bytes; + } + + if (chunk_size < ctx->upload_chunk_size && upload_size < ctx->file_size) { if (timeout_check == FLB_FALSE) { /* add data to local buffer */ ret = flb_buffer_put(&ctx->store, chunk, tag, json, (size_t) len); @@ -938,13 +952,14 @@ static void cb_s3_flush(const void *data, size_t bytes, FLB_OUTPUT_RETURN(FLB_RETRY); } - ret = upload_data(ctx, chunk, buffer, buffer_size, tag, tag_len); + ret = upload_data(ctx, chunk, m_upload, buffer, buffer_size, tag, tag_len); flb_free(buffer); if (ret != FLB_OK) { FLB_OUTPUT_RETURN(ret); } cleanup_existing: + m_upload = NULL; /* Check all chunks and see if any have timed out */ mk_list_foreach_safe(head, tmp, &ctx->store.chunks) { @@ -986,11 +1001,12 @@ static void cb_s3_flush(const void *data, size_t bytes, } if (complete == FLB_TRUE) { m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + mk_list_del(&m_upload->_head); ret = complete_multipart_upload(ctx, m_upload); if (ret == 0) { - mk_list_del(&m_upload->_head); multipart_upload_destroy(m_upload); } else { + mk_list_add(&m_upload->_head, &ctx->uploads); /* we return FLB_OK in this case, since data was persisted */ flb_plg_error(ctx->ins, "Could not complete upload %s, will retry on next flush..", m_upload->s3_key); @@ -1029,11 +1045,12 @@ static int cb_s3_exit(void *data, struct flb_config *config) if (m_upload->bytes > 0) { m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + mk_list_del(&m_upload->_head); ret = complete_multipart_upload(ctx, m_upload); if (ret == 0) { - mk_list_del(&m_upload->_head); multipart_upload_destroy(m_upload); } else { + mk_list_add(&m_upload->_head, &ctx->uploads); flb_plg_error(ctx->ins, "Could not complete upload %s", m_upload->s3_key); } diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index 73d9d4d17f8..f89259c5154 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -163,6 +163,8 @@ int complete_multipart_upload(struct flb_s3 *ctx, return -1; } + flb_info("Raw request: %s", body); + s3_client = ctx->s3_client; c = s3_client->client_vtable->request(s3_client, FLB_HTTP_POST, uri, body, size, From 778d106f19dc4a94adf54abe92dbda9302e7b818 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:40:48 -0700 Subject: [PATCH 08/17] wip --- plugins/out_s3/s3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 1a33511dca3..8f65b18b7cc 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -490,7 +490,6 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, char *body, size_t body_size, const char *tag, int tag_len) { - struct multipart_upload *m_upload = NULL; int init_upload = FLB_FALSE; int complete_upload = FLB_FALSE; int size_check = FLB_FALSE; @@ -969,7 +968,9 @@ static void cb_s3_flush(const void *data, size_t bytes, continue; /* Only send chunks which have timed out */ } - ret = construct_request_buffer(ctx, NULL, chunk, &buffer, &buffer_size); + m_upload = get_upload(ctx, chunk->tag, strlen(chunk->tag)); + + ret = construct_request_buffer(ctx, chunk, m_upload, &buffer, &buffer_size); if (ret < 0) { flb_plg_error(ctx->ins, "Could not construct request buffer for %s", chunk->file_path); From 2eec43bbb74437c574f6c2c4eb03f9441f0e757f Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:42:47 -0700 Subject: [PATCH 09/17] Fix build errors --- plugins/out_s3/s3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 8f65b18b7cc..df2852ad1bf 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -970,14 +970,14 @@ static void cb_s3_flush(const void *data, size_t bytes, m_upload = get_upload(ctx, chunk->tag, strlen(chunk->tag)); - ret = construct_request_buffer(ctx, chunk, m_upload, &buffer, &buffer_size); + ret = construct_request_buffer(ctx, chunk, &buffer, &buffer_size); if (ret < 0) { flb_plg_error(ctx->ins, "Could not construct request buffer for %s", chunk->file_path); continue; } - ret = upload_data(ctx, chunk, buffer, buffer_size, tag, tag_len); + ret = upload_data(ctx, chunk, m_upload, buffer, buffer_size, chunk->tag, strlen(chunk->tag)); flb_free(buffer); if (ret != FLB_OK) { /* From e8a6d8524907d5e879e80974079f87bdd2375c89 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:43:39 -0700 Subject: [PATCH 10/17] wip --- plugins/out_s3/s3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index df2852ad1bf..e3cd9ab598a 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -970,7 +970,7 @@ static void cb_s3_flush(const void *data, size_t bytes, m_upload = get_upload(ctx, chunk->tag, strlen(chunk->tag)); - ret = construct_request_buffer(ctx, chunk, &buffer, &buffer_size); + ret = construct_request_buffer(ctx, NULL, chunk, &buffer, &buffer_size); if (ret < 0) { flb_plg_error(ctx->ins, "Could not construct request buffer for %s", chunk->file_path); From 14f955ab9fb61dc1082eaf3bad677da034b6194d Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 22:47:17 -0700 Subject: [PATCH 11/17] Correct min chunked upload size --- plugins/out_s3/s3.c | 2 +- plugins/out_s3/s3.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index e3cd9ab598a..8ca9aa9199a 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -236,7 +236,7 @@ static int cb_s3_init(struct flb_output_instance *ins, goto error; } if (ctx->upload_chunk_size < MIN_CHUNKED_UPLOAD_SIZE) { - flb_plg_error(ctx->ins, "upload_chunk_size must be at least 5M"); + flb_plg_error(ctx->ins, "upload_chunk_size must be at least 5,242,880 bytes"); goto error; } if (ctx->upload_chunk_size > MAX_CHUNKED_UPLOAD_SIZE) { diff --git a/plugins/out_s3/s3.h b/plugins/out_s3/s3.h index 6d60fda2a83..04ed7265176 100644 --- a/plugins/out_s3/s3.h +++ b/plugins/out_s3/s3.h @@ -29,7 +29,7 @@ #include /* Upload data to S3 in 5MB chunks */ -#define MIN_CHUNKED_UPLOAD_SIZE 5000000 +#define MIN_CHUNKED_UPLOAD_SIZE 5242880 #define MAX_CHUNKED_UPLOAD_SIZE 50000000 From 1b8f54d7b5eb8871306f18b1a030325229328301 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 23:24:05 -0700 Subject: [PATCH 12/17] Do multipart operations in sync mode to ensure data ordering --- plugins/out_s3/s3.c | 3 +++ plugins/out_s3/s3.h | 6 ++++++ plugins/out_s3/s3_multipart.c | 9 +++++++++ 3 files changed, 18 insertions(+) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 8ca9aa9199a..b29dcf06b14 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -770,6 +770,9 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; + /* can always run PutObject in async mode */ + ctx->s3_client->flags |= ~(FLB_IO_ASYNC); + uri = flb_get_s3_key(ctx->s3_key_format, create_time, tag, ctx->tag_delimiters); if (!uri) { flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); diff --git a/plugins/out_s3/s3.h b/plugins/out_s3/s3.h index 04ed7265176..779e2c35962 100644 --- a/plugins/out_s3/s3.h +++ b/plugins/out_s3/s3.h @@ -59,6 +59,12 @@ struct multipart_upload { flb_sds_t etags[10000]; int part_number; + /* + * we use async http, so we need to check that all part requests have + * completed before we complete the upload + */ + int parts_uploaded; + /* ongoing tracker of how much data has been sent for this upload */ size_t bytes; diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index f89259c5154..ccec23fa632 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -142,6 +142,9 @@ int complete_multipart_upload(struct flb_s3 *ctx, struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; + /* run in sync mode */ + ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + flb_sds_len(m_upload->upload_id)); if (!uri) { @@ -203,6 +206,9 @@ int create_multipart_upload(struct flb_s3 *ctx, struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; + /* run in sync mode */ + ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { flb_errno(); @@ -301,6 +307,9 @@ int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; + /* run in sync mode */ + ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { flb_errno(); From 149fb86d1ce6ef0bf042842670ba804a2683aafe Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 23:40:51 -0700 Subject: [PATCH 13/17] wip --- plugins/out_s3/s3_multipart.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index ccec23fa632..528701aa20b 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -144,6 +144,7 @@ int complete_multipart_upload(struct flb_s3 *ctx, /* run in sync mode */ ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + flb_info("[complete_multipart_upload] ID=%s", m_upload->upload_id) uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + flb_sds_len(m_upload->upload_id)); @@ -208,6 +209,7 @@ int create_multipart_upload(struct flb_s3 *ctx, /* run in sync mode */ ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + flb_info("[create_multipart_upload] ID=%s", m_upload->upload_id) uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { @@ -309,6 +311,7 @@ int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, /* run in sync mode */ ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + flb_info("[upload_part] ID=%s", m_upload->upload_id) uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { From 01f339b6740c7865aa81af27d5c9e34b8fdfe5e9 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 23:41:57 -0700 Subject: [PATCH 14/17] wip --- plugins/out_s3/s3_multipart.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index 528701aa20b..87b9ac994b7 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -144,7 +144,7 @@ int complete_multipart_upload(struct flb_s3 *ctx, /* run in sync mode */ ctx->s3_client->flags &= ~(FLB_IO_ASYNC); - flb_info("[complete_multipart_upload] ID=%s", m_upload->upload_id) + flb_info("[complete_multipart_upload] ID=%s", m_upload->upload_id); uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + flb_sds_len(m_upload->upload_id)); @@ -209,7 +209,7 @@ int create_multipart_upload(struct flb_s3 *ctx, /* run in sync mode */ ctx->s3_client->flags &= ~(FLB_IO_ASYNC); - flb_info("[create_multipart_upload] ID=%s", m_upload->upload_id) + flb_info("[create_multipart_upload] ID=%s", m_upload->upload_id); uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { @@ -311,7 +311,7 @@ int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, /* run in sync mode */ ctx->s3_client->flags &= ~(FLB_IO_ASYNC); - flb_info("[upload_part] ID=%s", m_upload->upload_id) + flb_info("[upload_part] ID=%s", m_upload->upload_id); uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { From 19ec14b58524db2f932447d981fc1ea943372916 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 23:44:29 -0700 Subject: [PATCH 15/17] wip --- plugins/out_s3/s3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index b29dcf06b14..21396db4250 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -770,8 +770,8 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; - /* can always run PutObject in async mode */ - ctx->s3_client->flags |= ~(FLB_IO_ASYNC); + /* run in sync mode */ + ctx->s3_client->flags &= ~(FLB_IO_ASYNC); uri = flb_get_s3_key(ctx->s3_key_format, create_time, tag, ctx->tag_delimiters); if (!uri) { From 302cdc86427b1d3d1530030fbddb11a7aa2d66e7 Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Wed, 9 Sep 2020 23:53:32 -0700 Subject: [PATCH 16/17] Actually disable async --- plugins/out_s3/s3.c | 2 +- plugins/out_s3/s3_multipart.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index 21396db4250..ccfef6613c3 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -771,7 +771,7 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time struct flb_aws_client *s3_client; /* run in sync mode */ - ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); uri = flb_get_s3_key(ctx->s3_key_format, create_time, tag, ctx->tag_delimiters); if (!uri) { diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index 87b9ac994b7..1466f6952c6 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -143,7 +143,7 @@ int complete_multipart_upload(struct flb_s3 *ctx, struct flb_aws_client *s3_client; /* run in sync mode */ - ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); flb_info("[complete_multipart_upload] ID=%s", m_upload->upload_id); uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + @@ -208,7 +208,7 @@ int create_multipart_upload(struct flb_s3 *ctx, struct flb_aws_client *s3_client; /* run in sync mode */ - ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); flb_info("[create_multipart_upload] ID=%s", m_upload->upload_id); uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); @@ -310,7 +310,7 @@ int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, struct flb_aws_client *s3_client; /* run in sync mode */ - ctx->s3_client->flags &= ~(FLB_IO_ASYNC); + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); flb_info("[upload_part] ID=%s", m_upload->upload_id); uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); From eaba78c719024e92e2cf4fcb04039988be71b1ca Mon Sep 17 00:00:00 2001 From: Wesley Pettit Date: Tue, 15 Sep 2020 23:35:28 -0700 Subject: [PATCH 17/17] Set all file perms to 0700, misc bug fixes Signed-off-by: Wesley Pettit --- plugins/out_s3/s3.c | 281 ++++++++++++++++++++++------------ plugins/out_s3/s3.h | 16 ++ plugins/out_s3/s3_multipart.c | 14 -- src/aws/flb_aws_util.c | 44 ++++-- src/aws/flb_s3_local_buffer.c | 20 ++- 5 files changed, 246 insertions(+), 129 deletions(-) diff --git a/plugins/out_s3/s3.c b/plugins/out_s3/s3.c index ccfef6613c3..64e51cd736d 100644 --- a/plugins/out_s3/s3.c +++ b/plugins/out_s3/s3.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,8 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time static int put_all_chunks(struct flb_s3 *ctx); +static void cb_s3_upload(struct flb_config *ctx, void *data); + static struct multipart_upload *get_upload(struct flb_s3 *ctx, const char *tag, int tag_len); @@ -132,7 +135,7 @@ static void s3_context_destroy(struct flb_s3 *ctx) } static int cb_s3_init(struct flb_output_instance *ins, - struct flb_config *config, void *data) + struct flb_config *config, void *data) { int ret; const char *tmp; @@ -248,14 +251,14 @@ static int cb_s3_init(struct flb_output_instance *ins, ctx->upload_chunk_size = MIN_CHUNKED_UPLOAD_SIZE; } - if (ctx->file_size < MIN_CHUNKED_UPLOAD_SIZE) { - flb_plg_info(ctx->ins, "total_file_size is less than 5 MB, will use PutObject API"); + if (ctx->file_size < 2 * MIN_CHUNKED_UPLOAD_SIZE) { + flb_plg_info(ctx->ins, "total_file_size is less than 10 MB, will use PutObject API"); ctx->use_put_object = FLB_TRUE; } - if ((ctx->upload_chunk_size * 2) > ctx->file_size) { - flb_plg_info(ctx->ins, "total_file_size is less than 2x upload_chunk_size, will use PutObject API"); - ctx->use_put_object = FLB_TRUE; + if (ctx->upload_chunk_size != MIN_CHUNKED_UPLOAD_SIZE && (ctx->upload_chunk_size * 2) > ctx->file_size) { + flb_plg_error(ctx->ins, "total_file_size is less than 2x upload_chunk_size"); + goto error; } tmp = flb_output_get_property("use_put_object", ins); @@ -468,7 +471,22 @@ static int cb_s3_init(struct flb_output_instance *ins, ctx->provider->provider_vtable->sync(ctx->provider); ctx->provider->provider_vtable->init(ctx->provider); + ctx->timer_created = FLB_FALSE; + ctx->timer_ms = (int) (ctx->upload_timeout / 6) * 1000; + if (ctx->timer_ms > UPLOAD_TIMER_MAX_WAIT) { + ctx->timer_ms = UPLOAD_TIMER_MAX_WAIT; + } + if (ctx->use_put_object == FLB_FALSE) { + /* + * Run S3 in sync mode. + * Multipart uploads don't work with async mode right now in high throughput + * cases. Its not clear why. Realistically, the performance of sync mode + * will be sufficient for most users, and long term we can do the work + * to enable async if needed. + */ + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); + } /* Export context */ flb_output_set_context(ins, ctx); @@ -587,6 +605,7 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, ret = upload_part(ctx, m_upload, body, body_size); if (ret < 0) { + m_upload->upload_errors += 1; /* re-add chunk to list */ if (chunk) { mk_list_add(&chunk->_head, &ctx->store.chunks); @@ -608,17 +627,17 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, if (m_upload->bytes >= ctx->file_size) { size_check = FLB_TRUE; - flb_plg_info(ctx->ins, "Completing upload for %s because uploaded data is greater" + flb_plg_info(ctx->ins, "Will complete upload for %s because uploaded data is greater" " than size set by total_file_size", m_upload->s3_key); } if (m_upload->part_number >= 10000) { part_num_check = FLB_TRUE; - flb_plg_info(ctx->ins, "Completing upload for %s because 10,000 chunks " + flb_plg_info(ctx->ins, "Will complete upload for %s because 10,000 chunks " "(the API limit) have been uploaded", m_upload->s3_key); } if (time(NULL) > (m_upload->init_time + ctx->upload_timeout)) { timeout_check = FLB_TRUE; - flb_plg_info(ctx->ins, "Completing upload for %s because upload_timeout" + flb_plg_info(ctx->ins, "Will complete upload for %s because upload_timeout" " has elapsed", m_upload->s3_key); } if (size_check || part_num_check || timeout_check) { @@ -626,18 +645,27 @@ static int upload_data(struct flb_s3 *ctx, struct flb_local_chunk *chunk, } if (complete_upload == FLB_TRUE) { + /* mark for completion- the upload timer will handle actual completion */ m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; - mk_list_del(&m_upload->_head); - ret = complete_multipart_upload(ctx, m_upload); - if (ret == 0) { - multipart_upload_destroy(m_upload); - } else { - mk_list_add(&m_upload->_head, &ctx->uploads); - /* we return FLB_OK in this case, since data was persisted */ - flb_plg_error(ctx->ins, "Could not complete upload, will retry on next flush..", - m_upload->s3_key); - } } + // mk_list_del(&m_upload->_head); + // ret = complete_multipart_upload(ctx, m_upload); + // if (ret == 0) { + // multipart_upload_destroy(m_upload); + // } else { + // m_upload->complete_errors += 1; + // if (m_upload->complete_errors < MAX_UPLOAD_ERRORS) { + // mk_list_add(&m_upload->_head, &ctx->uploads); + // /* we return FLB_OK in this case, since data was persisted */ + // flb_plg_error(ctx->ins, "Could not complete upload, will retry on next flush..", + // m_upload->s3_key); + // } + // else { + // flb_plg_error(ctx->ins, "Upload for %s has reached max completion errors, plugin will give up", + // m_upload->s3_key); + // } + // } + // } return FLB_OK; } @@ -766,19 +794,38 @@ static int construct_request_buffer(struct flb_s3 *ctx, flb_sds_t new_data, static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time, char *body, size_t body_size) { - flb_sds_t uri = NULL; + flb_sds_t s3_key = NULL; struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; + char *random_alphanumeric; + int len; + char uri[1024]; /* max S3 key length */ - /* run in sync mode */ - ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); - - uri = flb_get_s3_key(ctx->s3_key_format, create_time, tag, ctx->tag_delimiters); - if (!uri) { + s3_key = flb_get_s3_key(ctx->s3_key_format, create_time, tag, ctx->tag_delimiters); + if (!s3_key) { flb_plg_error(ctx->ins, "Failed to construct S3 Object Key for %s", tag); return -1; } + len = strlen(s3_key); + memcpy(uri, s3_key, len); + if ((len + 16) <= 1024) { + random_alphanumeric = flb_sts_session_name(); + if (!random_alphanumeric) { + flb_sds_destroy(s3_key); + flb_plg_error(ctx->ins, "Failed to create randomness for S3 key %s", tag); + return -1; + } + + memcpy(&uri[len], "-object", 7); + memcpy(&uri[len + 7], random_alphanumeric, 8); + uri[len + 15] = '\0'; + flb_free(random_alphanumeric); + } + else { + uri[len] = '\0'; + } + s3_client = ctx->s3_client; c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT, uri, body, body_size, @@ -787,7 +834,7 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time flb_plg_debug(ctx->ins, "PutObject http status=%d", c->resp.status); if (c->resp.status == 200) { flb_plg_info(ctx->ins, "Successfully uploaded object %s", uri); - flb_sds_destroy(uri); + flb_sds_destroy(s3_key); flb_http_client_destroy(c); return 0; } @@ -800,7 +847,7 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time } flb_plg_error(ctx->ins, "PutObject request failed"); - flb_sds_destroy(uri); + flb_sds_destroy(s3_key); return -1; } @@ -817,6 +864,12 @@ static struct multipart_upload *get_upload(struct flb_s3 *ctx, if (tmp_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { continue; } + if (tmp_upload->upload_errors >= MAX_UPLOAD_ERRORS) { + tmp_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + flb_plg_error(ctx->ins, "Upload for %s has reached max upload errors", + tmp_upload->s3_key); + continue; + } if (strcmp(tmp_upload->tag, tag) == 0) { m_upload = tmp_upload; break; @@ -861,6 +914,82 @@ static struct multipart_upload *create_upload(struct flb_s3 *ctx, return m_upload; } +static void cb_s3_upload(struct flb_config *config, void *data) +{ + struct flb_s3 *ctx = data; + struct flb_local_chunk *chunk = NULL; + struct multipart_upload *m_upload = NULL; + char *buffer = NULL; + size_t buffer_size; + struct mk_list *tmp; + struct mk_list *head; + int complete; + int ret; + + flb_plg_debug(ctx->ins, "Running upload timer callback.."); + + /* Check all chunks and see if any have timed out */ + mk_list_foreach_safe(head, tmp, &ctx->store.chunks) { + chunk = mk_list_entry(head, struct flb_local_chunk, _head); + + if (time(NULL) < (chunk->create_time + ctx->upload_timeout)) { + continue; /* Only send chunks which have timed out */ + } + + m_upload = get_upload(ctx, chunk->tag, strlen(chunk->tag)); + + ret = construct_request_buffer(ctx, NULL, chunk, &buffer, &buffer_size); + if (ret < 0) { + flb_plg_error(ctx->ins, "Could not construct request buffer for %s", + chunk->file_path); + continue; + } + + ret = upload_data(ctx, chunk, m_upload, buffer, buffer_size, chunk->tag, strlen(chunk->tag)); + flb_free(buffer); + if (ret != FLB_OK) { + flb_plg_error(ctx->ins, "Could not send chunk with tag %s", + chunk->tag); + } + } + + /* Check all uploads and see if any need completion */ + mk_list_foreach_safe(head, tmp, &ctx->uploads) { + m_upload = mk_list_entry(head, struct multipart_upload, _head); + complete = FLB_FALSE; + + if (m_upload->complete_errors >= MAX_UPLOAD_ERRORS) { + flb_plg_error(ctx->ins, "Upload for %s has reached max completion errors, plugin will give up", + m_upload->s3_key); + mk_list_del(&m_upload->_head); + continue; + } + + if (m_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { + complete = FLB_TRUE; + } + if (time(NULL) > (m_upload->init_time + ctx->upload_timeout)) { + flb_plg_info(ctx->ins, "Completing upload for %s because upload_timeout" + " has passed", m_upload->s3_key); + complete = FLB_TRUE; + } + if (complete == FLB_TRUE) { + m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; + mk_list_del(&m_upload->_head); + ret = complete_multipart_upload(ctx, m_upload); + if (ret == 0) { + multipart_upload_destroy(m_upload); + } else { + mk_list_add(&m_upload->_head, &ctx->uploads); + /* data was persisted, this can be retried */ + m_upload->complete_errors += 1; + flb_plg_error(ctx->ins, "Could not complete upload %s, will retry..", + m_upload->s3_key); + } + } + } +} + static void cb_s3_flush(const void *data, size_t bytes, const char *tag, int tag_len, struct flb_input_instance *i_ins, @@ -874,26 +1003,40 @@ static void cb_s3_flush(const void *data, size_t bytes, char *buffer = NULL; size_t buffer_size; int timeout_check = FLB_FALSE; - struct mk_list *tmp; - struct mk_list *head; size_t chunk_size = 0; size_t upload_size = 0; - int complete; int ret; int len; (void) i_ins; (void) config; - /* first, clean up any old buffers found on startup */ + /* + * create a timer that will run periodically and check if uploads + * are ready for completion + * this is created once on the first flush + */ + if (ctx->timer_created == FLB_FALSE) { + flb_plg_debug(ctx->ins, "Creating upload timer with frequency %ds", ctx->timer_ms / 1000); + ret = flb_sched_timer_cb_create(config, FLB_SCHED_TIMER_CB_PERM, ctx->timer_ms, + cb_s3_upload, + ctx); + if (ret == -1) { + flb_plg_error(ctx->ins, "Failed to create upload timer"); + FLB_OUTPUT_RETURN(FLB_RETRY); + } + ctx->timer_created = FLB_TRUE; + } + + /* clean up any old buffers found on startup */ if (ctx->has_old_buffers == FLB_TRUE) { flb_plg_info(ctx->ins, "Sending locally buffered data from previous " "executions to S3; buffer=%s", ctx->store.dir); + ctx->has_old_buffers = FLB_FALSE; ret = put_all_chunks(ctx); if (ret < 0) { + ctx->has_old_buffers = FLB_TRUE; flb_plg_error(ctx->ins, "Failed to send locally buffered data left over" " from previous executions; will retry. Buffer=%s", ctx->store.dir); - } else { - ctx->has_old_buffers = FLB_FALSE; } } @@ -941,8 +1084,7 @@ static void cb_s3_flush(const void *data, size_t bytes, if (ret < 0) { FLB_OUTPUT_RETURN(FLB_RETRY); } - /* send any chunks/uploads which have timed out */ - goto cleanup_existing; + FLB_OUTPUT_RETURN(FLB_OK); } } @@ -956,69 +1098,8 @@ static void cb_s3_flush(const void *data, size_t bytes, ret = upload_data(ctx, chunk, m_upload, buffer, buffer_size, tag, tag_len); flb_free(buffer); - if (ret != FLB_OK) { - FLB_OUTPUT_RETURN(ret); - } - -cleanup_existing: - m_upload = NULL; - - /* Check all chunks and see if any have timed out */ - mk_list_foreach_safe(head, tmp, &ctx->store.chunks) { - chunk = mk_list_entry(head, struct flb_local_chunk, _head); - - if (time(NULL) < (chunk->create_time + ctx->upload_timeout)) { - continue; /* Only send chunks which have timed out */ - } - - m_upload = get_upload(ctx, chunk->tag, strlen(chunk->tag)); - - ret = construct_request_buffer(ctx, NULL, chunk, &buffer, &buffer_size); - if (ret < 0) { - flb_plg_error(ctx->ins, "Could not construct request buffer for %s", - chunk->file_path); - continue; - } - - ret = upload_data(ctx, chunk, m_upload, buffer, buffer_size, chunk->tag, strlen(chunk->tag)); - flb_free(buffer); - if (ret != FLB_OK) { - /* - * exit- can try again on next flush - * we return OK since the actual data sent in this flush was persisted - */ - FLB_OUTPUT_RETURN(FLB_OK); - } - } - - /* Check all uploads and see if any need completion */ - mk_list_foreach_safe(head, tmp, &ctx->uploads) { - m_upload = mk_list_entry(head, struct multipart_upload, _head); - complete = FLB_FALSE; - if (m_upload->upload_state == MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS) { - complete = FLB_TRUE; - } - if (time(NULL) > (m_upload->init_time + ctx->upload_timeout)) { - flb_plg_info(ctx->ins, "Completing upload for %s because upload_timeout" - " has passed", m_upload->s3_key); - complete = FLB_TRUE; - } - if (complete == FLB_TRUE) { - m_upload->upload_state = MULTIPART_UPLOAD_STATE_COMPLETE_IN_PROGRESS; - mk_list_del(&m_upload->_head); - ret = complete_multipart_upload(ctx, m_upload); - if (ret == 0) { - multipart_upload_destroy(m_upload); - } else { - mk_list_add(&m_upload->_head, &ctx->uploads); - /* we return FLB_OK in this case, since data was persisted */ - flb_plg_error(ctx->ins, "Could not complete upload %s, will retry on next flush..", - m_upload->s3_key); - } - } - } - FLB_OUTPUT_RETURN(FLB_OK); + FLB_OUTPUT_RETURN(ret); } static int cb_s3_exit(void *data, struct flb_config *config) @@ -1034,8 +1115,10 @@ static int cb_s3_exit(void *data, struct flb_config *config) } if (mk_list_size(&ctx->store.chunks) > 0) { - /* exit must run in sync mode */ - ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); + if (ctx->use_put_object == FLB_TRUE) { + /* exit must run in sync mode */ + ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); + } flb_plg_info(ctx->ins, "Sending all locally buffered data to S3"); ret = put_all_chunks(ctx); if (ret < 0) { diff --git a/plugins/out_s3/s3.h b/plugins/out_s3/s3.h index 779e2c35962..b5dcf8ae71f 100644 --- a/plugins/out_s3/s3.h +++ b/plugins/out_s3/s3.h @@ -32,6 +32,7 @@ #define MIN_CHUNKED_UPLOAD_SIZE 5242880 #define MAX_CHUNKED_UPLOAD_SIZE 50000000 +#define UPLOAD_TIMER_MAX_WAIT 60000 #define MULTIPART_UPLOAD_STATE_NOT_CREATED 0 #define MULTIPART_UPLOAD_STATE_CREATED 1 @@ -45,6 +46,14 @@ #define DEFAULT_UPLOAD_TIMEOUT 3600 +/* + * If we see repeated errors on an upload, we will discard it + * This saves us from scenarios where something goes wrong and an upload can + * not proceed (may be some other process completed it or deleted the upload) + * instead of erroring out forever, we eventually discard the upload. + */ +#define MAX_UPLOAD_ERRORS 10 + struct multipart_upload { flb_sds_t s3_key; flb_sds_t tag; @@ -69,6 +78,10 @@ struct multipart_upload { size_t bytes; struct mk_list _head; + + /* see note for MAX_UPLOAD_ERRORS */ + int upload_errors; + int complete_errors; }; struct flb_s3 { @@ -109,6 +122,9 @@ struct flb_s3 { size_t upload_chunk_size; time_t upload_timeout; + int timer_created; + int timer_ms; + struct flb_output_instance *ins; }; diff --git a/plugins/out_s3/s3_multipart.c b/plugins/out_s3/s3_multipart.c index 1466f6952c6..73d9d4d17f8 100644 --- a/plugins/out_s3/s3_multipart.c +++ b/plugins/out_s3/s3_multipart.c @@ -142,10 +142,6 @@ int complete_multipart_upload(struct flb_s3 *ctx, struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; - /* run in sync mode */ - ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); - flb_info("[complete_multipart_upload] ID=%s", m_upload->upload_id); - uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 11 + flb_sds_len(m_upload->upload_id)); if (!uri) { @@ -167,8 +163,6 @@ int complete_multipart_upload(struct flb_s3 *ctx, return -1; } - flb_info("Raw request: %s", body); - s3_client = ctx->s3_client; c = s3_client->client_vtable->request(s3_client, FLB_HTTP_POST, uri, body, size, @@ -207,10 +201,6 @@ int create_multipart_upload(struct flb_s3 *ctx, struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; - /* run in sync mode */ - ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); - flb_info("[create_multipart_upload] ID=%s", m_upload->upload_id); - uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { flb_errno(); @@ -309,10 +299,6 @@ int upload_part(struct flb_s3 *ctx, struct multipart_upload *m_upload, struct flb_http_client *c = NULL; struct flb_aws_client *s3_client; - /* run in sync mode */ - ctx->s3_client->upstream->flags &= ~(FLB_IO_ASYNC); - flb_info("[upload_part] ID=%s", m_upload->upload_id); - uri = flb_sds_create_size(flb_sds_len(m_upload->s3_key) + 8); if (!uri) { flb_errno(); diff --git a/src/aws/flb_aws_util.c b/src/aws/flb_aws_util.c index 96971b9f4d3..d6658ed0865 100644 --- a/src/aws/flb_aws_util.c +++ b/src/aws/flb_aws_util.c @@ -29,12 +29,16 @@ #include #include #include +#include #define AWS_SERVICE_ENDPOINT_FORMAT "%s.%s.amazonaws.com" #define AWS_SERVICE_ENDPOINT_BASE_LEN 15 -#define S3_SERVICE_ENDPOINT_FORMAT "%s.s3.amazonaws.com" -#define S3_SERVICE_ENDPOINT_BASE_LEN 17 +#define S3_SERVICE_GLOBAL_ENDPOINT_FORMAT "%s.s3.amazonaws.com" +#define S3_SERVICE_GLOBAL_ENDPOINT_BASE_LEN 17 + +#define S3_SERVICE_ENDPOINT_FORMAT "%s.s3.%s.amazonaws.com" +#define S3_SERVICE_ENDPOINT_BASE_LEN 18 #define TAG_PART_DESCRIPTOR "$TAG[%d]" #define TAG_DESCRIPTOR "$TAG" @@ -99,42 +103,40 @@ int flb_read_file(const char *path, char **out_buf, size_t *out_size) int ret; long bytes; char *buf = NULL; - FILE *fp = NULL; struct stat st; int fd; - fp = fopen(path, "r"); - if (!fp) { + fd = open(path, O_RDONLY); + if (fd < 0) { return -1; } - fd = fileno(fp); ret = fstat(fd, &st); if (ret == -1) { flb_errno(); - fclose(fp); + close(fd); return -1; } buf = flb_malloc(st.st_size + sizeof(char)); if (!buf) { flb_errno(); - fclose(fp); + close(fd); return -1; } - bytes = fread(buf, st.st_size, 1, fp); - if (bytes != 1) { + bytes = read(fd, buf, st.st_size); + if (bytes < 0) { flb_errno(); flb_free(buf); - fclose(fp); + close(fd); return -1; } /* fread does not add null byte */ buf[st.st_size] = '\0'; - fclose(fp); + close(fd); *out_buf = buf; *out_size = st.st_size; @@ -147,10 +149,18 @@ int flb_read_file(const char *path, char **out_buf, size_t *out_size) char *flb_s3_endpoint(char* bucket, char* region) { char *endpoint = NULL; - size_t len = S3_SERVICE_ENDPOINT_BASE_LEN; + size_t len = 0; int is_cn = FLB_FALSE; int bytes; + if (strcmp("us-east-1", region) == 0) { + len = S3_SERVICE_GLOBAL_ENDPOINT_BASE_LEN; + } + else { + len = S3_SERVICE_ENDPOINT_BASE_LEN; + len += strlen(region); + } + /* In the China regions, ".cn" is appended to the URL */ if (strcmp("cn-north-1", region) == 0) { @@ -171,7 +181,13 @@ char *flb_s3_endpoint(char* bucket, char* region) return NULL; } - bytes = snprintf(endpoint, len, S3_SERVICE_ENDPOINT_FORMAT, bucket); + if (strcmp("us-east-1", region) == 0) { + bytes = snprintf(endpoint, len, S3_SERVICE_GLOBAL_ENDPOINT_FORMAT, bucket); + } + else { + bytes = snprintf(endpoint, len, S3_SERVICE_ENDPOINT_FORMAT, bucket, region); + } + if (bytes < 0) { flb_errno(); flb_free(endpoint); diff --git a/src/aws/flb_s3_local_buffer.c b/src/aws/flb_s3_local_buffer.c index 12005afc47d..af52a1d9d19 100644 --- a/src/aws/flb_s3_local_buffer.c +++ b/src/aws/flb_s3_local_buffer.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include #include @@ -203,13 +205,27 @@ int flb_mkdir_all(const char *dir) { static size_t append_data(char *path, char *data, size_t bytes) { FILE *f; + int fd; size_t written; - f = fopen(path , "a" ); + fd = open( + path, + O_CREAT | O_WRONLY, + S_IRWXU + ); + if (fd == -1){ + return -1; + } + + f = fdopen(fd, "a"); if (!f) { + flb_errno(); + close(fd); return -1; } + written = fwrite(data, 1, bytes, f); - fclose(f); + fflush(f); + close(fd); return written; }