Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

out_kinesis_firehose: compression extraction and firehose integration -> 1.8 #4400

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ option(FLB_STREAM_PROCESSOR "Enable Stream Processor" Yes)
option(FLB_CORO_STACK_SIZE "Set coroutine stack size")
option(FLB_AVRO_ENCODER "Build with Avro encoding support" No)
option(FLB_AWS_ERROR_REPORTER "Build with aws error reporting support" No)

option(FLB_ARROW "Build with Apache Arrow support" No)

# Native Metrics Support (cmetrics)
option(FLB_METRICS "Enable metrics support" Yes)
Expand Down Expand Up @@ -735,6 +735,16 @@ if(FLB_OUT_PGSQL AND (NOT PostgreSQL_FOUND))
FLB_OPTION(FLB_OUT_PGSQL OFF)
endif()

# Arrow GLib
# ==========
find_package(PkgConfig)
pkg_check_modules(ARROW_GLIB QUIET arrow-glib)
if(FLB_ARROW AND ARROW_GLIB_FOUND)
FLB_DEFINITION(FLB_HAVE_ARROW)
else()
set(FLB_ARROW OFF)
endif()

# Pthread Local Storage
# =====================
# By default we expect the compiler already support thread local storage
Expand Down
63 changes: 63 additions & 0 deletions include/fluent-bit/aws/flb_aws_compress.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2019-2021 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FLB_AWS_COMPRESS
#define FLB_AWS_COMPRESS

#include <sys/types.h>
#define FLB_AWS_COMPRESS_NONE 0
#define FLB_AWS_COMPRESS_GZIP 1
#define FLB_AWS_COMPRESS_ARROW 2

/*
* Get compression type from compression keyword. The return value is used to identify
* what compression option to utilize.
*
* Returns int compression type id - FLB_AWS_COMPRESS_<compression-type>
*/
int flb_aws_compression_get_type(const char *compression_keyword);

/*
* Compress in_data and write result to newly allocated out_data buf
* Client is responsable for freeing out_data.
*
* Returns -1 on error
* Returns 0 on success
*/
int flb_aws_compression_compress(int compression_type, void *in_data, size_t in_len,
void **out_data, size_t *out_len);

/*
* Truncate and compress in_data and convert to b64
* If b64 output data is larger than max_out_len, the input is truncated with a
* [Truncated...] suffix appended to the end, and recompressed. The result is written to a
* newly allocated out_data buf.
* Client is responsable for freeing out_data.
*
* out_len and max_out_len do not count the null character as a part of out_data's length,
* though the null character may be included at the end of out_data.
*
* Returns -1 on error
* Returns 0 on success
*/
int flb_aws_compression_b64_truncate_compress(int compression_type, size_t max_out_len,
void *in_data, size_t in_len,
void **out_data, size_t *out_len);

#endif
20 changes: 20 additions & 0 deletions plugins/out_kinesis_firehose/firehose.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
#include <fluent-bit/flb_http_client.h>
#include <fluent-bit/flb_utils.h>

#include <fluent-bit/aws/flb_aws_compress.h>

#include <monkey/mk_core.h>
#include <msgpack.h>
#include <string.h>
Expand Down Expand Up @@ -119,6 +121,15 @@ static int cb_firehose_init(struct flb_output_instance *ins,
ctx->sts_endpoint = (char *) tmp;
}

tmp = flb_output_get_property("compression", ins);
if (tmp) {
ret = flb_aws_compression_get_type(tmp);
if (ret == -1) {
flb_plg_error(ctx->ins, "unknown compression: %s", tmp);
goto error;
}
ctx->compression = ret;
}

tmp = flb_output_get_property("log_key", ins);
if (tmp) {
Expand Down Expand Up @@ -427,6 +438,15 @@ static struct flb_config_map config_map[] = {
"Custom endpoint for the STS API."
},

{
FLB_CONFIG_MAP_STR, "compression", NULL,
0, FLB_FALSE, 0,
"Compression type for Firehose records. Each log record is individually compressed "
"and sent to Firehose. 'gzip' and 'arrow' are the supported values. "
"'arrow' is only an available if Apache Arrow was enabled at compile time. "
"Defaults to no compression."
},

{
FLB_CONFIG_MAP_STR, "log_key", NULL,
0, FLB_TRUE, offsetof(struct flb_firehose, log_key),
Expand Down
1 change: 1 addition & 0 deletions plugins/out_kinesis_firehose/firehose.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ struct flb_firehose {
char *sts_endpoint;
int custom_endpoint;
int retry_requests;
int compression;

/* must be freed on shutdown if custom_endpoint is not set */
char *endpoint;
Expand Down
62 changes: 44 additions & 18 deletions plugins/out_kinesis_firehose/firehose_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
#include <fluent-bit/flb_http_client.h>
#include <fluent-bit/flb_utils.h>

#include <fluent-bit/aws/flb_aws_compress.h>

#include <monkey/mk_core.h>
#include <mbedtls/base64.h>
#include <msgpack.h>
Expand Down Expand Up @@ -160,6 +162,7 @@ static int process_event(struct flb_firehose *ctx, struct flush *buf,
struct tm *tmp;
size_t len;
size_t tmp_size;
void *compressed_tmp_buf;

tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset;
ret = flb_msgpack_to_json(tmp_buf_ptr,
Expand Down Expand Up @@ -260,29 +263,52 @@ static int process_event(struct flb_firehose *ctx, struct flush *buf,
memcpy(tmp_buf_ptr + written, "\n", 1);
written++;

/*
* check if event_buf is initialized and big enough
* Base64 encoding will increase size by ~4/3
*/
size = (written * 1.5) + 4;
if (buf->event_buf == NULL || buf->event_buf_size < size) {
flb_free(buf->event_buf);
buf->event_buf = flb_malloc(size);
buf->event_buf_size = size;
if (buf->event_buf == NULL) {
if (ctx->compression == FLB_AWS_COMPRESS_NONE) {
/*
* check if event_buf is initialized and big enough
* Base64 encoding will increase size by ~4/3
*/
size = (written * 1.5) + 4;
if (buf->event_buf == NULL || buf->event_buf_size < size) {
flb_free(buf->event_buf);
buf->event_buf = flb_malloc(size);
buf->event_buf_size = size;
if (buf->event_buf == NULL) {
flb_errno();
return -1;
}
}

tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset;

ret = mbedtls_base64_encode((unsigned char *) buf->event_buf, size, &b64_len,
(unsigned char *) tmp_buf_ptr, written);
if (ret != 0) {
flb_errno();
return -1;
}
written = b64_len;
}

tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset;
ret = mbedtls_base64_encode((unsigned char *) buf->event_buf, size, &b64_len,
(unsigned char *) tmp_buf_ptr, written);
if (ret != 0) {
flb_errno();
return -1;
else {
/*
* compress event, truncating input if needed
* replace event buffer with compressed buffer
*/
ret = flb_aws_compression_b64_truncate_compress(ctx->compression,
MAX_B64_EVENT_SIZE,
tmp_buf_ptr,
written, &compressed_tmp_buf,
&size); /* evaluate size */
if (ret == -1) {
flb_plg_error(ctx->ins, "Unable to compress record, discarding, "
"%s", written + 1, ctx->delivery_stream);
return 2;
}
flb_free(buf->event_buf);
buf->event_buf = compressed_tmp_buf;
compressed_tmp_buf = NULL;
written = size;
}
written = b64_len;

tmp_buf_ptr = buf->tmp_buf + buf->tmp_buf_offset;
if ((buf->tmp_buf_size - buf->tmp_buf_offset) < written) {
Expand Down
1 change: 1 addition & 0 deletions plugins/out_kinesis_firehose/firehose_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#define PUT_RECORD_BATCH_PAYLOAD_SIZE 4194304
#define MAX_EVENTS_PER_PUT 500
#define MAX_EVENT_SIZE 1024000
#define MAX_B64_EVENT_SIZE 1365336 /* ceil(1024000 / 3) * 4 */

/* number of characters needed to 'start' a PutRecordBatch payload */
#define PUT_RECORD_BATCH_HEADER_LEN 42
Expand Down
35 changes: 20 additions & 15 deletions plugins/out_s3/s3.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <fluent-bit/flb_pack.h>
#include <fluent-bit/flb_config_map.h>
#include <fluent-bit/flb_aws_util.h>
#include <fluent-bit/aws/flb_aws_compress.h>
#include <fluent-bit/flb_signv4.h>
#include <fluent-bit/flb_scheduler.h>
#include <fluent-bit/flb_gzip.h>
Expand Down Expand Up @@ -125,7 +126,7 @@ static int create_headers(struct flb_s3 *ctx, char *body_md5, struct flb_aws_hea
if (ctx->content_type != NULL) {
headers_len++;
}
if (ctx->compression != NULL) {
if (ctx->compression == FLB_AWS_COMPRESS_GZIP) {
headers_len++;
}
if (ctx->canned_acl != NULL) {
Expand All @@ -152,7 +153,7 @@ static int create_headers(struct flb_s3 *ctx, char *body_md5, struct flb_aws_hea
s3_headers[n].val_len = strlen(ctx->content_type);
n++;
}
if (ctx->compression != NULL) {
if (ctx->compression == FLB_AWS_COMPRESS_GZIP) {
s3_headers[n] = content_encoding_header;
n++;
}
Expand Down Expand Up @@ -678,17 +679,17 @@ static int cb_s3_init(struct flb_output_instance *ins,

tmp = flb_output_get_property("compression", ins);
if (tmp) {
if (strcmp((char *) tmp, "gzip") != 0) {
flb_plg_error(ctx->ins,
"'gzip' is currently the only supported value for 'compression'");
return -1;
} else if (ctx->use_put_object == FLB_FALSE) {
if (ctx->use_put_object == FLB_FALSE) {
flb_plg_error(ctx->ins,
"use_put_object must be enabled when compression is enabled");
return -1;
}

ctx->compression = (char *) tmp;
ret = flb_aws_compression_get_type(tmp);
if (ret == -1) {
flb_plg_error(ctx->ins, "unknown compression: %s", tmp);
return -1;
}
ctx->compression = ret;
}

tmp = flb_output_get_property("content_type", ins);
Expand Down Expand Up @@ -1262,15 +1263,17 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time
flb_sds_destroy(s3_key);
uri = tmp;

if (ctx->compression != NULL) {
ret = flb_gzip_compress(body, body_size, &compressed_body, &final_body_size);
if (ctx->compression != FLB_AWS_COMPRESS_NONE) {
ret = flb_aws_compression_compress(ctx->compression, body, body_size,
&compressed_body, &final_body_size);
if (ret == -1) {
flb_plg_error(ctx->ins, "Failed to compress data");
flb_sds_destroy(uri);
return -1;
}
final_body = (char *) compressed_body;
} else {
}
else {
final_body = body;
final_body_size = body_size;
}
Expand Down Expand Up @@ -1313,7 +1316,7 @@ static int s3_put_object(struct flb_s3 *ctx, const char *tag, time_t create_time
c = s3_client->client_vtable->request(s3_client, FLB_HTTP_PUT,
uri, final_body, final_body_size,
headers, num_headers);
if (ctx->compression != NULL) {
if (ctx->compression != FLB_AWS_COMPRESS_NONE) {
flb_free(compressed_body);
}
flb_free(headers);
Expand Down Expand Up @@ -2225,8 +2228,10 @@ static struct flb_config_map config_map[] = {
{
FLB_CONFIG_MAP_STR, "compression", NULL,
0, FLB_FALSE, 0,
"Compression type for S3 objects. 'gzip' is currently the only supported value. "
"The Content-Encoding HTTP Header will be set to 'gzip'."
"Compression type for S3 objects. 'gzip' and 'arrow' are the supported values. "
"'arrow' is only an available if Apache Arrow was enabled at compile time. "
"Defaults to no compression. "
"If 'gzip' is selected, the Content-Encoding HTTP Header will be set to 'gzip'."
},
{
FLB_CONFIG_MAP_STR, "content_type", NULL,
Expand Down
2 changes: 1 addition & 1 deletion plugins/out_s3/s3.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,14 +107,14 @@ struct flb_s3 {
char *endpoint;
char *sts_endpoint;
char *canned_acl;
char *compression;
char *content_type;
char *log_key;
int free_endpoint;
int retry_requests;
int use_put_object;
int send_content_md5;
int static_file_path;
int compression;

struct flb_aws_provider *provider;
struct flb_aws_provider *base_provider;
Expand Down
Loading