From 947ad4e12ea8a92cf095df458ba61a5a1cb27c99 Mon Sep 17 00:00:00 2001 From: Matthew Fala Date: Tue, 14 Dec 2021 11:14:40 -0800 Subject: [PATCH] base64: fork mbedtls 2.25.0 base64 utility to avoid 2.26.0+ performance hit Signed-off-by: Matthew Fala --- include/fluent-bit/flb_base64.h | 106 +++++++++++++ src/CMakeLists.txt | 1 + src/flb_base64.c | 239 ++++++++++++++++++++++++++++++ tests/internal/CMakeLists.txt | 71 +++++---- tests/internal/aws/CMakeLists.txt | 7 + tests/internal/aws/placeholder.c | 11 ++ tests/internal/base64.c | 49 ++++++ 7 files changed, 453 insertions(+), 31 deletions(-) create mode 100644 include/fluent-bit/flb_base64.h create mode 100644 src/flb_base64.c create mode 100644 tests/internal/aws/CMakeLists.txt create mode 100644 tests/internal/aws/placeholder.c create mode 100644 tests/internal/base64.c diff --git a/include/fluent-bit/flb_base64.h b/include/fluent-bit/flb_base64.h new file mode 100644 index 00000000000..2235307df1f --- /dev/null +++ b/include/fluent-bit/flb_base64.h @@ -0,0 +1,106 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2021 The Fluent Bit Authors + * Copyright (C) 2015-2018 Treasure Data Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * \file base64.h + * + * \brief RFC 1521 base64 encoding/decoding + */ + +/* This code is based on base64.h from the mbedtls-2.25.0 Library distribution, + * as originally written by Paul Bakker, et al., and forked by the Fluent Bit + * project to provide performant base64 encoding and decoding routines. + * The 2.25.0 implementation is included rather than 2.26.0+ implementation due + * to performance degradation introduced in 2.26.0. + * + * Method and variable names are changed by the Fluent Bit authors to maintain + * consistency with the Fluent Bit project. + * The self test section of the code was removed by the Fluent Bit authors. + * Other minor changes are made by the Fluent Bit authors. + * + * The original source file base64.h is copyright and licensed as follows; + * + * Copyright The Mbed TLS Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_BASE64_H +#define FLB_BASE64_H + +#include + +#define FLB_BASE64_ERR_BUFFER_TOO_SMALL -0x002A /**< Output buffer too small. */ +#define FLB_BASE64_ERR_INVALID_CHARACTER -0x002C /**< Invalid character in input. */ + +/** + * \brief Encode a buffer into base64 format + * + * \param dst destination buffer + * \param dlen size of the destination buffer + * \param olen number of bytes written + * \param src source buffer + * \param slen amount of data to be encoded + * + * \return 0 if successful, or FLB_BASE64_ERR_BUFFER_TOO_SMALL. + * *olen is always updated to reflect the amount + * of data that has (or would have) been written. + * If that length cannot be represented, then no data is + * written to the buffer and *olen is set to the maximum + * length representable as a size_t. + * + * \note Call this function with dlen = 0 to obtain the + * required buffer size in *olen + */ +int flb_base64_encode( unsigned char *dst, size_t dlen, size_t *olen, + const unsigned char *src, size_t slen ); + +/** + * \brief Decode a base64-formatted buffer + * + * \param dst destination buffer (can be NULL for checking size) + * \param dlen size of the destination buffer + * \param olen number of bytes written + * \param src source buffer + * \param slen amount of data to be decoded + * + * \return 0 if successful, FLB_BASE64_ERR_BUFFER_TOO_SMALL, or + * FLB_BASE64_ERR_INVALID_CHARACTER if the input data is + * not correct. *olen is always updated to reflect the amount + * of data that has (or would have) been written. + * + * \note Call this function with *dst = NULL or dlen = 0 to obtain + * the required buffer size in *olen + */ +int flb_base64_decode( unsigned char *dst, size_t dlen, size_t *olen, + const unsigned char *src, size_t slen ); + +#endif /* base64.h */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 18a5336ecf1..1a908ae839a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -59,6 +59,7 @@ set(src flb_routes_mask.c flb_typecast.c flb_event.c + flb_base64.c ) # Multiline subsystem diff --git a/src/flb_base64.c b/src/flb_base64.c new file mode 100644 index 00000000000..2bf442fbaea --- /dev/null +++ b/src/flb_base64.c @@ -0,0 +1,239 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2019-2021 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This code is based on base64.c from the mbedtls-2.25.0 Library distribution, + * as originally written by Paul Bakker, et al., and forked by the Fluent Bit + * project to provide performant base64 encoding and decoding routines. + * The 2.25.0 implementation is included rather than 2.26.0+ implementation due + * to performance degradation introduced in 2.26.0. + * + * Method and variable names are changed by the Fluent Bit authors to maintain + * consistency with the Fluent Bit project. + * The self test section of the code was removed by the Fluent Bit authors. + * Other minor changes are made by the Fluent Bit authors. + * + * The original source file base64.c is copyright and licensed as follows; + * + * RFC 1521 base64 encoding/decoding + * + * Copyright The Mbed TLS Contributors + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +static const unsigned char base64_enc_map[64] = +{ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', + 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', + 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', '+', '/' +}; + +static const unsigned char base64_dec_map[128] = +{ + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 127, 127, 127, 127, 127, 127, 127, + 127, 127, 127, 62, 127, 127, 127, 63, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 127, 127, + 127, 64, 127, 127, 127, 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 127, 127, 127, 127, 127, 127, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, + 49, 50, 51, 127, 127, 127, 127, 127 +}; + +#define BASE64_SIZE_T_MAX ( (size_t) -1 ) /* SIZE_T_MAX is not standard */ + +/* + * Encode a buffer into base64 format + */ +int flb_base64_encode( unsigned char *dst, size_t dlen, size_t *olen, + const unsigned char *src, size_t slen ) +{ + size_t i, n; + int C1, C2, C3; + unsigned char *p; + + if( slen == 0 ) + { + *olen = 0; + return( 0 ); + } + + n = slen / 3 + ( slen % 3 != 0 ); + + if( n > ( BASE64_SIZE_T_MAX - 1 ) / 4 ) + { + *olen = BASE64_SIZE_T_MAX; + return( FLB_BASE64_ERR_BUFFER_TOO_SMALL ); + } + + n *= 4; + + if( ( dlen < n + 1 ) || ( NULL == dst ) ) + { + *olen = n + 1; + return( FLB_BASE64_ERR_BUFFER_TOO_SMALL ); + } + + n = ( slen / 3 ) * 3; + + for( i = 0, p = dst; i < n; i += 3 ) + { + C1 = *src++; + C2 = *src++; + C3 = *src++; + + *p++ = base64_enc_map[(C1 >> 2) & 0x3F]; + *p++ = base64_enc_map[(((C1 & 3) << 4) + (C2 >> 4)) & 0x3F]; + *p++ = base64_enc_map[(((C2 & 15) << 2) + (C3 >> 6)) & 0x3F]; + *p++ = base64_enc_map[C3 & 0x3F]; + } + + if( i < slen ) + { + C1 = *src++; + C2 = ( ( i + 1 ) < slen ) ? *src++ : 0; + + *p++ = base64_enc_map[(C1 >> 2) & 0x3F]; + *p++ = base64_enc_map[(((C1 & 3) << 4) + (C2 >> 4)) & 0x3F]; + + if( ( i + 1 ) < slen ) + *p++ = base64_enc_map[((C2 & 15) << 2) & 0x3F]; + else *p++ = '='; + + *p++ = '='; + } + + *olen = p - dst; + *p = 0; + + return( 0 ); +} + +/* + * Decode a base64-formatted buffer + */ +int flb_base64_decode( unsigned char *dst, size_t dlen, size_t *olen, + const unsigned char *src, size_t slen ) +{ + size_t i, n; + uint32_t j, x; + unsigned char *p; + + /* First pass: check for validity and get output length */ + for( i = n = j = 0; i < slen; i++ ) + { + /* Skip spaces before checking for EOL */ + x = 0; + while( i < slen && src[i] == ' ' ) + { + ++i; + ++x; + } + + /* Spaces at end of buffer are OK */ + if( i == slen ) + break; + + if( ( slen - i ) >= 2 && + src[i] == '\r' && src[i + 1] == '\n' ) + continue; + + if( src[i] == '\n' ) + continue; + + /* Space inside a line is an error */ + if( x != 0 ) + return( FLB_BASE64_ERR_INVALID_CHARACTER ); + + if( src[i] == '=' && ++j > 2 ) + return( FLB_BASE64_ERR_INVALID_CHARACTER ); + + if( src[i] > 127 || base64_dec_map[src[i]] == 127 ) + return( FLB_BASE64_ERR_INVALID_CHARACTER ); + + if( base64_dec_map[src[i]] < 64 && j != 0 ) + return( FLB_BASE64_ERR_INVALID_CHARACTER ); + + n++; + } + + if( n == 0 ) + { + *olen = 0; + return( 0 ); + } + + /* The following expression is to calculate the following formula without + * risk of integer overflow in n: + * n = ( ( n * 6 ) + 7 ) >> 3; + */ + n = ( 6 * ( n >> 3 ) ) + ( ( 6 * ( n & 0x7 ) + 7 ) >> 3 ); + n -= j; + + if( dst == NULL || dlen < n ) + { + *olen = n; + return( FLB_BASE64_ERR_BUFFER_TOO_SMALL ); + } + + for( j = 3, n = x = 0, p = dst; i > 0; i--, src++ ) + { + if( *src == '\r' || *src == '\n' || *src == ' ' ) + continue; + + j -= ( base64_dec_map[*src] == 64 ); + x = ( x << 6 ) | ( base64_dec_map[*src] & 0x3F ); + + if( ++n == 4 ) + { + n = 0; + if( j > 0 ) *p++ = (unsigned char)( x >> 16 ); + if( j > 1 ) *p++ = (unsigned char)( x >> 8 ); + if( j > 2 ) *p++ = (unsigned char)( x ); + } + } + + *olen = p - dst; + + return( 0 ); +} diff --git a/tests/internal/CMakeLists.txt b/tests/internal/CMakeLists.txt index 5075b4a4aee..c823b897d08 100644 --- a/tests/internal/CMakeLists.txt +++ b/tests/internal/CMakeLists.txt @@ -22,6 +22,7 @@ set(UNIT_TESTS_FILES flb_time.c multiline.c typecast.c + base64.c ) if (NOT WIN32) @@ -130,41 +131,49 @@ foreach(test_data ${UNIT_TESTS_DATA}) FLB_TEST_COPY_DATA(${test_data}) endforeach() -# Prepare list of unit tests -foreach(source_file ${UNIT_TESTS_FILES}) - get_filename_component(source_file_we ${source_file} NAME_WE) - set(source_file_we flb-it-${source_file_we}) - if(FLB_WITHOUT_${source_file_we}) - message("Skipping test ${source_file_we}") - else() - add_executable( - ${source_file_we} - ${source_file} - ) - add_sanitizers(${source_file_we}) - - if(FLB_JEMALLOC) - target_link_libraries(${source_file_we} libjemalloc ${CMAKE_THREAD_LIBS_INIT}) +# Prepare list of unit tests function +function(prepare_unit_tests TEST_PREFIX SOURCEFILES) + foreach(source_file ${SOURCEFILES}) + get_filename_component(source_file_we ${source_file} NAME_WE) + set(source_file_we ${TEST_PREFIX}${source_file_we}) + if(FLB_WITHOUT_${source_file_we}) + message("Skipping test ${source_file_we}") else() - target_link_libraries(${source_file_we} ${CMAKE_THREAD_LIBS_INIT}) + add_executable( + ${source_file_we} + ${source_file} + ) + add_sanitizers(${source_file_we}) + + if(FLB_JEMALLOC) + target_link_libraries(${source_file_we} libjemalloc ${CMAKE_THREAD_LIBS_INIT}) + else() + target_link_libraries(${source_file_we} ${CMAKE_THREAD_LIBS_INIT}) + endif() + + if(FLB_STREAM_PROCESSOR) + target_link_libraries(${source_file_we} flb-sp) + endif() + + target_link_libraries(${source_file_we} fluent-bit-static) + + if(FLB_AVRO_ENCODER) + target_link_libraries(${source_file_we} avro-static jansson) + endif() + + add_test(NAME ${source_file_we} + COMMAND ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${source_file_we} + WORKING_DIRECTORY ${CMAKE_HOME_DIRECTORY}/build) + set_tests_properties(${source_file_we} PROPERTIES LABELS "internal") endif() + endforeach() +endfunction(prepare_unit_tests) - if(FLB_STREAM_PROCESSOR) - target_link_libraries(${source_file_we} flb-sp) - endif() - - target_link_libraries(${source_file_we} fluent-bit-static) - - if(FLB_AVRO_ENCODER) - target_link_libraries(${source_file_we} avro-static jansson) - endif() +prepare_unit_tests(flb-it- "${UNIT_TESTS_FILES}") - add_test(NAME ${source_file_we} - COMMAND ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/${source_file_we} - WORKING_DIRECTORY ${CMAKE_HOME_DIRECTORY}/build) - set_tests_properties(${source_file_we} PROPERTIES LABELS "internal") - endif() -endforeach() +if(FLB_AWS) + add_subdirectory(aws) +endif() if(FLB_TESTS_INTERNAL_FUZZ) add_subdirectory(fuzzers) diff --git a/tests/internal/aws/CMakeLists.txt b/tests/internal/aws/CMakeLists.txt new file mode 100644 index 00000000000..1685c0e24b2 --- /dev/null +++ b/tests/internal/aws/CMakeLists.txt @@ -0,0 +1,7 @@ +# AWS unit tests +set(UNIT_TESTS_FILES + placeholder.c + ) + +# Prepare list of unit tests +prepare_unit_tests(flb-it-aws_ "${UNIT_TESTS_FILES}") diff --git a/tests/internal/aws/placeholder.c b/tests/internal/aws/placeholder.c new file mode 100644 index 00000000000..133c9a32d90 --- /dev/null +++ b/tests/internal/aws/placeholder.c @@ -0,0 +1,11 @@ +#include "../flb_tests_internal.h" + +static void test_placeholder() +{ + return; +} + +TEST_LIST = { + { "placeholder" , test_placeholder }, + { 0 } +}; diff --git a/tests/internal/base64.c b/tests/internal/base64.c new file mode 100644 index 00000000000..d16a2f59848 --- /dev/null +++ b/tests/internal/base64.c @@ -0,0 +1,49 @@ +#include +#include +#include + +#include "flb_tests_internal.h" + +static void b64_basic_test_encode() +{ + char* data = "Hello world"; + char out[100]; + char* expect = "SGVsbG8gd29ybGQ="; + size_t olen; + out[16] = 'X'; + + flb_base64_encode((unsigned char *) out, 100, &olen, (unsigned char *)data, 11); + + TEST_CHECK(strlen(out) == 16 && olen == 16); + TEST_MSG("Base64 encode failed to output result of expected length"); + + TEST_CHECK(strcmp(out, expect) == 0); + TEST_MSG("Base64 encode failed to output result of expected value"); + + TEST_CHECK(out[16] == 0); + TEST_MSG("Base64 not null terminated"); + return; +} + +static void b64_basic_test_decode() +{ + char* data = "SGVsbG8gd29ybGQ="; + char out[100] = { 0 }; + char* expect = "Hello world"; + size_t olen; + + flb_base64_decode((unsigned char *) out, 100, &olen, (unsigned char *)data, 16); + + TEST_CHECK(strlen(out) == 11 && olen == 11); + TEST_MSG("Base64 decode failed to output result of expected length"); + + TEST_CHECK(strcmp(out, expect) == 0); + TEST_MSG("Base64 decode failed to output result of expected value"); + return; +} + +TEST_LIST = { + { "b64_basic_test_encode" , b64_basic_test_encode }, + { "b64_basic_test_decode", b64_basic_test_decode }, + { 0 } +};