From a140a9742d65eb2d4c8293925d922fe953563fd4 Mon Sep 17 00:00:00 2001 From: morningman Date: Wed, 23 Aug 2023 20:38:16 +0800 Subject: [PATCH] remove lz4 config --- be/src/common/config.cpp | 2 -- be/src/common/config.h | 8 -------- be/src/util/load_util.cpp | 12 +++++------- be/src/vec/exec/format/csv/csv_reader.cpp | 6 ++---- .../apache/doris/planner/external/HiveScanNode.java | 11 +++++++++++ 5 files changed, 18 insertions(+), 21 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 072a91901752a5a..7998e153741bbd8 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1061,8 +1061,6 @@ DEFINE_mString(user_files_secure_path, "${DORIS_HOME}"); DEFINE_Int32(partition_topn_partition_threshold, "1024"); -DEFINE_mString(default_lz4_codec, "block"); - #ifdef BE_TEST // test s3 DEFINE_String(test_s3_resource, "resource"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 2fe0f43e634c920..a9f69e2a947ad9b 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1120,14 +1120,6 @@ DECLARE_mString(user_files_secure_path); // and if this threshold is exceeded, the remaining data will be pass through to other node directly. DECLARE_Int32(partition_topn_partition_threshold); -// The default lz4 codec. Options: frame, block -// In previous, we use lz4 "frame" as the default codec -// but the hadoop use lz4 block to write data -// So in v2.0, change the default codec to "block" -// So that we can read lz4 data from hive table by default. -// TODO: find a way to auto detect this. -DECLARE_mString(default_lz4_codec); - #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/util/load_util.cpp b/be/src/util/load_util.cpp index 3c51a6ca1074084..1277132378b85aa 100644 --- a/be/src/util/load_util.cpp +++ b/be/src/util/load_util.cpp @@ -44,13 +44,11 @@ void LoadUtil::parse_format(const std::string& format_str, const std::string& co *format_type = TFileFormatType::FORMAT_CSV_BZ2; *compress_type = TFileCompressType::BZ2; } else if (iequal(compress_type_str, "LZ4")) { - if (config::default_lz4_codec == "block") { - *format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK; - *compress_type = TFileCompressType::LZ4BLOCK; - } else { - *format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME; - *compress_type = TFileCompressType::LZ4FRAME; - } + *format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME; + *compress_type = TFileCompressType::LZ4FRAME; + } else if (iequal(compress_type_str, "LZ4_BLOCK")) { + *format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK; + *compress_type = TFileCompressType::LZ4BLOCK; } else if (iequal(compress_type_str, "LZOP")) { *format_type = TFileFormatType::FORMAT_CSV_LZOP; *compress_type = TFileCompressType::LZO; diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index 3bffa10aa491e07..64749e935cf0725 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -505,8 +505,7 @@ Status CsvReader::_create_decompressor() { compress_type = CompressType::BZIP2; break; case TFileCompressType::LZ4FRAME: - compress_type = config::default_lz4_codec == "block" ? CompressType::LZ4BLOCK - : CompressType::LZ4FRAME; + compress_type = CompressType::LZ4FRAME; break; case TFileCompressType::LZ4BLOCK: compress_type = CompressType::LZ4BLOCK; @@ -534,8 +533,7 @@ Status CsvReader::_create_decompressor() { compress_type = CompressType::BZIP2; break; case TFileFormatType::FORMAT_CSV_LZ4FRAME: - compress_type = config::default_lz4_codec == "block" ? CompressType::LZ4BLOCK - : CompressType::LZ4FRAME; + compress_type = CompressType::LZ4FRAME; break; case TFileFormatType::FORMAT_CSV_LZ4BLOCK: compress_type = CompressType::LZ4BLOCK; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java index 7178a585ff17393..5d0033c90ea806b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java @@ -51,6 +51,7 @@ import org.apache.doris.spi.Split; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TFileAttributes; +import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileTextScanRangeParams; import org.apache.doris.thrift.TFileType; @@ -386,4 +387,14 @@ public boolean pushDownAggNoGrouping(FunctionCallExpr aggExpr) { public boolean pushDownAggNoGroupingCheckCol(FunctionCallExpr aggExpr, Column col) { return !col.isAllowNull(); } + + @Override + protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws UserException { + TFileCompressType compressType = super.getFileCompressType(fileSplit); + // hadoop use lz4 blocked codec + if (compressType == TFileCompressType.LZ4FRAME) { + compressType = TFileCompressType.LZ4BLOCK; + } + return compressType; + } }