Skip to content

Commit

Permalink
remove lz4 config
Browse files Browse the repository at this point in the history
  • Loading branch information
morningman committed Aug 23, 2023
1 parent 1b12dd9 commit a140a97
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 21 deletions.
2 changes: 0 additions & 2 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1061,8 +1061,6 @@ DEFINE_mString(user_files_secure_path, "${DORIS_HOME}");

DEFINE_Int32(partition_topn_partition_threshold, "1024");

DEFINE_mString(default_lz4_codec, "block");

#ifdef BE_TEST
// test s3
DEFINE_String(test_s3_resource, "resource");
Expand Down
8 changes: 0 additions & 8 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1120,14 +1120,6 @@ DECLARE_mString(user_files_secure_path);
// and if this threshold is exceeded, the remaining data will be pass through to other node directly.
DECLARE_Int32(partition_topn_partition_threshold);

// The default lz4 codec. Options: frame, block
// In previous, we use lz4 "frame" as the default codec
// but the hadoop use lz4 block to write data
// So in v2.0, change the default codec to "block"
// So that we can read lz4 data from hive table by default.
// TODO: find a way to auto detect this.
DECLARE_mString(default_lz4_codec);

#ifdef BE_TEST
// test s3
DECLARE_String(test_s3_resource);
Expand Down
12 changes: 5 additions & 7 deletions be/src/util/load_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ void LoadUtil::parse_format(const std::string& format_str, const std::string& co
*format_type = TFileFormatType::FORMAT_CSV_BZ2;
*compress_type = TFileCompressType::BZ2;
} else if (iequal(compress_type_str, "LZ4")) {
if (config::default_lz4_codec == "block") {
*format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
*compress_type = TFileCompressType::LZ4BLOCK;
} else {
*format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
*compress_type = TFileCompressType::LZ4FRAME;
}
*format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
*compress_type = TFileCompressType::LZ4FRAME;
} else if (iequal(compress_type_str, "LZ4_BLOCK")) {
*format_type = TFileFormatType::FORMAT_CSV_LZ4BLOCK;
*compress_type = TFileCompressType::LZ4BLOCK;
} else if (iequal(compress_type_str, "LZOP")) {
*format_type = TFileFormatType::FORMAT_CSV_LZOP;
*compress_type = TFileCompressType::LZO;
Expand Down
6 changes: 2 additions & 4 deletions be/src/vec/exec/format/csv/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -505,8 +505,7 @@ Status CsvReader::_create_decompressor() {
compress_type = CompressType::BZIP2;
break;
case TFileCompressType::LZ4FRAME:
compress_type = config::default_lz4_codec == "block" ? CompressType::LZ4BLOCK
: CompressType::LZ4FRAME;
compress_type = CompressType::LZ4FRAME;
break;
case TFileCompressType::LZ4BLOCK:
compress_type = CompressType::LZ4BLOCK;
Expand Down Expand Up @@ -534,8 +533,7 @@ Status CsvReader::_create_decompressor() {
compress_type = CompressType::BZIP2;
break;
case TFileFormatType::FORMAT_CSV_LZ4FRAME:
compress_type = config::default_lz4_codec == "block" ? CompressType::LZ4BLOCK
: CompressType::LZ4FRAME;
compress_type = CompressType::LZ4FRAME;
break;
case TFileFormatType::FORMAT_CSV_LZ4BLOCK:
compress_type = CompressType::LZ4BLOCK;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.doris.spi.Split;
import org.apache.doris.statistics.StatisticalType;
import org.apache.doris.thrift.TFileAttributes;
import org.apache.doris.thrift.TFileCompressType;
import org.apache.doris.thrift.TFileFormatType;
import org.apache.doris.thrift.TFileTextScanRangeParams;
import org.apache.doris.thrift.TFileType;
Expand Down Expand Up @@ -386,4 +387,14 @@ public boolean pushDownAggNoGrouping(FunctionCallExpr aggExpr) {
public boolean pushDownAggNoGroupingCheckCol(FunctionCallExpr aggExpr, Column col) {
return !col.isAllowNull();
}

@Override
protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws UserException {
TFileCompressType compressType = super.getFileCompressType(fileSplit);
// hadoop use lz4 blocked codec
if (compressType == TFileCompressType.LZ4FRAME) {
compressType = TFileCompressType.LZ4BLOCK;
}
return compressType;
}
}

0 comments on commit a140a97

Please sign in to comment.