diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp index 5cfa078fb89827..1346b14a7b7379 100644 --- a/be/src/exec/text_converter.cpp +++ b/be/src/exec/text_converter.cpp @@ -35,16 +35,20 @@ #include "util/string_parser.hpp" #include "vec/columns/column_array.h" #include "vec/columns/column_complex.h" +#include "vec/columns/column_map.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_string.h" +#include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/core/types.h" #include "vec/runtime/vdatetime_value.h" namespace doris { -TextConverter::TextConverter(char escape_char, char array_delimiter) - : _escape_char(escape_char), _array_delimiter(array_delimiter) {} +TextConverter::TextConverter(char escape_char, char collection_delimiter, char map_kv_delimiter) + : _escape_char(escape_char), + _collection_delimiter(collection_delimiter), + _map_kv_delimiter(map_kv_delimiter) {} void TextConverter::write_string_column(const SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr, const char* data, @@ -62,12 +66,15 @@ void TextConverter::write_string_column(const SlotDescriptor* slot_desc, } } -bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, - vectorized::IColumn* nullable_col_ptr, const char* data, - size_t len, bool copy_string, bool need_escape, size_t rows) { +bool TextConverter::_write_data(const TypeDescriptor& type_desc, + vectorized::IColumn* nullable_col_ptr, const char* data, size_t len, + bool copy_string, bool need_escape, size_t rows, + char array_delimiter) { vectorized::IColumn* col_ptr = nullable_col_ptr; // \N means it's NULL - if (slot_desc->is_nullable()) { + std::string col_type_name = col_ptr->get_name(); + bool is_null_able = typeid(*nullable_col_ptr) == typeid(vectorized::ColumnNullable); + if (is_null_able) { auto* nullable_column = reinterpret_cast(nullable_col_ptr); if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len == SQL_NULL_DATA) { nullable_column->insert_many_defaults(rows); @@ -82,7 +89,7 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; size_t origin_size = col_ptr->size(); // Parse the raw-text data. Translate the text string to internal format. - switch (slot_desc->type().type) { + switch (type_desc.type) { case TYPE_HLL: { HyperLogLog hyper_log_log(Slice(data, len)); auto& hyper_data = reinterpret_cast(col_ptr)->get_data(); @@ -244,7 +251,7 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, case TYPE_DECIMAL32: { StringParser::ParseResult result = StringParser::PARSE_SUCCESS; int32_t value = StringParser::string_to_decimal( - data, len, slot_desc->type().precision, slot_desc->type().scale, &result); + data, len, type_desc.precision, type_desc.scale, &result); if (result != StringParser::PARSE_SUCCESS) { parse_result = StringParser::PARSE_FAILURE; break; @@ -257,7 +264,7 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, case TYPE_DECIMAL64: { StringParser::ParseResult result = StringParser::PARSE_SUCCESS; int64_t value = StringParser::string_to_decimal( - data, len, slot_desc->type().precision, slot_desc->type().scale, &result); + data, len, type_desc.precision, type_desc.scale, &result); if (result != StringParser::PARSE_SUCCESS) { parse_result = StringParser::PARSE_FAILURE; break; @@ -271,7 +278,7 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, StringParser::ParseResult result = StringParser::PARSE_SUCCESS; vectorized::Int128 value = StringParser::string_to_decimal( - data, len, slot_desc->type().precision, slot_desc->type().scale, &result); + data, len, type_desc.precision, type_desc.scale, &result); if (result != StringParser::PARSE_SUCCESS) { parse_result = StringParser::PARSE_FAILURE; break; @@ -282,200 +289,94 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, break; } case TYPE_ARRAY: { - std::function func = - [&](int left, int right, char split, - const TypeDescriptor& type) -> vectorized::Array { - vectorized::Array array; - int fr = left; - for (int i = left; i <= right + 1; i++) { - auto Sub_type = type.children[0]; - if (i <= right && data[i] != split && data[i] != _array_delimiter) { - continue; - } - if (Sub_type.type == TYPE_ARRAY) { - array.push_back(func(fr, i - 1, split + 1, Sub_type)); - } else { - StringParser::ParseResult local_parse_result = StringParser::PARSE_SUCCESS; - switch (Sub_type.type) { - case TYPE_HLL: { - DCHECK(false) << "not support type: " - << "array\n"; - break; - } - case TYPE_STRING: - case TYPE_VARCHAR: - case TYPE_CHAR: { - size_t sz = i - fr; - if (need_escape) { - unescape_string_on_spot(data + fr, &sz); - } - array.push_back(std::string(data + fr, sz)); - break; - } - case TYPE_BOOLEAN: { - bool num = StringParser::string_to_bool(data + fr, i - fr, - &local_parse_result); - array.push_back((uint8_t)num); - break; - } - case TYPE_TINYINT: { - int8_t num = StringParser::string_to_int(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_SMALLINT: { - int16_t num = StringParser::string_to_int(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_INT: { - int32_t num = StringParser::string_to_int(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_BIGINT: { - int64_t num = StringParser::string_to_int(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_LARGEINT: { - __int128 num = StringParser::string_to_int<__int128>(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_FLOAT: { - float num = StringParser::string_to_float(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_DOUBLE: { - double num = StringParser::string_to_float(data + fr, i - fr, - &local_parse_result); - array.push_back(num); - break; - } - case TYPE_DATE: { - vectorized::VecDateTimeValue ts_slot; - if (!ts_slot.from_date_str(data + fr, i - fr)) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - ts_slot.cast_to_date(); - array.push_back(*reinterpret_cast(&ts_slot)); - break; - } - case TYPE_DATEV2: { - vectorized::DateV2Value ts_slot; - if (!ts_slot.from_date_str(data + fr, i - fr)) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - uint32_t int_val = ts_slot.to_date_int_val(); - array.push_back(int_val); - break; - } - case TYPE_DATETIME: { - vectorized::VecDateTimeValue ts_slot; - if (!ts_slot.from_date_str(data + fr, i - fr)) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - ts_slot.to_datetime(); - array.push_back((int64_t)ts_slot); - break; - } - case TYPE_DATETIMEV2: { - vectorized::DateV2Value ts_slot; - if (!ts_slot.from_date_str(data + fr, i - fr)) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - uint64_t int_val = ts_slot.to_date_int_val(); - array.push_back(int_val); - break; - } - case TYPE_DECIMALV2: { - DecimalV2Value decimal_slot; - if (decimal_slot.parse_from_str(data + fr, i - fr)) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - array.push_back(decimal_slot.value()); - break; - } - case TYPE_DECIMAL32: { - StringParser::ParseResult result = StringParser::PARSE_SUCCESS; - int32_t value = StringParser::string_to_decimal( - data + fr, i - fr, Sub_type.precision, Sub_type.scale, &result); - if (result != StringParser::PARSE_SUCCESS) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - array.push_back(value); - break; - } - case TYPE_DECIMAL64: { - StringParser::ParseResult result = StringParser::PARSE_SUCCESS; - int64_t value = StringParser::string_to_decimal( - data + fr, i - fr, Sub_type.precision, Sub_type.scale, &result); - if (result != StringParser::PARSE_SUCCESS) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - array.push_back(value); - break; - } - case TYPE_DECIMAL128I: { - StringParser::ParseResult result = StringParser::PARSE_SUCCESS; - vectorized::Int128 value = - StringParser::string_to_decimal( - data + fr, i - fr, Sub_type.precision, Sub_type.scale, - &result); - if (result != StringParser::PARSE_SUCCESS) { - local_parse_result = StringParser::PARSE_FAILURE; - break; - } - array.push_back(value); - break; - } - default: { - DCHECK(false) << "bad slot type: array<" << Sub_type << ">"; - break; - } - } + auto col = reinterpret_cast(col_ptr); + + std::vector> ranges; + for (size_t i = 0, from = 0; i <= len; i++) { + if (i < len && data[i] != array_delimiter && data[i] != _collection_delimiter) { + continue; + } + ranges.push_back({from, i - from}); + from = i + 1; + } + + auto sub_type = type_desc.children[0]; + for (int i = 0; i < rows; i++) { + for (auto range : ranges) { + _write_data(sub_type, &col->get_data(), data + range.first, range.second, + copy_string, need_escape, 1, array_delimiter + 1); + } + col->get_offsets().push_back(col->get_offsets().back() + ranges.size()); + } + + break; + } + case TYPE_MAP: { + auto col = reinterpret_cast(col_ptr); - if (local_parse_result != StringParser::PARSE_SUCCESS) { - parse_result = local_parse_result; - return array; - } - } - fr = i + 1; + std::vector> ranges; + for (size_t i = 0, from = 0, kv = 0; i <= len; i++) { + /* + * In hive , when you special map key and value delimiter as ':' + * for map column , the query result is correct , but + * for map column and map column , the query result is incorrect, + * because this field have many '_map_kv_delimiter'. + * + * So i use 'kv <= from' in order to get _map_kv_delimiter that appears first. + * */ + if (i < len && data[i] == _map_kv_delimiter && kv <= from) { + kv = i; + continue; + } + if (i == len || data[i] == _collection_delimiter) { + ranges.push_back({from, kv, i - 1}); + from = i + 1; } - return array; - }; + } - auto array = func(0, len - 1, '\002', slot_desc->type()); + auto key_type = type_desc.children[0]; + auto value_type = type_desc.children[1]; for (int i = 0; i < rows; i++) { - reinterpret_cast(col_ptr)->insert(array); + for (auto range : ranges) { + _write_data(key_type, &col->get_keys(), data + range[0], range[1] - range[0], + copy_string, need_escape, 1, array_delimiter + 1); + + _write_data(value_type, &col->get_values(), data + range[1] + 1, + range[2] - range[1], copy_string, need_escape, 1, array_delimiter + 1); + } + + col->get_offsets().push_back(col->get_offsets().back() + ranges.size()); } break; } + case TYPE_STRUCT: { + auto col = reinterpret_cast(col_ptr); + + std::vector> ranges; + for (size_t i = 0, from = 0; i <= len; i++) { + if (i == len || data[i] == _collection_delimiter) { + ranges.push_back({from, i - from}); + from = i + 1; + } + } + for (int i = 0; i < rows; i++) { + for (size_t loc = 0; loc < col->get_columns().size(); loc++) { + _write_data(type_desc.children[loc], &col->get_column(loc), + data + ranges[loc].first, ranges[loc].second, copy_string, need_escape, + rows, array_delimiter + 1); + } + } + break; + } default: - DCHECK(false) << "bad slot type: " << slot_desc->type(); + DCHECK(false) << "bad slot type: " << type_desc; break; } if (UNLIKELY(parse_result == StringParser::PARSE_FAILURE)) { - if (true == slot_desc->is_nullable()) { + if (is_null_able) { auto* nullable_column = reinterpret_cast(nullable_col_ptr); size_t size = nullable_column->get_null_map_data().size(); doris::vectorized::NullMap& null_map_data = nullable_column->get_null_map_data(); @@ -489,6 +390,13 @@ bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, return true; } +bool TextConverter::write_vec_column(const SlotDescriptor* slot_desc, + vectorized::IColumn* nullable_col_ptr, const char* data, + size_t len, bool copy_string, bool need_escape, size_t rows) { + return _write_data(slot_desc->type(), nullable_col_ptr, data, len, copy_string, need_escape, + rows, '\2'); +} + void TextConverter::unescape_string_on_spot(const char* src, size_t* len) { const char* start = src; char* dest_ptr = const_cast(src); diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h index 083c7c6881ed61..ef4e87f5a5703e 100644 --- a/be/src/exec/text_converter.h +++ b/be/src/exec/text_converter.h @@ -31,7 +31,7 @@ class TextConverter { public: static constexpr char NULL_STR[3] = {'\\', 'N', '\0'}; - TextConverter(char escape_char, char array_delimiter = '\2'); + TextConverter(char escape_char, char collection_delimiter = '\2', char map_kv_delimiter = '\3'); void write_string_column(const SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr, const char* data, @@ -57,11 +57,23 @@ class TextConverter { size_t rows); void unescape_string_on_spot(const char* src, size_t* len); - void set_array_delimiter(char array_delimiter) { _array_delimiter = array_delimiter; } + void set_collection_delimiter(char collection_delimiter) { + _collection_delimiter = collection_delimiter; + } + void set_map_kv_delimiter(char mapkv_delimiter) { _map_kv_delimiter = mapkv_delimiter; } private: + bool _write_data(const TypeDescriptor& type_desc, vectorized::IColumn* nullable_col_ptr, + const char* data, size_t len, bool copy_string, bool need_escape, size_t rows, + char array_delimiter); + char _escape_char; - char _array_delimiter; + + //struct,array and map delimiter + char _collection_delimiter; + + //map key and value delimiter + char _map_kv_delimiter; }; } // namespace doris diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp b/be/src/vec/exec/format/csv/csv_reader.cpp index 43c1dde1ced6a2..5e218d867cd88c 100644 --- a/be/src/vec/exec/format/csv/csv_reader.cpp +++ b/be/src/vec/exec/format/csv/csv_reader.cpp @@ -200,9 +200,11 @@ Status CsvReader::init_reader(bool is_load) { _line_delimiter = _params.file_attributes.text_params.line_delimiter; _line_delimiter_length = _line_delimiter.size(); - //get array delimiter - _array_delimiter = _params.file_attributes.text_params.array_delimiter; - _text_converter->set_array_delimiter(_array_delimiter[0]); + _collection_delimiter = _params.file_attributes.text_params.collection_delimiter; + _text_converter->set_collection_delimiter(_collection_delimiter[0]); + + _map_kv_delimiter = _params.file_attributes.text_params.mapkv_delimiter; + _text_converter->set_map_kv_delimiter(_map_kv_delimiter[0]); if (_params.file_attributes.__isset.trim_double_quotes) { _trim_double_quotes = _params.file_attributes.trim_double_quotes; @@ -689,9 +691,11 @@ Status CsvReader::_prepare_parse(size_t* read_line, bool* is_parse_name) { _line_delimiter = _params.file_attributes.text_params.line_delimiter; _line_delimiter_length = _line_delimiter.size(); - //get array delimiter - _array_delimiter = _params.file_attributes.text_params.array_delimiter; - _text_converter->set_array_delimiter(_array_delimiter[0]); + _collection_delimiter = _params.file_attributes.text_params.collection_delimiter; + _text_converter->set_collection_delimiter(_collection_delimiter[0]); + + _map_kv_delimiter = _params.file_attributes.text_params.mapkv_delimiter; + _text_converter->set_map_kv_delimiter(_map_kv_delimiter[0]); // create decompressor. // _decompressor may be nullptr if this is not a compressed file diff --git a/be/src/vec/exec/format/csv/csv_reader.h b/be/src/vec/exec/format/csv/csv_reader.h index 42178846f15dc0..a1577a638ec470 100644 --- a/be/src/vec/exec/format/csv/csv_reader.h +++ b/be/src/vec/exec/format/csv/csv_reader.h @@ -144,7 +144,11 @@ class CsvReader : public GenericReader { std::string _value_separator; std::string _line_delimiter; - std::string _array_delimiter; + + // struct, array and map delimiter + std::string _collection_delimiter; + // map key and value delimiter + std::string _map_kv_delimiter; int _value_separator_length; int _line_delimiter_length; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java index 211f6e80569976..61a571358a2a6b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanNode.java @@ -20,11 +20,15 @@ import org.apache.doris.analysis.FunctionCallExpr; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.TupleDescriptor; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HiveMetaStoreClientHelper; import org.apache.doris.catalog.ListPartitionItem; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.external.HMSExternalTable; @@ -71,9 +75,12 @@ public class HiveScanNode extends FileQueryScanNode { public static final String PROP_LINE_DELIMITER = "line.delim"; public static final String DEFAULT_LINE_DELIMITER = "\n"; - public static final String PROP_ARRAY_DELIMITER_HIVE2 = "colelction.delim"; - public static final String PROP_ARRAY_DELIMITER_HIVE3 = "collection.delim"; - public static final String DEFAULT_ARRAY_DELIMITER = "\2"; + public static final String PROP_COLLECTION_DELIMITER_HIVE2 = "colelction.delim"; + public static final String PROP_COLLECTION_DELIMITER_HIVE3 = "collection.delim"; + public static final String DEFAULT_COLLECTION_DELIMITER = "\2"; + + public static final String PROP_MAP_KV_DELIMITER = "mapkey.delim"; + public static final String DEFAULT_MAP_KV_DELIMITER = "\003"; protected final HMSExternalTable hmsTable; private HiveTransaction hiveTransaction = null; @@ -104,10 +111,46 @@ protected void doInitialize() throws UserException { String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat(); if (inputFormat.contains("TextInputFormat")) { for (SlotDescriptor slot : desc.getSlots()) { - if (slot.getType().isMapType() || slot.getType().isStructType()) { + if (slot.getType().isScalarType()) { + continue; + } + boolean supported = true; + + // support Array and array> + if (slot.getType().isArrayType()) { + ArrayType arraySubType = (ArrayType) slot.getType(); + while (true) { + if (arraySubType.getItemType().isArrayType()) { + arraySubType = (ArrayType) arraySubType.getItemType(); + continue; + } + if (!arraySubType.getItemType().isScalarType()) { + supported = false; + } + break; + } + } else if (slot.getType().isMapType()) { //support map + if (!((MapType) slot.getType()).getValueType().isScalarType()) { + supported = false; + } + } else if (slot.getType().isStructType()) { //support Struct< primitive_type,primitive_type ... > + StructType structSubType = (StructType) slot.getType(); + structSubType.getColumnSize(); + for (StructField f : structSubType.getFields()) { + if (!f.getType().isScalarType()) { + supported = false; + } + } + } + + if (supported == false) { throw new UserException("For column `" + slot.getColumn().getName() - + "`, The column types MAP/STRUCT are not supported yet" - + " for text input format of Hive. "); + + "`, The column types are not supported yet" + + " for text input format of Hive.\n" + + "For complex type ,now Support :\n" + + "\t1. array< primitive_type > and array< array< ... > >\n" + + "\t2. map< primitive_type , primitive_type >\n" + + "\t3. Struct< primitive_type , primitive_type ... >\n"); } } } @@ -281,12 +324,15 @@ protected TFileAttributes getFileAttributes() throws UserException { java.util.Map delimiter = hmsTable.getRemoteTable().getSd().getSerdeInfo().getParameters(); textParams.setColumnSeparator(delimiter.getOrDefault(PROP_FIELD_DELIMITER, DEFAULT_FIELD_DELIMITER)); textParams.setLineDelimiter(delimiter.getOrDefault(PROP_LINE_DELIMITER, DEFAULT_LINE_DELIMITER)); - if (delimiter.get(PROP_ARRAY_DELIMITER_HIVE2) != null) { - textParams.setArrayDelimiter(delimiter.get(PROP_ARRAY_DELIMITER_HIVE2)); - } else if (delimiter.get(PROP_ARRAY_DELIMITER_HIVE3) != null) { - textParams.setArrayDelimiter(delimiter.get(PROP_ARRAY_DELIMITER_HIVE3)); + textParams.setMapkvDelimiter(delimiter.getOrDefault(PROP_MAP_KV_DELIMITER, DEFAULT_MAP_KV_DELIMITER)); + + // textParams.collection_delimiter field is map, array and struct delimiter; + if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2) != null) { + textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE2)); + } else if (delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3) != null) { + textParams.setCollectionDelimiter(delimiter.get(PROP_COLLECTION_DELIMITER_HIVE3)); } else { - textParams.setArrayDelimiter(DEFAULT_ARRAY_DELIMITER); + textParams.setCollectionDelimiter(DEFAULT_COLLECTION_DELIMITER); } TFileAttributes fileAttributes = new TFileAttributes(); fileAttributes.setTextParams(textParams); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 9318b593a397cb..d3f17845f2e51c 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -243,7 +243,8 @@ struct TEsScanRange { struct TFileTextScanRangeParams { 1: optional string column_separator; 2: optional string line_delimiter; - 3: optional string array_delimiter; + 3: optional string collection_delimiter;// array ,map ,struct delimiter + 4: optional string mapkv_delimiter; } struct TFileScanSlotInfo { diff --git a/regression-test/data/external_table_p2/hive/test_hive_text_complex_type.out b/regression-test/data/external_table_p2/hive/test_hive_text_complex_type.out new file mode 100644 index 00000000000000..a04b9c1def9fe6 --- /dev/null +++ b/regression-test/data/external_table_p2/hive/test_hive_text_complex_type.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +1 {101:1} {102:10} {"field1":100} {"field2":2000000} {"field3":300000000} {"field4":3.14} {"field5":3.14159} {103:"Hello"} {"field6":2023-07-28 12:34:56.000000} {"field7":2023-07-28} {1, 1, 20, 3000000, 44444444444, 3.14, 3.14159, "Hello", 2023-07-28 12:34:56.000000, 2023-07-28} +2 {201:1} {202:11} {"field1":200} {"field2":9000000} {"field3":8000000000} {"field4":9.13321} {"field5":322.14159} {203:"Hello"} {"field6":2023-07-28 12:34:56.000000} {"field7":2023-07-28} {1, 1, 201, 300011000, 44444444444, 3.14, 3.14159, "world", 2023-07-28 12:34:56.000000, 2023-06-28} +3 {201:1} {202:10} {"field1":120} {"field2":44440000} {"field3":700000000} {"field4":3.100004} {"field5":3.00014159} {103:"Hello"} {"field6":2023-07-28 12:34:56.000000} {"field7":2023-07-28} {1, 1, 700, 300011000, 3333333334, 3.00014, 3.3314159, "hello world", 2023-07-28 01:34:56.000000, 2023-07-27} +10 {101:1, 102:1, 103:1} {102:10, 104:1, 105:2} {"field1":100, "field0":100} {"field2":3000000} {"field3":300000000} {"field4":3.14, "hello world":0.111, "hell0":7.001} {"field5":3.14159} {103:"Hello"} {"field6":2023-07-28 12:34:56.000000, "field000006":2023-07-08 12:34:57.000000, "field2432456":2023-07-28 12:34:50.000000} {"field7":2023-07-28} {1, 1, 20, 3000000, 44444444444, 3.14, 3.14159, "Hello", 2023-07-28 12:34:56.000000, 2023-07-28} +11 {101:1, 102:1, 13:1, 12:1} {102:10, 14:1, 15:2, 12:10} {"field1":100, "fie88ld0":100, "fieweld0":100, "fieeeld1":100, "fieeeld0":100, "feeield0":100, "feeield1":100, "firreld0":100, "field0":100} {"field2":3000000, "abcd":4000000, "1231":3000000} {"fi7eld3":300000000, "field30":300000000, "fielwwd3":300000000, "fi055":300000000, "field7":300000121323} {"field4":3.14, "hello world":0.111, "hell0":7.001} {"field5":3.14159} {103:"Hello", 0:"hello"} {"field6":2023-07-28 12:34:56.000000, "field000006":2023-07-08 12:34:57.000000, "field2432456":2023-07-28 12:34:50.000000} {"field7":2023-07-28} {1, 1, 20, 3000000, 44444444444, 3.14, 3.14159, "Hello", 2023-07-28 12:34:56.000000, 2023-07-28} + +-- !sql2 -- +1 {101:1} {102:10} {"field1":100} {"field2":2000000} {"field3":300000000} {"field4":3.14} {"field5":3.14159} {103:"Hello"} {"field6":2023-07-28 12:34:56.000000} {"field7":2023-07-28} {1, 1, 20, 3000000, 44444444444, 3.14, 3.14159, "Hello", 2023-07-28 12:34:56.000000, 2023-07-28} +2 {201:1} {202:11} {"field1":200} {"field2":9000000} {"field3":8000000000} {"field4":9.13321} {"field5":322.14159} {203:"Hello"} {"field6":2023-07-28 12:34:56.000000} {"field7":2023-07-28} {1, 1, 201, 300011000, 44444444444, 3.14, 3.14159, "world", 2023-07-28 12:34:56.000000, 2023-06-28} +3 {201:1} {202:10} {"field1":120} {"field2":44440000} {"field3":700000000} {"field4":3.100004} {"field5":3.00014159} {103:"Hello"} {"field6":2023-07-28 12:34:56.000000} {"field7":2023-07-28} {1, 1, 700, 300011000, 3333333334, 3.00014, 3.3314159, "hello world", 2023-07-28 01:34:56.000000, 2023-07-27} +10 {101:1, 102:1, 103:1} {102:10, 104:1, 105:2} {"field1":100, "field0":100} {"field2":3000000} {"field3":300000000} {"field4":3.14, "hello world":0.111, "hell0":7.001} {"field5":3.14159} {103:"Hello"} {"field6":2023-07-28 12:34:56.000000, "field000006":2023-07-08 12:34:57.000000, "field2432456":2023-07-28 12:34:50.000000} {"field7":2023-07-28} {1, 1, 20, 3000000, 44444444444, 3.14, 3.14159, "Hello", 2023-07-28 12:34:56.000000, 2023-07-28} +11 {101:1, 102:1, 13:1, 12:1} {102:10, 14:1, 15:2, 12:10} {"field1":100, "fie88ld0":100, "fieweld0":100, "fieeeld1":100, "fieeeld0":100, "feeield0":100, "feeield1":100, "firreld0":100, "field0":100} {"field2":3000000, "abcd":4000000, "1231":3000000} {"fi7eld3":300000000, "field30":300000000, "fielwwd3":300000000, "fi055":300000000, "field7":300000121323} {"field4":3.14, "hello world":0.111, "hell0":7.001} {"field5":3.14159} {103:"Hello", 0:"hello"} {"field6":2023-07-28 12:34:56.000000, "field000006":2023-07-08 12:34:57.000000, "field2432456":2023-07-28 12:34:50.000000} {"field7":2023-07-28} {1, 1, 20, 3000000, 44444444444, 3.14, 3.14159, "Hello", 2023-07-28 12:34:56.000000, 2023-07-28} + diff --git a/regression-test/suites/external_table_p2/hive/test_hive_text_complex_type.groovy b/regression-test/suites/external_table_p2/hive/test_hive_text_complex_type.groovy new file mode 100644 index 00000000000000..8ea9f74135966d --- /dev/null +++ b/regression-test/suites/external_table_p2/hive/test_hive_text_complex_type.groovy @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_text_complex_type", "p2,external,hive,external_remote,external_remote_hive") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_hive_text_complex_type" + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + + sql """ use multi_catalog """ + + qt_sql1 """ select * from hive_text_complex_type order by column1; """ + + qt_sql2 """ select * from hive_text_complex_type_delimiter order by column1; """ + + + } +} +