diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index af1266cbbdce31..de8afb092367d3 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -74,7 +74,7 @@ static void fill_struct_null_map(FieldSchema* field, NullMap& null_map, null_map[pos++] = 1; } } - null_map.resize(pos + 1); + null_map.resize(pos); } static void fill_array_offset(FieldSchema* field, ColumnArray::Offsets64& offsets_data, @@ -394,10 +394,10 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& doris_column, DataType continue; } bool is_null = def_level < _field_schema->definition_level; - if (prev_is_null == is_null) { - if (USHRT_MAX - null_map.back() >= loop_read) { - null_map.back() += loop_read; - } + if (prev_is_null == is_null && (USHRT_MAX - null_map.back() >= loop_read)) { + // If whether the values are nullable in current loop is the same the previous values, + // we can save the memory usage in null map + null_map.back() += loop_read; } else { if (!(prev_is_null ^ is_null)) { null_map.emplace_back(0); @@ -633,10 +633,14 @@ Status MapColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& t bool value_eof = false; RETURN_IF_ERROR(_key_reader->read_column_data(key_column, key_type, select_vector, batch_size, &key_rows, &key_eof, is_dict_filter)); - select_vector.reset(); - RETURN_IF_ERROR(_value_reader->read_column_data(value_column, value_type, select_vector, - batch_size, &value_rows, &value_eof, - is_dict_filter)); + while (value_rows < key_rows && !value_eof) { + size_t loop_rows = 0; + select_vector.reset(); + RETURN_IF_ERROR(_value_reader->read_column_data(value_column, value_type, select_vector, + key_rows - value_rows, &loop_rows, + &value_eof, is_dict_filter)); + value_rows += loop_rows; + } DCHECK_EQ(key_rows, value_rows); DCHECK_EQ(key_eof, value_eof); *read_rows = key_rows; @@ -686,16 +690,24 @@ Status StructColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr ColumnPtr& doris_field = doris_struct.get_column_ptr(i); DataTypePtr& doris_type = const_cast(doris_struct_type->get_element(i)); select_vector.reset(); - size_t loop_rows = 0; - bool loop_eof = false; - _child_readers[i]->read_column_data(doris_field, doris_type, select_vector, batch_size, - &loop_rows, &loop_eof, is_dict_filter); - if (i != 0) { - DCHECK_EQ(*read_rows, loop_rows); - DCHECK_EQ(*eof, loop_eof); + size_t field_rows = 0; + bool field_eof = false; + if (i == 0) { + _child_readers[i]->read_column_data(doris_field, doris_type, select_vector, batch_size, + &field_rows, &field_eof, is_dict_filter); + *read_rows = field_rows; + *eof = field_eof; } else { - *read_rows = loop_rows; - *eof = loop_eof; + while (field_rows < *read_rows && !field_eof) { + size_t loop_rows = 0; + select_vector.reset(); + _child_readers[i]->read_column_data(doris_field, doris_type, select_vector, + *read_rows - field_rows, &loop_rows, &field_eof, + is_dict_filter); + field_rows += loop_rows; + } + DCHECK_EQ(*read_rows, field_rows); + DCHECK_EQ(*eof, field_eof); } } diff --git a/regression-test/data/external_table_emr_p2/hive/test_complex_types.out b/regression-test/data/external_table_emr_p2/hive/test_complex_types.out new file mode 100644 index 00000000000000..88d62b3841e7a2 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_complex_types.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !null_struct_element -- +0 + +-- !map_key_select -- +38111 0.770169659057425 + +-- !map_keys -- +["9wXr9n-TBm9Wyt-r8H-SkAq", "CPDH4G-ZXGPkku-3wY-ktaQ", "RvNlMt-HHjHN5M-VjP-xHAI", "qKIhKy-Ws344os-haX-2pmT", "DOJJ5l-UEkwVMs-x9F-HifD", "m871g8-1eFi7jt-oBq-S0yc", "wXugVP-v2fc6IF-DeU-On3T", "B0mXFX-QvgUgo7-Dih-6rDu", "E9zv3F-xMqSbMa-il4-FuDg", "msuFIN-ZkKO8TY-tu4-veH0", "0rSUyl-Un07aIW-KAx-WHnX", "XvbmO8-WA6oAqc-ihc-s8IL", "G6B6RD-AicAlZb-16u-Pn1I", "coDK0Q-tMg1294-JMQ-ZWQu", "4c0aWh-yhL6BOX-rRu-1n0r", "G4iUcG-ZhWw62v-VLt-n6lH", "IIB7qD-WQistwT-Vux-0c9B", "7cTyuR-5ssXm2S-sJR-JTIZ", "3KPhSW-FICEImf-bba-PCiQ", "qQ7Yup-XBeQGFz-3EP-q0vd", "gjRxRo-Af9Oqx5-IzN-3B9d", "1zSj57-nNZpZ0b-ZKn-BeY0", "sTK0mn-wkp1Xp5-PRS-txVM", "sLrM0s-1KnXLb6-1A3-Z1vJ", "UkYdkP-k7YKiKS-Fxp-qAcI", "v8p0YV-R5pAKZ8-UMr-P1bQ", "RJdTav-jk3os9Z-yRk-WhwV", "lB91ic-pNFZkE4-hBx-e104", "gmRV6e-GKJUg0L-ok7-J6Lz", "o3LUyz-7Toh54O-czG-Xep8", "8fzHhM-4otPAss-qTm-phg8", "kZsHhe-vfClpAR-b3H-7aHl", "TdZnlG-BUgMs7Z-iBM-9c3v", "RipJXn-p4gZkyy-1ZY-xkWe", "ke730M-LmMjGdc-EFy-0LUK", "jBSExJ-GXTc5TB-NSa-xBEd", "kI7Cc8-DSg5RdF-qLo-2bhe", "bAn3VI-x6xXWpB-zWe-G5CJ", "jAil30-kbt6K6z-kbr-8foB", "IHIwNs-1QGqy8l-i8i-vu4G", "p0IbZr-tHCtwiV-0hq-NtIt", "iggdij-M3YNBpd-yiD-a8Ro", "BrJEww-C4LpgaS-AeB-So4U", "xnO3Fi-8rXcpgj-zpm-EmuX", "5w57da-phYtDUx-px2-6frG", "31MfFs-1WyUAr6-gQ0-xLxY", "ryBl2p-rSoPhwd-WPv-NCAU", "KN5TEt-gOfJ4Hy-3pp-HiBa", "ytqxb8-utXXjUf-m41-i6ir", "WhGUGz-zzyvEpD-9BM-2bVf", "dE1tFe-zHClt4u-0cY-TQnC", "MveBhC-g29c0dU-tCT-R6nC", "JTpxue-xSqAhGo-AZk-zB1t", "92TVdU-qDJesPN-0lb-JOd3", "0PODnh-IciBdOZ-0CS-oNeL", "KkkW6x-TiemXQw-OiH-dZ9s", "PIs5Aj-g02HRXw-957-GD2z", "yJIzuw-au6460e-0Tl-XYEJ", "KHvMCD-OQDL0eX-nqK-TmEt", "6QJJgV-Z3IZ1Rf-wyv-rIJ6", "qA9ycc-sR2qm6P-PtB-AIax", "uDeuEb-B0t0Ljr-dWk-jkC4", "5vPy52-ygN0MMH-UB4-nZQL", "zbbmrQ-pT3uAuU-Kae-HjM5", "3QShHS-7RwUB10-0W2-H4Qy", "PMc4QI-5lNajXU-f8m-RGIi", "O9t3dl-q8YHozj-saR-A3Jm", "k4eH3O-aHnTKY7-ADp-4Vsi", "RA4epe-lWWnOff-bpM-bSR4", "6ysu2R-gSc5dwU-cv0-LqCJ", "tVl3TY-o42NMVO-k3S-iqOY", "NMgTrr-W1RrCvP-Zaf-paL7", "d1CJmF-CeG5asM-xms-1dwN", "N1D30g-zFjiGzI-eHC-Sof4", "tOhfKu-Gdtf9Ne-KwA-JdHV", "XLzwK0-6ocGDrS-TtU-wlEI", "XDgZfb-Sxc45Zn-mVO-S2QO", "GQD7a0-fnt9BZs-Kvh-dPbJ", "9dJxj9-HFwEQMY-6p9-s8Vt", "1qU9pA-QJGAna9-JoG-H7GS", "rKIkxA-UnGWYSn-0li-ziuB", "tbPazx-IjUrQ8J-NZe-VOPL", "xBpSIv-U6ojkK7-9p5-LviD", "88bnWI-pxrKa7T-n2d-tXk9", "0XviXp-9ksT8s0-fDy-35SW", "e0XauA-GNRALmd-SM2-Y4Gf", "kyvYBk-Bk5M4Xq-gxX-kE1B", "dIiQzS-5sT4ogL-6IV-tLmb", "OlGOyH-dyL1nzj-B2M-z8ir", "zC9Gtn-x8hpfPD-KOu-k31W", "qSq3z2-Lpv0YcB-hBq-Sabd", "LSyNyi-tBZUx1l-hAj-mwsx", "2c9aTP-hXloMK7-ufH-dgq6", "aXksHO-zARQxfo-sgS-8Bf4", "ioOXAL-eVUF0W8-vZx-ZeYX", "DXUkAP-A7SqnHj-V4U-PJfz", "cnzZXk-AOMepfN-hym-qbDH", "CMlAd6-8FF1yXs-fae-Izfv", "qiXnUv-e2PsJWm-tLF-KpjE", "Gfx3k9-JvXa7Wd-rI1-1e1E"] + +-- !map_values -- +[0.98055020292316664, 0.53302915957540542, 0.30024744873379805, 0.48563601750302665, 0.76871064251586241, 0.69935066449251015, 0.28493548088258069, 0.34734174551861408, 0.13500129443045072, 0.97081321037009394, 0.18583042639943448, 0.48863372645520731, 0.36354741695157655, 0.56408452689711752, 0.1374134087807577, 0.77665476474516226, 0.58353232966683177, 0.36544595471103491, 0.54797767099937644, 0.83799325421171922, 0.15665046278350814, 0.03371222042250388, 0.1699781825927229, 0.35796304950750779, 0.028092531855977265, 0.72042470298400274, 0.27604992564232056, 0.676890893219096, 0.035298786567000251, 0.022765783510278581, 0.097949917306254686, 0.52780628846133515, 0.13704041811391021, 0.54403524765808564, 0.72055406294199287, 0.13508529841959427, 0.41609464004318619, 0.29722954545629288, 0.92174265035856928, 0.58103998733474, 0.88454274363774732, 0.10179282672994228, 0.95471869739438919, 0.16801027847083416, 0.00084877454219867143, 0.16952415411069888, 0.67839217494332915, 0.71938183869710837, 0.930443435029246, 0.48466654693905176, 0.9924998940864419, 0.72382884810791481, 0.70535638177590088, 0.97351607727767553, 0.77824997878692337, 0.74133042805481741, 0.75509839260333067, 0.87136604463221856, 0.92052096787926374, 0.34197248989722773, 0.36968069857555563, 0.030232598171523017, 0.024774526048626844, 0.97641291575255884, 0.59330575594702828, 0.76125115548318434, 0.37875822703363504, 0.93127304595441207, 0.67120835078024121, 0.16508080008436798, 0.22928664639590624, 0.37366653502681058, 0.20480644640806578, 0.083943559374968335, 0.84949796967318236, 0.43215562556626219, 0.35346682671980267, 0.87917004341027716, 0.22745275830152578, 0.048869685073594016, 0.79365981101741634, 0.54497173434159185, 0.76359394459683483, 0.085055861839866242, 0.35091150265891446, 0.96331917452389082, 0.39725339103896173, 0.46597592499192675, 0.15790512463284645, 0.78535655781075941, 0.98949199397456544, 0.93953657306559291, 0.20226076738266596, 0.16196368561927676, 0.51055695298416159, 0.4531109229280732, 0.25791342685970842, 0.79621090899157465, 0.27729692295394215, 0.93159020376070611] + +-- !map_contains_key -- +1077 [0.78055609958738448, 0.93034890022695593, 0.25295229975218769, 0.662270811026298, 0.664725297532439, 0.10194410917644769, 0.96140593006881736, 0.52781260099838434, 0.52875058412167075, 0.426116738236779, 0.42300502393871175, 0.53270263300536513, 0.60254817779426029, 0.27107336472576271, 0.613792118138183, 0.0021003027835629906, 0.32006750487285818, 0.54856110146602044, 0.51215105813137074, 0.51451366528053577] {"9wXr9n-TBm9Wyt-r8H-SkAq":0.93383290104809946, "CPDH4G-ZXGPkku-3wY-ktaQ":0.43552569633508809, "RvNlMt-HHjHN5M-VjP-xHAI":0.32634746118047819, "qKIhKy-Ws344os-haX-2pmT":0.565450203625137, "DOJJ5l-UEkwVMs-x9F-HifD":0.093756220108222377, "m871g8-1eFi7jt-oBq-S0yc":0.8819687247951038, "wXugVP-v2fc6IF-DeU-On3T":0.34482334864473108, "B0mXFX-QvgUgo7-Dih-6rDu":0.1914040395475467, "E9zv3F-xMqSbMa-il4-FuDg":0.38570218910843357, "msuFIN-ZkKO8TY-tu4-veH0":0.66461726530746279, "0rSUyl-Un07aIW-KAx-WHnX":0.35580099104309737, "XvbmO8-WA6oAqc-ihc-s8IL":0.40582064344114233, "G6B6RD-AicAlZb-16u-Pn1I":0.72035549468957494, "coDK0Q-tMg1294-JMQ-ZWQu":0.82363286277431857, "4c0aWh-yhL6BOX-rRu-1n0r":0.13980911842304278, "G4iUcG-ZhWw62v-VLt-n6lH":0.18382889782542144, "IIB7qD-WQistwT-Vux-0c9B":0.91743891443094583, "7cTyuR-5ssXm2S-sJR-JTIZ":0.81322372426728373, "3KPhSW-FICEImf-bba-PCiQ":0.63026435799435532, "qQ7Yup-XBeQGFz-3EP-q0vd":0.6109025726752364, "gjRxRo-Af9Oqx5-IzN-3B9d":0.92514684903269162, "1zSj57-nNZpZ0b-ZKn-BeY0":0.56284631091071435, "sTK0mn-wkp1Xp5-PRS-txVM":0.79058081295599958, "sLrM0s-1KnXLb6-1A3-Z1vJ":0.42345986776701572, "UkYdkP-k7YKiKS-Fxp-qAcI":0.75414012666798691, "v8p0YV-R5pAKZ8-UMr-P1bQ":0.29311525651106829, "RJdTav-jk3os9Z-yRk-WhwV":0.52638113097388772, "lB91ic-pNFZkE4-hBx-e104":0.6692292834321788, "gmRV6e-GKJUg0L-ok7-J6Lz":0.059247669596643515, "o3LUyz-7Toh54O-czG-Xep8":0.62841938211272641, "8fzHhM-4otPAss-qTm-phg8":0.89530024415370124, "kZsHhe-vfClpAR-b3H-7aHl":0.17750156127473993, "TdZnlG-BUgMs7Z-iBM-9c3v":0.27498394395046333, "RipJXn-p4gZkyy-1ZY-xkWe":0.054616268950389735, "ke730M-LmMjGdc-EFy-0LUK":0.30781761836448285, "jBSExJ-GXTc5TB-NSa-xBEd":0.66178278500540244, "kI7Cc8-DSg5RdF-qLo-2bhe":0.98357074613234885, "bAn3VI-x6xXWpB-zWe-G5CJ":0.21798212299794562, "jAil30-kbt6K6z-kbr-8foB":0.97880669772451379, "IHIwNs-1QGqy8l-i8i-vu4G":0.49679397412450588, "p0IbZr-tHCtwiV-0hq-NtIt":0.050183795109057017, "iggdij-M3YNBpd-yiD-a8Ro":0.982385582884686, "BrJEww-C4LpgaS-AeB-So4U":0.90248554155536553, "xnO3Fi-8rXcpgj-zpm-EmuX":0.20529118817468572, "5w57da-phYtDUx-px2-6frG":0.29690638791565971, "31MfFs-1WyUAr6-gQ0-xLxY":0.48795551283135086, "ryBl2p-rSoPhwd-WPv-NCAU":0.79544854844958068, "KN5TEt-gOfJ4Hy-3pp-HiBa":0.1533389643648807, "ytqxb8-utXXjUf-m41-i6ir":0.61502086737193573, "WhGUGz-zzyvEpD-9BM-2bVf":0.581040090228354, "dE1tFe-zHClt4u-0cY-TQnC":0.76089996323694564, "MveBhC-g29c0dU-tCT-R6nC":0.33457340282218506, "JTpxue-xSqAhGo-AZk-zB1t":0.35040302774880538, "92TVdU-qDJesPN-0lb-JOd3":0.73876949983198048, "0PODnh-IciBdOZ-0CS-oNeL":0.95159059657696443, "KkkW6x-TiemXQw-OiH-dZ9s":0.40824123319990813, "PIs5Aj-g02HRXw-957-GD2z":0.641526116451016, "yJIzuw-au6460e-0Tl-XYEJ":0.75219285303562355, "KHvMCD-OQDL0eX-nqK-TmEt":0.13096167278968263, "6QJJgV-Z3IZ1Rf-wyv-rIJ6":0.70071103877259622, "qA9ycc-sR2qm6P-PtB-AIax":0.44629776556459089, "uDeuEb-B0t0Ljr-dWk-jkC4":0.69046727674079578, "5vPy52-ygN0MMH-UB4-nZQL":0.60575965422000211, "zbbmrQ-pT3uAuU-Kae-HjM5":0.98126574986862791, "3QShHS-7RwUB10-0W2-H4Qy":0.41557608488608533, "PMc4QI-5lNajXU-f8m-RGIi":0.70464209768002883, "O9t3dl-q8YHozj-saR-A3Jm":0.85433449541965856, "k4eH3O-aHnTKY7-ADp-4Vsi":0.26558324547185574, "RA4epe-lWWnOff-bpM-bSR4":0.75232522102228827, "6ysu2R-gSc5dwU-cv0-LqCJ":0.78308993227167323, "tVl3TY-o42NMVO-k3S-iqOY":0.79238234012157993, "NMgTrr-W1RrCvP-Zaf-paL7":0.46869286547569355, "d1CJmF-CeG5asM-xms-1dwN":0.76229087810764928, "N1D30g-zFjiGzI-eHC-Sof4":0.847542878440137, "tOhfKu-Gdtf9Ne-KwA-JdHV":0.49992852174451541, "XLzwK0-6ocGDrS-TtU-wlEI":0.39853544027050947, "XDgZfb-Sxc45Zn-mVO-S2QO":0.057915803376441866, "GQD7a0-fnt9BZs-Kvh-dPbJ":0.663903859916476, "9dJxj9-HFwEQMY-6p9-s8Vt":0.21944075953054343, "1qU9pA-QJGAna9-JoG-H7GS":0.88774019472953825, "rKIkxA-UnGWYSn-0li-ziuB":0.16079062750364659, "tbPazx-IjUrQ8J-NZe-VOPL":0.68091669167975932, "xBpSIv-U6ojkK7-9p5-LviD":0.11956726473799006, "88bnWI-pxrKa7T-n2d-tXk9":0.19560689517877206, "0XviXp-9ksT8s0-fDy-35SW":0.8690659418822626, "e0XauA-GNRALmd-SM2-Y4Gf":0.68408168887520893, "kyvYBk-Bk5M4Xq-gxX-kE1B":0.7744771682336401, "dIiQzS-5sT4ogL-6IV-tLmb":0.0340772833497166, "OlGOyH-dyL1nzj-B2M-z8ir":0.3765608037933722, "zC9Gtn-x8hpfPD-KOu-k31W":0.864392047887076, "qSq3z2-Lpv0YcB-hBq-Sabd":0.15428476092466781, "LSyNyi-tBZUx1l-hAj-mwsx":0.30403432829870103, "2c9aTP-hXloMK7-ufH-dgq6":0.10168525529531069, "aXksHO-zARQxfo-sgS-8Bf4":0.54905330820199594, "ioOXAL-eVUF0W8-vZx-ZeYX":0.45281640384817845, "DXUkAP-A7SqnHj-V4U-PJfz":0.36074074474259388, "cnzZXk-AOMepfN-hym-qbDH":0.45873615005925683, "CMlAd6-8FF1yXs-fae-Izfv":0.075550197208259173, "qiXnUv-e2PsJWm-tLF-KpjE":0.94096810653636875, "Gfx3k9-JvXa7Wd-rI1-1e1E":0.74927933121782264} {"r8HXXQM4XHoI", 238221053, 2023-07-26 15:40:37.694000} + +-- !array_max -- +11028 + +-- !array_filter -- +11028 + +-- !array_last -- +0.9899828598260161 + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_complex_types.groovy b/regression-test/suites/external_table_emr_p2/hive/test_complex_types.groovy new file mode 100644 index 00000000000000..2aaaa815fdd494 --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_complex_types.groovy @@ -0,0 +1,54 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_complex_types", "p2") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_complex_types" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + + sql """ use multi_catalog """ + + qt_null_struct_element """select count(struct_element(favor, 'tip')) from byd where id % 13 = 0""" + + qt_map_key_select """select id, singles["p0X72J-mkMe40O-vOa-opfI"] as map_key from byd where singles["p0X72J-mkMe40O-vOa-opfI"] is not null""" + + qt_map_keys """select map_keys(singles) from byd where id = 1077""" + + qt_map_values """select map_values(singles) from byd where id = 1433""" + + qt_map_contains_key """select * from byd where map_contains_key(singles, 'B0mXFX-QvgUgo7-Dih-6rDu') = 1""" + + qt_array_max """select count(array_max(capacity)) from byd where array_max(capacity) > 0.99""" + + qt_array_filter """select count(array_size(array_filter(i -> (i > 0.99), capacity))) from byd where array_size(array_filter(i -> (i > 0.99), capacity))""" + + qt_array_last """select max(array_last(i -> i > 0, capacity)) from byd where array_last(i -> i > 0, capacity) < 0.99""" + } +}