Skip to content

Commit

Permalink
[opt](hive)opt select count(*) stmt push down agg on parquet in hive .
Browse files Browse the repository at this point in the history
  • Loading branch information
hubgeter committed Jul 26, 2023
1 parent 3fe8b2f commit 3800417
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 6 deletions.
2 changes: 1 addition & 1 deletion be/src/vec/exec/scan/new_olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ Status NewOlapScanner::_init_tablet_reader_params(
// if the table with rowset [0-x] or [0-1] [2-y], and [0-1] is empty
const bool single_version = _tablet_reader_params.has_single_version();

if (_state->skip_storage_engine_merge()) {
if (_state->skip_storage_engine_merge()) {
_tablet_reader_params.direct_mode = true;
_aggregation = true;
} else {
Expand Down
10 changes: 5 additions & 5 deletions regression-test/conf/regression-conf.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ defaultDb = "regression_test"
// init cmd like: select @@session.tx_read_only
// at each time we connect.
// add allowLoadLocalInfile so that the jdbc can execute mysql load data from client.
jdbcUrl = "jdbc:mysql://127.0.0.1:9030/?useLocalSessionState=true&allowLoadLocalInfile=true"
targetJdbcUrl = "jdbc:mysql://127.0.0.1:9030/?useLocalSessionState=true&allowLoadLocalInfile=true"
jdbcUrl = "jdbc:mysql://127.0.0.1:55557/?useLocalSessionState=true&allowLoadLocalInfile=true"
targetJdbcUrl = "jdbc:mysql://127.0.0.1:55557/?useLocalSessionState=true&allowLoadLocalInfile=true"
jdbcUser = "root"
jdbcPassword = ""

Expand All @@ -34,7 +34,7 @@ feTargetThriftAddress = "127.0.0.1:9020"
feSyncerUser = "root"
feSyncerPassword = ""

feHttpAddress = "127.0.0.1:8030"
feHttpAddress = "127.0.0.1:55555"
feHttpUser = "root"
feHttpPassword = ""

Expand Down Expand Up @@ -101,8 +101,8 @@ es_8_port=39200


//hive catalog test config for bigdata
enableExternalHiveTest = false
extHiveHmsHost = "***.**.**.**"
enableExternalHiveTest = true
extHiveHmsHost = "172.21.16.47"
extHiveHmsPort = 7004
extHdfsPort = 4007
extHiveHmsUser = "****"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
0 ALGERIA 0 haggle. carefully final deposits detect slyly agai
1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon
2 BRAZIL 1 y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
3 CANADA 1 eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
18 CHINA 2 c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos
4 EGYPT 4 y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
5 ETHIOPIA 0 ven packages wake quickly. regu
6 FRANCE 3 refully final requests. regular, ironi
7 GERMANY 3 l platelets. regular accounts x-ray: unusual, regular acco
8 INDIA 2 ss excuses cajole slyly across the packages. deposits print aroun
9 INDONESIA 2 slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
10 IRAN 4 efully alongside of the slyly final dependencies.
11 IRAQ 4 nic deposits boost atop the quickly final requests? quickly regula
12 JAPAN 2 ously. final, express gifts cajole a
13 JORDAN 4 ic deposits are blithely about the carefully regular pa
14 KENYA 0 pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t
15 MOROCCO 0 rns. blithely bold courts among the closely regular packages use furiously bold platelets?
16 MOZAMBIQUE 0 s. ironic, unusual asymptotes wake blithely r
17 PERU 1 platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun
19 ROMANIA 3 ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
22 RUSSIA 3 requests against the platelets use never according to the quickly regular pint
20 SAUDI ARABIA 4 ts. silent requests haggle. closely express packages sleep across the blithely
23 UNITED KINGDOM 3 eans boost carefully special requests. accounts are. carefull
24 UNITED STATES 1 y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
21 VIETNAM 2 hely enticingly express accounts. even, final

-- !sql --
25

-- !sql --
25

-- !sql --
0

-- !sql --
5

-- !sql --
4

-- !sql --
0

-- !sql --
5
5
5
5
5

-- !sql --
5999989709

-- !sql --
200000000

-- !sql --
200000000

-- !sql --
3995860
3996114
3997119
3997177
3997193
3997623
3998060
3998197
3998199
3998205
3998246
3998259
3998308
3998860
3998903
3999137
3999286
3999411
3999441
3999477
3999643
3999670
3999687
3999830
4000095
4000151
4000164
4000268
4000572
4000594
4000664
4000672
4000711
4001091
4001127
4001273
4001351
4001463
4001520
4001568
4001718
4001940
4001942
4002064
4002067
4002305
4002815
4002966
4003245
4003749

-- !sql --
3999286

-- !sql --
210000000

-- !sql --
1

-- !sql --
200000000

-- !sql --
200000000

-- !sql --
3995860
3996114
3997119

-- !sql --
210000000

-- !sql --
ALGERIA
ARGENTINA

-- !sql --
25

-- !sql --
0

-- !sql --
1

-- !sql --
5
5
5
5
5

Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_select_count_optimize", "p2") {
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_select_count_optimize"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
sql """ set query_timeout=3600; """

//parquet
qt_sql """ select * from tpch_1000_parquet.nation order by n_name,n_regionkey,n_nationkey,n_comment ; """

qt_sql """ select count(*) from tpch_1000_parquet.nation; """

qt_sql """ select count(1024) from tpch_1000_parquet.nation; """

qt_sql """ select count(null) from tpch_1000_parquet.nation; """


qt_sql """ select count(*) from tpch_1000_parquet.nation where n_regionkey = 0; """

qt_sql """ select max(n_regionkey) from tpch_1000_parquet.nation ;"""

qt_sql """ select min(n_regionkey) from tpch_1000_parquet.nation ; """

qt_sql """ select count(*) as a from tpch_1000_parquet.nation group by n_regionkey order by a; """

qt_sql """ select count(*) from tpch_1000_parquet.lineitem; """

qt_sql """ select count(*) from tpch_1000_parquet.part; """

qt_sql """ select count(p_partkey) from tpch_1000_parquet.part; """

qt_sql """ select count(*) as sz from tpch_1000_parquet.part group by p_size order by sz ;"""

qt_sql """ select count(*) from tpch_1000_parquet.part where p_size = 1; """

qt_sql """ select count(*) from user_profile.hive_hll_user_profile_wide_table_parquet; """;

//orc
qt_sql """ select count(*) from tpch_1000_orc.part where p_partkey=1; """

qt_sql """ select max(p_partkey) from tpch_1000_orc.part ; """

qt_sql """ select count(p_comment) from tpch_1000_orc.part; """

qt_sql """ select count(*) as a from tpch_1000_orc.part group by p_size order by a limit 3 ; """

qt_sql """ select count(*) from user_profile.hive_hll_user_profile_wide_table_orc ; """ ;

//other
qt_sql """ select n_name from tpch_1000.nation order by n_name limit 2; """

qt_sql """ select count(*) from tpch_1000.nation; """

qt_sql """ select min(n_regionkey) from tpch_1000.nation ; """

qt_sql """ select count(*) from tpch_1000.nation where n_nationkey=5;"""

qt_sql """ select count(*) as a from tpch_1000.nation group by n_regionkey order by a;"""

}
}

0 comments on commit 3800417

Please sign in to comment.