Skip to content

Commit

Permalink
[test](inverted index)Add cases for inverted index format v2 (#38132)
Browse files Browse the repository at this point in the history
## Proposed changes

1. Add exception when trying to change index storage format
2. Add test cases for index format v2
    - test_add_build_index_with_format_v2
    - test_create_table_with_format_v2
    - test_cumulative_compaction_with_format_v2
    - test_drop_column_with_format_v2
    - test_drop_index_with_format_v2
    - test_index_change_format
    - test_mor_table_with_format_v2
    - test_mow_table_with_format_v2
    - test_recover_with_format_v2
    - test_rename_column_with_format_v2
    - test_single_replica_compaction_with_format_v2
3. There is a test case `test_rename_column_with_format_v2` for #38079
  • Loading branch information
qidaye authored and dataroaring committed Jul 24, 2024
1 parent a4928f0 commit 3650189
Show file tree
Hide file tree
Showing 22 changed files with 1,985 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ public void analyze(Analyzer analyzer) throws AnalysisException {
}
this.needTableStable = false;
this.opType = AlterOpType.MODIFY_TABLE_PROPERTY_SYNC;
} else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT)) {
throw new AnalysisException(
"Property "
+ PropertyAnalyzer.PROPERTIES_INVERTED_INDEX_STORAGE_FORMAT + " is not allowed to change");
} else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_ENABLE_SINGLE_REPLICA_COMPACTION)) {
if (!properties.get(PropertyAnalyzer.PROPERTIES_ENABLE_SINGLE_REPLICA_COMPACTION).equalsIgnoreCase("true")
&& !properties.get(PropertyAnalyzer
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 andy 100
2 andy 100
3 andy 100

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 andy 100
2 andy 100
3 andy 100

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19

-- !select_default2 --
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 andy 100
2 andy 100
3 andy 100

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 andy 100
2 andy 100
3 andy 100

Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 andy 100
2 andy 100
3 andy 100

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Shenzhen 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20

-- !select_default2 --
4 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Shenzhen 10 1 \N \N \N \N 2020-01-05T00:00 1 34 20

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19

-- !select_default2 --
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 andy 100
2 andy 100
3 andy 100

-- !sql --
1 andy 100
2 andy 100
3 andy 100

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19

-- !select_default2 --
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-01T00:00 2020-01-01T00:00 2017-10-01T11:11:11.170 2017-10-01T11:11:11.110111 2020-01-01T00:00 1 30 20
1 2017-10-01 2017-10-01 2017-10-01T11:11:11.110 2017-10-01T11:11:11.110111 Beijing 10 1 2020-01-02T00:00 2020-01-02T00:00 2017-10-01T11:11:11.160 2017-10-01T11:11:11.100111 2020-01-02T00:00 1 31 19

Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.codehaus.groovy.runtime.IOGroovyMethods

suite("test_add_build_index_with_format_v2", "inverted_index_format_v2"){
def tableName = "test_add_build_index_with_format_v2"

def calc_file_crc_on_tablet = { ip, port, tablet ->
return curl("GET", String.format("http://%s:%s/api/calc_crc?tablet_id=%s", ip, port, tablet))
}
def backendId_to_backendIP = [:]
def backendId_to_backendHttpPort = [:]
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);

def timeout = 60000
def delta_time = 1000
def alter_res = "null"
def useTime = 0
def wait_for_latest_op_on_table_finish = { table_name, OpTimeout ->
for(int t = delta_time; t <= OpTimeout; t += delta_time){
alter_res = sql """SHOW ALTER TABLE COLUMN WHERE TableName = "${table_name}" ORDER BY CreateTime DESC LIMIT 1;"""
alter_res = alter_res.toString()
if(alter_res.contains("FINISHED")) {
sleep(3000) // wait change table state to normal
logger.info(table_name + " latest alter job finished, detail: " + alter_res)
break
}
useTime = t
sleep(delta_time)
}
assertTrue(useTime <= OpTimeout, "wait_for_latest_op_on_table_finish timeout")
}

def wait_for_build_index_on_partition_finish = { table_name, OpTimeout ->
for(int t = delta_time; t <= OpTimeout; t += delta_time){
alter_res = sql """SHOW BUILD INDEX WHERE TableName = "${table_name}";"""
def expected_finished_num = alter_res.size();
def finished_num = 0;
for (int i = 0; i < expected_finished_num; i++) {
logger.info(table_name + " build index job state: " + alter_res[i][7] + i)
if (alter_res[i][7] == "FINISHED") {
++finished_num;
}
}
if (finished_num == expected_finished_num) {
logger.info(table_name + " all build index jobs finished, detail: " + alter_res)
break
}
useTime = t
sleep(delta_time)
}
assertTrue(useTime <= OpTimeout, "wait_for_latest_build_index_on_partition_finish timeout")
}

sql "DROP TABLE IF EXISTS ${tableName}"

sql """
CREATE TABLE ${tableName} (
`id` int(11) NULL,
`name` varchar(255) NULL,
`score` int(11) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"inverted_index_storage_format" = "V2",
"disable_auto_compaction" = "true"
);
"""
sql """ INSERT INTO ${tableName} VALUES (1, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (1, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (2, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """

// add index
sql """
ALTER TABLE ${tableName}
ADD INDEX idx_name (name) using inverted;
"""
wait_for_latest_op_on_table_finish(tableName, timeout)

sql """
ALTER TABLE ${tableName}
ADD INDEX idx_score (score) using inverted;
"""
wait_for_latest_op_on_table_finish(tableName, timeout)

// show index after add index
def show_result = sql_return_maparray "show index from ${tableName}"
logger.info("show index from " + tableName + " result: " + show_result)
assertEquals(show_result[0].Key_name, "idx_name")
assertEquals(show_result[1].Key_name, "idx_score")

def tablets = sql_return_maparray """ show tablets from ${tableName}; """
String tablet_id = tablets[0].TabletId
String backend_id = tablets[0].BackendId
String ip = backendId_to_backendIP.get(backend_id)
String port = backendId_to_backendHttpPort.get(backend_id)
def (code, out, err) = calc_file_crc_on_tablet(ip, port, tablet_id)
logger.info("Run calc_file_crc_on_tablet: code=" + code + ", out=" + out + ", err=" + err)
assertTrue(code == 0)
assertTrue(out.contains("crc_value"))
assertTrue(out.contains("used_time_ms"))
assertEquals("0", parseJson(out.trim()).start_version)
assertEquals("7", parseJson(out.trim()).end_version)
assertEquals("7", parseJson(out.trim()).rowset_count)
// cloud mode is directly schema change, local mode is light schema change.
// cloud mode is 12, local mode is 6
if (isCloudMode()) {
assertEquals("12", parseJson(out.trim()).file_count)
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"
return
} else {
assertEquals("6", parseJson(out.trim()).file_count)
}

// build index
sql """
BUILD INDEX idx_name ON ${tableName};
"""
wait_for_build_index_on_partition_finish(tableName, timeout)

(code, out, err) = calc_file_crc_on_tablet(ip, port, tablet_id)
logger.info("Run calc_file_crc_on_tablet: code=" + code + ", out=" + out + ", err=" + err)
assertTrue(code == 0)
assertTrue(out.contains("crc_value"))
assertTrue(out.contains("used_time_ms"))
assertEquals("0", parseJson(out.trim()).start_version)
assertEquals("7", parseJson(out.trim()).end_version)
assertEquals("7", parseJson(out.trim()).rowset_count)
assertEquals("12", parseJson(out.trim()).file_count)

// build index
sql """
BUILD INDEX idx_score ON ${tableName};
"""
wait_for_build_index_on_partition_finish(tableName, timeout)

(code, out, err) = calc_file_crc_on_tablet(ip, port, tablet_id)
logger.info("Run calc_file_crc_on_tablet: code=" + code + ", out=" + out + ", err=" + err)
assertTrue(code == 0)
assertTrue(out.contains("crc_value"))
assertTrue(out.contains("used_time_ms"))
assertEquals("0", parseJson(out.trim()).start_version)
assertEquals("7", parseJson(out.trim()).end_version)
assertEquals("7", parseJson(out.trim()).rowset_count)
assertEquals("12", parseJson(out.trim()).file_count)

qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import org.codehaus.groovy.runtime.IOGroovyMethods

suite("test_create_table_with_format_v2", "inverted_index_format_v2"){
def tableName = "test_create_table_with_format_v2"

def calc_file_crc_on_tablet = { ip, port, tablet ->
return curl("GET", String.format("http://%s:%s/api/calc_crc?tablet_id=%s", ip, port, tablet))
}
def backendId_to_backendIP = [:]
def backendId_to_backendHttpPort = [:]
getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort);

sql "DROP TABLE IF EXISTS ${tableName}"

sql """
CREATE TABLE ${tableName} (
`id` int(11) NULL,
`name` varchar(255) NULL,
`score` int(11) NULL,
index index_name (name) using inverted,
index index_score (score) using inverted
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"inverted_index_storage_format" = "V2",
"disable_auto_compaction" = "true"
);
"""
sql """ INSERT INTO ${tableName} VALUES (1, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (1, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (2, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """

qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"

def tablets = sql_return_maparray """ show tablets from ${tableName}; """
String tablet_id = tablets[0].TabletId
String backend_id = tablets[0].BackendId
String ip = backendId_to_backendIP.get(backend_id)
String port = backendId_to_backendHttpPort.get(backend_id)
def (code, out, err) = calc_file_crc_on_tablet(ip, port, tablet_id)
logger.info("Run calc_file_crc_on_tablet: code=" + code + ", out=" + out + ", err=" + err)
assertTrue(code == 0)
assertTrue(out.contains("crc_value"))
assertTrue(out.contains("used_time_ms"))
assertEquals("0", parseJson(out.trim()).start_version)
assertEquals("7", parseJson(out.trim()).end_version)
assertEquals("7", parseJson(out.trim()).rowset_count)
assertEquals("12", parseJson(out.trim()).file_count)
}
Loading

0 comments on commit 3650189

Please sign in to comment.