From 526a672bae3b08c373af989a21e46d925c051a6b Mon Sep 17 00:00:00 2001 From: wt Date: Mon, 16 Sep 2024 11:03:09 +0800 Subject: [PATCH] test: add more bitmap test cases (#36290) Signed-off-by: wangting0128 --- tests/python_client/base/client_base.py | 48 +++--- tests/python_client/common/code_mapping.py | 1 + tests/python_client/common/common_func.py | 56 +++++++ tests/python_client/common/common_params.py | 7 + tests/python_client/testcases/test_index.py | 113 +++++++++----- .../testcases/test_mix_scenes.py | 143 +++++++++++++++++- 6 files changed, 302 insertions(+), 66 deletions(-) diff --git a/tests/python_client/base/client_base.py b/tests/python_client/base/client_base.py index 2896c0b54c079..cd93925af0eb2 100644 --- a/tests/python_client/base/client_base.py +++ b/tests/python_client/base/client_base.py @@ -86,8 +86,8 @@ def _teardown_objects(self): rgs_list = self.utility_wrap.list_resource_groups()[0] for rg_name in self.resource_group_list: if rg_name is not None and rg_name in rgs_list: - rg = \ - self.utility_wrap.describe_resource_group(name=rg_name, check_task=ct.CheckTasks.check_nothing)[0] + rg = self.utility_wrap.describe_resource_group( + name=rg_name, check_task=ct.CheckTasks.check_nothing)[0] if isinstance(rg, ResourceGroupInfo): if rg.num_available_node > 0: self.utility_wrap.transfer_node(source=rg_name, @@ -443,27 +443,7 @@ def show_indexes(self, collection_obj: ApiCollectionWrapper = None): log.info("[TestcaseBase] Collection: `{0}` index: {1}".format(collection_obj.name, indexes)) return indexes - -class TestCaseClassBase(TestcaseBase): - """ - Setup objects on class - """ - - def setup_class(self): - log.info("[setup_class] " + " Start setup class ".center(100, "~")) - self._setup_objects(self) - - def teardown_class(self): - log.info("[teardown_class]" + " Start teardown class ".center(100, "~")) - self._teardown_objects(self) - - def setup_method(self, method): - log.info(" setup ".center(80, "*")) - log.info("[setup_method] Start setup test case %s." % method.__name__) - - def teardown_method(self, method): - log.info(" teardown ".center(80, "*")) - log.info("[teardown_method] Start teardown test case %s..." % method.__name__) + """ Property """ @property def all_scalar_fields(self): @@ -493,3 +473,25 @@ def bitmap_support_dtype_names(self): @property def bitmap_not_support_dtype_names(self): return list(set(self.all_scalar_fields) - set(self.bitmap_support_dtype_names)) + + +class TestCaseClassBase(TestcaseBase): + """ + Setup objects on class + """ + + def setup_class(self): + log.info("[setup_class] " + " Start setup class ".center(100, "~")) + self._setup_objects(self) + + def teardown_class(self): + log.info("[teardown_class]" + " Start teardown class ".center(100, "~")) + self._teardown_objects(self) + + def setup_method(self, method): + log.info(" setup ".center(80, "*")) + log.info("[setup_method] Start setup test case %s." % method.__name__) + + def teardown_method(self, method): + log.info(" teardown ".center(80, "*")) + log.info("[teardown_method] Start teardown test case %s..." % method.__name__) diff --git a/tests/python_client/common/code_mapping.py b/tests/python_client/common/code_mapping.py index 254b8a08604ca..74f87583e03a4 100644 --- a/tests/python_client/common/code_mapping.py +++ b/tests/python_client/common/code_mapping.py @@ -39,6 +39,7 @@ class IndexErrorMessage(ExceptionsMessage): CheckBitmapIndex = "bitmap index are only supported on bool, int, string and array field" CheckBitmapOnPK = "create bitmap index on primary key not supported" CheckBitmapCardinality = "failed to check bitmap cardinality limit, should be larger than 0 and smaller than 1000" + NotConfigable = "{0} is not configable index param" class QueryErrorMessage(ExceptionsMessage): diff --git a/tests/python_client/common/common_func.py b/tests/python_client/common/common_func.py index 4255e62ff849d..44381bdbc5453 100644 --- a/tests/python_client/common/common_func.py +++ b/tests/python_client/common/common_func.py @@ -17,6 +17,7 @@ from pymilvus import DataType, CollectionSchema from base.schema_wrapper import ApiCollectionSchemaWrapper, ApiFieldSchemaWrapper from common import common_type as ct +from common.common_params import ExprCheckParams from utils.util_log import test_log as log from customize.milvus_operator import MilvusOperator import pickle @@ -2061,6 +2062,48 @@ def gen_varchar_expression(expr_fields): return exprs +def gen_varchar_unicode_expression(expr_fields): + exprs = [] + for field in expr_fields: + exprs.extend([ + (Expr.like(field, "国%").value, field, r'^国.*'), + (Expr.LIKE(field, "%中").value, field, r'.*中$'), + (Expr.AND(Expr.like(field, "%江").subset, Expr.LIKE(field, "麚%").subset).value, field, r'^麚.*江$'), + (Expr.And(Expr.like(field, "鄷%").subset, Expr.LIKE(field, "%薞").subset).value, field, r'^鄷.*薞$'), + (Expr.OR(Expr.like(field, "%核%").subset, Expr.LIKE(field, "%臥蜜").subset).value, field, fr'(?:核.*|.*臥蜜$)'), + (Expr.Or(Expr.like(field, "咴矷%").subset, Expr.LIKE(field, "%濉蠬%").subset).value, field, fr'(?:^咴矷.*|.*濉蠬)'), + ]) + return exprs + + +def gen_varchar_unicode_expression_array(expr_fields): + exprs = [] + for field in expr_fields: + exprs.extend([ + ExprCheckParams(field, Expr.ARRAY_CONTAINS(field, '"中"').value, 'set(["中"]).issubset({0})'), + ExprCheckParams(field, Expr.array_contains(field, '"国"').value, 'set(["国"]).issubset({0})'), + ExprCheckParams(field, Expr.ARRAY_CONTAINS_ALL(field, ["华"]).value, 'set(["华"]).issubset({0})'), + ExprCheckParams(field, Expr.array_contains_all(field, ["中", "国"]).value, 'set(["中", "国"]).issubset({0})'), + ExprCheckParams(field, Expr.ARRAY_CONTAINS_ANY(field, ["紅"]).value, 'not set(["紅"]).isdisjoint({0})'), + ExprCheckParams(field, Expr.array_contains_any(field, ["紅", "父", "环", "稵"]).value, + 'not set(["紅", "父", "环", "稵"]).isdisjoint({0})'), + ExprCheckParams(field, Expr.AND(Expr.ARRAY_CONTAINS(field, '"噜"').value, + Expr.ARRAY_CONTAINS_ANY(field, ["浮", "沮", "茫"]).value).value, + 'set(["噜"]).issubset({0}) and not set(["浮", "沮", "茫"]).isdisjoint({0})'), + ExprCheckParams(field, Expr.And(Expr.ARRAY_CONTAINS_ALL(field, ["爤"]).value, + Expr.array_contains_any(field, ["暁", "非", "鸳", "丹"]).value).value, + 'set(["爤"]).issubset({0}) and not set(["暁", "非", "鸳", "丹"]).isdisjoint({0})'), + ExprCheckParams(field, Expr.OR(Expr.array_contains(field, '"草"').value, + Expr.array_contains_all(field, ["昩", "苴"]).value).value, + 'set(["草"]).issubset({0}) or set(["昩", "苴"]).issubset({0})'), + ExprCheckParams(field, Expr.Or(Expr.ARRAY_CONTAINS_ANY(field, ["魡", "展", "隶", "韀", "脠", "噩"]).value, + Expr.array_contains_any(field, ["备", "嘎", "蝐", "秦", "万"]).value).value, + 'not set(["魡", "展", "隶", "韀", "脠", "噩"]).isdisjoint({0}) or ' + + 'not set(["备", "嘎", "蝐", "秦", "万"]).isdisjoint({0})') + ]) + return exprs + + def gen_number_operation(expr_fields): exprs = [] for field in expr_fields: @@ -2747,3 +2790,16 @@ def check_keys(_source, _target): check_keys(source, target) return flag + + +def gen_unicode_string(): + return chr(random.randint(0x4e00, 0x9fbf)) + + +def gen_unicode_string_batch(nb, string_len: int = 1): + return [''.join([gen_unicode_string() for _ in range(string_len)]) for _ in range(nb)] + + +def gen_unicode_string_array_batch(nb, string_len: int = 1, max_capacity: int = ct.default_max_capacity): + return [[''.join([gen_unicode_string() for _ in range(min(random.randint(1, string_len), 50))]) for _ in + range(random.randint(0, max_capacity))] for _ in range(nb)] diff --git a/tests/python_client/common/common_params.py b/tests/python_client/common/common_params.py index cb09a4d2ee8e5..3d1331781ddec 100644 --- a/tests/python_client/common/common_params.py +++ b/tests/python_client/common/common_params.py @@ -388,3 +388,10 @@ def index_offset_cache(enable: bool = True): @staticmethod def index_mmap(enable: bool = True): return {'mmap.enabled': enable} + + +@dataclass +class ExprCheckParams: + field: str + field_expr: str + rex: str diff --git a/tests/python_client/testcases/test_index.py b/tests/python_client/testcases/test_index.py index 5b18651d7f17c..528a116aa4be3 100644 --- a/tests/python_client/testcases/test_index.py +++ b/tests/python_client/testcases/test_index.py @@ -2327,18 +2327,6 @@ def setup_method(self, method): # connect to server before testing self._connect() - @property - def get_bitmap_support_dtype_names(self): - dtypes = [DataType.BOOL, DataType.INT8, DataType.INT16, DataType.INT32, DataType.INT64, DataType.VARCHAR] - dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] - return dtype_names - - @property - def get_bitmap_not_support_dtype_names(self): - dtypes = [DataType.FLOAT, DataType.DOUBLE] - dtype_names = [f"{n.name}" for n in dtypes] + [f"ARRAY_{n.name}" for n in dtypes] + [DataType.JSON.name] - return dtype_names - @pytest.mark.tags(CaseLabel.L0) @pytest.mark.parametrize("auto_id", [True, False]) @pytest.mark.parametrize("primary_field", ["int64_pk", "varchar_pk"]) @@ -2389,7 +2377,7 @@ def test_bitmap_on_not_supported_fields(self, request): self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.get_bitmap_not_support_dtype_names], + fields=[primary_field, DataType.SPARSE_FLOAT_VECTOR.name, *self.bitmap_not_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict} ) ) @@ -2407,7 +2395,7 @@ def test_bitmap_on_not_supported_fields(self, request): ) # build `BITMAP` index on not supported scalar fields - for _field_name in self.get_bitmap_not_support_dtype_names: + for _field_name in self.bitmap_not_support_dtype_names: self.collection_wrap.create_index( field_name=_field_name, index_params=IndexPrams(index_type=IndexName.BITMAP).to_dict, check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapIndex} @@ -2438,7 +2426,7 @@ def test_bitmap_on_empty_collection(self, request, primary_field, auto_id): self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, auto_id=auto_id ) @@ -2447,7 +2435,7 @@ def test_bitmap_on_empty_collection(self, request, primary_field, auto_id): # build `BITMAP` index on empty collection index_params = { **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -2497,7 +2485,7 @@ def test_bitmap_insert_after_loading(self, request, primary_field, auto_id): self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.FLOAT16_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, auto_id=auto_id ) @@ -2506,7 +2494,7 @@ def test_bitmap_insert_after_loading(self, request, primary_field, auto_id): # build `BITMAP` index on empty collection index_params = { **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT16_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -2557,7 +2545,7 @@ def test_bitmap_insert_before_loading(self, request, primary_field, auto_id): self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.BFLOAT16_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, auto_id=auto_id ), @@ -2578,7 +2566,7 @@ def test_bitmap_insert_before_loading(self, request, primary_field, auto_id): # build `BITMAP` index index_params = { **DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -2620,7 +2608,7 @@ def test_bitmap_primary_field_data_repeated(self, request, primary_field, shards self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, ), shards_num=shards_num @@ -2640,7 +2628,7 @@ def test_bitmap_primary_field_data_repeated(self, request, primary_field, shards # build `BITMAP` index index_params = { **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -2683,7 +2671,7 @@ def test_bitmap_primary_field_data_not_repeated(self, request, primary_field, sh self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.BINARY_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.BINARY_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, ), shards_num=shards_num @@ -2703,7 +2691,7 @@ def test_bitmap_primary_field_data_not_repeated(self, request, primary_field, sh # build `BITMAP` index on empty collection index_params = { **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) @@ -2757,7 +2745,7 @@ def test_bitmap_alter_index(self, request, extra_params, name): self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, ) ) @@ -2765,13 +2753,13 @@ def test_bitmap_alter_index(self, request, extra_params, name): # build `BITMAP` index on empty collection index_params = { **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) # enable offset cache / mmap - for index_name in self.get_bitmap_support_dtype_names: + for index_name in self.bitmap_support_dtype_names: self.collection_wrap.alter_index(index_name=index_name, extra_params=extra_params) # prepare data (> 1024 triggering index building) @@ -2784,20 +2772,58 @@ def test_bitmap_alter_index(self, request, extra_params, name): # rebuild `BITMAP` index index_params = { **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name), - **DefaultScalarIndexParams.list_bitmap(self.get_bitmap_support_dtype_names) + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) # check alter index scalar_indexes = [{i.field_name: i.params} for i in self.collection_wrap.indexes if - i.field_name in self.get_bitmap_support_dtype_names] + i.field_name in self.bitmap_support_dtype_names] msg = f"Scalar indexes: {scalar_indexes}, expected all to contain {extra_params}" assert len([i for i in scalar_indexes for v in i.values() if not cf.check_key_exist(extra_params, v)]) == 0, msg # load collection self.collection_wrap.load() + @pytest.mark.tags(CaseLabel.L2) + def test_bitmap_alter_cardinality_limit(self, request): + """ + target: + 1. alter index `bitmap_cardinality_limit` failed + method: + 1. create a collection with scalar fields + 2. build BITMAP index on scalar fields + 3. altering index `bitmap_cardinality_limit` + expected: + 1. alter index failed with param `bitmap_cardinality_limit` + """ + # init params + collection_name, primary_field, nb = f"{request.function.__name__}", "int64_pk", 3000 + + # create a collection with fields that can build `BITMAP` index + self.collection_wrap.init_collection( + name=collection_name, + schema=cf.set_collection_schema( + fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names], + field_params={primary_field: FieldParams(is_primary=True).to_dict}, + ) + ) + + # build `BITMAP` index on empty collection + index_params = { + **DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name), + **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) + } + self.build_multi_index(index_params=index_params) + assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) + + # alter `bitmap_cardinality_limit` failed + for index_name in self.bitmap_support_dtype_names: + self.collection_wrap.alter_index( + index_name=index_name, extra_params={"bitmap_cardinality_limit": 10}, check_task=CheckTasks.err_res, + check_items={ct.err_code: 1100, ct.err_msg: iem.NotConfigable.format("bitmap_cardinality_limit")}) + @pytest.mark.tags(CaseLabel.L2) @pytest.mark.parametrize("bitmap_cardinality_limit", [-10, 0, 1001]) def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limit): @@ -2830,8 +2856,9 @@ def test_bitmap_cardinality_limit_invalid(self, request, bitmap_cardinality_limi check_task=CheckTasks.err_res, check_items={ct.err_code: 1100, ct.err_msg: iem.CheckBitmapCardinality}) @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("bitmap_cardinality_limit", [1, 1000]) - def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit): + @pytest.mark.parametrize("bitmap_cardinality_limit", [1, 100, 1000]) + @pytest.mark.parametrize("index_params, name", [({"index_type": IndexName.AUTOINDEX}, "AUTOINDEX"), ({}, "None")]) + def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit, index_params, name): """ target: 1. check auto index setting `bitmap_cardinality_limit` not failed @@ -2850,13 +2877,14 @@ def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit but is only used to verify that the index is successfully built. """ # init params - collection_name, primary_field, nb = f"{request.function.__name__}_{bitmap_cardinality_limit}", "int64_pk", 3000 + collection_name = f"{request.function.__name__}_{bitmap_cardinality_limit}_{name}" + primary_field, nb = "int64_pk", 3000 # create a collection with fields that can build `BITMAP` index self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, ) ) @@ -2872,20 +2900,23 @@ def test_bitmap_cardinality_limit_enable(self, request, bitmap_cardinality_limit self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name)) # build scalar index - for scalar_field in self.get_bitmap_support_dtype_names: + for scalar_field in self.bitmap_support_dtype_names: self.collection_wrap.create_index( field_name=scalar_field, index_name=scalar_field, - index_params={"index_type": IndexName.AUTOINDEX, "bitmap_cardinality_limit": bitmap_cardinality_limit}) + index_params={**index_params, "bitmap_cardinality_limit": bitmap_cardinality_limit}) # load collection self.collection_wrap.load() @pytest.mark.tags(CaseLabel.L2) - @pytest.mark.parametrize("config, name", [({"bitmap_cardinality_limit": 1000}, 1000), ({}, None)]) - def test_bitmap_cardinality_limit_low_data(self, request, config, name): + @pytest.mark.parametrize("config, cardinality_data_range, name", + [({"bitmap_cardinality_limit": 1000}, (-128, 127), 1000), + ({"bitmap_cardinality_limit": 100}, (-128, 127), 100), + ({}, (1, 100), "None_100"), ({}, (1, 99), "None_99")]) + def test_bitmap_cardinality_limit_low_data(self, request, config, name, cardinality_data_range): """ target: - 1. check auto index setting `bitmap_cardinality_limit` and insert low cardinality data + 1. check auto index setting `bitmap_cardinality_limit`(default value=100) and insert low cardinality data method: 1. create a collection with scalar fields 2. insert some data and flush @@ -2907,13 +2938,13 @@ def test_bitmap_cardinality_limit_low_data(self, request, config, name): self.collection_wrap.init_collection( name=collection_name, schema=cf.set_collection_schema( - fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.get_bitmap_support_dtype_names], + fields=[primary_field, DataType.FLOAT_VECTOR.name, *self.bitmap_support_dtype_names], field_params={primary_field: FieldParams(is_primary=True).to_dict}, ) ) # prepare data (> 1024 triggering index building) - low_cardinality = [random.randint(-128, 127) for _ in range(nb)] + low_cardinality = [random.randint(*cardinality_data_range) for _ in range(nb)] self.collection_wrap.insert( data=cf.gen_values( self.collection_wrap.schema, nb=nb, @@ -2938,7 +2969,7 @@ def test_bitmap_cardinality_limit_low_data(self, request, config, name): self.build_multi_index(index_params=DefaultVectorIndexParams.IVF_SQ8(DataType.FLOAT_VECTOR.name)) # build scalar index - for scalar_field in self.get_bitmap_support_dtype_names: + for scalar_field in self.bitmap_support_dtype_names: self.collection_wrap.create_index( field_name=scalar_field, index_name=scalar_field, index_params={"index_type": IndexName.AUTOINDEX, **config}) diff --git a/tests/python_client/testcases/test_mix_scenes.py b/tests/python_client/testcases/test_mix_scenes.py index d9eba89fafe3e..daaf57596d3a5 100644 --- a/tests/python_client/testcases/test_mix_scenes.py +++ b/tests/python_client/testcases/test_mix_scenes.py @@ -342,7 +342,7 @@ def prepare_data(self): **DefaultVectorIndexParams.HNSW(DataType.BFLOAT16_VECTOR.name), **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), **DefaultVectorIndexParams.BIN_FLAT(DataType.BINARY_VECTOR.name), - # build Hybrid index + # build INVERTED index **DefaultScalarIndexParams.list_inverted([self.primary_field] + self.inverted_support_dtype_names) } self.build_multi_index(index_params=index_params) @@ -466,7 +466,7 @@ def prepare_data(self): **DefaultVectorIndexParams.DISKANN(DataType.BFLOAT16_VECTOR.name), **DefaultVectorIndexParams.SPARSE_WAND(DataType.SPARSE_FLOAT_VECTOR.name), **DefaultVectorIndexParams.BIN_IVF_FLAT(DataType.BINARY_VECTOR.name), - # build Hybrid index + # build BITMAP index **DefaultScalarIndexParams.list_bitmap(self.bitmap_support_dtype_names) } self.build_multi_index(index_params=index_params) @@ -475,6 +475,32 @@ def prepare_data(self): # load collection self.collection_wrap.load() + # https://github.com/milvus-io/milvus/issues/36221 + @pytest.mark.tags(CaseLabel.L1) + def test_bitmap_index_query_with_invalid_array_params(self): + """ + target: + 1. check query with invalid array params + method: + 1. prepare some data and build `BITMAP index` on scalar fields + 2. query with the different wrong expr + 3. check query result error + expected: + 1. query response check error + """ + # query + self.collection_wrap.query( + expr=Expr.array_contains_any('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res, + check_items={ct.err_code: 65535, ct.err_msg: "fail to Query on QueryNode"}) + + self.collection_wrap.query( + expr=Expr.array_contains_all('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res, + check_items={ct.err_code: 65535, ct.err_msg: "fail to Query on QueryNode"}) + + self.collection_wrap.query( + expr=Expr.array_contains('ARRAY_VARCHAR', [['a', 'b']]).value, limit=1, check_task=CheckTasks.err_res, + check_items={ct.err_code: 1100, ct.err_msg: qem.ParseExpressionFailed}) + @pytest.mark.tags(CaseLabel.L1) @pytest.mark.parametrize("expr, expr_field", cf.gen_modulo_expression(['INT8', 'INT16', 'INT32', 'INT64'])) @pytest.mark.parametrize("limit", [1, 10, 3000]) @@ -942,6 +968,119 @@ def test_bitmap_mmap_hybrid_search(self): check_items={"nq": nq, "ids": self.insert_data.get('int64_pk'), "limit": limit}) +@pytest.mark.xdist_group("TestIndexUnicodeString") +class TestIndexUnicodeString(TestCaseClassBase): + """ + Scalar fields build BITMAP index, and verify Unicode string + + Author: Ting.Wang + """ + + def setup_class(self): + super().setup_class(self) + + # connect to server before testing + self._connect(self) + + # init params + self.primary_field, self.nb = "int64_pk", 3000 + + # create a collection with fields + self.collection_wrap.init_collection( + name=cf.gen_unique_str("test_bitmap_index_unicode"), + schema=cf.set_collection_schema( + fields=[self.primary_field, DataType.FLOAT_VECTOR.name, + f"{DataType.VARCHAR.name}_BITMAP", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP", + f"{DataType.VARCHAR.name}_INVERTED", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED", + f"{DataType.VARCHAR.name}_NoIndex", f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_NoIndex"], + field_params={ + self.primary_field: FieldParams(is_primary=True).to_dict + }, + ) + ) + + # prepare data (> 1024 triggering index building) + # insert unicode string + self.insert_data = cf.gen_field_values(self.collection_wrap.schema, nb=self.nb, default_values={ + f"{DataType.VARCHAR.name}_BITMAP": cf.gen_unicode_string_batch(nb=self.nb, string_len=30), + f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP": cf.gen_unicode_string_array_batch( + nb=self.nb, string_len=1, max_capacity=100), + f"{DataType.VARCHAR.name}_INVERTED": cf.gen_unicode_string_batch(nb=self.nb, string_len=30), + f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED": cf.gen_unicode_string_array_batch( + nb=self.nb, string_len=1, max_capacity=100), + f"{DataType.VARCHAR.name}_NoIndex": cf.gen_unicode_string_batch(nb=self.nb, string_len=30), + f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_NoIndex": cf.gen_unicode_string_array_batch( + nb=self.nb, string_len=1, max_capacity=100), + }) + + @pytest.fixture(scope="class", autouse=True) + def prepare_data(self): + self.collection_wrap.insert(data=list(self.insert_data.values()), check_task=CheckTasks.check_insert_result) + + # flush collection, segment sealed + self.collection_wrap.flush() + + # build scalar index + index_params = { + **DefaultVectorIndexParams.HNSW(DataType.FLOAT_VECTOR.name), + # build BITMAP index + **DefaultScalarIndexParams.list_bitmap([f"{DataType.VARCHAR.name}_BITMAP", + f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_BITMAP"]), + # build INVERTED index + **DefaultScalarIndexParams.list_inverted([f"{DataType.VARCHAR.name}_INVERTED", + f"{DataType.ARRAY.name}_{DataType.VARCHAR.name}_INVERTED"]) + } + self.build_multi_index(index_params=index_params) + assert sorted([n.field_name for n in self.collection_wrap.indexes]) == sorted(index_params.keys()) + + # load collection + self.collection_wrap.load() + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("expr, expr_field, rex", + cf.gen_varchar_unicode_expression(['VARCHAR_BITMAP', 'VARCHAR_INVERTED'])) + @pytest.mark.parametrize("limit", [1, 10, 3000]) + def test_index_unicode_string_query(self, expr, expr_field, limit, rex): + """ + target: + 1. check string expression + method: + 1. prepare some data and build `BITMAP index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(expr_field, []) if re.search(rex, i) is not None]) + + # query + res, _ = self.collection_wrap.query(expr=expr, limit=limit, output_fields=[expr_field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + @pytest.mark.tags(CaseLabel.L2) + @pytest.mark.parametrize("obj", cf.gen_varchar_unicode_expression_array( + ['ARRAY_VARCHAR_BITMAP', 'ARRAY_VARCHAR_INVERTED', 'ARRAY_VARCHAR_NoIndex'])) + @pytest.mark.parametrize("limit", [1]) + def test_index_unicode_string_array_query(self, limit, obj): + """ + target: + 1. check string expression + method: + 1. prepare some data and build `BITMAP index` on scalar fields + 2. query with the different expr and limit + 3. check query result + expected: + 1. query response equal to min(insert data, limit) + """ + # the total number of inserted data that matches the expression + expr_count = len([i for i in self.insert_data.get(obj.field, []) if eval(obj.rex.format(str(i)))]) + + # query + res, _ = self.collection_wrap.query(expr=obj.field_expr, limit=limit, output_fields=[obj.field]) + assert len(res) == min(expr_count, limit), f"actual: {len(res)} == expect: {min(expr_count, limit)}" + + class TestMixScenes(TestcaseBase): """ Testing cross-combination scenarios