Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Add a new range search test for all indexes and align some index params #32724

Merged
merged 3 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion tests/python_client/base/client_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,8 @@ def init_collection_general(self, prefix="test", insert_data=False, nb=ct.defaul
expected: return collection and raw data, insert ids
"""
log.info("Test case of search interface: initialize before test case")
self._connect()
if not self.connection_wrap.has_connection(alias=DefaultConfig.DEFAULT_USING)[0]:
self._connect()
collection_name = cf.gen_unique_str(prefix)
if name is not None:
collection_name = name
Expand Down
37 changes: 24 additions & 13 deletions tests/python_client/common/common_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,9 @@ def gen_collection_schema_all_datatype(description=ct.default_desc,
else:
multiple_dim_array.insert(0, dim)
for i in range(len(multiple_dim_array)):
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.vector_data_type_all[i%3]}",
fields.append(gen_float_vec_field(name=f"multiple_vector_{ct.all_float_vector_types[i%3]}",
dim=multiple_dim_array[i],
vector_data_type=ct.vector_data_type_all[i%3]))
vector_data_type=ct.all_float_vector_types[i%3]))

schema, _ = ApiCollectionSchemaWrapper().init_collection_schema(fields=fields, description=description,
primary_field=primary_field, auto_id=auto_id,
Expand Down Expand Up @@ -485,8 +485,8 @@ def gen_default_dataframe_data(nb=ct.default_nb, dim=ct.default_dim, start=0, wi

def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0, with_json=True,
random_primary_key=False, multiple_dim_array=[], multiple_vector_field_name=[],
vector_data_type="FLOAT_VECTOR", auto_id = False,
primary_field = ct.default_int64_field_name):
vector_data_type="FLOAT_VECTOR", auto_id=False,
primary_field=ct.default_int64_field_name):
insert_list = []
if not random_primary_key:
int_values = pd.Series(data=[i for i in range(start, start + nb)])
Expand All @@ -496,14 +496,15 @@ def gen_general_default_list_data(nb=ct.default_nb, dim=ct.default_dim, start=0,
string_values = pd.Series(data=[str(i) for i in range(start, start + nb)], dtype="string")
json_values = [{"number": i, "float": i*1.0} for i in range(start, start + nb)]
float_vec_values = gen_vectors(nb, dim, vector_data_type=vector_data_type)
insert_list = [int_values, float_values, string_values, json_values, float_vec_values]
insert_list = [int_values, float_values, string_values]

if with_json is True:
insert_list.append(json_values)
insert_list.append(float_vec_values)

if with_json is False:
index = insert_list.index(json_values)
del insert_list[index]
if auto_id is True:
if primary_field == ct.default_int64_field_name:
index = insert_list.index(int_values)
index = 0
elif primary_field == ct.default_string_field_name:
index = 2
del insert_list[index]
Expand Down Expand Up @@ -699,7 +700,7 @@ def gen_dataframe_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0, w
df[ct.default_float_vec_field_name] = float_vec_values
else:
for i in range(len(multiple_dim_array)):
df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3])
df[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i], ct.all_float_vector_types[i%3])

if with_json is False:
df.drop(ct.default_json_field_name, axis=1, inplace=True)
Expand Down Expand Up @@ -737,7 +738,7 @@ def gen_general_list_all_data_type(nb=ct.default_nb, dim=ct.default_dim, start=0
insert_list.append(float_vec_values)
else:
for i in range(len(multiple_dim_array)):
insert_list.append(gen_vectors(nb, multiple_dim_array[i], ct.vector_data_type_all[i%3]))
insert_list.append(gen_vectors(nb, multiple_dim_array[i], ct.all_float_vector_types[i%3]))

if with_json is False:
# index = insert_list.index(json_values)
Expand Down Expand Up @@ -782,7 +783,7 @@ def gen_default_rows_data_all_data_type(nb=ct.default_nb, dim=ct.default_dim, st
else:
for i in range(len(multiple_dim_array)):
dict[multiple_vector_field_name[i]] = gen_vectors(nb, multiple_dim_array[i],
ct.vector_data_type_all[i])[0]
ct.all_float_vector_types[i])[0]
if len(multiple_dim_array) != 0:
with open(ct.rows_all_data_type_file_path + f'_{partition_id}' + f'_dim{dim}.txt', 'wb') as json_file:
pickle.dump(array, json_file)
Expand Down Expand Up @@ -1233,7 +1234,7 @@ def gen_simple_index():
elif ct.all_index_types[i] in ct.sparse_support:
continue
dic = {"index_type": ct.all_index_types[i], "metric_type": "L2"}
dic.update({"params": ct.default_index_params[i]})
dic.update({"params": ct.default_all_indexes_params[i]})
index_params.append(dic)
return index_params

Expand Down Expand Up @@ -1671,6 +1672,16 @@ def index_to_dict(index):
}


def get_index_params_params(index_type):
"""get default params of index params by index type"""
return ct.default_all_indexes_params[ct.all_index_types.index(index_type)]


def get_search_params_params(index_type):
"""get default params of search params by index type"""
return ct.default_all_search_params_params[ct.all_index_types.index(index_type)]


def assert_json_contains(expr, list_data):
opposite = False
if expr.startswith("not"):
Expand Down
57 changes: 34 additions & 23 deletions tests/python_client/common/common_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,6 @@
default_limit = 10
default_batch_size = 1000
max_limit = 16384
default_search_params = {"metric_type": "COSINE", "params": {"nprobe": 10}}
default_search_ip_params = {"metric_type": "IP", "params": {"nprobe": 10}}
default_search_binary_params = {"metric_type": "JACCARD", "params": {"nprobe": 10}}
default_index = {"index_type": "IVF_SQ8", "metric_type": "COSINE", "params": {"nlist": 64}}
default_binary_index = {"index_type": "BIN_IVF_FLAT", "params": {"nlist": 128}, "metric_type": "JACCARD"}
default_diskann_index = {"index_type": "DISKANN", "metric_type": "COSINE", "params": {}}
default_diskann_search_params = {"metric_type": "COSINE", "params": {"search_list": 30}}
default_sparse_search_params = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}}
max_top_k = 16384
max_partition_num = 4096
max_role_num = 10
Expand Down Expand Up @@ -52,7 +44,7 @@
float_type = "FLOAT_VECTOR"
float16_type = "FLOAT16_VECTOR"
bfloat16_type = "BFLOAT16_VECTOR"
vector_data_type_all = [float_type, float16_type, bfloat16_type]
all_float_vector_types = [float_type, float16_type, bfloat16_type]
default_sparse_vec_field_name = "sparse_vector"
default_partition_name = "_default"
default_resource_group_name = '__default_resource_group'
Expand Down Expand Up @@ -108,11 +100,6 @@
err_code = "err_code"
err_msg = "err_msg"
in_cluster_env = "IN_CLUSTER"

default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": "COSINE"}
default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"}
default_sparse_inverted_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP",
"params": {"drop_ratio_build": 0.2}}
default_count_output = "count(*)"

rows_all_data_type_file_path = "/tmp/rows_all_data_type"
Expand Down Expand Up @@ -250,26 +237,50 @@
]

""" Specially defined list """
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ", "HNSW", "SCANN", "DISKANN", "BIN_FLAT", "BIN_IVF_FLAT",
"SPARSE_INVERTED_INDEX", "SPARSE_WAND", "GPU_IVF_FLAT", "GPU_IVF_PQ"]
L0_index_types = ["IVF_SQ8", "HNSW", "DISKANN"]
all_index_types = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ",
"HNSW", "SCANN", "DISKANN",
"BIN_FLAT", "BIN_IVF_FLAT",
"SPARSE_INVERTED_INDEX", "SPARSE_WAND",
"GPU_IVF_FLAT", "GPU_IVF_PQ"]

default_all_indexes_params = [{}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8},
{"M": 32, "efConstruction": 360}, {"nlist": 128}, {},
{}, {"nlist": 64},
{"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2},
{"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}]

default_index_params = [{"nlist": 128}, {"nlist": 128}, {"nlist": 128}, {"nlist": 128, "m": 16, "nbits": 8},
{"M": 48, "efConstruction": 500}, {"nlist": 128}, {}, {"nlist": 128}, {"nlist": 128},
{"drop_ratio_build": 0.2}, {"drop_ratio_build": 0.2},
{"nlist": 64}, {"nlist": 64, "m": 16, "nbits": 8}]
default_all_search_params_params = [{}, {"nprobe": 32}, {"nprobe": 32}, {"nprobe": 32},
{"ef": 100}, {"nprobe": 32, "reorder_k": 100}, {"search_list": 30},
{}, {"nprobe": 32},
{"drop_ratio_search": "0.2"}, {"drop_ratio_search": "0.2"},
{}, {}]

Handler_type = ["GRPC", "HTTP"]
binary_support = ["BIN_FLAT", "BIN_IVF_FLAT"]
delete_support = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"]
ivf = ["FLAT", "IVF_FLAT", "IVF_SQ8", "IVF_PQ"]
skip_pq = ["IVF_PQ"]
sparse_support = ["SPARSE_INVERTED_INDEX", "SPARSE_WAND"]
default_L0_metric = "COSINE"
float_metrics = ["L2", "IP", "COSINE"]
binary_metrics = ["JACCARD", "HAMMING", "SUBSTRUCTURE", "SUPERSTRUCTURE"]
structure_metrics = ["SUBSTRUCTURE", "SUPERSTRUCTURE"]
all_scalar_data_types = ['int8', 'int16', 'int32', 'int64', 'float', 'double', 'bool', 'varchar']


default_flat_index = {"index_type": "FLAT", "params": {}, "metric_type": default_L0_metric}
default_bin_flat_index = {"index_type": "BIN_FLAT", "params": {}, "metric_type": "JACCARD"}
default_sparse_inverted_index = {"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP",
"params": {"drop_ratio_build": 0.2}}

default_search_params = {"params": default_all_search_params_params[2]}
default_search_ip_params = {"metric_type": "IP", "params": default_all_search_params_params[2]}
default_search_binary_params = {"metric_type": "JACCARD", "params": {"nprobe": 32}}
default_index = {"index_type": "IVF_SQ8", "metric_type": default_L0_metric, "params": default_all_indexes_params[2]}
default_binary_index = {"index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD", "params": default_all_indexes_params[8]}
default_diskann_index = {"index_type": "DISKANN", "metric_type": default_L0_metric, "params": {}}
default_diskann_search_params = {"params": {"search_list": 30}}
default_sparse_search_params = {"metric_type": "IP", "params": {"drop_ratio_search": "0.2"}}


class CheckTasks:
""" The name of the method used to check the result """
check_nothing = "check_nothing"
Expand Down
188 changes: 94 additions & 94 deletions tests/python_client/load/test_workload.py
Original file line number Diff line number Diff line change
@@ -1,94 +1,94 @@
import datetime
import pytest

from base.client_base import TestcaseBase
from common import common_func as cf
from common import common_type as ct
from common.common_type import CaseLabel
from utils.util_log import test_log as log
from pymilvus import utility


rounds = 100
per_nb = 100000
default_field_name = ct.default_float_vec_field_name
default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}


class TestLoad(TestcaseBase):
""" Test case of end to end"""
@pytest.mark.tags(CaseLabel.L3)
def test_load_default(self):
name = 'load_test_collection_1'
name2 = 'load_test_collection_2'
# create
# collection_w = self.init_collection_wrap(name=name)
# collection_w2 = self.init_collection_wrap(name=name2)
# assert collection_w.name == name

for i in range(50):
name = f"load_collection2_{i}"
self.init_collection_wrap(name=name)
log.debug(f"total collections: {len(utility.list_collections())}")

# # insert
# data = cf.gen_default_list_data(per_nb)
# log.debug(f"data len: {len(data[0])}")
# for i in range(rounds):
# t0 = datetime.datetime.now()
# ins_res, res = collection_w.insert(data, timeout=180)
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} insert: {len(ins_res.primary_keys)} entities in {tt}s")
# assert res # and per_nb == len(ins_res.primary_keys)
#
# t0 = datetime.datetime.now()
# ins_res2, res = collection_w2.insert(data, timeout=180)
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} insert2: {len(ins_res2.primary_keys)} entities in {tt}s")
# assert res
#
# # flush
# t0 = datetime.datetime.now()
# log.debug(f"current collection num_entities: {collection_w.num_entities}")
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} flush in {tt}")
#
# t0 = datetime.datetime.now()
# log.debug(f"current collection2 num_entities: {collection_w2.num_entities}")
# tt = datetime.datetime.now() - t0
# log.debug(f"round{i} flush2 in {tt}")

# index, res = collection_w.create_index(default_field_name, default_index_params, timeout=60)
# assert res

# # search
# collection_w.load()
# search_vectors = cf.gen_vectors(1, ct.default_dim)
# t0 = datetime.datetime.now()
# res_1, _ = collection_w.search(data=search_vectors,
# anns_field=ct.default_float_vec_field_name,
# param={"nprobe": 16}, limit=1)
# tt = datetime.datetime.now() - t0
# log.debug(f"assert search: {tt}")
# assert len(res_1) == 1
# # collection_w.release()
#
# # index
# collection_w.insert(cf.gen_default_dataframe_data(nb=5000))
# assert collection_w.num_entities == len(data[0]) + 5000
# _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
# t0 = datetime.datetime.now()
# index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
# index_params=_index_params,
# name=cf.gen_unique_str())
# tt = datetime.datetime.now() - t0
# log.debug(f"assert index: {tt}")
# assert len(collection_w.indexes) == 1
#
# # query
# term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
# t0 = datetime.datetime.now()
# res, _ = collection_w.query(term_expr)
# tt = datetime.datetime.now() - t0
# log.debug(f"assert query: {tt}")
# assert len(res) == 4
# import datetime
# import pytest
#
# from base.client_base import TestcaseBase
# from common import common_func as cf
# from common import common_type as ct
# from common.common_type import CaseLabel
# from utils.util_log import test_log as log
# from pymilvus import utility
#
#
# rounds = 100
# per_nb = 100000
# default_field_name = ct.default_float_vec_field_name
# default_index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
#
#
# class TestLoad(TestcaseBase):
# """ Test case of end to end"""
# @pytest.mark.tags(CaseLabel.L3)
# def test_load_default(self):
# name = 'load_test_collection_1'
# name2 = 'load_test_collection_2'
# # create
# # collection_w = self.init_collection_wrap(name=name)
# # collection_w2 = self.init_collection_wrap(name=name2)
# # assert collection_w.name == name
#
# for i in range(50):
# name = f"load_collection2_{i}"
# self.init_collection_wrap(name=name)
# log.debug(f"total collections: {len(utility.list_collections())}")
#
# # # insert
# # data = cf.gen_default_list_data(per_nb)
# # log.debug(f"data len: {len(data[0])}")
# # for i in range(rounds):
# # t0 = datetime.datetime.now()
# # ins_res, res = collection_w.insert(data, timeout=180)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} insert: {len(ins_res.primary_keys)} entities in {tt}s")
# # assert res # and per_nb == len(ins_res.primary_keys)
# #
# # t0 = datetime.datetime.now()
# # ins_res2, res = collection_w2.insert(data, timeout=180)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} insert2: {len(ins_res2.primary_keys)} entities in {tt}s")
# # assert res
# #
# # # flush
# # t0 = datetime.datetime.now()
# # log.debug(f"current collection num_entities: {collection_w.num_entities}")
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} flush in {tt}")
# #
# # t0 = datetime.datetime.now()
# # log.debug(f"current collection2 num_entities: {collection_w2.num_entities}")
# # tt = datetime.datetime.now() - t0
# # log.debug(f"round{i} flush2 in {tt}")
#
# # index, res = collection_w.create_index(default_field_name, default_all_indexes_params, timeout=60)
# # assert res
#
# # # search
# # collection_w.load()
# # search_vectors = cf.gen_vectors(1, ct.default_dim)
# # t0 = datetime.datetime.now()
# # res_1, _ = collection_w.search(data=search_vectors,
# # anns_field=ct.default_float_vec_field_name,
# # param={"nprobe": 16}, limit=1)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"assert search: {tt}")
# # assert len(res_1) == 1
# # # collection_w.release()
# #
# # # index
# # collection_w.insert(cf.gen_default_dataframe_data(nb=5000))
# # assert collection_w.num_entities == len(data[0]) + 5000
# # _index_params = {"index_type": "IVF_SQ8", "metric_type": "L2", "params": {"nlist": 64}}
# # t0 = datetime.datetime.now()
# # index, _ = collection_w.create_index(field_name=ct.default_float_vec_field_name,
# # index_params=_index_params,
# # name=cf.gen_unique_str())
# # tt = datetime.datetime.now() - t0
# # log.debug(f"assert index: {tt}")
# # assert len(collection_w.indexes) == 1
# #
# # # query
# # term_expr = f'{ct.default_int64_field_name} in [3001,4001,4999,2999]'
# # t0 = datetime.datetime.now()
# # res, _ = collection_w.query(term_expr)
# # tt = datetime.datetime.now() - t0
# # log.debug(f"assert query: {tt}")
# # assert len(res) == 4
Loading