From f7ca3c09f6ae974f085afd14a75f4cd4692ab51b Mon Sep 17 00:00:00 2001 From: "zhenshan.cao" Date: Tue, 30 May 2023 17:17:45 +0800 Subject: [PATCH] Fix bug: failed to insert binary vector Signed-off-by: zhenshan.cao --- examples/binary_example.py | 69 ++++++++++++++++++++++++++++++++ pymilvus/client/entity_helper.py | 10 ++--- 2 files changed, 74 insertions(+), 5 deletions(-) create mode 100644 examples/binary_example.py diff --git a/examples/binary_example.py b/examples/binary_example.py new file mode 100644 index 000000000..80e5da627 --- /dev/null +++ b/examples/binary_example.py @@ -0,0 +1,69 @@ +import time +import random +import numpy as np +from pymilvus import ( + connections, + utility, + FieldSchema, CollectionSchema, DataType, + Collection, + ) + + +bin_index_types = ["BIN_FLAT", "BIN_IVF_FLAT"] + +default_bin_index_params = [{"nlist": 128}, {"nlist": 128}] + +def gen_binary_vectors(num, dim): + raw_vectors = [] + binary_vectors = [] + for _ in range(num): + raw_vector = [random.randint(0, 1) for _ in range(dim)] + raw_vectors.append(raw_vector) + # packs a binary-valued array into bits in a unit8 array, and bytes array_of_ints + binary_vectors.append(bytes(np.packbits(raw_vector, axis=-1).tolist())) + return raw_vectors, binary_vectors + + +def binary_vector_search(): + connections.connect() + int64_field = FieldSchema(name="int64", dtype=DataType.INT64, is_primary=True, auto_id=True) + dim = 128 + nb = 3000 + vector_field_name = "binary_vector" + binary_vector = FieldSchema(name=vector_field_name, dtype=DataType.BINARY_VECTOR, dim=dim) + schema = CollectionSchema(fields=[int64_field, binary_vector], enable_dynamic_field=True) + + has = utility.has_collection("hello_milvus") + if has: + hello_milvus = Collection("hello_milvus_bin") + hello_milvus.drop() + else: + hello_milvus = Collection("hello_milvus_bin", schema) + + _, vectors = gen_binary_vectors(nb, dim) + rows = [ + {vector_field_name: vectors[0]}, + {vector_field_name: vectors[1]}, + {vector_field_name: vectors[2]}, + {vector_field_name: vectors[3]}, + {vector_field_name: vectors[4]}, + {vector_field_name: vectors[5]}, + ] + + hello_milvus.insert(rows) + hello_milvus.flush() + for i, index_type in enumerate(bin_index_types): + index_params = default_bin_index_params[i] + hello_milvus.create_index(vector_field_name, + index_params={"index_type": index_type, "params": index_params, "metric_type": "HAMMING"}) + hello_milvus.load() + print("index_type = ", index_type) + res = hello_milvus.search(vectors[:1], vector_field_name, {"metric_type": "HAMMING"}, limit=1) + print("res = ", res) + hello_milvus.release() + hello_milvus.drop_index() + hello_milvus.drop() + + +if __name__ == "__main__": + binary_vector_search() diff --git a/pymilvus/client/entity_helper.py b/pymilvus/client/entity_helper.py index cd9508a74..4c1002d62 100644 --- a/pymilvus/client/entity_helper.py +++ b/pymilvus/client/entity_helper.py @@ -84,7 +84,7 @@ def pack_field_value_to_field_data(field_value, field_data, field_info): field_data.vectors.float_vector.data.extend(field_value) elif field_type in (DataType.BINARY_VECTOR,): field_data.vectors.dim = len(field_value) * 8 - field_data.vectors.binary_vector.data.append(b''.join(field_value)) + field_data.vectors.binary_vector += bytes(field_value) elif field_type in (DataType.VARCHAR,): field_data.scalars.string_data.data.append( convert_to_str_array(field_value, field_info, True)) @@ -204,10 +204,10 @@ def extract_row_data_from_fields_data(fields_data, index, dynamic_output_fields= start_pos:end_pos]] elif field_data.type == DataType.BINARY_VECTOR: dim = field_data.vectors.dim - if len(field_data.vectors.binary_vector.data) >= index * (dim / 8): - start_pos = index * (dim / 8) - end_pos = (index + 1) * (dim / 8) + if len(field_data.vectors.binary_vector) >= index * (dim // 8): + start_pos = index * (dim // 8) + end_pos = (index + 1) * (dim // 8) entity_row_data[field_data.field_name] = [ - field_data.vectors.binary_vector.data[start_pos:end_pos]] + field_data.vectors.binary_vector[start_pos:end_pos]] return entity_row_data