From 4fc7915c70499c06e954a031dd1e9ad2cf2de853 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Tue, 14 May 2024 14:33:33 +0800 Subject: [PATCH] enhance: unify data generation test APIs (#32955) Issue: #22837 Signed-off-by: Cai Yudong --- internal/proxy/mock_test.go | 411 +-------- internal/proxy/msg_pack_test.go | 5 +- internal/proxy/proxy_test.go | 17 +- internal/proxy/task_insert_test.go | 5 +- internal/proxy/task_query_test.go | 3 +- internal/proxy/task_test.go | 9 +- internal/proxy/task_upsert_test.go | 5 +- internal/proxy/validate_util_test.go | 41 +- internal/querynodev2/pipeline/mock_data.go | 6 +- internal/querynodev2/segments/mock_data.go | 591 +----------- internal/querynodev2/segments/reduce_test.go | 3 +- internal/storage/utils_test.go | 120 +-- internal/util/testutil/test_util.go | 207 ++--- pkg/go.mod | 1 + pkg/go.sum | 2 + pkg/util/testutils/gen_data.go | 912 +++++++++++++++++++ pkg/util/testutils/sparse_test_utils.go | 70 -- tests/integration/util_insert.go | 148 +-- 18 files changed, 1092 insertions(+), 1464 deletions(-) create mode 100644 pkg/util/testutils/gen_data.go delete mode 100644 pkg/util/testutils/sparse_test_utils.go diff --git a/internal/proxy/mock_test.go b/internal/proxy/mock_test.go index e872740a44fc2..5675b100fa54d 100644 --- a/internal/proxy/mock_test.go +++ b/internal/proxy/mock_test.go @@ -18,13 +18,9 @@ package proxy import ( "context" - "encoding/binary" - "math" - "math/rand" "sync" "time" - "github.com/x448/float16" "google.golang.org/grpc" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -36,6 +32,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" "github.com/milvus-io/milvus/pkg/util/uniquegenerator" ) @@ -346,418 +343,28 @@ func newSimpleMockMsgStreamFactory() *simpleMockMsgStreamFactory { } func generateFieldData(dataType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData { - fieldData := &schemapb.FieldData{ - Type: dataType, - FieldName: fieldName, + if dataType < 100 { + return testutils.GenerateScalarFieldData(dataType, fieldName, numRows) } - switch dataType { - case schemapb.DataType_Bool: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: generateBoolArray(numRows), - }, - }, - }, - } - case schemapb.DataType_Int32: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int64: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: generateInt64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Float: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: generateFloat32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Double: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: generateFloat64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_VarChar: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: generateVarCharArray(numRows, maxTestStringLen), - }, - }, - }, - } - case schemapb.DataType_FloatVector: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(testVecDim), - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(numRows, testVecDim), - }, - }, - }, - } - case schemapb.DataType_BinaryVector: - fieldData.FieldName = fieldName - fieldData.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(testVecDim), - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(numRows, testVecDim), - }, - }, - } - default: - // TODO:: - } - - return fieldData -} - -func generateBoolArray(numRows int) []bool { - ret := make([]bool, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Int()%2 == 0) - } - return ret -} - -func generateInt8Array(numRows int) []int8 { - ret := make([]int8, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int8(rand.Int())) - } - return ret -} - -func generateInt16Array(numRows int) []int16 { - ret := make([]int16, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int16(rand.Int())) - } - return ret -} - -func generateInt32Array(numRows int) []int32 { - ret := make([]int32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int32(rand.Int())) - } - return ret -} - -func generateInt64Array(numRows int) []int64 { - ret := make([]int64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int64(rand.Int())) - } - return ret -} - -func generateUint64Array(numRows int) []uint64 { - ret := make([]uint64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Uint64()) - } - return ret -} - -func generateFloat32Array(numRows int) []float32 { - ret := make([]float32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func generateFloat64Array(numRows int) []float64 { - ret := make([]float64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Float64()) - } - return ret -} - -func generateFloatVectors(numRows, dim int) []float32 { - total := numRows * dim - ret := make([]float32, 0, total) - for i := 0; i < total; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func generateBinaryVectors(numRows, dim int) []byte { - total := (numRows * dim) / 8 - ret := make([]byte, total) - _, err := rand.Read(ret) - if err != nil { - panic(err) - } - return ret -} - -func generateFloat16Vectors(numRows, dim int) []byte { - total := numRows * dim - ret := make([]byte, total*2) - for i := 0; i < total; i++ { - v := float16.Fromfloat32(rand.Float32()).Bits() - binary.LittleEndian.PutUint16(ret[i*2:], v) - } - return ret -} - -func generateBFloat16Vectors(numRows, dim int) []byte { - total := numRows * dim - ret16 := make([]uint16, 0, total) - for i := 0; i < total; i++ { - f := rand.Float32() - bits := math.Float32bits(f) - bits >>= 16 - bits &= 0x7FFF - ret16 = append(ret16, uint16(bits)) - } - ret := make([]byte, len(ret16)*2) - for i, value := range ret16 { - binary.LittleEndian.PutUint16(ret[i*2:], value) - } - return ret -} - -func generateBFloat16VectorsWithInvalidData(numRows, dim int) []byte { - total := numRows * dim - ret16 := make([]uint16, 0, total) - for i := 0; i < total; i++ { - var f float32 - if i%2 == 0 { - f = float32(math.NaN()) - } else { - f = float32(math.Inf(1)) - } - bits := math.Float32bits(f) - bits >>= 16 - bits &= 0x7FFF - ret16 = append(ret16, uint16(bits)) - } - ret := make([]byte, len(ret16)*2) - for i, value := range ret16 { - binary.LittleEndian.PutUint16(ret[i*2:], value) - } - return ret -} - -func generateFloat16VectorsWithInvalidData(numRows, dim int) []byte { - total := numRows * dim - ret := make([]byte, total*2) - for i := 0; i < total; i++ { - if i%2 == 0 { - binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.Inf(1))) - } else { - binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.NaN())) - } - } - return ret -} - -func generateVarCharArray(numRows int, maxLen int) []string { - ret := make([]string, numRows) - for i := 0; i < numRows; i++ { - ret[i] = funcutil.RandomString(rand.Intn(maxLen)) - } - return ret + return testutils.GenerateVectorFieldData(dataType, fieldName, numRows, testVecDim) } func newScalarFieldData(fieldSchema *schemapb.FieldSchema, fieldName string, numRows int) *schemapb.FieldData { - ret := &schemapb.FieldData{ - Type: fieldSchema.DataType, - FieldName: fieldName, - Field: nil, - } - - switch fieldSchema.DataType { - case schemapb.DataType_Bool: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: generateBoolArray(numRows), - }, - }, - }, - } - case schemapb.DataType_Int8: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int16: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int32: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int64: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: generateInt64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Float: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: generateFloat32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Double: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: generateFloat64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_VarChar: - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: generateVarCharArray(numRows, testMaxVarCharLength), - }, - }, - }, - } - } - - return ret + return testutils.GenerateScalarFieldData(fieldSchema.GetDataType(), fieldName, numRows) } func newFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_FloatVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(numRows, dim), - }, - }, - }, - }, - } + return testutils.NewFloatVectorFieldData(fieldName, numRows, dim) } func newBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_BinaryVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(numRows, dim), - }, - }, - }, - } + return testutils.NewBinaryVectorFieldData(fieldName, numRows, dim) } func newFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_Float16Vector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_Float16Vector{ - Float16Vector: generateFloat16Vectors(numRows, dim), - }, - }, - }, - } + return testutils.NewFloat16VectorFieldData(fieldName, numRows, dim) } func newBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_BFloat16Vector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_Bfloat16Vector{ - Bfloat16Vector: generateBFloat16Vectors(numRows, dim), - }, - }, - }, - } -} - -func generateHashKeys(numRows int) []uint32 { - ret := make([]uint32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Uint32()) - } - return ret + return testutils.NewBFloat16VectorFieldData(fieldName, numRows, dim) } diff --git a/internal/proxy/msg_pack_test.go b/internal/proxy/msg_pack_test.go index 4114660666e93..f41e0516296b7 100644 --- a/internal/proxy/msg_pack_test.go +++ b/internal/proxy/msg_pack_test.go @@ -32,11 +32,12 @@ import ( "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" ) func TestRepackInsertData(t *testing.T) { nb := 10 - hash := generateHashKeys(nb) + hash := testutils.GenerateHashKeys(nb) prefix := "TestRepackInsertData" dbName := "" collectionName := prefix + funcutil.GenRandomStr() @@ -143,7 +144,7 @@ func TestRepackInsertData(t *testing.T) { func TestRepackInsertDataWithPartitionKey(t *testing.T) { nb := 10 - hash := generateHashKeys(nb) + hash := testutils.GenerateHashKeys(nb) prefix := "TestRepackInsertData" collectionName := prefix + funcutil.GenRandomStr() diff --git a/internal/proxy/proxy_test.go b/internal/proxy/proxy_test.go index ebbfd7e2d571f..9877d1243e3ac 100644 --- a/internal/proxy/proxy_test.go +++ b/internal/proxy/proxy_test.go @@ -73,6 +73,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/metric" "github.com/milvus-io/milvus/pkg/util/metricsinfo" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -599,7 +600,7 @@ func TestProxy(t *testing.T) { constructCollectionInsertRequest := func() *milvuspb.InsertRequest { fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim) bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim) - hashKeys := generateHashKeys(rowNum) + hashKeys := testutils.GenerateHashKeys(rowNum) return &milvuspb.InsertRequest{ Base: nil, DbName: dbName, @@ -614,7 +615,7 @@ func TestProxy(t *testing.T) { constructPartitionInsertRequest := func() *milvuspb.InsertRequest { fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim) bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim) - hashKeys := generateHashKeys(rowNum) + hashKeys := testutils.GenerateHashKeys(rowNum) return &milvuspb.InsertRequest{ Base: nil, DbName: dbName, @@ -629,7 +630,7 @@ func TestProxy(t *testing.T) { constructCollectionUpsertRequest := func() *milvuspb.UpsertRequest { fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim) bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim) - hashKeys := generateHashKeys(rowNum) + hashKeys := testutils.GenerateHashKeys(rowNum) return &milvuspb.UpsertRequest{ Base: nil, DbName: dbName, @@ -1811,7 +1812,7 @@ func TestProxy(t *testing.T) { Dim: int64(dim), Data: &schemapb.VectorField_FloatVector{ FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(nq, dim), + Data: testutils.GenerateFloatVectors(nq, dim), }, }, }, @@ -1824,7 +1825,7 @@ func TestProxy(t *testing.T) { Dim: int64(dim), Data: &schemapb.VectorField_FloatVector{ FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(nq, dim), + Data: testutils.GenerateFloatVectors(nq, dim), }, }, }, @@ -3723,7 +3724,7 @@ func TestProxy(t *testing.T) { pkFieldData := newScalarFieldData(schema.Fields[0], int64Field, rowNum) fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim) bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim) - hashKeys := generateHashKeys(rowNum) + hashKeys := testutils.GenerateHashKeys(rowNum) return &milvuspb.UpsertRequest{ Base: nil, DbName: dbName, @@ -3739,7 +3740,7 @@ func TestProxy(t *testing.T) { pkFieldData := newScalarFieldData(schema.Fields[0], int64Field, rowNum) fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim) bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim) - hashKeys := generateHashKeys(rowNum) + hashKeys := testutils.GenerateHashKeys(rowNum) return &milvuspb.UpsertRequest{ Base: nil, DbName: dbName, @@ -3755,7 +3756,7 @@ func TestProxy(t *testing.T) { pkFieldData := newScalarFieldData(schema.Fields[0], int64Field, rowNum) fVecColumn := newFloatVectorFieldData(floatVecField, rowNum, dim) bVecColumn := newBinaryVectorFieldData(binaryVecField, rowNum, dim) - hashKeys := generateHashKeys(rowNum) + hashKeys := testutils.GenerateHashKeys(rowNum) return &milvuspb.UpsertRequest{ Base: nil, DbName: dbName, diff --git a/internal/proxy/task_insert_test.go b/internal/proxy/task_insert_test.go index 083398ea985a2..4fa7480d23ca8 100644 --- a/internal/proxy/task_insert_test.go +++ b/internal/proxy/task_insert_test.go @@ -13,6 +13,7 @@ import ( "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" ) func TestInsertTask_CheckAligned(t *testing.T) { @@ -56,8 +57,8 @@ func TestInsertTask_CheckAligned(t *testing.T) { MsgType: commonpb.MsgType_Insert, }, Version: msgpb.InsertDataVersion_ColumnBased, - RowIDs: generateInt64Array(numRows), - Timestamps: generateUint64Array(numRows), + RowIDs: testutils.GenerateInt64Array(numRows), + Timestamps: testutils.GenerateUint64Array(numRows), }, }, schema: &schemapb.CollectionSchema{ diff --git a/internal/proxy/task_query_test.go b/internal/proxy/task_query_test.go index fddf9e42c44da..5112b53ac2552 100644 --- a/internal/proxy/task_query_test.go +++ b/internal/proxy/task_query_test.go @@ -39,6 +39,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -195,7 +196,7 @@ func TestQueryTask_all(t *testing.T) { Status: merr.Success(), Ids: &schemapb.IDs{ IdField: &schemapb.IDs_IntId{ - IntId: &schemapb.LongArray{Data: generateInt64Array(hitNum)}, + IntId: &schemapb.LongArray{Data: testutils.GenerateInt64Array(hitNum)}, }, }, } diff --git a/internal/proxy/task_test.go b/internal/proxy/task_test.go index c93843cd5d097..161f5b1bf9f11 100644 --- a/internal/proxy/task_test.go +++ b/internal/proxy/task_test.go @@ -49,6 +49,7 @@ import ( "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/metric" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/uniquegenerator" @@ -1680,7 +1681,7 @@ func TestTask_Int64PrimaryKey(t *testing.T) { defer segAllocator.Close() t.Run("insert", func(t *testing.T) { - hash := generateHashKeys(nb) + hash := testutils.GenerateHashKeys(nb) task := &insertTask{ insertMsg: &BaseInsertTask{ BaseMsg: msgstream.BaseMsg{ @@ -1874,7 +1875,7 @@ func TestTask_VarCharPrimaryKey(t *testing.T) { defer segAllocator.Close() t.Run("insert", func(t *testing.T) { - hash := generateHashKeys(nb) + hash := testutils.GenerateHashKeys(nb) task := &insertTask{ insertMsg: &BaseInsertTask{ BaseMsg: msgstream.BaseMsg{ @@ -1929,7 +1930,7 @@ func TestTask_VarCharPrimaryKey(t *testing.T) { }) t.Run("upsert", func(t *testing.T) { - hash := generateHashKeys(nb) + hash := testutils.GenerateHashKeys(nb) task := &upsertTask{ upsertMsg: &msgstream.UpsertMsg{ InsertMsg: &BaseInsertTask{ @@ -3339,7 +3340,7 @@ func TestPartitionKey(t *testing.T) { }) t.Run("Upsert", func(t *testing.T) { - hash := generateHashKeys(nb) + hash := testutils.GenerateHashKeys(nb) ut := &upsertTask{ ctx: ctx, Condition: NewTaskCondition(ctx), diff --git a/internal/proxy/task_upsert_test.go b/internal/proxy/task_upsert_test.go index 26e1946168be6..032ee18b21ab0 100644 --- a/internal/proxy/task_upsert_test.go +++ b/internal/proxy/task_upsert_test.go @@ -28,6 +28,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/commonpbutil" + "github.com/milvus-io/milvus/pkg/util/testutils" ) func TestUpsertTask_CheckAligned(t *testing.T) { @@ -96,8 +97,8 @@ func TestUpsertTask_CheckAligned(t *testing.T) { NumRows: uint32(numRows), FieldsData: []*schemapb.FieldData{}, }, - rowIDs: generateInt64Array(numRows), - timestamps: generateUint64Array(numRows), + rowIDs: testutils.GenerateInt64Array(numRows), + timestamps: testutils.GenerateUint64Array(numRows), schema: schema, upsertMsg: &msgstream.UpsertMsg{ InsertMsg: &msgstream.InsertMsg{ diff --git a/internal/proxy/validate_util_test.go b/internal/proxy/validate_util_test.go index e1363444afe73..a99238cd39968 100644 --- a/internal/proxy/validate_util_test.go +++ b/internal/proxy/validate_util_test.go @@ -12,6 +12,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -291,8 +292,8 @@ func Test_validateUtil_checkFloatVectorFieldData(t *testing.T) { func Test_validateUtil_checkFloat16VectorFieldData(t *testing.T) { nb := 5 dim := int64(8) - data := generateFloat16Vectors(nb, int(dim)) - invalidData := generateFloat16VectorsWithInvalidData(nb, int(dim)) + data := testutils.GenerateFloat16Vectors(nb, int(dim)) + invalidData := testutils.GenerateFloat16VectorsWithInvalidData(nb, int(dim)) t.Run("not float16 vector", func(t *testing.T) { f := &schemapb.FieldData{} @@ -382,8 +383,8 @@ func Test_validateUtil_checkFloat16VectorFieldData(t *testing.T) { func Test_validateUtil_checkBfloatVectorFieldData(t *testing.T) { nb := 5 dim := int64(8) - data := generateFloat16Vectors(nb, int(dim)) - invalidData := generateBFloat16VectorsWithInvalidData(nb, int(dim)) + data := testutils.GenerateFloat16Vectors(nb, int(dim)) + invalidData := testutils.GenerateBFloat16VectorsWithInvalidData(nb, int(dim)) t.Run("not float vector", func(t *testing.T) { f := &schemapb.FieldData{} v := newValidateUtil() @@ -1196,7 +1197,7 @@ func Test_validateUtil_checkAligned(t *testing.T) { Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_FloatVector{ FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(10, 8), + Data: testutils.GenerateFloatVectors(10, 8), }, }, Dim: 8, @@ -1209,7 +1210,7 @@ func Test_validateUtil_checkAligned(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(10, 8), + BinaryVector: testutils.GenerateBinaryVectors(10, 8), }, Dim: 8, }, @@ -1222,7 +1223,7 @@ func Test_validateUtil_checkAligned(t *testing.T) { Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_StringData{ StringData: &schemapb.StringArray{ - Data: generateVarCharArray(10, 8), + Data: testutils.GenerateVarCharArray(10, 8), }, }, }, @@ -1356,7 +1357,7 @@ func Test_validateUtil_Validate(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(2, 8), + BinaryVector: testutils.GenerateBinaryVectors(2, 8), }, }, }, @@ -1368,7 +1369,7 @@ func Test_validateUtil_Validate(t *testing.T) { Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_StringData{ StringData: &schemapb.StringArray{ - Data: generateVarCharArray(2, 8), + Data: testutils.GenerateVarCharArray(2, 8), }, }, }, @@ -1380,7 +1381,7 @@ func Test_validateUtil_Validate(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_Float16Vector{ - Float16Vector: generateFloat16Vectors(2, 8), + Float16Vector: testutils.GenerateFloat16Vectors(2, 8), }, }, }, @@ -1391,7 +1392,7 @@ func Test_validateUtil_Validate(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_Bfloat16Vector{ - Bfloat16Vector: generateBFloat16Vectors(2, 8), + Bfloat16Vector: testutils.GenerateBFloat16Vectors(2, 8), }, }, }, @@ -1474,7 +1475,7 @@ func Test_validateUtil_Validate(t *testing.T) { Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_FloatVector{ FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(2, 1), + Data: testutils.GenerateFloatVectors(2, 1), }, }, }, @@ -1486,7 +1487,7 @@ func Test_validateUtil_Validate(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(2, 8), + BinaryVector: testutils.GenerateBinaryVectors(2, 8), }, }, }, @@ -1497,7 +1498,7 @@ func Test_validateUtil_Validate(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_Float16Vector{ - Float16Vector: generateFloat16Vectors(2, 8), + Float16Vector: testutils.GenerateFloat16Vectors(2, 8), }, }, }, @@ -1508,7 +1509,7 @@ func Test_validateUtil_Validate(t *testing.T) { Field: &schemapb.FieldData_Vectors{ Vectors: &schemapb.VectorField{ Data: &schemapb.VectorField_Bfloat16Vector{ - Bfloat16Vector: generateBFloat16Vectors(2, 8), + Bfloat16Vector: testutils.GenerateBFloat16Vectors(2, 8), }, }, }, @@ -2323,7 +2324,7 @@ func Test_validateUtil_Validate(t *testing.T) { Dim: 8, Data: &schemapb.VectorField_FloatVector{ FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(2, 8), + Data: testutils.GenerateFloatVectors(2, 8), }, }, }, @@ -2336,7 +2337,7 @@ func Test_validateUtil_Validate(t *testing.T) { Vectors: &schemapb.VectorField{ Dim: 8, Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(2, 8), + BinaryVector: testutils.GenerateBinaryVectors(2, 8), }, }, }, @@ -2348,7 +2349,7 @@ func Test_validateUtil_Validate(t *testing.T) { Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_StringData{ StringData: &schemapb.StringArray{ - Data: generateVarCharArray(2, 8), + Data: testutils.GenerateVarCharArray(2, 8), }, }, }, @@ -2568,7 +2569,7 @@ func Test_validateUtil_Validate(t *testing.T) { Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_FloatData{ FloatData: &schemapb.FloatArray{ - Data: generateFloat32Array(2), + Data: testutils.GenerateFloat32Array(2), }, }, }, @@ -2581,7 +2582,7 @@ func Test_validateUtil_Validate(t *testing.T) { Scalars: &schemapb.ScalarField{ Data: &schemapb.ScalarField_DoubleData{ DoubleData: &schemapb.DoubleArray{ - Data: generateFloat64Array(2), + Data: testutils.GenerateFloat64Array(2), }, }, }, diff --git a/internal/querynodev2/pipeline/mock_data.go b/internal/querynodev2/pipeline/mock_data.go index 1c42314bd1e2b..a26b0d56603c7 100644 --- a/internal/querynodev2/pipeline/mock_data.go +++ b/internal/querynodev2/pipeline/mock_data.go @@ -22,9 +22,9 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/msgpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/internal/querynodev2/segments" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/commonpbutil" + "github.com/milvus-io/milvus/pkg/util/testutils" ) const defaultDim = 128 @@ -164,9 +164,9 @@ func genFiledDataWithSchema(schema *schemapb.CollectionSchema, numRows int) []*s fieldsData := make([]*schemapb.FieldData, 0) for _, field := range schema.Fields { if field.DataType < 100 { - fieldsData = append(fieldsData, segments.GenTestScalarFieldData(field.DataType, field.DataType.String(), field.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(field.DataType, field.DataType.String(), field.GetFieldID(), numRows)) } else { - fieldsData = append(fieldsData, segments.GenTestVectorFiledData(field.DataType, field.DataType.String(), field.GetFieldID(), numRows, defaultDim)) + fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(field.DataType, field.DataType.String(), field.GetFieldID(), numRows, defaultDim)) } } return fieldsData diff --git a/internal/querynodev2/segments/mock_data.go b/internal/querynodev2/segments/mock_data.go index bbcd8c43796ef..6d15212d047e4 100644 --- a/internal/querynodev2/segments/mock_data.go +++ b/internal/querynodev2/segments/mock_data.go @@ -28,7 +28,6 @@ import ( "github.com/cockroachdb/errors" "github.com/golang/protobuf/proto" - "github.com/x448/float16" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" @@ -397,338 +396,6 @@ func GenTestIndexMeta(collectionID int64, schema *schemapb.CollectionSchema) *se return &indexMeta } -// ---------- unittest util functions ---------- -// gen field data -func generateBoolArray(numRows int) []bool { - ret := make([]bool, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Int()%2 == 0) - } - return ret -} - -func generateInt8Array(numRows int) []int8 { - ret := make([]int8, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int8(rand.Int())) - } - return ret -} - -func generateInt16Array(numRows int) []int16 { - ret := make([]int16, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int16(rand.Int())) - } - return ret -} - -func generateInt32Array(numRows int) []int32 { - ret := make([]int32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Int31()) - } - return ret -} - -func generateInt64Array(numRows int) []int64 { - ret := make([]int64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int64(i)) - } - return ret -} - -func generateFloat32Array(numRows int) []float32 { - ret := make([]float32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func generateStringArray(numRows int) []string { - ret := make([]string, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, strconv.Itoa(rand.Int())) - } - return ret -} - -func generateArrayArray(numRows int) []*schemapb.ScalarField { - ret := make([]*schemapb.ScalarField, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(10), - }, - }, - }) - } - return ret -} - -func generateJSONArray(numRows int) [][]byte { - ret := make([][]byte, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, []byte(fmt.Sprintf(`{"key":%d}`, i+1))) - } - return ret -} - -func generateFloat64Array(numRows int) []float64 { - ret := make([]float64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Float64()) - } - return ret -} - -func generateFloatVectors(numRows, dim int) []float32 { - total := numRows * dim - ret := make([]float32, 0, total) - for i := 0; i < total; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func generateBinaryVectors(numRows, dim int) []byte { - total := (numRows * dim) / 8 - ret := make([]byte, total) - _, err := rand.Read(ret) - if err != nil { - panic(err) - } - return ret -} - -func generateFloat16Vectors(numRows, dim int) []byte { - total := numRows * dim - ret := make([]byte, total*2) - for i := 0; i < total; i++ { - v := float16.Fromfloat32(rand.Float32()).Bits() - binary.LittleEndian.PutUint16(ret[i*2:], v) - } - return ret -} - -func generateBFloat16Vectors(numRows, dim int) []byte { - total := numRows * dim - ret16 := make([]uint16, 0, total) - for i := 0; i < total; i++ { - f := rand.Float32() - bits := math.Float32bits(f) - bits >>= 16 - bits &= 0x7FFF - ret16 = append(ret16, uint16(bits)) - } - ret := make([]byte, len(ret16)*2) - for i, value := range ret16 { - binary.LittleEndian.PutUint16(ret[i*2:], value) - } - return ret -} - -func GenTestScalarFieldData(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData { - ret := &schemapb.FieldData{ - Type: dType, - FieldName: fieldName, - Field: nil, - } - - switch dType { - case schemapb.DataType_Bool: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: generateBoolArray(numRows), - }, - }, - }, - } - case schemapb.DataType_Int8: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int16: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int32: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: generateInt32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Int64: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: generateInt64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Float: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: generateFloat32Array(numRows), - }, - }, - }, - } - case schemapb.DataType_Double: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: generateFloat64Array(numRows), - }, - }, - }, - } - case schemapb.DataType_VarChar: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: generateStringArray(numRows), - }, - }, - }, - } - - case schemapb.DataType_Array: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_ArrayData{ - ArrayData: &schemapb.ArrayArray{ - Data: generateArrayArray(numRows), - }, - }, - }, - } - - case schemapb.DataType_JSON: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_JsonData{ - JsonData: &schemapb.JSONArray{ - Data: generateJSONArray(numRows), - }, - }, - }, - } - - default: - panic("data type not supported") - } - - return ret -} - -// dim is ignored for sparse -func GenTestVectorFiledData(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData { - ret := &schemapb.FieldData{ - Type: dType, - FieldName: fieldName, - Field: nil, - } - switch dType { - case schemapb.DataType_BinaryVector: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: generateBinaryVectors(numRows, dim), - }, - }, - } - case schemapb.DataType_FloatVector: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: generateFloatVectors(numRows, dim), - }, - }, - }, - } - case schemapb.DataType_Float16Vector: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_Float16Vector{ - Float16Vector: generateFloat16Vectors(numRows, dim), - }, - }, - } - case schemapb.DataType_BFloat16Vector: - ret.FieldId = fieldID - ret.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_Bfloat16Vector{ - Bfloat16Vector: generateBFloat16Vectors(numRows, dim), - }, - }, - } - case schemapb.DataType_SparseFloatVector: - ret.FieldId = fieldID - sparseData := testutils.GenerateSparseFloatVectors(numRows) - ret.Field = &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: sparseData.Dim, - Data: &schemapb.VectorField_SparseFloatVector{ - SparseFloatVector: &schemapb.SparseFloatArray{ - Dim: sparseData.Dim, - Contents: sparseData.Contents, - }, - }, - }, - } - default: - panic("data type not supported") - } - return ret -} - func NewTestChunkManagerFactory(params *paramtable.ComponentParam, rootPath string) *storage.ChunkManagerFactory { return storage.NewChunkManagerFactory("minio", storage.RootPath(rootPath), @@ -854,67 +521,67 @@ func genInsertData(msgLength int, schema *schemapb.CollectionSchema) (*storage.I switch f.DataType { case schemapb.DataType_Bool: insertData.Data[f.FieldID] = &storage.BoolFieldData{ - Data: generateBoolArray(msgLength), + Data: testutils.GenerateBoolArray(msgLength), } case schemapb.DataType_Int8: insertData.Data[f.FieldID] = &storage.Int8FieldData{ - Data: generateInt8Array(msgLength), + Data: testutils.GenerateInt8Array(msgLength), } case schemapb.DataType_Int16: insertData.Data[f.FieldID] = &storage.Int16FieldData{ - Data: generateInt16Array(msgLength), + Data: testutils.GenerateInt16Array(msgLength), } case schemapb.DataType_Int32: insertData.Data[f.FieldID] = &storage.Int32FieldData{ - Data: generateInt32Array(msgLength), + Data: testutils.GenerateInt32Array(msgLength), } case schemapb.DataType_Int64: insertData.Data[f.FieldID] = &storage.Int64FieldData{ - Data: generateInt64Array(msgLength), + Data: testutils.GenerateInt64Array(msgLength), } case schemapb.DataType_Float: insertData.Data[f.FieldID] = &storage.FloatFieldData{ - Data: generateFloat32Array(msgLength), + Data: testutils.GenerateFloat32Array(msgLength), } case schemapb.DataType_Double: insertData.Data[f.FieldID] = &storage.DoubleFieldData{ - Data: generateFloat64Array(msgLength), + Data: testutils.GenerateFloat64Array(msgLength), } case schemapb.DataType_String, schemapb.DataType_VarChar: insertData.Data[f.FieldID] = &storage.StringFieldData{ - Data: generateStringArray(msgLength), + Data: testutils.GenerateStringArray(msgLength), } case schemapb.DataType_Array: insertData.Data[f.FieldID] = &storage.ArrayFieldData{ ElementType: schemapb.DataType_Int32, - Data: generateArrayArray(msgLength), + Data: testutils.GenerateArrayOfIntArray(msgLength), } case schemapb.DataType_JSON: insertData.Data[f.FieldID] = &storage.JSONFieldData{ - Data: generateJSONArray(msgLength), + Data: testutils.GenerateJSONArray(msgLength), } case schemapb.DataType_FloatVector: dim := simpleFloatVecField.dim // if no dim specified, use simpleFloatVecField's dim insertData.Data[f.FieldID] = &storage.FloatVectorFieldData{ - Data: generateFloatVectors(msgLength, dim), + Data: testutils.GenerateFloatVectors(msgLength, dim), Dim: dim, } case schemapb.DataType_Float16Vector: dim := simpleFloat16VecField.dim insertData.Data[f.FieldID] = &storage.Float16VectorFieldData{ - Data: generateFloat16Vectors(msgLength, dim), + Data: testutils.GenerateFloat16Vectors(msgLength, dim), Dim: dim, } case schemapb.DataType_BFloat16Vector: dim := simpleFloat16VecField.dim insertData.Data[f.FieldID] = &storage.BFloat16VectorFieldData{ - Data: generateBFloat16Vectors(msgLength, dim), + Data: testutils.GenerateBFloat16Vectors(msgLength, dim), Dim: dim, } case schemapb.DataType_BinaryVector: dim := simpleBinVecField.dim insertData.Data[f.FieldID] = &storage.BinaryVectorFieldData{ - Data: generateBinaryVectors(msgLength, dim), + Data: testutils.GenerateBinaryVectors(msgLength, dim), Dim: dim, } case schemapb.DataType_SparseFloatVector: @@ -929,7 +596,7 @@ func genInsertData(msgLength int, schema *schemapb.CollectionSchema) (*storage.I } // set data for rowID field insertData.Data[rowIDFieldID] = &storage.Int64FieldData{ - Data: generateInt64Array(msgLength), + Data: testutils.GenerateInt64Array(msgLength), } // set data for ts field insertData.Data[timestampFieldID] = &storage.Int64FieldData{ @@ -1018,13 +685,13 @@ func GenAndSaveIndexV2(collectionID, partitionID, segmentID, buildID int64, var dataset *indexcgowrapper.Dataset switch fieldSchema.DataType { case schemapb.DataType_BinaryVector: - dataset = indexcgowrapper.GenBinaryVecDataset(generateBinaryVectors(msgLength, defaultDim)) + dataset = indexcgowrapper.GenBinaryVecDataset(testutils.GenerateBinaryVectors(msgLength, defaultDim)) case schemapb.DataType_FloatVector: - dataset = indexcgowrapper.GenFloatVecDataset(generateFloatVectors(msgLength, defaultDim)) + dataset = indexcgowrapper.GenFloatVecDataset(testutils.GenerateFloatVectors(msgLength, defaultDim)) case schemapb.DataType_Float16Vector: - dataset = indexcgowrapper.GenFloat16VecDataset(generateFloat16Vectors(msgLength, defaultDim)) + dataset = indexcgowrapper.GenFloat16VecDataset(testutils.GenerateFloat16Vectors(msgLength, defaultDim)) case schemapb.DataType_BFloat16Vector: - dataset = indexcgowrapper.GenBFloat16VecDataset(generateBFloat16Vectors(msgLength, defaultDim)) + dataset = indexcgowrapper.GenBFloat16VecDataset(testutils.GenerateBFloat16Vectors(msgLength, defaultDim)) case schemapb.DataType_SparseFloatVector: data := testutils.GenerateSparseFloatVectors(msgLength) dataset = indexcgowrapper.GenSparseFloatVecDataset(&storage.SparseFloatVectorFieldData{ @@ -1091,7 +758,7 @@ func GenAndSaveIndex(collectionID, partitionID, segmentID, fieldID int64, msgLen } defer index.Delete() - err = index.Build(indexcgowrapper.GenFloatVecDataset(generateFloatVectors(msgLength, defaultDim))) + err = index.Build(indexcgowrapper.GenFloatVecDataset(testutils.GenerateFloatVectors(msgLength, defaultDim))) if err != nil { return nil, err } @@ -1400,39 +1067,39 @@ func genInsertMsg(collection *Collection, partitionID, segment int64, numRows in for _, f := range collection.Schema().Fields { switch f.DataType { case schemapb.DataType_Bool: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleBoolField.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleBoolField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int8: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt8Field.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt8Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int16: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt16Field.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt16Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int32: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt32Field.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt32Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Int64: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleInt64Field.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleInt64Field.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Float: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleFloatField.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleFloatField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Double: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleDoubleField.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleDoubleField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_VarChar: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleVarCharField.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleVarCharField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_Array: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleArrayField.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleArrayField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_JSON: - fieldsData = append(fieldsData, GenTestScalarFieldData(f.DataType, simpleJSONField.fieldName, f.GetFieldID(), numRows)) + fieldsData = append(fieldsData, testutils.GenerateScalarFieldDataWithID(f.DataType, simpleJSONField.fieldName, f.GetFieldID(), numRows)) case schemapb.DataType_FloatVector: dim := simpleFloatVecField.dim // if no dim specified, use simpleFloatVecField's dim - fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim)) + fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim)) case schemapb.DataType_BinaryVector: dim := simpleBinVecField.dim // if no dim specified, use simpleFloatVecField's dim - fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim)) + fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim)) case schemapb.DataType_Float16Vector: dim := simpleFloat16VecField.dim // if no dim specified, use simpleFloatVecField's dim - fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim)) + fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim)) case schemapb.DataType_BFloat16Vector: dim := simpleBFloat16VecField.dim // if no dim specified, use simpleFloatVecField's dim - fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, dim)) + fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, dim)) case schemapb.DataType_SparseFloatVector: - fieldsData = append(fieldsData, GenTestVectorFiledData(f.DataType, f.Name, f.FieldID, numRows, 0)) + fieldsData = append(fieldsData, testutils.GenerateVectorFieldDataWithID(f.DataType, f.Name, f.FieldID, numRows, 0)) default: err := errors.New("data type not supported") return nil, err @@ -1544,192 +1211,10 @@ func genSimpleRetrievePlanExpr(schema *schemapb.CollectionSchema) ([]byte, error } func genFieldData(fieldName string, fieldID int64, fieldType schemapb.DataType, fieldValue interface{}, dim int64) *schemapb.FieldData { - var fieldData *schemapb.FieldData - switch fieldType { - case schemapb.DataType_Bool: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Bool, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: fieldValue.([]bool), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_Int32: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Int32, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: fieldValue.([]int32), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_Int64: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Int64, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: fieldValue.([]int64), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_Float: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Float, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: fieldValue.([]float32), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_Double: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Double, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: fieldValue.([]float64), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_VarChar: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_VarChar, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: fieldValue.([]string), - }, - }, - }, - }, - FieldId: fieldID, - } - - case schemapb.DataType_BinaryVector: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_BinaryVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: dim, - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: fieldValue.([]byte), - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_FloatVector: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_FloatVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: dim, - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: fieldValue.([]float32), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_Float16Vector: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Float16Vector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: dim, - Data: &schemapb.VectorField_Float16Vector{ - Float16Vector: fieldValue.([]byte), - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_BFloat16Vector: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_BFloat16Vector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: dim, - Data: &schemapb.VectorField_Bfloat16Vector{ - Bfloat16Vector: fieldValue.([]byte), - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_JSON: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_JSON, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_JsonData{ - JsonData: &schemapb.JSONArray{ - Data: fieldValue.([][]byte), - }, - }, - }, - }, - FieldId: fieldID, - } - case schemapb.DataType_Array: - fieldData = &schemapb.FieldData{ - Type: schemapb.DataType_Array, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_ArrayData{ - ArrayData: &schemapb.ArrayArray{ - Data: fieldValue.([]*schemapb.ScalarField), - }, - }, - }, - }, - FieldId: fieldID, - } - default: - log.Error("not supported field type", zap.String("field type", fieldType.String())) + if fieldType < 100 { + return testutils.GenerateScalarFieldDataWithValue(fieldType, fieldName, fieldID, fieldValue) } - - return fieldData + return testutils.GenerateVectorFieldDataWithValue(fieldType, fieldName, fieldID, fieldValue, int(dim)) } func genSearchResultData(nq int64, topk int64, ids []int64, scores []float32, topks []int64) *schemapb.SearchResultData { diff --git a/internal/querynodev2/segments/reduce_test.go b/internal/querynodev2/segments/reduce_test.go index eb58ef61fbc39..21defdd64c122 100644 --- a/internal/querynodev2/segments/reduce_test.go +++ b/internal/querynodev2/segments/reduce_test.go @@ -36,6 +36,7 @@ import ( "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/testutils" "github.com/milvus-io/milvus/pkg/util/typeutil" ) @@ -128,7 +129,7 @@ func (suite *ReduceSuite) TestReduceAllFunc() { nq := int64(10) // TODO: replace below by genPlaceholderGroup(nq) - vec := generateFloatVectors(1, defaultDim) + vec := testutils.GenerateFloatVectors(1, defaultDim) var searchRawData []byte for i, ele := range vec { buf := make([]byte, 4) diff --git a/internal/storage/utils_test.go b/internal/storage/utils_test.go index a1b07f6831d78..f9d1be2d136d8 100644 --- a/internal/storage/utils_test.go +++ b/internal/storage/utils_test.go @@ -20,7 +20,6 @@ import ( "bytes" "encoding/binary" "encoding/json" - "fmt" "math/rand" "strconv" "testing" @@ -435,93 +434,6 @@ func genAllFieldsSchema(fVecDim, bVecDim, f16VecDim, bf16VecDim int, withSparse return schema, pkFieldID, fieldIDs } -func generateFloatVectors(numRows, dim int) []float32 { - total := numRows * dim - ret := make([]float32, 0, total) - for i := 0; i < total; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func generateBinaryVectors(numRows, dim int) []byte { - total := (numRows * dim) / 8 - ret := make([]byte, total) - _, err := rand.Read(ret) - if err != nil { - panic(err) - } - return ret -} - -func generateFloat16Vectors(numRows, dim int) []byte { - total := (numRows * dim) * 2 - ret := make([]byte, total) - _, err := rand.Read(ret) - if err != nil { - panic(err) - } - return ret -} - -func generateBFloat16Vectors(numRows, dim int) []byte { - total := (numRows * dim) * 2 - ret := make([]byte, total) - _, err := rand.Read(ret) - if err != nil { - panic(err) - } - return ret -} - -func generateBoolArray(numRows int) []bool { - ret := make([]bool, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Int()%2 == 0) - } - return ret -} - -func generateInt32Array(numRows int) []int32 { - ret := make([]int32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int32(rand.Int())) - } - return ret -} - -func generateInt64Array(numRows int) []int64 { - ret := make([]int64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, int64(rand.Int())) - } - return ret -} - -func generateFloat32Array(numRows int) []float32 { - ret := make([]float32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func generateFloat64Array(numRows int) []float64 { - ret := make([]float64, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Float64()) - } - return ret -} - -func generateBytesArray(numRows int) [][]byte { - ret := make([][]byte, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, []byte(fmt.Sprint(rand.Int()))) - } - return ret -} - func generateInt32ArrayList(numRows int) []*schemapb.ScalarField { ret := make([]*schemapb.ScalarField, 0, numRows) for i := 0; i < numRows; i++ { @@ -546,22 +458,22 @@ func genRowWithAllFields(fVecDim, bVecDim, f16VecDim, bf16VecDim int) (blob *com var buffer bytes.Buffer switch field.DataType { case schemapb.DataType_FloatVector: - fVec := generateFloatVectors(1, fVecDim) + fVec := testutils.GenerateFloatVectors(1, fVecDim) _ = binary.Write(&buffer, common.Endian, fVec) ret.Value = append(ret.Value, buffer.Bytes()...) row = append(row, fVec) case schemapb.DataType_BinaryVector: - bVec := generateBinaryVectors(1, bVecDim) + bVec := testutils.GenerateBinaryVectors(1, bVecDim) _ = binary.Write(&buffer, common.Endian, bVec) ret.Value = append(ret.Value, buffer.Bytes()...) row = append(row, bVec) case schemapb.DataType_Float16Vector: - f16Vec := generateFloat16Vectors(1, f16VecDim) + f16Vec := testutils.GenerateFloat16Vectors(1, f16VecDim) _ = binary.Write(&buffer, common.Endian, f16Vec) ret.Value = append(ret.Value, buffer.Bytes()...) row = append(row, f16Vec) case schemapb.DataType_BFloat16Vector: - bf16Vec := generateBFloat16Vectors(1, bf16VecDim) + bf16Vec := testutils.GenerateBFloat16Vectors(1, bf16VecDim) _ = binary.Write(&buffer, common.Endian, bf16Vec) ret.Value = append(ret.Value, buffer.Bytes()...) row = append(row, bf16Vec) @@ -689,7 +601,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim for idx, field := range schema.Fields { switch field.DataType { case schemapb.DataType_Bool: - data := generateBoolArray(numRows) + data := testutils.GenerateBoolArray(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -709,7 +621,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], d) } case schemapb.DataType_Int8: - data := generateInt32Array(numRows) + data := testutils.GenerateInt32Array(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -729,7 +641,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], int8(d)) } case schemapb.DataType_Int16: - data := generateInt32Array(numRows) + data := testutils.GenerateInt32Array(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -749,7 +661,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], int16(d)) } case schemapb.DataType_Int32: - data := generateInt32Array(numRows) + data := testutils.GenerateInt32Array(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -769,7 +681,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], d) } case schemapb.DataType_Int64: - data := generateInt64Array(numRows) + data := testutils.GenerateInt64Array(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -790,7 +702,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim } pks = data case schemapb.DataType_Float: - data := generateFloat32Array(numRows) + data := testutils.GenerateFloat32Array(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -810,7 +722,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], d) } case schemapb.DataType_Double: - data := generateFloat64Array(numRows) + data := testutils.GenerateFloat64Array(numRows) f := &schemapb.FieldData{ Type: field.DataType, FieldName: field.Name, @@ -830,7 +742,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], d) } case schemapb.DataType_FloatVector: - data := generateFloatVectors(numRows, fVecDim) + data := testutils.GenerateFloatVectors(numRows, fVecDim) f := &schemapb.FieldData{ Type: schemapb.DataType_FloatVector, FieldName: field.Name, @@ -851,7 +763,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], data[nrows*fVecDim:(nrows+1)*fVecDim]) } case schemapb.DataType_BinaryVector: - data := generateBinaryVectors(numRows, bVecDim) + data := testutils.GenerateBinaryVectors(numRows, bVecDim) f := &schemapb.FieldData{ Type: schemapb.DataType_BinaryVector, FieldName: field.Name, @@ -870,7 +782,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], data[nrows*bVecDim/8:(nrows+1)*bVecDim/8]) } case schemapb.DataType_Float16Vector: - data := generateFloat16Vectors(numRows, f16VecDim) + data := testutils.GenerateFloat16Vectors(numRows, f16VecDim) f := &schemapb.FieldData{ Type: schemapb.DataType_Float16Vector, FieldName: field.Name, @@ -889,7 +801,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim columns[idx] = append(columns[idx], data[nrows*f16VecDim*2:(nrows+1)*f16VecDim*2]) } case schemapb.DataType_BFloat16Vector: - data := generateBFloat16Vectors(numRows, bf16VecDim) + data := testutils.GenerateBFloat16Vectors(numRows, bf16VecDim) f := &schemapb.FieldData{ Type: schemapb.DataType_BFloat16Vector, FieldName: field.Name, @@ -950,7 +862,7 @@ func genColumnBasedInsertMsg(schema *schemapb.CollectionSchema, numRows, fVecDim } case schemapb.DataType_JSON: - data := generateBytesArray(numRows) + data := testutils.GenerateBytesArray(numRows) f := &schemapb.FieldData{ Type: schemapb.DataType_Array, FieldName: field.GetName(), diff --git a/internal/util/testutil/test_util.go b/internal/util/testutil/test_util.go index 1da27f9a64986..a9da8ca3b8ac6 100644 --- a/internal/util/testutil/test_util.go +++ b/internal/util/testutil/test_util.go @@ -1,8 +1,6 @@ package testutil import ( - rand2 "crypto/rand" - "encoding/json" "fmt" "math/rand" "strconv" @@ -108,210 +106,117 @@ func CreateInsertData(schema *schemapb.CollectionSchema, rows int) (*storage.Ins if err != nil { return nil, err } - for _, field := range schema.GetFields() { - if field.GetAutoID() { + for _, f := range schema.GetFields() { + if f.GetAutoID() { continue } - switch field.GetDataType() { + switch f.GetDataType() { case schemapb.DataType_Bool: - boolData := make([]bool, 0) - for i := 0; i < rows; i++ { - boolData = append(boolData, i%3 != 0) - } - insertData.Data[field.GetFieldID()] = &storage.BoolFieldData{Data: boolData} - case schemapb.DataType_Float: - floatData := make([]float32, 0) - for i := 0; i < rows; i++ { - floatData = append(floatData, float32(i/2)) + insertData.Data[f.FieldID] = &storage.BoolFieldData{ + Data: testutils.GenerateBoolArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.FloatFieldData{Data: floatData} - case schemapb.DataType_Double: - doubleData := make([]float64, 0) - for i := 0; i < rows; i++ { - doubleData = append(doubleData, float64(i/5)) - } - insertData.Data[field.GetFieldID()] = &storage.DoubleFieldData{Data: doubleData} case schemapb.DataType_Int8: - int8Data := make([]int8, 0) - for i := 0; i < rows; i++ { - int8Data = append(int8Data, int8(i%256)) + insertData.Data[f.FieldID] = &storage.Int8FieldData{ + Data: testutils.GenerateInt8Array(rows), } - insertData.Data[field.GetFieldID()] = &storage.Int8FieldData{Data: int8Data} case schemapb.DataType_Int16: - int16Data := make([]int16, 0) - for i := 0; i < rows; i++ { - int16Data = append(int16Data, int16(i%65536)) + insertData.Data[f.FieldID] = &storage.Int16FieldData{ + Data: testutils.GenerateInt16Array(rows), } - insertData.Data[field.GetFieldID()] = &storage.Int16FieldData{Data: int16Data} case schemapb.DataType_Int32: - int32Data := make([]int32, 0) - for i := 0; i < rows; i++ { - int32Data = append(int32Data, int32(i%1000)) + insertData.Data[f.FieldID] = &storage.Int32FieldData{ + Data: testutils.GenerateInt32Array(rows), } - insertData.Data[field.GetFieldID()] = &storage.Int32FieldData{Data: int32Data} case schemapb.DataType_Int64: - int64Data := make([]int64, 0) - for i := 0; i < rows; i++ { - int64Data = append(int64Data, int64(i)) + insertData.Data[f.FieldID] = &storage.Int64FieldData{ + Data: testutils.GenerateInt64Array(rows), + } + case schemapb.DataType_Float: + insertData.Data[f.FieldID] = &storage.FloatFieldData{ + Data: testutils.GenerateFloat32Array(rows), + } + case schemapb.DataType_Double: + insertData.Data[f.FieldID] = &storage.DoubleFieldData{ + Data: testutils.GenerateFloat64Array(rows), } - insertData.Data[field.GetFieldID()] = &storage.Int64FieldData{Data: int64Data} case schemapb.DataType_BinaryVector: - dim, err := typeutil.GetDim(field) + dim, err := typeutil.GetDim(f) if err != nil { return nil, err } - binVecData := make([]byte, 0) - total := rows * int(dim) / 8 - for i := 0; i < total; i++ { - binVecData = append(binVecData, byte(i%256)) + insertData.Data[f.FieldID] = &storage.BinaryVectorFieldData{ + Data: testutils.GenerateBinaryVectors(rows, int(dim)), + Dim: int(dim), } - insertData.Data[field.GetFieldID()] = &storage.BinaryVectorFieldData{Data: binVecData, Dim: int(dim)} case schemapb.DataType_FloatVector: - dim, err := typeutil.GetDim(field) + dim, err := typeutil.GetDim(f) if err != nil { return nil, err } - floatVecData := make([]float32, 0) - total := rows * int(dim) - for i := 0; i < total; i++ { - floatVecData = append(floatVecData, rand.Float32()) + insertData.Data[f.GetFieldID()] = &storage.FloatVectorFieldData{ + Data: testutils.GenerateFloatVectors(rows, int(dim)), + Dim: int(dim), } - insertData.Data[field.GetFieldID()] = &storage.FloatVectorFieldData{Data: floatVecData, Dim: int(dim)} case schemapb.DataType_Float16Vector: - dim, err := typeutil.GetDim(field) + dim, err := typeutil.GetDim(f) if err != nil { return nil, err } - total := int64(rows) * dim * 2 - float16VecData := make([]byte, total) - _, err = rand2.Read(float16VecData) - if err != nil { - return nil, err + insertData.Data[f.FieldID] = &storage.Float16VectorFieldData{ + Data: testutils.GenerateFloat16Vectors(rows, int(dim)), + Dim: int(dim), } - insertData.Data[field.GetFieldID()] = &storage.Float16VectorFieldData{Data: float16VecData, Dim: int(dim)} case schemapb.DataType_BFloat16Vector: - dim, err := typeutil.GetDim(field) + dim, err := typeutil.GetDim(f) if err != nil { return nil, err } - total := int64(rows) * dim * 2 - bfloat16VecData := make([]byte, total) - _, err = rand2.Read(bfloat16VecData) - if err != nil { - return nil, err + insertData.Data[f.FieldID] = &storage.BFloat16VectorFieldData{ + Data: testutils.GenerateBFloat16Vectors(rows, int(dim)), + Dim: int(dim), } - insertData.Data[field.GetFieldID()] = &storage.BFloat16VectorFieldData{Data: bfloat16VecData, Dim: int(dim)} case schemapb.DataType_SparseFloatVector: sparseFloatVecData := testutils.GenerateSparseFloatVectors(rows) - insertData.Data[field.GetFieldID()] = &storage.SparseFloatVectorFieldData{ + insertData.Data[f.FieldID] = &storage.SparseFloatVectorFieldData{ SparseFloatArray: *sparseFloatVecData, } case schemapb.DataType_String, schemapb.DataType_VarChar: - varcharData := make([]string, 0) - for i := 0; i < rows; i++ { - varcharData = append(varcharData, strconv.Itoa(i)) + insertData.Data[f.FieldID] = &storage.StringFieldData{ + Data: testutils.GenerateStringArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.StringFieldData{Data: varcharData} case schemapb.DataType_JSON: - jsonData := make([][]byte, 0) - for i := 0; i < rows; i++ { - if i%4 == 0 { - v, _ := json.Marshal("{\"a\": \"%s\", \"b\": %d}") - jsonData = append(jsonData, v) - } else if i%4 == 1 { - v, _ := json.Marshal(i) - jsonData = append(jsonData, v) - } else if i%4 == 2 { - v, _ := json.Marshal(float32(i) * 0.1) - jsonData = append(jsonData, v) - } else if i%4 == 3 { - v, _ := json.Marshal(strconv.Itoa(i)) - jsonData = append(jsonData, v) - } + insertData.Data[f.FieldID] = &storage.JSONFieldData{ + Data: testutils.GenerateJSONArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.JSONFieldData{Data: jsonData} case schemapb.DataType_Array: - arrayData := make([]*schemapb.ScalarField, 0) - switch field.GetElementType() { + switch f.GetElementType() { case schemapb.DataType_Bool: - for i := 0; i < rows; i++ { - data := []bool{i%2 == 0, i%3 == 0, i%4 == 0} - arrayData = append(arrayData, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_BoolData{ - BoolData: &schemapb.BoolArray{ - Data: data, - }, - }, - }) + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: testutils.GenerateArrayOfBoolArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData} case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32: - for i := 0; i < rows; i++ { - data := []int32{int32(i), int32(i + 1), int32(i + 2)} - arrayData = append(arrayData, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_IntData{ - IntData: &schemapb.IntArray{ - Data: data, - }, - }, - }) + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: testutils.GenerateArrayOfIntArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData} case schemapb.DataType_Int64: - for i := 0; i < rows; i++ { - data := []int64{int64(i), int64(i + 1), int64(i + 2)} - arrayData = append(arrayData, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_LongData{ - LongData: &schemapb.LongArray{ - Data: data, - }, - }, - }) + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: testutils.GenerateArrayOfLongArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData} case schemapb.DataType_Float: - for i := 0; i < rows; i++ { - data := []float32{float32(i) * 0.1, float32(i+1) * 0.1, float32(i+2) * 0.1} - arrayData = append(arrayData, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_FloatData{ - FloatData: &schemapb.FloatArray{ - Data: data, - }, - }, - }) + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: testutils.GenerateArrayOfFloatArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData} case schemapb.DataType_Double: - for i := 0; i < rows; i++ { - data := []float64{float64(i) * 0.02, float64(i+1) * 0.02, float64(i+2) * 0.02} - arrayData = append(arrayData, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_DoubleData{ - DoubleData: &schemapb.DoubleArray{ - Data: data, - }, - }, - }) + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: testutils.GenerateArrayOfDoubleArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData} case schemapb.DataType_String, schemapb.DataType_VarChar: - for i := 0; i < rows; i++ { - data := []string{ - randomString(5) + "-" + fmt.Sprintf("%d", i), - randomString(5) + "-" + fmt.Sprintf("%d", i), - randomString(5) + "-" + fmt.Sprintf("%d", i), - } - arrayData = append(arrayData, &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: data, - }, - }, - }) + insertData.Data[f.FieldID] = &storage.ArrayFieldData{ + Data: testutils.GenerateArrayOfStringArray(rows), } - insertData.Data[field.GetFieldID()] = &storage.ArrayFieldData{Data: arrayData} } default: - panic(fmt.Sprintf("unexpected data type: %s", field.GetDataType().String())) + panic(fmt.Sprintf("unsupported data type: %s", f.GetDataType().String())) } } return insertData, nil diff --git a/pkg/go.mod b/pkg/go.mod index 2361c2f259fdd..c3019c2f685f6 100644 --- a/pkg/go.mod +++ b/pkg/go.mod @@ -146,6 +146,7 @@ require ( github.com/tklauser/numcpus v0.4.0 // indirect github.com/tmc/grpc-websocket-proxy v0.0.0-20201229170055-e5319fda7802 // indirect github.com/twmb/murmur3 v1.1.3 // indirect + github.com/x448/float16 v0.8.4 // indirect github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect go.etcd.io/bbolt v1.3.6 // indirect diff --git a/pkg/go.sum b/pkg/go.sum index ae3f741149028..8903eeb3ca7c4 100644 --- a/pkg/go.sum +++ b/pkg/go.sum @@ -730,6 +730,8 @@ github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBn github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= diff --git a/pkg/util/testutils/gen_data.go b/pkg/util/testutils/gen_data.go new file mode 100644 index 0000000000000..ce48aad783201 --- /dev/null +++ b/pkg/util/testutils/gen_data.go @@ -0,0 +1,912 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package testutils + +import ( + "encoding/binary" + "encoding/json" + "fmt" + "math" + "math/rand" + "sort" + "strconv" + + "github.com/x448/float16" + + "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/util/funcutil" + "github.com/milvus-io/milvus/pkg/util/typeutil" +) + +const elemCountOfArray = 10 + +// generate data +func GenerateBoolArray(numRows int) []bool { + ret := make([]bool, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, i%2 == 0) + } + return ret +} + +func GenerateInt8Array(numRows int) []int8 { + ret := make([]int8, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, int8(i)) + } + return ret +} + +func GenerateInt16Array(numRows int) []int16 { + ret := make([]int16, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, int16(i)) + } + return ret +} + +func GenerateInt32Array(numRows int) []int32 { + ret := make([]int32, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, int32(i)) + } + return ret +} + +func GenerateInt64Array(numRows int) []int64 { + ret := make([]int64, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, int64(i)) + } + return ret +} + +func GenerateUint64Array(numRows int) []uint64 { + ret := make([]uint64, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, uint64(i)) + } + return ret +} + +func GenerateFloat32Array(numRows int) []float32 { + ret := make([]float32, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, float32(i)) + } + return ret +} + +func GenerateFloat64Array(numRows int) []float64 { + ret := make([]float64, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, float64(i)) + } + return ret +} + +func GenerateVarCharArray(numRows int, maxLen int) []string { + ret := make([]string, numRows) + for i := 0; i < numRows; i++ { + ret[i] = funcutil.RandomString(rand.Intn(maxLen)) + } + return ret +} + +func GenerateStringArray(numRows int) []string { + ret := make([]string, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, strconv.Itoa(i)) + } + return ret +} + +func GenerateJSONArray(numRows int) [][]byte { + ret := make([][]byte, 0, numRows) + for i := 0; i < numRows; i++ { + if i%4 == 0 { + v, _ := json.Marshal("{\"a\": \"%s\", \"b\": %d}") + ret = append(ret, v) + } else if i%4 == 1 { + v, _ := json.Marshal(i) + ret = append(ret, v) + } else if i%4 == 2 { + v, _ := json.Marshal(float32(i) * 0.1) + ret = append(ret, v) + } else if i%4 == 3 { + v, _ := json.Marshal(strconv.Itoa(i)) + ret = append(ret, v) + } + } + return ret +} + +func GenerateArrayOfBoolArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_BoolData{ + BoolData: &schemapb.BoolArray{ + Data: GenerateBoolArray(elemCountOfArray), + }, + }, + }) + } + return ret +} + +func GenerateArrayOfIntArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: GenerateInt32Array(elemCountOfArray), + }, + }, + }) + } + return ret +} + +func GenerateArrayOfLongArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_LongData{ + LongData: &schemapb.LongArray{ + Data: GenerateInt64Array(elemCountOfArray), + }, + }, + }) + } + return ret +} + +func GenerateArrayOfFloatArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_FloatData{ + FloatData: &schemapb.FloatArray{ + Data: GenerateFloat32Array(elemCountOfArray), + }, + }, + }) + } + return ret +} + +func GenerateArrayOfDoubleArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_DoubleData{ + DoubleData: &schemapb.DoubleArray{ + Data: GenerateFloat64Array(elemCountOfArray), + }, + }, + }) + } + return ret +} + +func GenerateArrayOfStringArray(numRows int) []*schemapb.ScalarField { + ret := make([]*schemapb.ScalarField, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: GenerateStringArray(elemCountOfArray), + }, + }, + }) + } + return ret +} + +func GenerateBytesArray(numRows int) [][]byte { + ret := make([][]byte, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, []byte(fmt.Sprint(rand.Int()))) + } + return ret +} + +func GenerateBinaryVectors(numRows, dim int) []byte { + total := (numRows * dim) / 8 + ret := make([]byte, total) + _, err := rand.Read(ret) + if err != nil { + panic(err) + } + return ret +} + +func GenerateFloatVectors(numRows, dim int) []float32 { + total := numRows * dim + ret := make([]float32, 0, total) + for i := 0; i < total; i++ { + ret = append(ret, rand.Float32()) + } + return ret +} + +func GenerateFloat16Vectors(numRows, dim int) []byte { + total := numRows * dim + ret := make([]byte, total*2) + for i := 0; i < total; i++ { + v := float16.Fromfloat32(rand.Float32()).Bits() + binary.LittleEndian.PutUint16(ret[i*2:], v) + } + return ret +} + +func GenerateBFloat16Vectors(numRows, dim int) []byte { + total := numRows * dim + ret16 := make([]uint16, 0, total) + for i := 0; i < total; i++ { + f := rand.Float32() + bits := math.Float32bits(f) + bits >>= 16 + bits &= 0x7FFF + ret16 = append(ret16, uint16(bits)) + } + ret := make([]byte, len(ret16)*2) + for i, value := range ret16 { + binary.LittleEndian.PutUint16(ret[i*2:], value) + } + return ret +} + +func GenerateBFloat16VectorsWithInvalidData(numRows, dim int) []byte { + total := numRows * dim + ret16 := make([]uint16, 0, total) + for i := 0; i < total; i++ { + var f float32 + if i%2 == 0 { + f = float32(math.NaN()) + } else { + f = float32(math.Inf(1)) + } + bits := math.Float32bits(f) + bits >>= 16 + bits &= 0x7FFF + ret16 = append(ret16, uint16(bits)) + } + ret := make([]byte, len(ret16)*2) + for i, value := range ret16 { + binary.LittleEndian.PutUint16(ret[i*2:], value) + } + return ret +} + +func GenerateFloat16VectorsWithInvalidData(numRows, dim int) []byte { + total := numRows * dim + ret := make([]byte, total*2) + for i := 0; i < total; i++ { + if i%2 == 0 { + binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.Inf(1))) + } else { + binary.LittleEndian.PutUint16(ret[i*2:], uint16(float16.NaN())) + } + } + return ret +} + +func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray { + dim := 700 + avgNnz := 20 + var contents [][]byte + maxDim := 0 + + uniqueAndSort := func(indices []uint32) []uint32 { + seen := make(map[uint32]bool) + var result []uint32 + for _, value := range indices { + if _, ok := seen[value]; !ok { + seen[value] = true + result = append(result, value) + } + } + sort.Slice(result, func(i, j int) bool { + return result[i] < result[j] + }) + return result + } + + for i := 0; i < numRows; i++ { + nnz := rand.Intn(avgNnz*2) + 1 + indices := make([]uint32, 0, nnz) + for j := 0; j < nnz; j++ { + indices = append(indices, uint32(rand.Intn(dim))) + } + indices = uniqueAndSort(indices) + values := make([]float32, 0, len(indices)) + for j := 0; j < len(indices); j++ { + values = append(values, rand.Float32()) + } + if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim { + maxDim = int(indices[len(indices)-1]) + 1 + } + rowBytes := typeutil.CreateSparseFloatRow(indices, values) + + contents = append(contents, rowBytes) + } + return &schemapb.SparseFloatArray{ + Dim: int64(maxDim), + Contents: contents, + } +} + +func GenerateHashKeys(numRows int) []uint32 { + ret := make([]uint32, 0, numRows) + for i := 0; i < numRows; i++ { + ret = append(ret, rand.Uint32()) + } + return ret +} + +// generate FieldData +func NewBoolFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Bool, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_BoolData{ + BoolData: &schemapb.BoolArray{ + Data: GenerateBoolArray(numRows), + }, + }, + }, + }, + } +} + +func NewBoolFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Bool, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_BoolData{ + BoolData: &schemapb.BoolArray{ + Data: fieldValue.([]bool), + }, + }, + }, + }, + } +} + +func NewInt8FieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Int8, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: GenerateInt32Array(numRows), + }, + }, + }, + }, + } +} + +func NewInt16FieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Int16, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: GenerateInt32Array(numRows), + }, + }, + }, + }, + } +} + +func NewInt32FieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Int32, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: GenerateInt32Array(numRows), + }, + }, + }, + }, + } +} + +func NewInt32FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Int32, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: fieldValue.([]int32), + }, + }, + }, + }, + } +} + +func NewInt64FieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Int64, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_LongData{ + LongData: &schemapb.LongArray{ + Data: GenerateInt64Array(numRows), + }, + }, + }, + }, + } +} + +func NewInt64FieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Int64, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_LongData{ + LongData: &schemapb.LongArray{ + Data: fieldValue.([]int64), + }, + }, + }, + }, + } +} + +func NewFloatFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Float, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_FloatData{ + FloatData: &schemapb.FloatArray{ + Data: GenerateFloat32Array(numRows), + }, + }, + }, + }, + } +} + +func NewFloatFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Float, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_FloatData{ + FloatData: &schemapb.FloatArray{ + Data: fieldValue.([]float32), + }, + }, + }, + }, + } +} + +func NewDoubleFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Double, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_DoubleData{ + DoubleData: &schemapb.DoubleArray{ + Data: GenerateFloat64Array(numRows), + }, + }, + }, + }, + } +} + +func NewDoubleFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Double, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_DoubleData{ + DoubleData: &schemapb.DoubleArray{ + Data: fieldValue.([]float64), + }, + }, + }, + }, + } +} + +func NewVarCharFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_VarChar, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: GenerateVarCharArray(numRows, 10), + }, + }, + }, + }, + } +} + +func NewVarCharFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_VarChar, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: fieldValue.([]string), + }, + }, + }, + }, + } +} + +func NewStringFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_String, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: GenerateStringArray(numRows), + }, + }, + }, + }, + } +} + +func NewJSONFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_JSON, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: GenerateJSONArray(numRows), + }, + }, + }, + }, + } +} + +func NewJSONFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_JSON, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: fieldValue.([][]byte), + }, + }, + }, + }, + } +} + +func NewArrayFieldData(fieldName string, numRows int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: GenerateArrayOfIntArray(numRows), + }, + }, + }, + }, + } +} + +func NewArrayFieldDataWithValue(fieldName string, fieldValue interface{}) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldName: fieldName, + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: fieldValue.([]*schemapb.ScalarField), + }, + }, + }, + }, + } +} + +func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_BinaryVector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_BinaryVector{ + BinaryVector: GenerateBinaryVectors(numRows, dim), + }, + }, + }, + } +} + +func NewBinaryVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_BinaryVector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_BinaryVector{ + BinaryVector: fieldValue.([]byte), + }, + }, + }, + } +} + +func NewFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_FloatVector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_FloatVector{ + FloatVector: &schemapb.FloatArray{ + Data: GenerateFloatVectors(numRows, dim), + }, + }, + }, + }, + } +} + +func NewFloatVectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_FloatVector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_FloatVector{ + FloatVector: &schemapb.FloatArray{ + Data: fieldValue.([]float32), + }, + }, + }, + }, + } +} + +func NewFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Float16Vector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_Float16Vector{ + Float16Vector: GenerateFloat16Vectors(numRows, dim), + }, + }, + }, + } +} + +func NewFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_Float16Vector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_Float16Vector{ + Float16Vector: fieldValue.([]byte), + }, + }, + }, + } +} + +func NewBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_BFloat16Vector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_Bfloat16Vector{ + Bfloat16Vector: GenerateBFloat16Vectors(numRows, dim), + }, + }, + }, + } +} + +func NewBFloat16VectorFieldDataWithValue(fieldName string, fieldValue interface{}, dim int) *schemapb.FieldData { + return &schemapb.FieldData{ + Type: schemapb.DataType_BFloat16Vector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: int64(dim), + Data: &schemapb.VectorField_Bfloat16Vector{ + Bfloat16Vector: fieldValue.([]byte), + }, + }, + }, + } +} + +func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData { + sparseData := GenerateSparseFloatVectors(numRows) + return &schemapb.FieldData{ + Type: schemapb.DataType_SparseFloatVector, + FieldName: fieldName, + Field: &schemapb.FieldData_Vectors{ + Vectors: &schemapb.VectorField{ + Dim: sparseData.Dim, + Data: &schemapb.VectorField_SparseFloatVector{ + SparseFloatVector: &schemapb.SparseFloatArray{ + Dim: sparseData.Dim, + Contents: sparseData.Contents, + }, + }, + }, + }, + } +} + +func GenerateScalarFieldData(dType schemapb.DataType, fieldName string, numRows int) *schemapb.FieldData { + switch dType { + case schemapb.DataType_Bool: + return NewBoolFieldData(fieldName, numRows) + case schemapb.DataType_Int8: + return NewInt8FieldData(fieldName, numRows) + case schemapb.DataType_Int16: + return NewInt16FieldData(fieldName, numRows) + case schemapb.DataType_Int32: + return NewInt32FieldData(fieldName, numRows) + case schemapb.DataType_Int64: + return NewInt64FieldData(fieldName, numRows) + case schemapb.DataType_Float: + return NewFloatFieldData(fieldName, numRows) + case schemapb.DataType_Double: + return NewDoubleFieldData(fieldName, numRows) + case schemapb.DataType_VarChar: + return NewVarCharFieldData(fieldName, numRows) + case schemapb.DataType_String: + return NewStringFieldData(fieldName, numRows) + case schemapb.DataType_Array: + return NewArrayFieldData(fieldName, numRows) + case schemapb.DataType_JSON: + return NewJSONFieldData(fieldName, numRows) + default: + panic("unsupported data type") + } +} + +func GenerateScalarFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int) *schemapb.FieldData { + fieldData := GenerateScalarFieldData(dType, fieldName, numRows) + fieldData.FieldId = fieldID + return fieldData +} + +func GenerateScalarFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}) *schemapb.FieldData { + var fieldData *schemapb.FieldData + switch dType { + case schemapb.DataType_Bool: + fieldData = NewBoolFieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_Int32: + fieldData = NewInt32FieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_Int64: + fieldData = NewInt64FieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_Float: + fieldData = NewFloatFieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_Double: + fieldData = NewDoubleFieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_VarChar: + fieldData = NewVarCharFieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_Array: + fieldData = NewArrayFieldDataWithValue(fieldName, fieldValue) + case schemapb.DataType_JSON: + fieldData = NewJSONFieldDataWithValue(fieldName, fieldValue) + default: + panic("unsupported data type") + } + fieldData.FieldId = fieldID + return fieldData +} + +func GenerateVectorFieldData(dType schemapb.DataType, fieldName string, numRows int, dim int) *schemapb.FieldData { + switch dType { + case schemapb.DataType_BinaryVector: + return NewBinaryVectorFieldData(fieldName, numRows, dim) + case schemapb.DataType_FloatVector: + return NewFloatVectorFieldData(fieldName, numRows, dim) + case schemapb.DataType_Float16Vector: + return NewFloat16VectorFieldData(fieldName, numRows, dim) + case schemapb.DataType_BFloat16Vector: + return NewBFloat16VectorFieldData(fieldName, numRows, dim) + case schemapb.DataType_SparseFloatVector: + return NewSparseFloatVectorFieldData(fieldName, numRows) + default: + panic("unsupported data type") + } +} + +func GenerateVectorFieldDataWithID(dType schemapb.DataType, fieldName string, fieldID int64, numRows int, dim int) *schemapb.FieldData { + fieldData := GenerateVectorFieldData(dType, fieldName, numRows, dim) + fieldData.FieldId = fieldID + return fieldData +} + +func GenerateVectorFieldDataWithValue(dType schemapb.DataType, fieldName string, fieldID int64, fieldValue interface{}, dim int) *schemapb.FieldData { + var fieldData *schemapb.FieldData + switch dType { + case schemapb.DataType_BinaryVector: + fieldData = NewBinaryVectorFieldDataWithValue(fieldName, fieldValue, dim) + case schemapb.DataType_FloatVector: + fieldData = NewFloatVectorFieldDataWithValue(fieldName, fieldValue, dim) + case schemapb.DataType_Float16Vector: + fieldData = NewFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim) + case schemapb.DataType_BFloat16Vector: + fieldData = NewBFloat16VectorFieldDataWithValue(fieldName, fieldValue, dim) + default: + panic("unsupported data type") + } + fieldData.FieldId = fieldID + return fieldData +} diff --git a/pkg/util/testutils/sparse_test_utils.go b/pkg/util/testutils/sparse_test_utils.go deleted file mode 100644 index 488a84f7caa6d..0000000000000 --- a/pkg/util/testutils/sparse_test_utils.go +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the LF AI & Data foundation under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package testutils - -import ( - "math/rand" - "sort" - - "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" - "github.com/milvus-io/milvus/pkg/util/typeutil" -) - -func GenerateSparseFloatVectors(numRows int) *schemapb.SparseFloatArray { - dim := 700 - avgNnz := 20 - var contents [][]byte - maxDim := 0 - - uniqueAndSort := func(indices []uint32) []uint32 { - seen := make(map[uint32]bool) - var result []uint32 - for _, value := range indices { - if _, ok := seen[value]; !ok { - seen[value] = true - result = append(result, value) - } - } - sort.Slice(result, func(i, j int) bool { - return result[i] < result[j] - }) - return result - } - - for i := 0; i < numRows; i++ { - nnz := rand.Intn(avgNnz*2) + 1 - indices := make([]uint32, 0, nnz) - for j := 0; j < nnz; j++ { - indices = append(indices, uint32(rand.Intn(dim))) - } - indices = uniqueAndSort(indices) - values := make([]float32, 0, len(indices)) - for j := 0; j < len(indices); j++ { - values = append(values, rand.Float32()) - } - if len(indices) > 0 && int(indices[len(indices)-1])+1 > maxDim { - maxDim = int(indices[len(indices)-1]) + 1 - } - rowBytes := typeutil.CreateSparseFloatRow(indices, values) - - contents = append(contents, rowBytes) - } - return &schemapb.SparseFloatArray{ - Dim: int64(maxDim), - Contents: contents, - } -} diff --git a/tests/integration/util_insert.go b/tests/integration/util_insert.go index 8ee642bde4f02..4c1aebd39993e 100644 --- a/tests/integration/util_insert.go +++ b/tests/integration/util_insert.go @@ -18,14 +18,8 @@ package integration import ( "context" - "encoding/binary" - "fmt" - "math" - "math/rand" "time" - "github.com/x448/float16" - "github.com/milvus-io/milvus-proto/go-api/v2/milvuspb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/pkg/util/testutils" @@ -120,97 +114,27 @@ func NewVarCharSameFieldData(fieldName string, numRows int, value string) *schem } func NewStringFieldData(fieldName string, numRows int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_Int64, - FieldName: fieldName, - Field: &schemapb.FieldData_Scalars{ - Scalars: &schemapb.ScalarField{ - Data: &schemapb.ScalarField_StringData{ - StringData: &schemapb.StringArray{ - Data: GenerateStringArray(numRows), - }, - }, - }, - }, - } + return testutils.NewStringFieldData(fieldName, numRows) } func NewFloatVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_FloatVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_FloatVector{ - FloatVector: &schemapb.FloatArray{ - Data: GenerateFloatVectors(numRows, dim), - }, - }, - }, - }, - } + return testutils.NewFloatVectorFieldData(fieldName, numRows, dim) } func NewFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_Float16Vector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_Float16Vector{ - Float16Vector: GenerateFloat16Vectors(numRows, dim), - }, - }, - }, - } + return testutils.NewFloat16VectorFieldData(fieldName, numRows, dim) } func NewBFloat16VectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_BFloat16Vector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_Bfloat16Vector{ - Bfloat16Vector: GenerateBFloat16Vectors(numRows, dim), - }, - }, - }, - } + return testutils.NewBFloat16VectorFieldData(fieldName, numRows, dim) } func NewBinaryVectorFieldData(fieldName string, numRows, dim int) *schemapb.FieldData { - return &schemapb.FieldData{ - Type: schemapb.DataType_BinaryVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: int64(dim), - Data: &schemapb.VectorField_BinaryVector{ - BinaryVector: GenerateBinaryVectors(numRows, dim), - }, - }, - }, - } + return testutils.NewBinaryVectorFieldData(fieldName, numRows, dim) } func NewSparseFloatVectorFieldData(fieldName string, numRows int) *schemapb.FieldData { - sparseVecs := GenerateSparseFloatArray(numRows) - return &schemapb.FieldData{ - Type: schemapb.DataType_SparseFloatVector, - FieldName: fieldName, - Field: &schemapb.FieldData_Vectors{ - Vectors: &schemapb.VectorField{ - Dim: sparseVecs.Dim, - Data: &schemapb.VectorField_SparseFloatVector{ - SparseFloatVector: sparseVecs, - }, - }, - }, - } + return testutils.NewSparseFloatVectorFieldData(fieldName, numRows) } func GenerateInt64Array(numRows int, start int64) []int64 { @@ -237,68 +161,10 @@ func GenerateSameStringArray(numRows int, value string) []string { return ret } -func GenerateStringArray(numRows int) []string { - ret := make([]string, numRows) - for i := 0; i < numRows; i++ { - ret[i] = fmt.Sprintf("%d", i) - } - return ret -} - -func GenerateFloatVectors(numRows, dim int) []float32 { - total := numRows * dim - ret := make([]float32, 0, total) - for i := 0; i < total; i++ { - ret = append(ret, rand.Float32()) - } - return ret -} - -func GenerateBinaryVectors(numRows, dim int) []byte { - total := (numRows * dim) / 8 - ret := make([]byte, total) - _, err := rand.Read(ret) - if err != nil { - panic(err) - } - return ret -} - -func GenerateFloat16Vectors(numRows, dim int) []byte { - total := numRows * dim - ret := make([]byte, total*2) - for i := 0; i < total; i++ { - v := float16.Fromfloat32(rand.Float32()).Bits() - binary.LittleEndian.PutUint16(ret[i*2:], v) - } - return ret -} - -func GenerateBFloat16Vectors(numRows, dim int) []byte { - total := numRows * dim - ret16 := make([]uint16, 0, total) - for i := 0; i < total; i++ { - f := rand.Float32() - bits := math.Float32bits(f) - bits >>= 16 - bits &= 0x7FFF - ret16 = append(ret16, uint16(bits)) - } - ret := make([]byte, total*2) - for i, value := range ret16 { - binary.LittleEndian.PutUint16(ret[i*2:], value) - } - return ret -} - func GenerateSparseFloatArray(numRows int) *schemapb.SparseFloatArray { return testutils.GenerateSparseFloatVectors(numRows) } func GenerateHashKeys(numRows int) []uint32 { - ret := make([]uint32, 0, numRows) - for i := 0; i < numRows; i++ { - ret = append(ret, rand.Uint32()) - } - return ret + return testutils.GenerateHashKeys(numRows) }