Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: provide a static checking method for index config #888

Merged
merged 2 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions include/knowhere/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,7 @@ const float defaultRangeFilter = 1.0f / 0.0;

class BaseConfig : public Config {
public:
CFG_INT dim; // just used for config verify
liliu-z marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dim could be larger than the int32 max for sparse. should use a larger type

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currently sparse dim range is uint32max

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currently sparse dim range is uint32max

Plz create an issue to track

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chatted offline, fixed in #903

CFG_STRING metric_type;
CFG_INT k;
CFG_INT num_build_thread;
Expand Down Expand Up @@ -535,6 +536,7 @@ class BaseConfig : public Config {
CFG_FLOAT bm25_b;
CFG_FLOAT bm25_avgdl;
KNOHWERE_DECLARE_CONFIG(BaseConfig) {
KNOWHERE_CONFIG_DECLARE_FIELD(dim).allow_empty_without_default().description("vector dim").for_train();
KNOWHERE_CONFIG_DECLARE_FIELD(metric_type)
.set_default("L2")
.description("metric type")
Expand Down
54 changes: 54 additions & 0 deletions include/knowhere/feature.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright (C) 2019-2023 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

#ifndef FEATURE_H
#define FEATURE_H

// these features have been report to outside (milvus); pls sync the feature code when it needs to be changed.
namespace knowhere::feature {
// vector datatype support : binary
constexpr uint64_t BINARY = 1UL << 0;
// vector datatype support : float32
constexpr uint64_t FLOAT32 = 1UL << 1;
// vector datatype support : fp16
constexpr uint64_t FP16 = 1UL << 2;
// vector datatype support : bf16
constexpr uint64_t BF16 = 1UL << 3;
// vector datatype support : sparse_float32
constexpr uint64_t SPARSE_FLOAT32 = 1UL << 4;

// This flag indicates that there is no need to create any index structure (build stage can be skipped)
constexpr uint64_t NO_TRAIN = 1UL << 16;
// This flag indicates that the index defaults to KNN search, meaning the recall is 100% (no precision loss compared
// with original data)
constexpr uint64_t KNN = 1UL << 17;
// This flag indicates that the index search stage will be performed on GPU (need GPU devices)
constexpr uint64_t GPU = 1UL << 18;
// This flag indicates that the index support using mmap manage its mainly memory, which can significant improve the
// capacity
constexpr uint64_t MMAP = 1UL << 19;
// This flag indicates that the index support using materialized view to accelerate filtering search
constexpr uint64_t MV = 1UL << 20;
// This flag indicates that the index need disk during search stage
constexpr uint64_t DISK = 1UL << 21;

constexpr uint64_t ALL_TYPE = BINARY | FLOAT32 | FP16 | BF16 | SPARSE_FLOAT32;
constexpr uint64_t ALL_DENSE_TYPE = BINARY | FLOAT32 | FP16 | BF16;
constexpr uint64_t ALL_DENSE_FLOAT_TYPE = FLOAT32 | FP16 | BF16;

constexpr uint64_t NO_TRAIN_INDEX = NO_TRAIN;
constexpr uint64_t GPU_KNN_FLOAT_INDEX = FLOAT32 | GPU | KNN;
constexpr uint64_t GPU_ANN_FLOAT_INDEX = FLOAT32 | GPU;
} // namespace knowhere::feature
#endif /* FEATURE_H */
4 changes: 3 additions & 1 deletion include/knowhere/index/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,10 @@ class Index {
if (node == nullptr)
return;
node->DecRef();
if (!node->Ref())
if (!node->Ref()) {
delete node;
node = nullptr;
}
}

private:
Expand Down
93 changes: 79 additions & 14 deletions include/knowhere/index/index_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,22 @@ class IndexFactory {
template <typename DataType>
expected<Index<IndexNode>>
Create(const std::string& name, const int32_t& version, const Object& object = nullptr);

template <typename DataType>
const IndexFactory&
Register(const std::string& name, std::function<Index<IndexNode>(const int32_t&, const Object&)> func);
Register(const std::string& name, std::function<Index<IndexNode>(const int32_t&, const Object&)> func,
const uint64_t features);

static IndexFactory&
Instance();
typedef std::tuple<std::set<std::pair<std::string, VecType>>, std::set<std::string>> GlobalIndexTable;

bool
FeatureCheck(const std::string& name, uint64_t feature) const;

static const std::map<std::string, uint64_t>&
GetIndexFeatures();

static GlobalIndexTable&
StaticIndexTableInstance();

Expand All @@ -47,48 +57,103 @@ class IndexFactory {
}
std::function<T1(const int32_t&, const Object&)> fun_value;
};
typedef std::map<std::string, std::unique_ptr<FunMapValueBase>> FuncMap;
using FuncMap = std::map<std::string, std::unique_ptr<FunMapValueBase>>;
using FeatureMap = std::map<std::string, uint64_t>;
IndexFactory();

static FuncMap&
MapInstance();
};

static FeatureMap&
FeatureMapInstance();
};
// register the index adapter corresponding to indexType
#define KNOWHERE_FACTOR_CONCAT(x, y) index_factory_ref_##x##y
#define KNOWHERE_STATIC_CONCAT(x, y) index_static_ref_##x##y
#define KNOWHERE_REGISTER_GLOBAL(name, func, data_type, condition, features) \
const IndexFactory& KNOWHERE_FACTOR_CONCAT(name, data_type) = \
condition ? IndexFactory::Instance().Register<data_type>(#name, func, features) : IndexFactory::Instance();

// register some static methods that are bound to indexType
#define KNOWHERE_STATIC_CONCAT(x, y) index_static_ref_##x##y
#define KNOWHERE_REGISTER_STATIC(name, index_node, data_type, ...) \
const IndexStaticFaced<data_type>& KNOWHERE_STATIC_CONCAT(name, data_type) = \
IndexStaticFaced<data_type>::Instance().RegisterStaticFunc<index_node<data_type, ##__VA_ARGS__>>(#name);

#define KNOWHERE_REGISTER_GLOBAL(name, func, data_type) \
const IndexFactory& KNOWHERE_FACTOR_CONCAT(name, data_type) = \
IndexFactory::Instance().Register<data_type>(#name, func)

#define KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, data_type, ...) \
// register the index implementation along with its associated features. Please carefully check the types and features
// supported by the index—both need to be consistent, otherwise the registration will be skipped
#define KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, data_type, features, ...) \
KNOWHERE_REGISTER_STATIC(name, index_node, data_type, ##__VA_ARGS__) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
(static_cast<Index<index_node<data_type, ##__VA_ARGS__>> (*)(const int32_t&, const Object&)>( \
&Index<index_node<data_type, ##__VA_ARGS__>>::Create)), \
data_type)
#define KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, data_type, ...) \
data_type, typeCheck<data_type>(features), features)

#define KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, data_type, features, ...) \
KNOWHERE_REGISTER_STATIC(name, index_node, data_type, ##__VA_ARGS__) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
[](const int32_t& version, const Object& object) { \
return (Index<IndexNodeDataMockWrapper<data_type>>::Create( \
std::make_unique<index_node<MockData<data_type>::type, ##__VA_ARGS__>>(version, object))); \
}, \
data_type)
#define KNOWHERE_REGISTER_GLOBAL_WITH_THREAD_POOL(name, index_node, data_type, thread_size) \
data_type, typeCheck<data_type>(features), features)

// Below are some group index registration methods for batch registration of indexes that support multiple data types.
// Please review carefully and select with caution

// register vector index supporting ALL_TYPE(binary, bf16, fp16, fp32, sparse_float32) data types
#define KNOWHERE_SIMPLE_REGISTER_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_TYPE), ##__VA_ARGS__);

// register vector index supporting sparse_float32
#define KNOWHERE_SIMPLE_REGISTER_SPARSE_FLOAT_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::SPARSE_FLOAT32), \
##__VA_ARGS__);

// register vector index supporting ALL_DENSE_TYPE(binary, bf16, fp16, fp32) data types
#define KNOWHERE_SIMPLE_REGISTER_DENSE_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_TYPE), \
##__VA_ARGS__);
// register vector index supporting binary data type
#define KNOWHERE_SIMPLE_REGISTER_DENSE_BIN_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bin1, (features | knowhere::feature::BINARY), ##__VA_ARGS__);

// register vector index supporting ALL_DENSE_FLOAT_TYPE(float32, bf16, fp16) data types
#define KNOWHERE_SIMPLE_REGISTER_DENSE_FLOAT_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__);
// register vector index supporting ALL_DENSE_FLOAT_TYPE(float32, bf16, fp16) data types, but mocked bf16 and fp16
#define KNOWHERE_MOCK_REGISTER_DENSE_FLOAT_ALL_GLOBAL(name, index_node, features, ...) \
KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, bf16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_MOCK_REGISTER_GLOBAL(name, index_node, fp16, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__); \
KNOWHERE_SIMPLE_REGISTER_GLOBAL(name, index_node, fp32, (features | knowhere::feature::ALL_DENSE_FLOAT_TYPE), \
##__VA_ARGS__);

#define KNOWHERE_REGISTER_GLOBAL_WITH_THREAD_POOL(name, index_node, data_type, features, thread_size) \
KNOWHERE_REGISTER_STATIC(name, index_node, data_type) \
KNOWHERE_REGISTER_GLOBAL( \
name, \
[](const int32_t& version, const Object& object) { \
return (Index<IndexNodeThreadPoolWrapper>::Create( \
std::make_unique<index_node<MockData<data_type>::type>>(version, object), thread_size)); \
}, \
data_type)
data_type, typeCheck<data_type>(features), features)
#define KNOWHERE_SET_STATIC_GLOBAL_INDEX_TABLE(table_index, name, index_table) \
static int name = []() -> int { \
auto& static_index_table = std::get<table_index>(IndexFactory::StaticIndexTableInstance()); \
Expand Down
14 changes: 14 additions & 0 deletions include/knowhere/index/index_static.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct Resource {
DEFINE_HAS_STATIC_FUNC(StaticCreateConfig)
DEFINE_HAS_STATIC_FUNC(StaticEstimateLoadResource)
DEFINE_HAS_STATIC_FUNC(StaticHasRawData)
DEFINE_HAS_STATIC_FUNC(StaticConfigCheck)

template <typename DataType>
class IndexStaticFaced {
Expand All @@ -61,6 +62,10 @@ class IndexStaticFaced {
static std::unique_ptr<BaseConfig>
CreateConfig(const knowhere::IndexType& indexType, const knowhere::IndexVersion& version);

static knowhere::Status
ConfigCheck(const knowhere::IndexType& indexType, const knowhere::IndexVersion& version,
const knowhere::Json& params, std::string& msg);

/**
* @brief estimate the memory and disk resource usage before index loading by index params
* @param indexType vector index type (HNSW, IVFFLAT, etc)
Expand Down Expand Up @@ -103,6 +108,11 @@ class IndexStaticFaced {
staticHasRawDataMap[indexType] = VecIndexNode::StaticHasRawData;
}

if constexpr (has_static_StaticConfigCheck<VecIndexNode,
decltype(IndexStaticFaced<DataType>::InternalConfigCheck)>::value) {
staticConfigCheckMap[indexType] = VecIndexNode::StaticConfigCheck;
}

return Instance();
}

Expand All @@ -117,12 +127,16 @@ class IndexStaticFaced {
static bool
InternalStaticHasRawData(const knowhere::BaseConfig& config, const IndexVersion& version);

static knowhere::Status
InternalConfigCheck(const knowhere::BaseConfig& config, const IndexVersion& version, std::string& msg);

static std::unique_ptr<BaseConfig>
InternalStaticCreateConfig();

std::map<std::string, std::function<decltype(InternalStaticCreateConfig)>> staticCreateConfigMap;
std::map<std::string, std::function<decltype(InternalStaticHasRawData)>> staticHasRawDataMap;
std::map<std::string, std::function<decltype(InternalEstimateLoadResource)>> staticEstimateLoadResourceMap;
std::map<std::string, std::function<decltype(InternalConfigCheck)>> staticConfigCheckMap;
};

} // namespace knowhere
Expand Down
21 changes: 21 additions & 0 deletions include/knowhere/operands.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include <cstdint>
#include <cstring>

#include "feature.h"

namespace {
union fp32_bits {
uint32_t as_bits;
Expand Down Expand Up @@ -143,6 +145,25 @@ struct bf16 {
}
};

template <typename T>
bool
typeCheck(uint64_t features) {
if constexpr (std::is_same_v<T, bin1>) {
return features & knowhere::feature::BINARY;
}
if constexpr (std::is_same_v<T, fp16>) {
return features & knowhere::feature::FP16;
}
if constexpr (std::is_same_v<T, bf16>) {
return features & knowhere::feature::BF16;
}
// TODO : add sparse_fp32 data type
if constexpr (std::is_same_v<T, fp32>) {
return (features & knowhere::feature::FLOAT32) || (features & knowhere::feature::SPARSE_FLOAT32);
}
return false;
}

template <typename InType, typename... Types>
using TypeMatch = std::bool_constant<(... | std::is_same_v<InType, Types>)>;
template <typename InType>
Expand Down
Loading
Loading