From a75bb85f3acfda2a768be6fa6070e35fe7c5565a Mon Sep 17 00:00:00 2001 From: Bingyi Sun Date: Sat, 12 Oct 2024 15:04:52 +0800 Subject: [PATCH] feat: support chunked column for sealed segment (#35764) This PR splits sealed segment to chunked data to avoid unnecessary memory copy and save memory usage when loading segments so that loading can be accelerated. To support rollback to previous version, we add an option `multipleChunkedEnable` which is false by default. Signed-off-by: sunby --- configs/milvus.yaml | 1 + .../core/src/bitset/detail/element_wise.h | 1 + internal/core/src/common/Chunk.cpp | 29 +- internal/core/src/common/Chunk.h | 156 +- internal/core/src/common/ChunkTarget.cpp | 44 + internal/core/src/common/ChunkTarget.h | 43 +- internal/core/src/common/ChunkWriter.cpp | 257 +- internal/core/src/common/ChunkWriter.h | 42 +- internal/core/src/common/Common.h | 15 +- internal/core/src/common/FieldData.h | 16 + internal/core/src/common/FieldDataInterface.h | 15 - internal/core/src/common/type_c.h | 1 + .../core/src/exec/expression/CompareExpr.cpp | 288 ++- .../core/src/exec/expression/CompareExpr.h | 184 +- internal/core/src/exec/expression/Expr.h | 142 +- .../core/src/exec/expression/UnaryExpr.cpp | 30 +- .../operator/groupby/SearchGroupByOperator.h | 7 +- internal/core/src/index/SkipIndex.cpp | 25 - internal/core/src/index/SkipIndex.h | 29 +- internal/core/src/mmap/ChunkedColumn.h | 427 ++++ internal/core/src/mmap/Column.h | 153 +- internal/core/src/mmap/Types.h | 25 +- internal/core/src/query/SearchOnSealed.cpp | 93 + internal/core/src/query/SearchOnSealed.h | 10 + .../src/segcore/ChunkedSegmentSealedImpl.cpp | 2222 +++++++++++++++++ .../src/segcore/ChunkedSegmentSealedImpl.h | 392 +++ internal/core/src/segcore/ConcurrentVector.h | 6 +- internal/core/src/segcore/InsertRecord.h | 50 +- .../core/src/segcore/SegmentGrowingImpl.cpp | 2 +- .../core/src/segcore/SegmentGrowingImpl.h | 18 +- .../core/src/segcore/SegmentInterface.cpp | 8 - internal/core/src/segcore/SegmentInterface.h | 58 +- internal/core/src/segcore/SegmentSealed.h | 7 +- .../core/src/segcore/SegmentSealedImpl.cpp | 118 +- internal/core/src/segcore/SegmentSealedImpl.h | 60 +- internal/core/src/segcore/Utils.cpp | 39 +- internal/core/src/segcore/Utils.h | 5 +- internal/core/src/segcore/segment_c.cpp | 21 +- internal/core/src/storage/ChunkCache.cpp | 111 +- internal/core/src/storage/ChunkCache.h | 11 +- internal/core/src/storage/DataCodec.cpp | 22 +- internal/core/src/storage/DataCodec.h | 29 +- internal/core/src/storage/Event.cpp | 11 +- internal/core/src/storage/Event.h | 5 +- internal/core/src/storage/InsertData.h | 5 + internal/core/src/storage/PayloadReader.cpp | 32 +- internal/core/src/storage/PayloadReader.h | 19 +- internal/core/src/storage/Util.cpp | 9 +- internal/core/src/storage/Util.h | 3 +- internal/core/unittest/test_chunk.cpp | 75 +- internal/core/unittest/test_sealed.cpp | 6 +- internal/core/unittest/test_span.cpp | 2 +- internal/querynodev2/segments/segment.go | 7 +- pkg/util/paramtable/component_param.go | 10 + 54 files changed, 4912 insertions(+), 484 deletions(-) create mode 100644 internal/core/src/mmap/ChunkedColumn.h create mode 100644 internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp create mode 100644 internal/core/src/segcore/ChunkedSegmentSealedImpl.h diff --git a/configs/milvus.yaml b/configs/milvus.yaml index f67fbb16be318..55939c50098da 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -400,6 +400,7 @@ queryNode: nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist memExpansionRate: 1.15 # extra memory needed by building interim index buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num + multipleChunkedEnable: false # Enable multiple chunked search knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index diff --git a/internal/core/src/bitset/detail/element_wise.h b/internal/core/src/bitset/detail/element_wise.h index 62e49b5a93ae1..120657908cab6 100644 --- a/internal/core/src/bitset/detail/element_wise.h +++ b/internal/core/src/bitset/detail/element_wise.h @@ -25,6 +25,7 @@ #include "ctz.h" #include "popcount.h" +#include "bitset/common.h" namespace milvus { namespace bitset { namespace detail { diff --git a/internal/core/src/common/Chunk.cpp b/internal/core/src/common/Chunk.cpp index 8e957afd18748..6032c6b930d9c 100644 --- a/internal/core/src/common/Chunk.cpp +++ b/internal/core/src/common/Chunk.cpp @@ -18,15 +18,13 @@ namespace milvus { -std::vector -StringChunk::StringViews() const { +std::pair, FixedVector> +StringChunk::StringViews() { std::vector ret; - for (int i = 0; i < row_nums_ - 1; i++) { + for (int i = 0; i < row_nums_; i++) { ret.emplace_back(data_ + offsets_[i], offsets_[i + 1] - offsets_[i]); } - ret.emplace_back(data_ + offsets_[row_nums_ - 1], - size_ - MMAP_STRING_PADDING - offsets_[row_nums_ - 1]); - return ret; + return {ret, valid_}; } void @@ -34,20 +32,22 @@ ArrayChunk::ConstructViews() { views_.reserve(row_nums_); for (int i = 0; i < row_nums_; ++i) { - auto data_ptr = data_ + offsets_[i]; - auto next_data_ptr = i == row_nums_ - 1 - ? data_ + size_ - MMAP_ARRAY_PADDING - : data_ + offsets_[i + 1]; - auto offsets_len = lens_[i] * sizeof(uint64_t); + int offset = offsets_lens_[2 * i]; + int next_offset = offsets_lens_[2 * (i + 1)]; + int len = offsets_lens_[2 * i + 1]; + + auto data_ptr = data_ + offset; + auto offsets_len = 0; std::vector element_indices = {}; if (IsStringDataType(element_type_)) { + offsets_len = len * sizeof(uint64_t); std::vector tmp( reinterpret_cast(data_ptr), reinterpret_cast(data_ptr + offsets_len)); element_indices = std::move(tmp); } views_.emplace_back(data_ptr + offsets_len, - next_data_ptr - data_ptr - offsets_len, + next_offset - offset - offsets_len, element_type_, std::move(element_indices)); } @@ -55,7 +55,10 @@ ArrayChunk::ConstructViews() { SpanBase ArrayChunk::Span() const { - return SpanBase(views_.data(), views_.size(), sizeof(ArrayView)); + return SpanBase(views_.data(), + nullable_ ? valid_.data() : nullptr, + views_.size(), + sizeof(ArrayView)); } } // namespace milvus diff --git a/internal/core/src/common/Chunk.h b/internal/core/src/common/Chunk.h index facc0cd4c0408..24db41dcd0198 100644 --- a/internal/core/src/common/Chunk.h +++ b/internal/core/src/common/Chunk.h @@ -21,60 +21,126 @@ #include "arrow/record_batch.h" #include "common/Array.h" #include "common/ChunkTarget.h" +#include "common/EasyAssert.h" #include "common/FieldDataInterface.h" #include "common/Json.h" #include "common/Span.h" #include "knowhere/sparse_utils.h" #include "simdjson/common_defs.h" #include "sys/mman.h" +#include "common/Types.h" namespace milvus { -constexpr size_t MMAP_STRING_PADDING = 1; -constexpr size_t MMAP_ARRAY_PADDING = 1; +constexpr uint64_t MMAP_STRING_PADDING = 1; +constexpr uint64_t MMAP_ARRAY_PADDING = 1; class Chunk { public: Chunk() = default; - Chunk(int64_t row_nums, char* data, size_t size) - : row_nums_(row_nums), data_(data), size_(size) { + Chunk(int64_t row_nums, char* data, uint64_t size, bool nullable) + : row_nums_(row_nums), data_(data), size_(size), nullable_(nullable) { + if (nullable) { + valid_.reserve(row_nums); + for (int i = 0; i < row_nums; i++) { + valid_.push_back((data[i >> 3] >> (i & 0x07)) & 1); + } + } } virtual ~Chunk() { munmap(data_, size_); } + uint64_t + Size() const { + return size_; + } + + int64_t + RowNums() const { + return row_nums_; + } + + virtual const char* + ValueAt(int64_t idx) const = 0; + + virtual const char* + Data() const { + return data_; + } + + virtual bool + isValid(int offset) { + return valid_[offset]; + }; + protected: char* data_; int64_t row_nums_; - size_t size_; + uint64_t size_; + bool nullable_; + FixedVector + valid_; // parse null bitmap to valid_ to be compatible with SpanBase }; // for fixed size data, includes fixed size array -template class FixedWidthChunk : public Chunk { public: - FixedWidthChunk(int32_t row_nums, int32_t dim, char* data, size_t size) - : Chunk(row_nums, data, size), dim_(dim){}; + FixedWidthChunk(int32_t row_nums, + int32_t dim, + char* data, + uint64_t size, + uint64_t element_size, + bool nullable) + : Chunk(row_nums, data, size, nullable), + dim_(dim), + element_size_(element_size){}; milvus::SpanBase Span() const { auto null_bitmap_bytes_num = (row_nums_ + 7) / 8; - return milvus::SpanBase( - data_ + null_bitmap_bytes_num, row_nums_, sizeof(T) * dim_); + return milvus::SpanBase(data_ + null_bitmap_bytes_num, + nullable_ ? valid_.data() : nullptr, + row_nums_, + element_size_ * dim_); + } + + const char* + ValueAt(int64_t idx) const override { + auto null_bitmap_bytes_num = (row_nums_ + 7) / 8; + return data_ + null_bitmap_bytes_num + idx * element_size_ * dim_; + } + + const char* + Data() const override { + auto null_bitmap_bytes_num = (row_nums_ + 7) / 8; + return data_ + null_bitmap_bytes_num; } private: int dim_; + int element_size_; }; class StringChunk : public Chunk { public: StringChunk() = default; - StringChunk(int32_t row_nums, char* data, size_t size) - : Chunk(row_nums, data, size) { + StringChunk(int32_t row_nums, char* data, uint64_t size, bool nullable) + : Chunk(row_nums, data, size, nullable) { auto null_bitmap_bytes_num = (row_nums + 7) / 8; offsets_ = reinterpret_cast(data + null_bitmap_bytes_num); } - std::vector - StringViews() const; + std::pair, FixedVector> + StringViews(); + + const char* + ValueAt(int64_t idx) const override { + PanicInfo(ErrorCode::Unsupported, + "StringChunk::ValueAt is not supported"); + } + + uint64_t* + Offsets() { + return offsets_; + } protected: uint64_t* offsets_; @@ -86,63 +152,83 @@ class ArrayChunk : public Chunk { public: ArrayChunk(int32_t row_nums, char* data, - size_t size, - milvus::DataType element_type) - : Chunk(row_nums, data, size), element_type_(element_type) { + uint64_t size, + milvus::DataType element_type, + bool nullable) + : Chunk(row_nums, data, size, nullable), element_type_(element_type) { auto null_bitmap_bytes_num = (row_nums + 7) / 8; - offsets_ = reinterpret_cast(data + null_bitmap_bytes_num); - lens_ = offsets_ + row_nums; + offsets_lens_ = + reinterpret_cast(data + null_bitmap_bytes_num); ConstructViews(); } SpanBase Span() const; + ArrayView + View(int64_t idx) const { + return views_[idx]; + } + void ConstructViews(); + const char* + ValueAt(int64_t idx) const override { + PanicInfo(ErrorCode::Unsupported, + "ArrayChunk::ValueAt is not supported"); + } + private: milvus::DataType element_type_; - uint64_t* offsets_; - uint64_t* lens_; + uint64_t* offsets_lens_; std::vector views_; }; class SparseFloatVectorChunk : public Chunk { public: - SparseFloatVectorChunk(int32_t row_nums, char* data, size_t size) - : Chunk(row_nums, data, size) { + SparseFloatVectorChunk(int32_t row_nums, + char* data, + uint64_t size, + bool nullable) + : Chunk(row_nums, data, size, nullable) { vec_.resize(row_nums); auto null_bitmap_bytes_num = (row_nums + 7) / 8; auto offsets_ptr = reinterpret_cast(data + null_bitmap_bytes_num); for (int i = 0; i < row_nums; i++) { - int vec_size = 0; - if (i == row_nums - 1) { - vec_size = size - offsets_ptr[i]; - } else { - vec_size = offsets_ptr[i + 1] - offsets_ptr[i]; - } - - vec_[i] = { - vec_size / knowhere::sparse::SparseRow::element_size(), - (uint8_t*)(data + offsets_ptr[i]), - false}; + vec_[i] = {(offsets_ptr[i + 1] - offsets_ptr[i]) / + knowhere::sparse::SparseRow::element_size(), + (uint8_t*)(data + offsets_ptr[i]), + false}; + dim_ = std::max(dim_, vec_[i].dim()); } } const char* - Data() const { + Data() const override { return static_cast(static_cast(vec_.data())); } + const char* + ValueAt(int64_t i) const override { + return static_cast( + static_cast(vec_.data() + i)); + } + // only for test std::vector>& Vec() { return vec_; } + int64_t + Dim() { + return dim_; + } + private: + int64_t dim_ = 0; std::vector> vec_; }; } // namespace milvus \ No newline at end of file diff --git a/internal/core/src/common/ChunkTarget.cpp b/internal/core/src/common/ChunkTarget.cpp index abe47dd819f8d..068b1ccabb147 100644 --- a/internal/core/src/common/ChunkTarget.cpp +++ b/internal/core/src/common/ChunkTarget.cpp @@ -10,10 +10,13 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include +#include #include #include "common/EasyAssert.h" #include +#include +const auto PAGE_SIZE = sysconf(_SC_PAGE_SIZE); namespace milvus { void MemChunkTarget::write(const void* data, size_t size, bool append) { @@ -42,8 +45,33 @@ MemChunkTarget::tell() { return size_; } +void +MmapChunkTarget::flush() { + if (buffer_.pos == 0) { + return; + } + + auto n = file_.Write(buffer_.buf, buffer_.pos); + AssertInfo(n != -1, "failed to write data to file"); + buffer_.clear(); +} + void MmapChunkTarget::write(const void* data, size_t size, bool append) { + if (buffer_.sufficient(size)) { + buffer_.write(data, size); + size_ += append ? size : 0; + return; + } + + flush(); + + if (buffer_.sufficient(size)) { + buffer_.write(data, size); + size_ += append ? size : 0; + return; + } + auto n = file_.Write(data, size); AssertInfo(n != -1, "failed to write data to file"); size_ += append ? size : 0; @@ -51,19 +79,35 @@ MmapChunkTarget::write(const void* data, size_t size, bool append) { void MmapChunkTarget::skip(size_t size) { + flush(); file_.Seek(size, SEEK_CUR); size_ += size; } void MmapChunkTarget::seek(size_t offset) { + flush(); file_.Seek(offset_ + offset, SEEK_SET); } std::pair MmapChunkTarget::get() { + // Write padding to align with the page size, ensuring the offset_ aligns with the page size. + auto padding_size = + (size_ / PAGE_SIZE + (size_ % PAGE_SIZE != 0)) * PAGE_SIZE - size_; + char padding[padding_size]; + memset(padding, 0, sizeof(padding)); + write(padding, padding_size); + + flush(); + auto m = mmap( nullptr, size_, PROT_READ, MAP_SHARED, file_.Descriptor(), offset_); + AssertInfo(m != MAP_FAILED, + "failed to map: {}, map_size={}, offset={}", + strerror(errno), + size_, + offset_); return {(char*)m, size_}; } diff --git a/internal/core/src/common/ChunkTarget.h b/internal/core/src/common/ChunkTarget.h index 3419e40cb202a..91b0655c63373 100644 --- a/internal/core/src/common/ChunkTarget.h +++ b/internal/core/src/common/ChunkTarget.h @@ -37,9 +37,34 @@ class ChunkTarget { }; class MmapChunkTarget : public ChunkTarget { + struct Buffer { + char buf[1 << 14]; + size_t pos = 0; + + bool + sufficient(size_t size) { + return pos + size <= sizeof(buf); + } + + void + write(const void* data, size_t size) { + memcpy(buf + pos, data, size); + pos += size; + } + + void + clear() { + pos = 0; + } + }; + public: MmapChunkTarget(File& file, size_t offset) : file_(file), offset_(offset) { } + + void + flush(); + void write(const void* data, size_t size, bool append = true) override; @@ -59,17 +84,23 @@ class MmapChunkTarget : public ChunkTarget { File& file_; size_t offset_ = 0; size_t size_ = 0; + Buffer buffer_; }; class MemChunkTarget : public ChunkTarget { public: MemChunkTarget(size_t cap) : cap_(cap) { - data_ = reinterpret_cast(mmap(nullptr, - cap, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, - -1, - 0)); + auto m = mmap(nullptr, + cap, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, + -1, + 0); + AssertInfo(m != MAP_FAILED, + "failed to map: {}, map_size={}", + strerror(errno), + size_); + data_ = reinterpret_cast(m); } void diff --git a/internal/core/src/common/ChunkWriter.cpp b/internal/core/src/common/ChunkWriter.cpp index 52b339feb2a23..d7ad87db17834 100644 --- a/internal/core/src/common/ChunkWriter.cpp +++ b/internal/core/src/common/ChunkWriter.cpp @@ -44,7 +44,7 @@ StringChunkWriter::write(std::shared_ptr data) { size += null_bitmap_n; row_nums_ += array->length(); } - size += sizeof(uint64_t) * row_nums_ + MMAP_STRING_PADDING; + size += sizeof(uint64_t) * (row_nums_ + 1) + MMAP_STRING_PADDING; if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -63,11 +63,19 @@ StringChunkWriter::write(std::shared_ptr data) { } // write data - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_); + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto str : strs) { + offsets.push_back(offset_start_pos); + offset_start_pos += str.size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); for (auto str : strs) { - offsets_.push_back(target_->tell()); target_->write(str.data(), str.size()); } } @@ -78,12 +86,8 @@ StringChunkWriter::finish() { // FIXME char padding[MMAP_STRING_PADDING]; target_->write(padding, MMAP_STRING_PADDING); - - // seek back to write offsets - target_->seek(offsets_pos_); - target_->write(offsets_.data(), offsets_.size() * sizeof(uint64_t)); auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size); + return std::make_shared(row_nums_, data, size, nullable_); } void @@ -101,14 +105,14 @@ JSONChunkWriter::write(std::shared_ptr data) { size += json.data().size(); jsons.push_back(std::move(json)); } - AssertInfo(data->length() % 8 == 0, - "String length should be multiple of 8"); + // AssertInfo(data->length() % 8 == 0, + // "String length should be multiple of 8"); auto null_bitmap_n = (data->length() + 7) / 8; null_bitmaps.emplace_back(data->null_bitmap_data(), null_bitmap_n); size += null_bitmap_n; row_nums_ += array->length(); } - size += sizeof(uint64_t) * row_nums_ + simdjson::SIMDJSON_PADDING; + size += sizeof(uint64_t) * (row_nums_ + 1) + simdjson::SIMDJSON_PADDING; if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -126,12 +130,20 @@ JSONChunkWriter::write(std::shared_ptr data) { } } - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_); + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto json : jsons) { + offsets.push_back(offset_start_pos); + offset_start_pos += json.data().size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); // write data for (auto json : jsons) { - offsets_.push_back(target_->tell()); target_->write(json.data().data(), json.data().size()); } } @@ -141,17 +153,15 @@ JSONChunkWriter::finish() { char padding[simdjson::SIMDJSON_PADDING]; target_->write(padding, simdjson::SIMDJSON_PADDING); - // write offsets and padding - target_->seek(offsets_pos_); - target_->write(offsets_.data(), offsets_.size() * sizeof(uint64_t)); auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size); + return std::make_shared(row_nums_, data, size, nullable_); } void ArrayChunkWriter::write(std::shared_ptr data) { auto size = 0; + auto is_string = IsStringDataType(element_type_); std::vector arrays; std::vector> null_bitmaps; for (auto batch : *data) { @@ -164,8 +174,10 @@ ArrayChunkWriter::write(std::shared_ptr data) { auto arr = Array(scalar_array); size += arr.byte_size(); arrays.push_back(std::move(arr)); - // element offsets size - size += sizeof(uint64_t) * arr.length(); + if (is_string) { + // element offsets size + size += sizeof(uint64_t) * arr.length(); + } } row_nums_ += array->length(); auto null_bitmap_n = (data->length() + 7) / 8; @@ -173,10 +185,8 @@ ArrayChunkWriter::write(std::shared_ptr data) { size += null_bitmap_n; } - auto is_string = IsStringDataType(element_type_); // offsets + lens - size += is_string ? sizeof(uint64_t) * row_nums_ * 2 + MMAP_ARRAY_PADDING - : sizeof(uint64_t) * row_nums_ + MMAP_ARRAY_PADDING; + size += sizeof(uint64_t) * (row_nums_ * 2 + 1) + MMAP_ARRAY_PADDING; if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -193,16 +203,35 @@ ArrayChunkWriter::write(std::shared_ptr data) { } } - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_ * 2); + int offsets_num = row_nums_ + 1; + int len_num = row_nums_; + int offset_start_pos = + target_->tell() + sizeof(uint64_t) * (offsets_num + len_num); + std::vector offsets; + std::vector lens; + for (auto& arr : arrays) { + offsets.push_back(offset_start_pos); + lens.push_back(arr.length()); + offset_start_pos += + is_string ? sizeof(uint64_t) * arr.get_offsets().size() : 0; + offset_start_pos += arr.byte_size(); + } + offsets.push_back(offset_start_pos); + + for (int i = 0; i < offsets.size(); i++) { + if (i == offsets.size() - 1) { + target_->write(&offsets[i], sizeof(uint64_t)); + break; + } + target_->write(&offsets[i], sizeof(uint64_t)); + target_->write(&lens[i], sizeof(uint64_t)); + } + for (auto& arr : arrays) { - // write elements offsets - offsets_.push_back(target_->tell()); if (is_string) { target_->write(arr.get_offsets().data(), arr.get_offsets().size() * sizeof(uint64_t)); } - lens_.push_back(arr.length()); target_->write(arr.data(), arr.byte_size()); } } @@ -212,14 +241,9 @@ ArrayChunkWriter::finish() { char padding[MMAP_ARRAY_PADDING]; target_->write(padding, MMAP_ARRAY_PADDING); - // write offsets and lens - target_->seek(offsets_pos_); - for (size_t i = 0; i < offsets_.size(); i++) { - target_->write(&offsets_[i], sizeof(uint64_t)); - target_->write(&lens_[i], sizeof(uint64_t)); - } auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size, element_type_); + return std::make_shared( + row_nums_, data, size, element_type_, nullable_); } void @@ -241,7 +265,7 @@ SparseFloatVectorChunkWriter::write( size += null_bitmap_n; row_nums_ += array->length(); } - size += sizeof(uint64_t) * row_nums_; + size += sizeof(uint64_t) * (row_nums_ + 1); if (file_) { target_ = std::make_shared(*file_, file_offset_); } else { @@ -260,95 +284,210 @@ SparseFloatVectorChunkWriter::write( } // write data - offsets_pos_ = target_->tell(); - target_->skip(sizeof(uint64_t) * row_nums_); + + int offset_num = row_nums_ + 1; + int offset_start_pos = target_->tell() + sizeof(uint64_t) * offset_num; + std::vector offsets; + + for (auto str : strs) { + offsets.push_back(offset_start_pos); + offset_start_pos += str.size(); + } + offsets.push_back(offset_start_pos); + + target_->write(offsets.data(), offsets.size() * sizeof(uint64_t)); for (auto str : strs) { - offsets_.push_back(target_->tell()); target_->write(str.data(), str.size()); } } std::shared_ptr SparseFloatVectorChunkWriter::finish() { - // seek back to write offsets - target_->seek(offsets_pos_); - target_->write(offsets_.data(), offsets_.size() * sizeof(uint64_t)); auto [data, size] = target_->get(); - return std::make_shared(row_nums_, data, size); + return std::make_shared( + row_nums_, data, size, nullable_); +} + +std::shared_ptr +create_chunk(const FieldMeta& field_meta, + int dim, + std::shared_ptr r) { + std::shared_ptr w; + bool nullable = field_meta.is_nullable(); + + switch (field_meta.get_data_type()) { + case milvus::DataType::BOOL: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT8: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT16: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT32: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::INT64: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::FLOAT: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::DOUBLE: { + w = std::make_shared>( + dim, nullable); + break; + } + case milvus::DataType::VECTOR_FLOAT: { + w = std::make_shared< + ChunkWriter>(dim, nullable); + break; + } + case milvus::DataType::VECTOR_BINARY: { + w = std::make_shared< + ChunkWriter>(dim / 8, + nullable); + break; + } + case milvus::DataType::VECTOR_FLOAT16: { + w = std::make_shared< + ChunkWriter>( + dim, nullable); + break; + } + case milvus::DataType::VECTOR_BFLOAT16: { + w = std::make_shared< + ChunkWriter>( + dim, nullable); + break; + } + case milvus::DataType::VARCHAR: + case milvus::DataType::STRING: { + w = std::make_shared(nullable); + break; + } + case milvus::DataType::JSON: { + w = std::make_shared(nullable); + break; + } + case milvus::DataType::ARRAY: { + w = std::make_shared( + field_meta.get_element_type(), nullable); + break; + } + case milvus::DataType::VECTOR_SPARSE_FLOAT: { + w = std::make_shared(nullable); + break; + } + default: + PanicInfo(Unsupported, "Unsupported data type"); + } + + w->write(r); + return w->finish(); } std::shared_ptr create_chunk(const FieldMeta& field_meta, int dim, + File& file, + size_t file_offset, std::shared_ptr r) { std::shared_ptr w; + bool nullable = field_meta.is_nullable(); switch (field_meta.get_data_type()) { case milvus::DataType::BOOL: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT8: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT16: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT32: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::INT64: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::FLOAT: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::DOUBLE: { - w = std::make_shared>(dim); + w = std::make_shared>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_FLOAT: { w = std::make_shared< - ChunkWriter>(dim); + ChunkWriter>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_BINARY: { w = std::make_shared< - ChunkWriter>(dim / 8); + ChunkWriter>( + dim / 8, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_FLOAT16: { w = std::make_shared< - ChunkWriter>(dim); + ChunkWriter>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VECTOR_BFLOAT16: { w = std::make_shared< - ChunkWriter>(dim); + ChunkWriter>( + dim, file, file_offset, nullable); break; } case milvus::DataType::VARCHAR: case milvus::DataType::STRING: { - w = std::make_shared(); + w = std::make_shared( + file, file_offset, nullable); break; } case milvus::DataType::JSON: { - w = std::make_shared(); + w = std::make_shared(file, file_offset, nullable); break; } case milvus::DataType::ARRAY: { w = std::make_shared( - field_meta.get_element_type()); + field_meta.get_element_type(), file, file_offset, nullable); break; } case milvus::DataType::VECTOR_SPARSE_FLOAT: { - w = std::make_shared(); + w = std::make_shared( + file, file_offset, nullable); break; } default: diff --git a/internal/core/src/common/ChunkWriter.h b/internal/core/src/common/ChunkWriter.h index a16b9bae47448..c389b0e799096 100644 --- a/internal/core/src/common/ChunkWriter.h +++ b/internal/core/src/common/ChunkWriter.h @@ -25,10 +25,11 @@ namespace milvus { class ChunkWriterBase { public: - ChunkWriterBase() = default; + explicit ChunkWriterBase(bool nullable) : nullable_(nullable) { + } - ChunkWriterBase(File& file, size_t offset) - : file_(&file), file_offset_(offset) { + ChunkWriterBase(File& file, size_t offset, bool nullable) + : file_(&file), file_offset_(offset), nullable_(nullable) { } virtual void @@ -46,17 +47,18 @@ class ChunkWriterBase { int row_nums_ = 0; File* file_ = nullptr; size_t file_offset_ = 0; + bool nullable_ = false; std::shared_ptr target_; }; template class ChunkWriter : public ChunkWriterBase { public: - ChunkWriter(int dim) : dim_(dim) { + ChunkWriter(int dim, bool nullable) : ChunkWriterBase(nullable), dim_(dim) { } - ChunkWriter(int dim, File& file, size_t offset) - : ChunkWriterBase(file, offset), dim_(dim){}; + ChunkWriter(int dim, File& file, size_t offset, bool nullable) + : ChunkWriterBase(file, offset, nullable), dim_(dim){}; void write(std::shared_ptr data) override { @@ -104,8 +106,8 @@ class ChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override { auto [data, size] = target_->get(); - return std::make_shared>( - row_nums_, dim_, data, size); + return std::make_shared( + row_nums_, dim_, data, size, sizeof(T), nullable_); } private: @@ -165,10 +167,6 @@ class StringChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override; - - protected: - std::vector offsets_; - size_t offsets_pos_ = 0; }; class JSONChunkWriter : public ChunkWriterBase { @@ -180,21 +178,18 @@ class JSONChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override; - - private: - std::vector offsets_; - size_t offsets_pos_ = 0; }; class ArrayChunkWriter : public ChunkWriterBase { public: - ArrayChunkWriter(const milvus::DataType element_type) - : element_type_(element_type) { + ArrayChunkWriter(const milvus::DataType element_type, bool nullable) + : ChunkWriterBase(nullable), element_type_(element_type) { } ArrayChunkWriter(const milvus::DataType element_type, File& file, - size_t offset) - : ChunkWriterBase(file, offset), element_type_(element_type) { + size_t offset, + bool nullable) + : ChunkWriterBase(file, offset, nullable), element_type_(element_type) { } void @@ -205,9 +200,6 @@ class ArrayChunkWriter : public ChunkWriterBase { private: const milvus::DataType element_type_; - std::vector offsets_; - std::vector lens_; - size_t offsets_pos_; }; class SparseFloatVectorChunkWriter : public ChunkWriterBase { @@ -219,10 +211,6 @@ class SparseFloatVectorChunkWriter : public ChunkWriterBase { std::shared_ptr finish() override; - - private: - uint64_t offsets_pos_ = 0; - std::vector offsets_; }; std::shared_ptr diff --git a/internal/core/src/common/Common.h b/internal/core/src/common/Common.h index c398c161d58ea..49fcbcb7c8592 100644 --- a/internal/core/src/common/Common.h +++ b/internal/core/src/common/Common.h @@ -17,6 +17,8 @@ #pragma once #include +#include +#include #include "common/Consts.h" namespace milvus { @@ -47,11 +49,14 @@ void SetDefaultExecEvalExprBatchSize(int64_t val); struct BufferView { - char* data_; - size_t size_; - - BufferView(char* data_ptr, size_t size) : data_(data_ptr), size_(size) { - } + struct Element { + const char* data_; + uint64_t* offsets_; + int start_; + int end_; + }; + + std::variant, std::pair> data_; }; } // namespace milvus diff --git a/internal/core/src/common/FieldData.h b/internal/core/src/common/FieldData.h index de796fa3c5e8a..334a46190f02e 100644 --- a/internal/core/src/common/FieldData.h +++ b/internal/core/src/common/FieldData.h @@ -23,6 +23,7 @@ #include "common/FieldDataInterface.h" #include "common/Channel.h" +#include "parquet/arrow/reader.h" namespace milvus { @@ -143,6 +144,21 @@ using FieldDataPtr = std::shared_ptr; using FieldDataChannel = Channel; using FieldDataChannelPtr = std::shared_ptr; +struct ArrowDataWrapper { + ArrowDataWrapper() = default; + ArrowDataWrapper(std::shared_ptr reader, + std::shared_ptr arrow_reader, + std::shared_ptr file_data) + : reader(reader), arrow_reader(arrow_reader), file_data(file_data) { + } + std::shared_ptr reader; + // file reader must outlive the record batch reader + std::shared_ptr arrow_reader; + // underlying file data memory, must outlive the arrow reader + std::shared_ptr file_data; +}; +using ArrowReaderChannel = Channel>; + FieldDataPtr InitScalarFieldData(const DataType& type, bool nullable, int64_t cap_rows); diff --git a/internal/core/src/common/FieldDataInterface.h b/internal/core/src/common/FieldDataInterface.h index 926a1bb16e3d9..72aff36da8b82 100644 --- a/internal/core/src/common/FieldDataInterface.h +++ b/internal/core/src/common/FieldDataInterface.h @@ -395,21 +395,6 @@ class FieldDataImpl : public FieldDataBase { return &data_[offset]; } - // std::optional - // Value(ssize_t offset) { - // if (!is_type_entire_row) { - // return RawValue(offset); - // } - // AssertInfo(offset < get_num_rows(), - // "field data subscript out of range"); - // AssertInfo(offset < length(), - // "subscript position don't has valid value"); - // if (nullable_ && !valid_data_[offset]) { - // return std::nullopt; - // } - // return &field_data_[offset]; - // } - int64_t Size() const override { return DataSize() + ValidDataSize(); diff --git a/internal/core/src/common/type_c.h b/internal/core/src/common/type_c.h index bf19e0dac962d..77bc563698933 100644 --- a/internal/core/src/common/type_c.h +++ b/internal/core/src/common/type_c.h @@ -28,6 +28,7 @@ enum SegmentType { Growing = 1, Sealed = 2, Indexing = 3, + ChunkedSealed = 4, }; typedef enum SegmentType SegmentType; diff --git a/internal/core/src/exec/expression/CompareExpr.cpp b/internal/core/src/exec/expression/CompareExpr.cpp index 0c412ac82b64b..467df6654a929 100644 --- a/internal/core/src/exec/expression/CompareExpr.cpp +++ b/internal/core/src/exec/expression/CompareExpr.cpp @@ -15,6 +15,7 @@ // limitations under the License. #include "CompareExpr.h" +#include "common/type_c.h" #include "query/Relational.h" namespace milvus { @@ -28,15 +29,248 @@ PhyCompareFilterExpr::IsStringExpr() { int64_t PhyCompareFilterExpr::GetNextBatchSize() { - auto current_rows = - segment_->type() == SegmentType::Growing - ? current_chunk_id_ * size_per_chunk_ + current_chunk_pos_ - : current_chunk_pos_; + auto current_rows = GetCurrentRows(); + return current_rows + batch_size_ >= active_count_ ? active_count_ - current_rows : batch_size_; } +template +MultipleChunkDataAccessor +PhyCompareFilterExpr::GetChunkData(FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos) { + if (index) { + auto& indexing = const_cast&>( + segment_->chunk_scalar_index(field_id, current_chunk_id)); + auto current_chunk_size = segment_->type() == SegmentType::Growing + ? size_per_chunk_ + : active_count_; + + if (indexing.HasRawData()) { + return [&, current_chunk_size]() -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + indexing = const_cast&>( + segment_->chunk_scalar_index(field_id, + current_chunk_id)); + } + return indexing.Reverse_Lookup(current_chunk_pos++); + }; + } + } + auto chunk_data = + segment_->chunk_data(field_id, current_chunk_id).data(); + auto current_chunk_size = segment_->chunk_size(field_id, current_chunk_id); + return + [=, ¤t_chunk_id, ¤t_chunk_pos]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + chunk_data = + segment_->chunk_data(field_id, current_chunk_id).data(); + current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + } + + return chunk_data[current_chunk_pos++]; + }; +} + +template <> +MultipleChunkDataAccessor +PhyCompareFilterExpr::GetChunkData(FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos) { + if (index) { + auto& indexing = const_cast&>( + segment_->chunk_scalar_index(field_id, + current_chunk_id)); + auto current_chunk_size = segment_->type() == SegmentType::Growing + ? size_per_chunk_ + : active_count_; + + if (indexing.HasRawData()) { + return [&, current_chunk_size]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + indexing = const_cast&>( + segment_->chunk_scalar_index( + field_id, current_chunk_id)); + } + return indexing.Reverse_Lookup(current_chunk_pos++); + }; + } + } + if (segment_->type() == SegmentType::Growing && + !storage::MmapManager::GetInstance() + .GetMmapConfig() + .growing_enable_mmap) { + auto chunk_data = + segment_->chunk_data(field_id, current_chunk_id) + .data(); + auto current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + return [=, + ¤t_chunk_id, + ¤t_chunk_pos]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + chunk_data = + segment_ + ->chunk_data(field_id, current_chunk_id) + .data(); + current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + } + + return chunk_data[current_chunk_pos++]; + }; + } else { + auto chunk_data = + segment_->chunk_view(field_id, current_chunk_id) + .first.data(); + auto current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + return [=, + ¤t_chunk_id, + ¤t_chunk_pos]() mutable -> const number { + if (current_chunk_pos >= current_chunk_size) { + current_chunk_id++; + current_chunk_pos = 0; + chunk_data = segment_ + ->chunk_view( + field_id, current_chunk_id) + .first.data(); + current_chunk_size = + segment_->chunk_size(field_id, current_chunk_id); + } + + return std::string(chunk_data[current_chunk_pos++]); + }; + } +} + +MultipleChunkDataAccessor +PhyCompareFilterExpr::GetChunkData(DataType data_type, + FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos) { + switch (data_type) { + case DataType::BOOL: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT8: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT16: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT32: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::INT64: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::FLOAT: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::DOUBLE: + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + case DataType::VARCHAR: { + return GetChunkData( + field_id, index, current_chunk_id, current_chunk_pos); + } + default: + PanicInfo(DataTypeInvalid, "unsupported data type: {}", data_type); + } +} + +template +VectorPtr +PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { + if (segment_->is_chunked()) { + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = + std::make_shared(TargetBitmap(real_batch_size)); + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + + auto left = GetChunkData(expr_->left_data_type_, + expr_->left_field_id_, + is_left_indexed_, + left_current_chunk_id_, + left_current_chunk_pos_); + auto right = GetChunkData(expr_->right_data_type_, + expr_->right_field_id_, + is_right_indexed_, + right_current_chunk_id_, + right_current_chunk_pos_); + for (int i = 0; i < real_batch_size; ++i) { + res[i] = boost::apply_visitor( + milvus::query::Relational{}, left(), right()); + } + return res_vec; + } else { + auto real_batch_size = GetNextBatchSize(); + if (real_batch_size == 0) { + return nullptr; + } + + auto res_vec = + std::make_shared(TargetBitmap(real_batch_size)); + TargetBitmapView res(res_vec->GetRawData(), real_batch_size); + + auto left_data_barrier = + segment_->num_chunk_data(expr_->left_field_id_); + auto right_data_barrier = + segment_->num_chunk_data(expr_->right_field_id_); + + int64_t processed_rows = 0; + for (int64_t chunk_id = current_chunk_id_; chunk_id < num_chunk_; + ++chunk_id) { + auto chunk_size = chunk_id == num_chunk_ - 1 + ? active_count_ - chunk_id * size_per_chunk_ + : size_per_chunk_; + auto left = GetChunkData(expr_->left_data_type_, + expr_->left_field_id_, + chunk_id, + left_data_barrier); + auto right = GetChunkData(expr_->right_data_type_, + expr_->right_field_id_, + chunk_id, + right_data_barrier); + + for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0; + i < chunk_size; + ++i) { + res[processed_rows++] = boost::apply_visitor( + milvus::query::Relational{}, + left(i), + right(i)); + + if (processed_rows >= batch_size_) { + current_chunk_id_ = chunk_id; + current_chunk_pos_ = i + 1; + return res_vec; + } + } + } + return res_vec; + } +} + template ChunkDataAccessor PhyCompareFilterExpr::GetChunkData(FieldId field_id, @@ -113,52 +347,6 @@ PhyCompareFilterExpr::GetChunkData(DataType data_type, } } -template -VectorPtr -PhyCompareFilterExpr::ExecCompareExprDispatcher(OpType op) { - auto real_batch_size = GetNextBatchSize(); - if (real_batch_size == 0) { - return nullptr; - } - - auto res_vec = - std::make_shared(TargetBitmap(real_batch_size)); - TargetBitmapView res(res_vec->GetRawData(), real_batch_size); - - auto left_data_barrier = segment_->num_chunk_data(expr_->left_field_id_); - auto right_data_barrier = segment_->num_chunk_data(expr_->right_field_id_); - - int64_t processed_rows = 0; - for (int64_t chunk_id = current_chunk_id_; chunk_id < num_chunk_; - ++chunk_id) { - auto chunk_size = chunk_id == num_chunk_ - 1 - ? active_count_ - chunk_id * size_per_chunk_ - : size_per_chunk_; - auto left = GetChunkData(expr_->left_data_type_, - expr_->left_field_id_, - chunk_id, - left_data_barrier); - auto right = GetChunkData(expr_->right_data_type_, - expr_->right_field_id_, - chunk_id, - right_data_barrier); - - for (int i = chunk_id == current_chunk_id_ ? current_chunk_pos_ : 0; - i < chunk_size; - ++i) { - res[processed_rows++] = boost::apply_visitor( - milvus::query::Relational{}, left(i), right(i)); - - if (processed_rows >= batch_size_) { - current_chunk_id_ = chunk_id; - current_chunk_pos_ = i + 1; - return res_vec; - } - } - } - return res_vec; -} - void PhyCompareFilterExpr::Eval(EvalCtx& context, VectorPtr& result) { // For segment both fields has no index, can use SIMD to speed up. diff --git a/internal/core/src/exec/expression/CompareExpr.h b/internal/core/src/exec/expression/CompareExpr.h index ff6069665182f..fd9ef751387cb 100644 --- a/internal/core/src/exec/expression/CompareExpr.h +++ b/internal/core/src/exec/expression/CompareExpr.h @@ -22,6 +22,7 @@ #include "common/EasyAssert.h" #include "common/Types.h" #include "common/Vector.h" +#include "common/type_c.h" #include "exec/expression/Expr.h" #include "segcore/SegmentInterface.h" @@ -37,6 +38,7 @@ using number = boost::variant; using ChunkDataAccessor = std::function; +using MultipleChunkDataAccessor = std::function; template struct CompareElementFunc { @@ -114,9 +116,26 @@ class PhyCompareFilterExpr : public Expr { is_left_indexed_ = segment_->HasIndex(left_field_); is_right_indexed_ = segment_->HasIndex(right_field_); size_per_chunk_ = segment_->size_per_chunk(); - num_chunk_ = is_left_indexed_ - ? segment_->num_chunk_index(expr_->left_field_id_) - : upper_div(active_count_, size_per_chunk_); + if (segment_->is_chunked()) { + left_num_chunk_ = + is_left_indexed_ + ? segment_->num_chunk_index(expr_->left_field_id_) + : segment_->type() == SegmentType::Growing + ? upper_div(active_count_, size_per_chunk_) + : segment_->num_chunk_data(left_field_); + right_num_chunk_ = + is_right_indexed_ + ? segment_->num_chunk_index(expr_->right_field_id_) + : segment_->type() == SegmentType::Growing + ? upper_div(active_count_, size_per_chunk_) + : segment_->num_chunk_data(right_field_); + num_chunk_ = left_num_chunk_; + } else { + num_chunk_ = is_left_indexed_ + ? segment_->num_chunk_index(expr_->left_field_id_) + : upper_div(active_count_, size_per_chunk_); + } + AssertInfo( batch_size_ > 0, fmt::format("expr batch size should greater than zero, but now: {}", @@ -128,6 +147,67 @@ class PhyCompareFilterExpr : public Expr { void MoveCursor() override { + if (segment_->is_chunked()) { + MoveCursorForMultipleChunk(); + } else { + MoveCursorForSingleChunk(); + } + } + + void + MoveCursorForMultipleChunk() { + int64_t processed_rows = 0; + for (int64_t chunk_id = left_current_chunk_id_; + chunk_id < left_num_chunk_; + ++chunk_id) { + auto chunk_size = 0; + if (segment_->type() == SegmentType::Growing) { + chunk_size = chunk_id == left_num_chunk_ - 1 + ? active_count_ - chunk_id * size_per_chunk_ + : size_per_chunk_; + } else { + chunk_size = segment_->chunk_size(left_field_, chunk_id); + } + + for (int i = chunk_id == left_current_chunk_id_ + ? left_current_chunk_pos_ + : 0; + i < chunk_size; + ++i) { + if (++processed_rows >= batch_size_) { + left_current_chunk_id_ = chunk_id; + left_current_chunk_pos_ = i + 1; + } + } + } + processed_rows = 0; + for (int64_t chunk_id = right_current_chunk_id_; + chunk_id < right_num_chunk_; + ++chunk_id) { + auto chunk_size = 0; + if (segment_->type() == SegmentType::Growing) { + chunk_size = chunk_id == right_num_chunk_ - 1 + ? active_count_ - chunk_id * size_per_chunk_ + : size_per_chunk_; + } else { + chunk_size = segment_->chunk_size(right_field_, chunk_id); + } + + for (int i = chunk_id == right_current_chunk_id_ + ? right_current_chunk_pos_ + : 0; + i < chunk_size; + ++i) { + if (++processed_rows >= batch_size_) { + right_current_chunk_id_ = chunk_id; + right_current_chunk_pos_ = i + 1; + } + } + } + } + + void + MoveCursorForSingleChunk() { int64_t processed_rows = 0; for (int64_t chunk_id = current_chunk_id_; chunk_id < num_chunk_; ++chunk_id) { @@ -146,6 +226,24 @@ class PhyCompareFilterExpr : public Expr { } } + int64_t + GetCurrentRows() { + if (segment_->is_chunked()) { + auto current_rows = + is_left_indexed_ && segment_->type() == SegmentType::Sealed + ? left_current_chunk_pos_ + : segment_->num_rows_until_chunk(left_field_, + left_current_chunk_id_) + + left_current_chunk_pos_; + return current_rows; + } else { + return segment_->type() == SegmentType::Growing + ? current_chunk_id_ * size_per_chunk_ + + current_chunk_pos_ + : current_chunk_pos_; + } + } + private: int64_t GetNextBatchSize(); @@ -153,6 +251,13 @@ class PhyCompareFilterExpr : public Expr { bool IsStringExpr(); + template + MultipleChunkDataAccessor + GetChunkData(FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos); + template ChunkDataAccessor GetChunkData(FieldId field_id, int chunk_id, int data_barrier); @@ -160,6 +265,23 @@ class PhyCompareFilterExpr : public Expr { template int64_t ProcessBothDataChunks(FUNC func, TargetBitmapView res, ValTypes... values) { + if (segment_->is_chunked()) { + return ProcessBothDataChunksForMultipleChunk( + func, res, values...); + } else { + return ProcessBothDataChunksForSingleChunk( + func, res, values...); + } + } + + template + int64_t + ProcessBothDataChunksForSingleChunk(FUNC func, + TargetBitmapView res, + ValTypes... values) { int64_t processed_size = 0; for (size_t i = current_chunk_id_; i < num_chunk_; i++) { @@ -194,6 +316,56 @@ class PhyCompareFilterExpr : public Expr { return processed_size; } + template + int64_t + ProcessBothDataChunksForMultipleChunk(FUNC func, + TargetBitmapView res, + ValTypes... values) { + int64_t processed_size = 0; + + // only call this function when left and right are not indexed, so they have the same number of chunks + for (size_t i = left_current_chunk_id_; i < left_num_chunk_; i++) { + auto left_chunk = segment_->chunk_data(left_field_, i); + auto right_chunk = segment_->chunk_data(right_field_, i); + auto data_pos = + (i == left_current_chunk_id_) ? left_current_chunk_pos_ : 0; + auto size = 0; + if (segment_->type() == SegmentType::Growing) { + size = (i == (left_num_chunk_ - 1)) + ? (active_count_ % size_per_chunk_ == 0 + ? size_per_chunk_ - data_pos + : active_count_ % size_per_chunk_ - data_pos) + : size_per_chunk_ - data_pos; + } else { + size = segment_->chunk_size(left_field_, i) - data_pos; + } + + if (processed_size + size >= batch_size_) { + size = batch_size_ - processed_size; + } + + const T* left_data = left_chunk.data() + data_pos; + const U* right_data = right_chunk.data() + data_pos; + func(left_data, right_data, size, res + processed_size, values...); + processed_size += size; + + if (processed_size >= batch_size_) { + left_current_chunk_id_ = i; + left_current_chunk_pos_ = data_pos + size; + break; + } + } + + return processed_size; + } + + MultipleChunkDataAccessor + GetChunkData(DataType data_type, + FieldId field_id, + bool index, + int64_t& current_chunk_id, + int64_t& current_chunk_pos); + ChunkDataAccessor GetChunkData(DataType data_type, FieldId field_id, @@ -225,6 +397,12 @@ class PhyCompareFilterExpr : public Expr { bool is_right_indexed_; int64_t active_count_{0}; int64_t num_chunk_{0}; + int64_t left_num_chunk_{0}; + int64_t right_num_chunk_{0}; + int64_t left_current_chunk_id_{0}; + int64_t left_current_chunk_pos_{0}; + int64_t right_current_chunk_id_{0}; + int64_t right_current_chunk_pos_{0}; int64_t current_chunk_id_{0}; int64_t current_chunk_pos_{0}; int64_t size_per_chunk_{0}; diff --git a/internal/core/src/exec/expression/Expr.h b/internal/core/src/exec/expression/Expr.h index b80d376c78ede..25f90db4a249f 100644 --- a/internal/core/src/exec/expression/Expr.h +++ b/internal/core/src/exec/expression/Expr.h @@ -122,12 +122,43 @@ class SegmentExpr : public Expr { } // if index not include raw data, also need load data if (segment_->HasFieldData(field_id_)) { - num_data_chunk_ = upper_div(active_count_, size_per_chunk_); + if (segment_->is_chunked()) { + num_data_chunk_ = segment_->num_chunk_data(field_id_); + } else { + num_data_chunk_ = upper_div(active_count_, size_per_chunk_); + } } } void - MoveCursorForData() { + MoveCursorForDataMultipleChunk() { + int64_t processed_size = 0; + for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) { + auto data_pos = + (i == current_data_chunk_) ? current_data_chunk_pos_ : 0; + int64_t size = 0; + if (segment_->type() == SegmentType::Growing) { + size = (i == (num_data_chunk_ - 1) && + active_count_ % size_per_chunk_ != 0) + ? active_count_ % size_per_chunk_ - data_pos + : size_per_chunk_ - data_pos; + } else { + size = segment_->chunk_size(field_id_, i) - data_pos; + } + + size = std::min(size, batch_size_ - processed_size); + + processed_size += size; + if (processed_size >= batch_size_) { + current_data_chunk_ = i; + current_data_chunk_pos_ = data_pos + size; + break; + } + // } + } + } + void + MoveCursorForDataSingleChunk() { if (segment_->type() == SegmentType::Sealed) { auto size = std::min(active_count_ - current_data_chunk_pos_, batch_size_); @@ -154,6 +185,15 @@ class SegmentExpr : public Expr { } } + void + MoveCursorForData() { + if (segment_->is_chunked()) { + MoveCursorForDataMultipleChunk(); + } else { + MoveCursorForDataSingleChunk(); + } + } + void MoveCursorForIndex() { AssertInfo(segment_->type() == SegmentType::Sealed, @@ -183,7 +223,17 @@ class SegmentExpr : public Expr { auto current_chunk_pos = is_index_mode_ && use_index_ ? current_index_chunk_pos_ : current_data_chunk_pos_; - auto current_rows = current_chunk * size_per_chunk_ + current_chunk_pos; + auto current_rows = 0; + if (segment_->is_chunked()) { + current_rows = + is_index_mode_ && use_index_ && + segment_->type() == SegmentType::Sealed + ? current_chunk_pos + : segment_->num_rows_until_chunk(field_id_, current_chunk) + + current_chunk_pos; + } else { + current_rows = current_chunk * size_per_chunk_ + current_chunk_pos; + } return current_rows + batch_size_ >= active_count_ ? active_count_ - current_rows : batch_size_; @@ -220,7 +270,7 @@ class SegmentExpr : public Expr { template int64_t - ProcessDataChunks( + ProcessDataChunksForSingleChunk( FUNC func, std::function skip_func, TargetBitmapView res, @@ -266,6 +316,90 @@ class SegmentExpr : public Expr { return processed_size; } + template + int64_t + ProcessDataChunksForMultipleChunk( + FUNC func, + std::function skip_func, + TargetBitmapView res, + ValTypes... values) { + int64_t processed_size = 0; + + // if constexpr (std::is_same_v || + // std::is_same_v) { + // if (segment_->type() == SegmentType::Sealed) { + // return ProcessChunkForSealedSeg( + // func, skip_func, res, values...); + // } + // } + + for (size_t i = current_data_chunk_; i < num_data_chunk_; i++) { + auto data_pos = + (i == current_data_chunk_) ? current_data_chunk_pos_ : 0; + + int64_t size = 0; + if (segment_->type() == SegmentType::Growing) { + size = (i == (num_data_chunk_ - 1)) + ? (active_count_ % size_per_chunk_ == 0 + ? size_per_chunk_ - data_pos + : active_count_ % size_per_chunk_ - data_pos) + : size_per_chunk_ - data_pos; + } else { + size = segment_->chunk_size(field_id_, i) - data_pos; + } + + size = std::min(size, batch_size_ - processed_size); + + auto& skip_index = segment_->GetSkipIndex(); + if (!skip_func || !skip_func(skip_index, field_id_, i)) { + bool is_seal = false; + if constexpr (std::is_same_v || + std::is_same_v) { + if (segment_->type() == SegmentType::Sealed) { + auto data_vec = segment_ + ->get_batch_views( + field_id_, i, data_pos, size) + .first; + func(data_vec.data(), + size, + res + processed_size, + values...); + is_seal = true; + } + } + if (!is_seal) { + auto chunk = segment_->chunk_data(field_id_, i); + const T* data = chunk.data() + data_pos; + func(data, size, res + processed_size, values...); + } + } + + processed_size += size; + if (processed_size >= batch_size_) { + current_data_chunk_ = i; + current_data_chunk_pos_ = data_pos + size; + break; + } + } + + return processed_size; + } + + template + int64_t + ProcessDataChunks( + FUNC func, + std::function skip_func, + TargetBitmapView res, + ValTypes... values) { + if (segment_->is_chunked()) { + return ProcessDataChunksForMultipleChunk( + func, skip_func, res, values...); + } else { + return ProcessDataChunksForSingleChunk( + func, skip_func, res, values...); + } + } int ProcessIndexOneChunk(TargetBitmap& result, diff --git a/internal/core/src/exec/expression/UnaryExpr.cpp b/internal/core/src/exec/expression/UnaryExpr.cpp index f53475e14e192..3b7c2116244fb 100644 --- a/internal/core/src/exec/expression/UnaryExpr.cpp +++ b/internal/core/src/exec/expression/UnaryExpr.cpp @@ -363,15 +363,27 @@ PhyUnaryRangeFilterExpr::ExecArrayEqualForIndex(bool reverse) { } // filtering by index, get candidates. - auto size_per_chunk = segment_->size_per_chunk(); - auto retrieve = [ size_per_chunk, this ](int64_t offset) -> auto { - auto chunk_idx = offset / size_per_chunk; - auto chunk_offset = offset % size_per_chunk; - const auto& chunk = - segment_->template chunk_data(field_id_, - chunk_idx); - return chunk.data() + chunk_offset; - }; + std::function retrieve; + if (segment_->is_chunked()) { + retrieve = [this](int64_t offset) -> const milvus::ArrayView* { + auto [chunk_idx, chunk_offset] = + segment_->get_chunk_by_offset(field_id_, offset); + const auto& chunk = + segment_->template chunk_data( + field_id_, chunk_idx); + return chunk.data() + chunk_offset; + }; + } else { + auto size_per_chunk = segment_->size_per_chunk(); + retrieve = [ size_per_chunk, this ](int64_t offset) -> auto { + auto chunk_idx = offset / size_per_chunk; + auto chunk_offset = offset % size_per_chunk; + const auto& chunk = + segment_->template chunk_data( + field_id_, chunk_idx); + return chunk.data() + chunk_offset; + }; + } // compare the array via the raw data. auto filter = [&retrieve, &val, reverse](size_t offset) -> bool { diff --git a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h index 61888ae2874ee..78833a8d34cd5 100644 --- a/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h +++ b/internal/core/src/exec/operator/groupby/SearchGroupByOperator.h @@ -62,8 +62,7 @@ class SealedDataGetter : public DataGetter { const index::ScalarIndex* field_index_; public: - SealedDataGetter(const segcore::SegmentSealedImpl& segment, - FieldId& field_id) { + SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) { if (segment.HasFieldData(field_id)) { if constexpr (std::is_same_v) { str_field_data_ = @@ -114,8 +113,8 @@ GetDataGetter(const segcore::SegmentInternalInterface& segment, dynamic_cast(&segment)) { return std::make_shared>(*growing_segment, fieldId); - } else if (const segcore::SegmentSealedImpl* sealed_segment = - dynamic_cast(&segment)) { + } else if (const segcore::SegmentSealed* sealed_segment = + dynamic_cast(&segment)) { return std::make_shared>(*sealed_segment, fieldId); } else { PanicInfo(UnexpectedError, diff --git a/internal/core/src/index/SkipIndex.cpp b/internal/core/src/index/SkipIndex.cpp index 20780a4bbc159..82357e5b6ff21 100644 --- a/internal/core/src/index/SkipIndex.cpp +++ b/internal/core/src/index/SkipIndex.cpp @@ -111,29 +111,4 @@ SkipIndex::LoadPrimitive(milvus::FieldId field_id, fieldChunkMetrics_[field_id].emplace(chunk_id, std::move(chunkMetrics)); } -void -SkipIndex::LoadString(milvus::FieldId field_id, - int64_t chunk_id, - const milvus::VariableColumn& var_column) { - int num_rows = var_column.NumRows(); - auto chunkMetrics = std::make_unique(); - if (num_rows > 0) { - auto info = ProcessStringFieldMetrics(var_column); - chunkMetrics->min_ = Metrics(std::move(info.min_)); - chunkMetrics->max_ = Metrics(std::move(info.max_)); - chunkMetrics->null_count_ = info.null_count_; - } - - chunkMetrics->hasValue_ = - chunkMetrics->null_count_ == num_rows ? false : true; - - std::unique_lock lck(mutex_); - if (fieldChunkMetrics_.count(field_id) == 0) { - fieldChunkMetrics_.insert(std::make_pair( - field_id, - std::unordered_map>())); - } - fieldChunkMetrics_[field_id].emplace(chunk_id, std::move(chunkMetrics)); -} - } // namespace milvus diff --git a/internal/core/src/index/SkipIndex.h b/internal/core/src/index/SkipIndex.h index 754a18b8dd1b4..92ee34fd53ea4 100644 --- a/internal/core/src/index/SkipIndex.h +++ b/internal/core/src/index/SkipIndex.h @@ -16,6 +16,7 @@ #include "common/Types.h" #include "log/Log.h" #include "mmap/Column.h" +#include "mmap/ChunkedColumn.h" namespace milvus { @@ -100,10 +101,32 @@ class SkipIndex { const bool* valid_data, int64_t count); + template void LoadString(milvus::FieldId field_id, int64_t chunk_id, - const milvus::VariableColumn& var_column); + const T& var_column) { + int num_rows = var_column.NumRows(); + auto chunkMetrics = std::make_unique(); + if (num_rows > 0) { + auto info = ProcessStringFieldMetrics(var_column); + chunkMetrics->min_ = Metrics(info.min_); + chunkMetrics->max_ = Metrics(info.max_); + chunkMetrics->null_count_ = info.null_count_; + } + + chunkMetrics->hasValue_ = + chunkMetrics->null_count_ == num_rows ? false : true; + + std::unique_lock lck(mutex_); + if (fieldChunkMetrics_.count(field_id) == 0) { + fieldChunkMetrics_.insert(std::make_pair( + field_id, + std::unordered_map>())); + } + fieldChunkMetrics_[field_id].emplace(chunk_id, std::move(chunkMetrics)); + } private: const FieldChunkMetrics& @@ -269,9 +292,9 @@ class SkipIndex { return {minValue, maxValue, null_count}; } + template metricInfo - ProcessStringFieldMetrics( - const milvus::VariableColumn& var_column) { + ProcessStringFieldMetrics(const T& var_column) { int num_rows = var_column.NumRows(); // find first not null value int64_t start = 0; diff --git a/internal/core/src/mmap/ChunkedColumn.h b/internal/core/src/mmap/ChunkedColumn.h new file mode 100644 index 0000000000000..a069c9cf8c4a4 --- /dev/null +++ b/internal/core/src/mmap/ChunkedColumn.h @@ -0,0 +1,427 @@ +// Licensed to the LF AI & Data foundation under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/Array.h" +#include "common/Chunk.h" +#include "common/Common.h" +#include "common/EasyAssert.h" +#include "common/File.h" +#include "common/FieldMeta.h" +#include "common/FieldData.h" +#include "common/Span.h" +#include "fmt/format.h" +#include "log/Log.h" +#include "mmap/Utils.h" +#include "common/FieldData.h" +#include "common/FieldDataInterface.h" +#include "common/Array.h" +#include "knowhere/dataset.h" +#include "monitor/prometheus_client.h" +#include "storage/MmapChunkManager.h" + +#include "mmap/Column.h" +namespace milvus { + +class ChunkedColumnBase : public ColumnBase { + public: + ChunkedColumnBase() = default; + // memory mode ctor + ChunkedColumnBase(const FieldMeta& field_meta) { + if (field_meta.is_nullable()) { + nullable_ = true; + } + } + + virtual ~ChunkedColumnBase(){}; + + ChunkedColumnBase(ChunkedColumnBase&& column) noexcept + : nullable_(column.nullable_), num_rows_(column.num_rows_) { + column.num_rows_ = 0; + column.nullable_ = false; + } + + virtual void + AppendBatch(const FieldDataPtr data) override { + PanicInfo(ErrorCode::Unsupported, "AppendBatch not supported"); + } + + virtual const char* + Data(int chunk_id) const override { + chunks_[chunk_id]->Data(); + } + + virtual const char* + ValueAt(int64_t offset) const { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(offset); + return chunks_[chunk_id]->ValueAt(offset_in_chunk); + }; + + // MmappedData() returns the mmaped address + const char* + MmappedData() const override { + AssertInfo(chunks_.size() == 1, + "only support one chunk, but got {} chunk(s)", + chunks_.size()); + return chunks_[0]->Data(); + } + + bool + IsValid(size_t offset) const { + if (nullable_) { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(offset); + return chunks_[chunk_id]->isValid(offset_in_chunk); + } + return true; + } + + bool + IsNullable() const { + return nullable_; + } + + size_t + NumRows() const { + return num_rows_; + }; + + int64_t + num_chunks() const { + return chunks_.size(); + } + + virtual void + AddChunk(std::shared_ptr chunk) { + num_rows_until_chunk_.push_back(num_rows_); + num_rows_ += chunk->RowNums(); + chunks_.push_back(chunk); + } + + virtual uint64_t + DataByteSize() const override { + auto size = 0; + for (auto& chunk : chunks_) { + size += chunk->Size(); + } + return size; + } + + int64_t + chunk_row_nums(int64_t chunk_id) const { + return chunks_[chunk_id]->RowNums(); + } + + virtual SpanBase + Span(int64_t chunk_id) const = 0; + + // used for sequential access for search + virtual BufferView + GetBatchBuffer(int64_t start_offset, int64_t length) { + PanicInfo(ErrorCode::Unsupported, + "GetBatchBuffer only supported for VariableColumn"); + } + + virtual std::pair, FixedVector> + StringViews(int64_t chunk_id) const { + PanicInfo(ErrorCode::Unsupported, + "StringViews only supported for VariableColumn"); + } + + std::pair + GetChunkIDByOffset(int64_t offset) const { + int chunk_id = 0; + for (auto& chunk : chunks_) { + if (offset < chunk->RowNums()) { + break; + } + offset -= chunk->RowNums(); + chunk_id++; + } + return {chunk_id, offset}; + } + + int64_t + GetNumRowsUntilChunk(int64_t chunk_id) const { + return num_rows_until_chunk_[chunk_id]; + } + + protected: + bool nullable_{false}; + size_t num_rows_{0}; + std::vector num_rows_until_chunk_; + + private: + // void + // UpdateMetricWhenMmap(size_t mmaped_size) { + // UpdateMetricWhenMmap(mapping_type_, mmaped_size); + // } + + // void + // UpdateMetricWhenMmap(bool is_map_anonymous, size_t mapped_size) { + // if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) { + // milvus::monitor::internal_mmap_allocated_space_bytes_anon.Observe( + // mapped_size); + // milvus::monitor::internal_mmap_in_used_space_bytes_anon.Increment( + // mapped_size); + // } else { + // milvus::monitor::internal_mmap_allocated_space_bytes_file.Observe( + // mapped_size); + // milvus::monitor::internal_mmap_in_used_space_bytes_file.Increment( + // mapped_size); + // } + // } + + // void + // UpdateMetricWhenMunmap(size_t mapped_size) { + // if (mapping_type_ == MappingType::MAP_WITH_ANONYMOUS) { + // milvus::monitor::internal_mmap_in_used_space_bytes_anon.Decrement( + // mapped_size); + // } else { + // milvus::monitor::internal_mmap_in_used_space_bytes_file.Decrement( + // mapped_size); + // } + // } + + private: + storage::MmapChunkManagerPtr mcm_ = nullptr; + + protected: + std::vector> chunks_; +}; + +class ChunkedColumn : public ChunkedColumnBase { + public: + // memory mode ctor + ChunkedColumn(const FieldMeta& field_meta) : ChunkedColumnBase(field_meta) { + } + + ChunkedColumn(ChunkedColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)) { + } + + ChunkedColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ~ChunkedColumn() override = default; + + virtual SpanBase + Span(int64_t chunk_id) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->Span(); + } +}; + +// when mmap is used, size_, data_ and num_rows_ of ColumnBase are used. +class ChunkedSparseFloatColumn : public ChunkedColumnBase { + public: + // memory mode ctor + ChunkedSparseFloatColumn(const FieldMeta& field_meta) + : ChunkedColumnBase(field_meta) { + } + + ChunkedSparseFloatColumn(ChunkedSparseFloatColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)), + dim_(column.dim_), + vec_(std::move(column.vec_)) { + } + + ChunkedSparseFloatColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ~ChunkedSparseFloatColumn() override = default; + + void + AddChunk(std::shared_ptr chunk) override { + num_rows_until_chunk_.push_back(num_rows_); + num_rows_ += chunk->RowNums(); + chunks_.push_back(chunk); + dim_ = std::max( + dim_, + std::dynamic_pointer_cast(chunk)->Dim()); + } + + // This is used to advice mmap prefetch, we don't currently support mmap for + // sparse float vector thus not implemented for now. + size_t + DataByteSize() const override { + PanicInfo(ErrorCode::Unsupported, + "ByteSize not supported for sparse float column"); + } + + SpanBase + Span(int64_t chunk_id) const override { + PanicInfo(ErrorCode::Unsupported, + "Span not supported for sparse float column"); + } + + int64_t + Dim() const { + return dim_; + } + + private: + int64_t dim_ = 0; + std::vector> vec_; +}; + +template +class ChunkedVariableColumn : public ChunkedColumnBase { + public: + using ViewType = + std::conditional_t, std::string_view, T>; + + // memory mode ctor + ChunkedVariableColumn(const FieldMeta& field_meta) + : ChunkedColumnBase(field_meta) { + } + + ChunkedVariableColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ChunkedVariableColumn(ChunkedVariableColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)) { + } + + ~ChunkedVariableColumn() override = default; + + SpanBase + Span(int64_t chunk_id) const override { + PanicInfo(ErrorCode::NotImplemented, + "span() interface is not implemented for variable column"); + } + + std::pair, FixedVector> + StringViews(int64_t chunk_id) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->StringViews(); + } + + BufferView + GetBatchBuffer(int64_t start_offset, int64_t length) override { + if (start_offset < 0 || start_offset > num_rows_ || + start_offset + length > num_rows_) { + PanicInfo(ErrorCode::OutOfRange, "index out of range"); + } + + int chunk_num = chunks_.size(); + + auto [start_chunk_id, start_offset_in_chunk] = + GetChunkIDByOffset(start_offset); + BufferView buffer_view; + + std::vector elements; + for (; start_chunk_id < chunk_num && length > 0; ++start_chunk_id) { + int chunk_size = chunks_[start_chunk_id]->RowNums(); + int len = + std::min(int64_t(chunk_size - start_offset_in_chunk), length); + elements.push_back( + {chunks_[start_chunk_id]->Data(), + std::dynamic_pointer_cast(chunks_[start_chunk_id]) + ->Offsets(), + start_offset_in_chunk, + start_offset_in_chunk + len}); + + start_offset_in_chunk = 0; + length -= len; + } + + buffer_view.data_ = elements; + return buffer_view; + } + + ViewType + operator[](const int i) const { + if (i < 0 || i > num_rows_) { + PanicInfo(ErrorCode::OutOfRange, "index out of range"); + } + + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i); + auto data = chunks_[chunk_id]->Data(); + auto offsets = std::dynamic_pointer_cast(chunks_[chunk_id]) + ->Offsets(); + auto len = offsets[offset_in_chunk + 1] - offsets[offset_in_chunk]; + + return ViewType(data + offsets[offset_in_chunk], len); + } + + std::string_view + RawAt(const int i) const { + return std::string_view((*this)[i]); + } +}; + +class ChunkedArrayColumn : public ChunkedColumnBase { + public: + // memory mode ctor + ChunkedArrayColumn(const FieldMeta& field_meta) + : ChunkedColumnBase(field_meta) { + } + + ChunkedArrayColumn(ChunkedArrayColumn&& column) noexcept + : ChunkedColumnBase(std::move(column)) { + } + + ChunkedArrayColumn(std::vector> chunks) { + for (auto& chunk : chunks) { + AddChunk(chunk); + } + } + + ~ChunkedArrayColumn() override = default; + + SpanBase + Span(int64_t chunk_id) const override { + return std::dynamic_pointer_cast(chunks_[chunk_id])->Span(); + } + + ArrayView + operator[](const int i) const { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i); + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->View(offset_in_chunk); + } + + ScalarArray + RawAt(const int i) const { + auto [chunk_id, offset_in_chunk] = GetChunkIDByOffset(i); + return std::dynamic_pointer_cast(chunks_[chunk_id]) + ->View(offset_in_chunk) + .output_data(); + } +}; +} // namespace milvus \ No newline at end of file diff --git a/internal/core/src/mmap/Column.h b/internal/core/src/mmap/Column.h index 7049c01f2e7b2..698097d30677e 100644 --- a/internal/core/src/mmap/Column.h +++ b/internal/core/src/mmap/Column.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "common/Array.h" @@ -121,13 +122,27 @@ class ColumnBase { * */ public: - enum class MappingType { + virtual size_t + DataByteSize() const = 0; + + virtual const char* + MmappedData() const = 0; + + virtual void + AppendBatch(const FieldDataPtr data) = 0; + + virtual const char* + Data(int chunk_id = 0) const = 0; +}; +class SingleChunkColumnBase : public ColumnBase { + public: + enum MappingType { MAP_WITH_ANONYMOUS = 0, MAP_WITH_FILE = 1, MAP_WITH_MANAGER = 2, }; // MAP_WITH_ANONYMOUS ctor - ColumnBase(size_t reserve_rows, const FieldMeta& field_meta) + SingleChunkColumnBase(size_t reserve_rows, const FieldMeta& field_meta) : mapping_type_(MappingType::MAP_WITH_ANONYMOUS) { auto data_type = field_meta.get_data_type(); SetPaddingSize(data_type); @@ -161,11 +176,11 @@ class ColumnBase { // MAP_WITH_MANAGER ctor // reserve is number of bytes to allocate(without padding) - ColumnBase(size_t reserve, - const DataType& data_type, - storage::MmapChunkManagerPtr mcm, - storage::MmapChunkDescriptorPtr descriptor, - bool nullable) + SingleChunkColumnBase(size_t reserve, + const DataType& data_type, + storage::MmapChunkManagerPtr mcm, + storage::MmapChunkDescriptorPtr descriptor, + bool nullable) : mcm_(mcm), mmap_descriptor_(descriptor), num_rows_(0), @@ -193,7 +208,9 @@ class ColumnBase { // !!! The incoming file must have padding written at the end of the file. // Subclasses of variable length data type, if they used this constructor, // must set num_rows_ by themselves. - ColumnBase(const File& file, size_t size, const FieldMeta& field_meta) + SingleChunkColumnBase(const File& file, + size_t size, + const FieldMeta& field_meta) : nullable_(field_meta.is_nullable()), mapping_type_(MappingType::MAP_WITH_FILE) { auto data_type = field_meta.get_data_type(); @@ -229,7 +246,7 @@ class ColumnBase { UpdateMetricWhenMmap(size); } - virtual ~ColumnBase() { + virtual ~SingleChunkColumnBase() { if (data_ != nullptr) { size_t mapped_size = data_cap_size_ + padding_; if (mapping_type_ != MappingType::MAP_WITH_MANAGER) { @@ -246,17 +263,17 @@ class ColumnBase { } } - ColumnBase(ColumnBase&&) = delete; + SingleChunkColumnBase(ColumnBase&&) = delete; // Data() points at an addr that contains the elements virtual const char* - Data() const { + Data(int chunk_id = 0) const override { return data_; } // MmappedData() returns the mmaped address const char* - MmappedData() const { + MmappedData() const override { return data_; } @@ -481,28 +498,30 @@ class ColumnBase { storage::MmapChunkManagerPtr mcm_ = nullptr; }; -class Column : public ColumnBase { +class SingleChunkColumn : public SingleChunkColumnBase { public: // MAP_WITH_ANONYMOUS ctor - Column(size_t cap, const FieldMeta& field_meta) - : ColumnBase(cap, field_meta) { + SingleChunkColumn(size_t cap, const FieldMeta& field_meta) + : SingleChunkColumnBase(cap, field_meta) { } // MAP_WITH_FILE ctor - Column(const File& file, size_t size, const FieldMeta& field_meta) - : ColumnBase(file, size, field_meta) { + SingleChunkColumn(const File& file, + size_t size, + const FieldMeta& field_meta) + : SingleChunkColumnBase(file, size, field_meta) { } // MAP_WITH_MANAGER ctor - Column(size_t reserve, - const DataType& data_type, - storage::MmapChunkManagerPtr mcm, - storage::MmapChunkDescriptorPtr descriptor, - bool nullable) - : ColumnBase(reserve, data_type, mcm, descriptor, nullable) { + SingleChunkColumn(size_t reserve, + const DataType& data_type, + storage::MmapChunkManagerPtr mcm, + storage::MmapChunkDescriptorPtr descriptor, + bool nullable) + : SingleChunkColumnBase(reserve, data_type, mcm, descriptor, nullable) { } - ~Column() override = default; + ~SingleChunkColumn() override = default; SpanBase Span() const override { @@ -511,19 +530,18 @@ class Column : public ColumnBase { } }; -class SparseFloatColumn : public ColumnBase { +class SingleChunkSparseFloatColumn : public SingleChunkColumnBase { public: // MAP_WITH_ANONYMOUS ctor - SparseFloatColumn(const FieldMeta& field_meta) - : ColumnBase(/*reserve_rows= */ 0, field_meta) { + SingleChunkSparseFloatColumn(const FieldMeta& field_meta) + : SingleChunkColumnBase(0, field_meta) { } - // MAP_WITH_FILE ctor - SparseFloatColumn(const File& file, - size_t size, - const FieldMeta& field_meta, - std::vector&& indices = {}) - : ColumnBase(file, size, field_meta) { + SingleChunkSparseFloatColumn(const File& file, + size_t size, + const FieldMeta& field_meta, + std::vector&& indices = {}) + : SingleChunkColumnBase(file, size, field_meta) { AssertInfo(!indices.empty(), "SparseFloatColumn indices should not be empty."); num_rows_ = indices.size(); @@ -545,22 +563,18 @@ class SparseFloatColumn : public ColumnBase { dim_ = std::max(dim_, vec_.back().dim()); } } - // MAP_WITH_MANAGER ctor - SparseFloatColumn(storage::MmapChunkManagerPtr mcm, - storage::MmapChunkDescriptorPtr descriptor) - : ColumnBase(/*reserve= */ 0, - DataType::VECTOR_SPARSE_FLOAT, - mcm, - descriptor, - false) { + SingleChunkSparseFloatColumn(storage::MmapChunkManagerPtr mcm, + storage::MmapChunkDescriptorPtr descriptor) + : SingleChunkColumnBase( + 0, DataType::VECTOR_SPARSE_FLOAT, mcm, descriptor, false) { } - ~SparseFloatColumn() override = default; + ~SingleChunkSparseFloatColumn() override = default; // returned pointer points at a list of knowhere::sparse::SparseRow const char* - Data() const override { + Data(int chunk_id = 0) const override { return static_cast(static_cast(vec_.data())); } @@ -635,27 +649,29 @@ class SparseFloatColumn : public ColumnBase { }; template -class VariableColumn : public ColumnBase { +class SingleChunkVariableColumn : public SingleChunkColumnBase { public: using ViewType = std::conditional_t, std::string_view, T>; // MAP_WITH_ANONYMOUS ctor - VariableColumn(size_t reserve_rows, - const FieldMeta& field_meta, - size_t block_size) - : ColumnBase(reserve_rows, field_meta), block_size_(block_size) { + SingleChunkVariableColumn(size_t reserve_rows, + const FieldMeta& field_meta, + size_t block_size) + : SingleChunkColumnBase(reserve_rows, field_meta), + block_size_(block_size) { } // MAP_WITH_FILE ctor - VariableColumn(const File& file, - size_t size, - const FieldMeta& field_meta, - size_t block_size) - : ColumnBase(file, size, field_meta), block_size_(block_size) { + SingleChunkVariableColumn(const File& file, + size_t size, + const FieldMeta& field_meta, + size_t block_size) + : SingleChunkColumnBase(file, size, field_meta), + block_size_(block_size) { } - ~VariableColumn() override = default; + ~SingleChunkVariableColumn() override = default; SpanBase Span() const override { @@ -705,7 +721,9 @@ class VariableColumn : public ColumnBase { pos += sizeof(uint32_t) + size; } - return BufferView{pos, data_size_ - (pos - data_)}; + BufferView res; + res.data_ = std::pair{pos, 0}; + return res; } ViewType @@ -809,21 +827,23 @@ class VariableColumn : public ColumnBase { std::vector indices_{}; }; -class ArrayColumn : public ColumnBase { +class SingleChunkArrayColumn : public SingleChunkColumnBase { public: // MAP_WITH_ANONYMOUS ctor - ArrayColumn(size_t reserve_rows, const FieldMeta& field_meta) - : ColumnBase(reserve_rows, field_meta), + SingleChunkArrayColumn(size_t reserve_rows, const FieldMeta& field_meta) + : SingleChunkColumnBase(reserve_rows, field_meta), element_type_(field_meta.get_element_type()) { } // MAP_WITH_FILE ctor - ArrayColumn(const File& file, size_t size, const FieldMeta& field_meta) - : ColumnBase(file, size, field_meta), + SingleChunkArrayColumn(const File& file, + size_t size, + const FieldMeta& field_meta) + : SingleChunkColumnBase(file, size, field_meta), element_type_(field_meta.get_element_type()) { } - ~ArrayColumn() override = default; + ~SingleChunkArrayColumn() override = default; SpanBase Span() const override { @@ -853,12 +873,13 @@ class ArrayColumn : public ColumnBase { indices_.emplace_back(data_size_); element_indices_.emplace_back(array.get_offsets()); if (nullable_) { - return ColumnBase::Append(static_cast(array.data()), - valid_data, - array.byte_size()); + return SingleChunkColumnBase::Append( + static_cast(array.data()), + valid_data, + array.byte_size()); } - ColumnBase::Append(static_cast(array.data()), - array.byte_size()); + SingleChunkColumnBase::Append(static_cast(array.data()), + array.byte_size()); } void diff --git a/internal/core/src/mmap/Types.h b/internal/core/src/mmap/Types.h index c2f8c1a9e45f2..77c6ec3ee88bc 100644 --- a/internal/core/src/mmap/Types.h +++ b/internal/core/src/mmap/Types.h @@ -19,22 +19,30 @@ #include #include #include +#include "arrow/record_batch.h" #include "common/FieldData.h" +#include "storage/DataCodec.h" namespace milvus { struct FieldDataInfo { FieldDataInfo() { channel = std::make_shared(); + arrow_reader_channel = std::make_shared(); } FieldDataInfo(int64_t field_id, size_t row_count, - std::string mmap_dir_path = "") + std::string mmap_dir_path = "", + bool growing = true) : field_id(field_id), row_count(row_count), mmap_dir_path(std::move(mmap_dir_path)) { - channel = std::make_shared(); + if (growing) { + channel = std::make_shared(); + } else { + arrow_reader_channel = std::make_shared(); + } } FieldDataInfo(int64_t field_id, @@ -66,6 +74,18 @@ struct FieldDataInfo { channel->close(); } + FieldDataInfo( + int64_t field_id, + size_t row_count, + const std::vector>& batch) + : field_id(field_id), row_count(row_count) { + arrow_reader_channel = std::make_shared(); + for (auto& data : batch) { + arrow_reader_channel->push(data); + } + arrow_reader_channel->close(); + } + FieldDataInfo(int64_t field_id, size_t row_count, std::string mmap_dir_path, @@ -84,5 +104,6 @@ struct FieldDataInfo { size_t row_count; std::string mmap_dir_path; FieldDataChannelPtr channel; + std::shared_ptr arrow_reader_channel; }; } // namespace milvus diff --git a/internal/core/src/query/SearchOnSealed.cpp b/internal/core/src/query/SearchOnSealed.cpp index ba0554ca872f7..c519f480ca696 100644 --- a/internal/core/src/query/SearchOnSealed.cpp +++ b/internal/core/src/query/SearchOnSealed.cpp @@ -9,11 +9,15 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#include #include #include +#include "bitset/detail/element_wise.h" +#include "common/BitsetView.h" #include "common/QueryInfo.h" #include "common/Types.h" +#include "mmap/Column.h" #include "query/SearchBruteForce.h" #include "query/SearchOnSealed.h" #include "query/helper.h" @@ -73,6 +77,95 @@ SearchOnSealedIndex(const Schema& schema, search_result.unity_topK_ = topK; } +void +SearchOnSealed(const Schema& schema, + std::shared_ptr column, + const SearchInfo& search_info, + const void* query_data, + int64_t num_queries, + int64_t row_count, + const BitsetView& bitview, + SearchResult& result) { + auto field_id = search_info.field_id_; + auto& field = schema[field_id]; + + // TODO(SPARSE): see todo in PlanImpl.h::PlaceHolder. + auto dim = field.get_data_type() == DataType::VECTOR_SPARSE_FLOAT + ? 0 + : field.get_dim(); + + query::dataset::SearchDataset dataset{search_info.metric_type_, + num_queries, + search_info.topk_, + search_info.round_decimal_, + dim, + query_data}; + + auto data_type = field.get_data_type(); + CheckBruteForceSearchParam(field, search_info); + auto num_chunk = column->num_chunks(); + + SubSearchResult final_qr(num_queries, + search_info.topk_, + search_info.metric_type_, + search_info.round_decimal_); + + auto offset = 0; + for (int i = 0; i < num_chunk; ++i) { + auto vec_data = column->Data(i); + auto chunk_size = column->chunk_row_nums(i); + const uint8_t* bitset_ptr = nullptr; + bool aligned = false; + if ((offset & 0x7) == 0) { + bitset_ptr = bitview.data() + (offset >> 3); + aligned = true; + } else { + char* bitset_data = new char[(chunk_size + 7) / 8]; + std::fill(bitset_data, bitset_data + sizeof(bitset_data), 0); + bitset::detail::ElementWiseBitsetPolicy::op_copy( + reinterpret_cast(bitview.data()), + offset, + bitset_data, + 0, + chunk_size); + bitset_ptr = reinterpret_cast(bitset_data); + } + offset += chunk_size; + BitsetView bitset_view(bitset_ptr, chunk_size); + + if (search_info.group_by_field_id_.has_value()) { + auto sub_qr = BruteForceSearchIterators(dataset, + vec_data, + row_count, + search_info, + bitset_view, + data_type); + final_qr.merge(sub_qr); + } else { + auto sub_qr = BruteForceSearch(dataset, + vec_data, + row_count, + search_info, + bitset_view, + data_type); + final_qr.merge(sub_qr); + } + + if (!aligned) { + delete[] bitset_ptr; + } + } + if (search_info.group_by_field_id_.has_value()) { + result.AssembleChunkVectorIterators( + num_queries, 1, -1, final_qr.chunk_iterators()); + } else { + result.distances_ = std::move(final_qr.mutable_distances()); + result.seg_offsets_ = std::move(final_qr.mutable_seg_offsets()); + } + result.unity_topK_ = dataset.topk; + result.total_nq_ = dataset.num_queries; +} + void SearchOnSealed(const Schema& schema, const void* vec_data, diff --git a/internal/core/src/query/SearchOnSealed.h b/internal/core/src/query/SearchOnSealed.h index 73528c4b60fb5..a9261c793f88e 100644 --- a/internal/core/src/query/SearchOnSealed.h +++ b/internal/core/src/query/SearchOnSealed.h @@ -27,6 +27,16 @@ SearchOnSealedIndex(const Schema& schema, const BitsetView& view, SearchResult& search_result); +void +SearchOnSealed(const Schema& schema, + std::shared_ptr column, + const SearchInfo& search_info, + const void* query_data, + int64_t num_queries, + int64_t row_count, + const BitsetView& bitset, + SearchResult& result); + void SearchOnSealed(const Schema& schema, const void* vec_data, diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp new file mode 100644 index 0000000000000..a95ae1ecd1665 --- /dev/null +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.cpp @@ -0,0 +1,2222 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#include "ChunkedSegmentSealedImpl.h" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Utils.h" +#include "Types.h" +#include "common/Array.h" +#include "common/Chunk.h" +#include "common/ChunkWriter.h" +#include "common/Consts.h" +#include "common/EasyAssert.h" +#include "common/FieldData.h" +#include "common/FieldMeta.h" +#include "common/File.h" +#include "common/Json.h" +#include "common/LoadInfo.h" +#include "common/Tracer.h" +#include "common/Types.h" +#include "google/protobuf/message_lite.h" +#include "index/VectorMemIndex.h" +#include "mmap/ChunkedColumn.h" +#include "mmap/Utils.h" +#include "mmap/Types.h" +#include "log/Log.h" +#include "pb/schema.pb.h" +#include "query/ScalarIndex.h" +#include "query/SearchBruteForce.h" +#include "query/SearchOnSealed.h" +#include "storage/DataCodec.h" +#include "storage/Util.h" +#include "storage/ThreadPools.h" +#include "storage/MmapManager.h" + +namespace milvus::segcore { + +static inline void +set_bit(BitsetType& bitset, FieldId field_id, bool flag = true) { + auto pos = field_id.get() - START_USER_FIELDID; + AssertInfo(pos >= 0, "invalid field id"); + bitset[pos] = flag; +} + +static inline bool +get_bit(const BitsetType& bitset, FieldId field_id) { + auto pos = field_id.get() - START_USER_FIELDID; + AssertInfo(pos >= 0, "invalid field id"); + + return bitset[pos]; +} + +void +ChunkedSegmentSealedImpl::LoadIndex(const LoadIndexInfo& info) { + // print(info); + // NOTE: lock only when data is ready to avoid starvation + auto field_id = FieldId(info.field_id); + auto& field_meta = schema_->operator[](field_id); + + if (field_meta.is_vector()) { + LoadVecIndex(info); + } else { + LoadScalarIndex(info); + } +} + +void +ChunkedSegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) { + // NOTE: lock only when data is ready to avoid starvation + auto field_id = FieldId(info.field_id); + auto& field_meta = schema_->operator[](field_id); + + AssertInfo(info.index_params.count("metric_type"), + "Can't get metric_type in index_params"); + auto metric_type = info.index_params.at("metric_type"); + auto row_count = info.index->Count(); + AssertInfo(row_count > 0, "Index count is 0"); + + std::unique_lock lck(mutex_); + AssertInfo( + !get_bit(index_ready_bitset_, field_id), + "vector index has been exist at " + std::to_string(field_id.get())); + if (num_rows_.has_value()) { + AssertInfo(num_rows_.value() == row_count, + "field (" + std::to_string(field_id.get()) + + ") data has different row count (" + + std::to_string(row_count) + + ") than other column's row count (" + + std::to_string(num_rows_.value()) + ")"); + } + LOG_INFO( + "Before setting field_bit for field index, fieldID:{}. segmentID:{}, ", + info.field_id, + id_); + if (get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } else if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } + update_row_count(row_count); + vector_indexings_.append_field_indexing( + field_id, + metric_type, + std::move(const_cast(info).index)); + set_bit(index_ready_bitset_, field_id, true); + LOG_INFO("Has load vec index done, fieldID:{}. segmentID:{}, ", + info.field_id, + id_); +} + +void +ChunkedSegmentSealedImpl::WarmupChunkCache(const FieldId field_id, + bool mmap_enabled) { + auto& field_meta = schema_->operator[](field_id); + AssertInfo(field_meta.is_vector(), "vector field is not vector type"); + + if (!get_bit(index_ready_bitset_, field_id) && + !get_bit(binlog_index_bitset_, field_id)) { + return; + } + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector index is not ready"); + auto field_indexing = vector_indexings_.get_field_indexing(field_id); + auto vec_index = + dynamic_cast(field_indexing->indexing_.get()); + AssertInfo(vec_index, "invalid vector indexing"); + + auto it = field_data_info_.field_infos.find(field_id.get()); + AssertInfo(it != field_data_info_.field_infos.end(), + "cannot find binlog file for field: {}, seg: {}", + field_id.get(), + id_); + auto field_info = it->second; + + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + for (const auto& data_path : field_info.insert_files) { + auto column = cc->Read(data_path, mmap_descriptor_, field_meta); + } +} + +void +ChunkedSegmentSealedImpl::LoadScalarIndex(const LoadIndexInfo& info) { + // NOTE: lock only when data is ready to avoid starvation + auto field_id = FieldId(info.field_id); + auto& field_meta = schema_->operator[](field_id); + + auto row_count = info.index->Count(); + AssertInfo(row_count > 0, "Index count is 0"); + + std::unique_lock lck(mutex_); + AssertInfo( + !get_bit(index_ready_bitset_, field_id), + "scalar index has been exist at " + std::to_string(field_id.get())); + if (num_rows_.has_value()) { + AssertInfo(num_rows_.value() == row_count, + "field (" + std::to_string(field_id.get()) + + ") data has different row count (" + + std::to_string(row_count) + + ") than other column's row count (" + + std::to_string(num_rows_.value()) + ")"); + } + + scalar_indexings_[field_id] = + std::move(const_cast(info).index); + // reverse pk from scalar index and set pks to offset + if (schema_->get_primary_field_id() == field_id) { + AssertInfo(field_id.get() != -1, "Primary key is -1"); + switch (field_meta.get_data_type()) { + case DataType::INT64: { + auto int64_index = dynamic_cast*>( + scalar_indexings_[field_id].get()); + if (!is_sorted_by_pk_ && insert_record_.empty_pks() && + int64_index->HasRawData()) { + for (int i = 0; i < row_count; ++i) { + insert_record_.insert_pk(int64_index->Reverse_Lookup(i), + i); + } + insert_record_.seal_pks(); + } + break; + } + case DataType::VARCHAR: { + auto string_index = + dynamic_cast*>( + scalar_indexings_[field_id].get()); + if (!is_sorted_by_pk_ && insert_record_.empty_pks() && + string_index->HasRawData()) { + for (int i = 0; i < row_count; ++i) { + insert_record_.insert_pk( + string_index->Reverse_Lookup(i), i); + } + insert_record_.seal_pks(); + } + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported primary key type {}", + field_meta.get_data_type())); + } + } + } + + set_bit(index_ready_bitset_, field_id, true); + update_row_count(row_count); + // release field column if the index contains raw data + if (scalar_indexings_[field_id]->HasRawData() && + get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } + + lck.unlock(); +} + +void +ChunkedSegmentSealedImpl::LoadFieldData(const LoadFieldDataInfo& load_info) { + // NOTE: lock only when data is ready to avoid starvation + // only one field for now, parallel load field data in golang + size_t num_rows = storage::GetNumRowsForLoadInfo(load_info); + + for (auto& [id, info] : load_info.field_infos) { + AssertInfo(info.row_count > 0, "The row count of field data is 0"); + + auto field_id = FieldId(id); + auto insert_files = info.insert_files; + std::sort(insert_files.begin(), + insert_files.end(), + [](const std::string& a, const std::string& b) { + return std::stol(a.substr(a.find_last_of('/') + 1)) < + std::stol(b.substr(b.find_last_of('/') + 1)); + }); + + auto field_data_info = FieldDataInfo( + field_id.get(), num_rows, load_info.mmap_dir_path, false); + LOG_INFO("segment {} loads field {} with num_rows {}", + this->get_segment_id(), + field_id.get(), + num_rows); + + auto parallel_degree = static_cast( + DEFAULT_FIELD_MAX_MEMORY_LIMIT / FILE_SLICE_SIZE); + field_data_info.arrow_reader_channel->set_capacity(parallel_degree * 2); + auto& pool = + ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::MIDDLE); + pool.Submit(LoadArrowReaderFromRemote, + insert_files, + field_data_info.arrow_reader_channel); + + LOG_INFO("segment {} submits load field {} task to thread pool", + this->get_segment_id(), + field_id.get()); + bool use_mmap = false; + if (!info.enable_mmap || + SystemProperty::Instance().IsSystem(field_id)) { + LoadFieldData(field_id, field_data_info); + } else { + MapFieldData(field_id, field_data_info); + use_mmap = true; + } + LOG_INFO("segment {} loads field {} mmap {} done", + this->get_segment_id(), + field_id.get(), + use_mmap); + } +} + +void +ChunkedSegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { + auto num_rows = data.row_count; + if (SystemProperty::Instance().IsSystem(field_id)) { + auto system_field_type = + SystemProperty::Instance().GetSystemFieldType(field_id); + if (system_field_type == SystemFieldType::Timestamp) { + std::vector timestamps(num_rows); + int64_t offset = 0; + FieldMeta field_meta( + FieldName(""), FieldId(0), DataType::INT64, false); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = std::dynamic_pointer_cast( + create_chunk(field_meta, 1, r->reader)); + std::copy_n(static_cast(chunk->Span().data()), + chunk->Span().row_count(), + timestamps.data() + offset); + offset += chunk->Span().row_count(); + } + + // for (auto& data : field_data) { + // int64_t row_count = data->get_num_rows(); + // std::copy_n(static_cast(data->Data()), + // row_count, + // timestamps.data() + offset); + // offset += row_count; + // } + + TimestampIndex index; + auto min_slice_length = num_rows < 4096 ? 1 : 4096; + auto meta = GenerateFakeSlices( + timestamps.data(), num_rows, min_slice_length); + index.set_length_meta(std::move(meta)); + // todo ::opt to avoid copy timestamps from field data + index.build_with(timestamps.data(), num_rows); + + // use special index + std::unique_lock lck(mutex_); + AssertInfo(insert_record_.timestamps_.empty(), "already exists"); + insert_record_.timestamps_.set_data_raw( + 0, timestamps.data(), timestamps.size()); + insert_record_.timestamp_index_ = std::move(index); + AssertInfo(insert_record_.timestamps_.num_chunk() == 1, + "num chunk not equal to 1 for sealed segment"); + stats_.mem_size += sizeof(Timestamp) * data.row_count; + } else { + AssertInfo(system_field_type == SystemFieldType::RowId, + "System field type of id column is not RowId"); + // Consume rowid field data but not really load it + // storage::CollectFieldDataChannel(data.arrow_reader_channel); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + } + } + ++system_ready_count_; + } else { + // prepare data + auto& field_meta = (*schema_)[field_id]; + auto data_type = field_meta.get_data_type(); + + // Don't allow raw data and index exist at the same time + // AssertInfo(!get_bit(index_ready_bitset_, field_id), + // "field data can't be loaded when indexing exists"); + auto get_block_size = [&]() -> size_t { + return schema_->get_primary_field_id() == field_id + ? DEFAULT_PK_VRCOL_BLOCK_SIZE + : DEFAULT_MEM_VRCOL_BLOCK_SIZE; + }; + + std::shared_ptr column{}; + if (IsVariableDataType(data_type)) { + int64_t field_data_size = 0; + switch (data_type) { + case milvus::DataType::STRING: + case milvus::DataType::VARCHAR: { + auto var_column = + std::make_shared>( + field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = create_chunk(field_meta, 1, r->reader); + var_column->AddChunk(chunk); + } + // var_column->Seal(); + field_data_size = var_column->DataByteSize(); + stats_.mem_size += var_column->DataByteSize(); + LoadStringSkipIndex(field_id, 0, *var_column); + column = std::move(var_column); + break; + } + case milvus::DataType::JSON: { + auto var_column = + std::make_shared>( + field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = create_chunk(field_meta, 1, r->reader); + var_column->AddChunk(chunk); + } + // var_column->Seal(); + stats_.mem_size += var_column->DataByteSize(); + field_data_size = var_column->DataByteSize(); + column = std::move(var_column); + break; + } + case milvus::DataType::ARRAY: { + auto var_column = + std::make_shared(field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + // for (auto i = 0; i < field_data->get_num_rows(); i++) { + // auto rawValue = field_data->RawValue(i); + // auto array = + // static_cast(rawValue); + // if (field_data->IsNullable()) { + // var_column->Append(*array, + // field_data->is_valid(i)); + // } else { + // var_column->Append(*array); + // } + + // // we stores the offset for each array element, so there is a additional uint64_t for each array element + // field_data_size = + // array->byte_size() + sizeof(uint64_t); + // stats_.mem_size += + // array->byte_size() + sizeof(uint64_t); + // } + + auto chunk = create_chunk(field_meta, 1, r->reader); + var_column->AddChunk(chunk); + } + // var_column->Seal(); + column = std::move(var_column); + break; + } + case milvus::DataType::VECTOR_SPARSE_FLOAT: { + auto col = + std::make_shared(field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = create_chunk(field_meta, 1, r->reader); + col->AddChunk(chunk); + } + column = std::move(col); + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type", data_type)); + } + } + + // update average row data size + SegmentInternalInterface::set_field_avg_size( + field_id, num_rows, field_data_size); + } else { + column = std::make_shared(field_meta); + std::shared_ptr r; + while (data.arrow_reader_channel->pop(r)) { + auto chunk = + create_chunk(field_meta, + IsVectorDataType(field_meta.get_data_type()) + ? field_meta.get_dim() + : 1, + r->reader); + // column->AppendBatch(field_data); + // stats_.mem_size += field_data->Size(); + column->AddChunk(chunk); + } + + auto num_chunk = column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + LoadPrimitiveSkipIndex(field_id, + i, + data_type, + column->Span(i).data(), + column->Span(i).valid_data(), + column->Span(i).row_count()); + } + } + + AssertInfo(column->NumRows() == num_rows, + fmt::format("data lost while loading column {}: loaded " + "num rows {} but expected {}", + data.field_id, + column->NumRows(), + num_rows)); + + { + std::unique_lock lck(mutex_); + fields_.emplace(field_id, column); + } + + // set pks to offset + if (schema_->get_primary_field_id() == field_id && !is_sorted_by_pk_) { + AssertInfo(field_id.get() != -1, "Primary key is -1"); + AssertInfo(insert_record_.empty_pks(), "already exists"); + insert_record_.insert_pks(data_type, column); + insert_record_.seal_pks(); + } + + bool use_temp_index = false; + { + // update num_rows to build temperate binlog index + std::unique_lock lck(mutex_); + update_row_count(num_rows); + } + + if (generate_interim_index(field_id)) { + std::unique_lock lck(mutex_); + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + use_temp_index = true; + } + + if (!use_temp_index) { + std::unique_lock lck(mutex_); + set_bit(field_data_ready_bitset_, field_id, true); + } + } + { + std::unique_lock lck(mutex_); + update_row_count(num_rows); + } +} + +void +ChunkedSegmentSealedImpl::MapFieldData(const FieldId field_id, + FieldDataInfo& data) { + auto filepath = std::filesystem::path(data.mmap_dir_path) / + std::to_string(get_segment_id()) / + std::to_string(field_id.get()); + auto dir = filepath.parent_path(); + std::filesystem::create_directories(dir); + + auto file = File::Open(filepath.string(), O_CREAT | O_TRUNC | O_RDWR); + + auto& field_meta = (*schema_)[field_id]; + auto data_type = field_meta.get_data_type(); + + // write the field data to disk + uint64_t total_written = 0; + std::vector indices{}; + std::vector> element_indices{}; + // FixedVector valid_data{}; + std::shared_ptr r; + + size_t file_offset = 0; + std::vector> chunks; + while (data.arrow_reader_channel->pop(r)) { + // WriteFieldData(file, + // data_type, + // field_data, + // total_written, + // indices, + // element_indices, + // valid_data); + auto chunk = create_chunk(field_meta, + IsVectorDataType(field_meta.get_data_type()) + ? field_meta.get_dim() + : 1, + file, + file_offset, + r->reader); + file_offset += chunk->Size(); + chunks.push_back(chunk); + } + // WriteFieldPadding(file, data_type, total_written); + std::shared_ptr column{}; + auto num_rows = data.row_count; + if (IsVariableDataType(data_type)) { + switch (data_type) { + case milvus::DataType::STRING: + case milvus::DataType::VARCHAR: { + // auto var_column = std::make_shared>( + // file, + // total_written, + // field_meta, + // DEFAULT_MMAP_VRCOL_BLOCK_SIZE); + auto var_column = + std::make_shared>( + chunks); + // var_column->Seal(std::move(indices)); + column = std::move(var_column); + break; + } + case milvus::DataType::JSON: { + auto var_column = + std::make_shared>( + chunks); + // var_column->Seal(std::move(indices)); + column = std::move(var_column); + break; + } + case milvus::DataType::ARRAY: { + auto arr_column = std::make_shared(chunks); + // arr_column->Seal(std::move(indices), + // std::move(element_indices)); + column = std::move(arr_column); + break; + } + case milvus::DataType::VECTOR_SPARSE_FLOAT: { + auto sparse_column = + std::make_shared(chunks); + // sparse_column->Seal(std::move(indices)); + column = std::move(sparse_column); + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", data_type)); + } + } + } else { + column = std::make_shared(chunks); + } + + // column->SetValidData(std::move(valid_data)); + + { + std::unique_lock lck(mutex_); + fields_.emplace(field_id, column); + mmap_fields_.insert(field_id); + } + + auto ok = unlink(filepath.c_str()); + AssertInfo(ok == 0, + fmt::format("failed to unlink mmap data file {}, err: {}", + filepath.c_str(), + strerror(errno))); + + // set pks to offset + if (schema_->get_primary_field_id() == field_id && !is_sorted_by_pk_) { + AssertInfo(field_id.get() != -1, "Primary key is -1"); + AssertInfo(insert_record_.empty_pks(), "already exists"); + insert_record_.insert_pks(data_type, column); + insert_record_.seal_pks(); + } + + std::unique_lock lck(mutex_); + set_bit(field_data_ready_bitset_, field_id, true); +} + +void +ChunkedSegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) { + AssertInfo(info.row_count > 0, "The row count of deleted record is 0"); + AssertInfo(info.primary_keys, "Deleted primary keys is null"); + AssertInfo(info.timestamps, "Deleted timestamps is null"); + // step 1: get pks and timestamps + auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(field_id.get() != -1, "Primary key is -1"); + auto& field_meta = schema_->operator[](field_id); + int64_t size = info.row_count; + std::vector pks(size); + ParsePksFromIDs(pks, field_meta.get_data_type(), *info.primary_keys); + auto timestamps = reinterpret_cast(info.timestamps); + + std::vector> ordering(size); + for (int i = 0; i < size; i++) { + ordering[i] = std::make_tuple(timestamps[i], pks[i]); + } + + if (!insert_record_.empty_pks()) { + auto end = std::remove_if( + ordering.begin(), + ordering.end(), + [&](const std::tuple& record) { + return !insert_record_.contain(std::get<1>(record)); + }); + size = end - ordering.begin(); + ordering.resize(size); + } + + // all record filtered + if (size == 0) { + return; + } + + std::sort(ordering.begin(), ordering.end()); + std::vector sort_pks(size); + std::vector sort_timestamps(size); + + for (int i = 0; i < size; i++) { + auto [t, pk] = ordering[i]; + sort_timestamps[i] = t; + sort_pks[i] = pk; + } + + deleted_record_.push(sort_pks, sort_timestamps.data()); +} + +void +ChunkedSegmentSealedImpl::AddFieldDataInfoForSealed( + const LoadFieldDataInfo& field_data_info) { + // copy assignment + field_data_info_ = field_data_info; +} + +// internal API: support scalar index only +int64_t +ChunkedSegmentSealedImpl::num_chunk_index(FieldId field_id) const { + auto& field_meta = schema_->operator[](field_id); + if (field_meta.is_vector()) { + return int64_t(vector_indexings_.is_ready(field_id)); + } + + return scalar_indexings_.count(field_id); +} + +int64_t +ChunkedSegmentSealedImpl::num_chunk_data(FieldId field_id) const { + return fields_.at(field_id)->num_chunks(); +} + +int64_t +ChunkedSegmentSealedImpl::num_chunk(FieldId field_id) const { + return get_bit(field_data_ready_bitset_, field_id) + ? fields_.find(field_id) != fields_.end() + ? fields_.at(field_id)->num_chunks() + : 1 + : 0; +} + +int64_t +ChunkedSegmentSealedImpl::size_per_chunk() const { + return get_row_count(); +} + +int64_t +ChunkedSegmentSealedImpl::chunk_size(FieldId field_id, int64_t chunk_id) const { + return get_bit(field_data_ready_bitset_, field_id) + ? fields_.find(field_id) != fields_.end() + ? fields_.at(field_id)->chunk_row_nums(chunk_id) + : num_rows_.value() + : 0; +} + +std::pair +ChunkedSegmentSealedImpl::get_chunk_by_offset(FieldId field_id, + int64_t offset) const { + return fields_.at(field_id)->GetChunkIDByOffset(offset); +} + +int64_t +ChunkedSegmentSealedImpl::num_rows_until_chunk(FieldId field_id, + int64_t chunk_id) const { + return fields_.at(field_id)->GetNumRowsUntilChunk(chunk_id); +} + +std::pair> +ChunkedSegmentSealedImpl::get_chunk_buffer(FieldId field_id, + int64_t chunk_id, + int64_t start_offset, + int64_t length) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + FixedVector valid_data; + if (field_data->IsNullable()) { + valid_data.reserve(length); + for (int i = 0; i < length; i++) { + valid_data.push_back(field_data->IsValid(start_offset + i)); + } + } + return std::make_pair(field_data->GetBatchBuffer(start_offset, length), + valid_data); + } + PanicInfo(ErrorCode::UnexpectedError, + "get_chunk_buffer only used for variable column field"); +} + +bool +ChunkedSegmentSealedImpl::is_mmap_field(FieldId field_id) const { + std::shared_lock lck(mutex_); + return mmap_fields_.find(field_id) != mmap_fields_.end(); +} + +SpanBase +ChunkedSegmentSealedImpl::chunk_data_impl(FieldId field_id, + int64_t chunk_id) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->Span(chunk_id); + } + auto field_data = insert_record_.get_data_base(field_id); + AssertInfo(field_data->num_chunk() == 1, + "num chunk not equal to 1 for sealed segment"); + // system field + return field_data->get_span_base(0); +} + +std::pair, FixedVector> +ChunkedSegmentSealedImpl::chunk_view_impl(FieldId field_id, + int64_t chunk_id) const { + std::shared_lock lck(mutex_); + AssertInfo(get_bit(field_data_ready_bitset_, field_id), + "Can't get bitset element at " + std::to_string(field_id.get())); + auto& field_meta = schema_->operator[](field_id); + if (auto it = fields_.find(field_id); it != fields_.end()) { + auto& field_data = it->second; + return field_data->StringViews(chunk_id); + } + PanicInfo(ErrorCode::UnexpectedError, + "chunk_view_impl only used for variable column field "); +} + +const index::IndexBase* +ChunkedSegmentSealedImpl::chunk_index_impl(FieldId field_id, + int64_t chunk_id) const { + AssertInfo(scalar_indexings_.find(field_id) != scalar_indexings_.end(), + "Cannot find scalar_indexing with field_id: " + + std::to_string(field_id.get())); + auto ptr = scalar_indexings_.at(field_id).get(); + return ptr; +} + +int64_t +ChunkedSegmentSealedImpl::get_row_count() const { + std::shared_lock lck(mutex_); + return num_rows_.value_or(0); +} + +int64_t +ChunkedSegmentSealedImpl::get_deleted_count() const { + std::shared_lock lck(mutex_); + return deleted_record_.size(); +} + +const Schema& +ChunkedSegmentSealedImpl::get_schema() const { + return *schema_; +} + +void +ChunkedSegmentSealedImpl::mask_with_delete(BitsetTypeView& bitset, + int64_t ins_barrier, + Timestamp timestamp) const { + auto del_barrier = get_barrier(get_deleted_record(), timestamp); + if (del_barrier == 0) { + return; + } + + auto bitmap_holder = std::shared_ptr(); + + if (!is_sorted_by_pk_) { + bitmap_holder = get_deleted_bitmap(del_barrier, + ins_barrier, + deleted_record_, + insert_record_, + timestamp); + } else { + bitmap_holder = get_deleted_bitmap_s( + del_barrier, ins_barrier, deleted_record_, timestamp); + } + if (!bitmap_holder || !bitmap_holder->bitmap_ptr) { + return; + } + auto& delete_bitset = *bitmap_holder->bitmap_ptr; + AssertInfo( + delete_bitset.size() == bitset.size(), + fmt::format( + "Deleted bitmap size:{} not equal to filtered bitmap size:{}", + delete_bitset.size(), + bitset.size())); + bitset |= delete_bitset; +} + +void +ChunkedSegmentSealedImpl::vector_search(SearchInfo& search_info, + const void* query_data, + int64_t query_count, + Timestamp timestamp, + const BitsetView& bitset, + SearchResult& output) const { + AssertInfo(is_system_field_ready(), "System field is not ready"); + auto field_id = search_info.field_id_; + auto& field_meta = schema_->operator[](field_id); + + AssertInfo(field_meta.is_vector(), + "The meta type of vector field is not vector type"); + if (get_bit(binlog_index_bitset_, field_id)) { + AssertInfo( + vec_binlog_config_.find(field_id) != vec_binlog_config_.end(), + "The binlog params is not generate."); + auto binlog_search_info = + vec_binlog_config_.at(field_id)->GetSearchConf(search_info); + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector indexes isn't ready for field " + + std::to_string(field_id.get())); + query::SearchOnSealedIndex(*schema_, + vector_indexings_, + binlog_search_info, + query_data, + query_count, + bitset, + output); + milvus::tracer::AddEvent( + "finish_searching_vector_temperate_binlog_index"); + } else if (get_bit(index_ready_bitset_, field_id)) { + AssertInfo(vector_indexings_.is_ready(field_id), + "vector indexes isn't ready for field " + + std::to_string(field_id.get())); + query::SearchOnSealedIndex(*schema_, + vector_indexings_, + search_info, + query_data, + query_count, + bitset, + output); + milvus::tracer::AddEvent("finish_searching_vector_index"); + } else { + AssertInfo( + get_bit(field_data_ready_bitset_, field_id), + "Field Data is not loaded: " + std::to_string(field_id.get())); + AssertInfo(num_rows_.has_value(), "Can't get row count value"); + auto row_count = num_rows_.value(); + auto vec_data = fields_.at(field_id); + query::SearchOnSealed(*schema_, + vec_data, + search_info, + query_data, + query_count, + row_count, + bitset, + output); + milvus::tracer::AddEvent("finish_searching_vector_data"); + } +} + +std::tuple +ChunkedSegmentSealedImpl::GetFieldDataPath(FieldId field_id, + int64_t offset) const { + auto offset_in_binlog = offset; + auto data_path = std::string(); + auto it = field_data_info_.field_infos.find(field_id.get()); + AssertInfo(it != field_data_info_.field_infos.end(), + fmt::format("cannot find binlog file for field: {}, seg: {}", + field_id.get(), + id_)); + auto field_info = it->second; + + for (auto i = 0; i < field_info.insert_files.size(); i++) { + if (offset_in_binlog < field_info.entries_nums[i]) { + data_path = field_info.insert_files[i]; + break; + } else { + offset_in_binlog -= field_info.entries_nums[i]; + } + } + return {data_path, offset_in_binlog}; +} + +std::tuple< + std::string, + std::shared_ptr< + ChunkedColumnBase>> static ReadFromChunkCache(const storage:: + ChunkCachePtr& cc, + const std::string& + data_path, + const storage:: + MmapChunkDescriptorPtr& + descriptor, + const FieldMeta& + field_meta) { + auto column = cc->Read(data_path, descriptor, field_meta); + cc->Prefetch(data_path); + return {data_path, std::dynamic_pointer_cast(column)}; +} + +std::unique_ptr +ChunkedSegmentSealedImpl::get_vector(FieldId field_id, + const int64_t* ids, + int64_t count) const { + auto& field_meta = schema_->operator[](field_id); + AssertInfo(field_meta.is_vector(), "vector field is not vector type"); + + if (!get_bit(index_ready_bitset_, field_id) && + !get_bit(binlog_index_bitset_, field_id)) { + return fill_with_empty(field_id, count); + } + + AssertInfo(vector_indexings_.is_ready(field_id), + "vector index is not ready"); + auto field_indexing = vector_indexings_.get_field_indexing(field_id); + auto vec_index = + dynamic_cast(field_indexing->indexing_.get()); + AssertInfo(vec_index, "invalid vector indexing"); + + auto index_type = vec_index->GetIndexType(); + auto metric_type = vec_index->GetMetricType(); + auto has_raw_data = vec_index->HasRawData(); + + if (has_raw_data && !TEST_skip_index_for_retrieve_) { + // If index has raw data, get vector from memory. + auto ids_ds = GenIdsDataset(count, ids); + if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) { + auto res = vec_index->GetSparseVector(ids_ds); + return segcore::CreateVectorDataArrayFrom( + res.get(), count, field_meta); + } else { + // dense vector: + auto vector = vec_index->GetVector(ids_ds); + return segcore::CreateVectorDataArrayFrom( + vector.data(), count, field_meta); + } + } + + // If index doesn't have raw data, get vector from chunk cache. + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + + // group by data_path + auto id_to_data_path = + std::unordered_map>{}; + auto path_to_column = + std::unordered_map>{}; + for (auto i = 0; i < count; i++) { + const auto& tuple = GetFieldDataPath(field_id, ids[i]); + id_to_data_path.emplace(ids[i], tuple); + path_to_column.emplace(std::get<0>(tuple), nullptr); + } + + // read and prefetch + auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH); + std::vector>>> + futures; + futures.reserve(path_to_column.size()); + for (const auto& iter : path_to_column) { + const auto& data_path = iter.first; + futures.emplace_back(pool.Submit( + ReadFromChunkCache, cc, data_path, mmap_descriptor_, field_meta)); + } + + for (int i = 0; i < futures.size(); ++i) { + const auto& [data_path, column] = futures[i].get(); + path_to_column[data_path] = column; + } + + if (field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT) { + auto buf = std::vector>(count); + for (auto i = 0; i < count; ++i) { + const auto& [data_path, offset_in_binlog] = + id_to_data_path.at(ids[i]); + const auto& column = path_to_column.at(data_path); + AssertInfo( + offset_in_binlog < column->NumRows(), + "column idx out of range, idx: {}, size: {}, data_path: {}", + offset_in_binlog, + column->NumRows(), + data_path); + auto sparse_column = + std::dynamic_pointer_cast(column); + AssertInfo(sparse_column, "incorrect column created"); + buf[i] = *static_cast*>( + static_cast( + sparse_column->ValueAt(offset_in_binlog))); + } + return segcore::CreateVectorDataArrayFrom( + buf.data(), count, field_meta); + } else { + // assign to data array + auto row_bytes = field_meta.get_sizeof(); + auto buf = std::vector(count * row_bytes); + for (auto i = 0; i < count; ++i) { + AssertInfo(id_to_data_path.count(ids[i]) != 0, "id not found"); + const auto& [data_path, offset_in_binlog] = + id_to_data_path.at(ids[i]); + AssertInfo(path_to_column.count(data_path) != 0, + "column not found"); + const auto& column = path_to_column.at(data_path); + AssertInfo( + offset_in_binlog * row_bytes < column->DataByteSize(), + "column idx out of range, idx: {}, size: {}, data_path: {}", + offset_in_binlog * row_bytes, + column->DataByteSize(), + data_path); + auto vector = column->ValueAt(offset_in_binlog); + std::memcpy(buf.data() + i * row_bytes, vector, row_bytes); + } + return segcore::CreateVectorDataArrayFrom( + buf.data(), count, field_meta); + } +} + +void +ChunkedSegmentSealedImpl::DropFieldData(const FieldId field_id) { + if (SystemProperty::Instance().IsSystem(field_id)) { + auto system_field_type = + SystemProperty::Instance().GetSystemFieldType(field_id); + + std::unique_lock lck(mutex_); + --system_ready_count_; + if (system_field_type == SystemFieldType::Timestamp) { + insert_record_.timestamps_.clear(); + } + lck.unlock(); + } else { + auto& field_meta = schema_->operator[](field_id); + std::unique_lock lck(mutex_); + if (get_bit(field_data_ready_bitset_, field_id)) { + fields_.erase(field_id); + set_bit(field_data_ready_bitset_, field_id, false); + } + if (get_bit(binlog_index_bitset_, field_id)) { + set_bit(binlog_index_bitset_, field_id, false); + vector_indexings_.drop_field_indexing(field_id); + } + lck.unlock(); + } +} + +void +ChunkedSegmentSealedImpl::DropIndex(const FieldId field_id) { + AssertInfo(!SystemProperty::Instance().IsSystem(field_id), + "Field id:" + std::to_string(field_id.get()) + + " isn't one of system type when drop index"); + auto& field_meta = schema_->operator[](field_id); + AssertInfo(field_meta.is_vector(), + "Field meta of offset:" + std::to_string(field_id.get()) + + " is not vector type"); + + std::unique_lock lck(mutex_); + vector_indexings_.drop_field_indexing(field_id); + set_bit(index_ready_bitset_, field_id, false); +} + +void +ChunkedSegmentSealedImpl::check_search(const query::Plan* plan) const { + AssertInfo(plan, "Search plan is null"); + AssertInfo(plan->extra_info_opt_.has_value(), + "Extra info of search plan doesn't have value"); + + if (!is_system_field_ready()) { + PanicInfo( + FieldNotLoaded, + "failed to load row ID or timestamp, potential missing bin logs or " + "empty segments. Segment ID = " + + std::to_string(this->id_)); + } + + auto& request_fields = plan->extra_info_opt_.value().involved_fields_; + auto field_ready_bitset = + field_data_ready_bitset_ | index_ready_bitset_ | binlog_index_bitset_; + AssertInfo(request_fields.size() == field_ready_bitset.size(), + "Request fields size not equal to field ready bitset size when " + "check search"); + auto absent_fields = request_fields - field_ready_bitset; + + if (absent_fields.any()) { + // absent_fields.find_first() returns std::optional<> + auto field_id = + FieldId(absent_fields.find_first().value() + START_USER_FIELDID); + auto& field_meta = schema_->operator[](field_id); + PanicInfo( + FieldNotLoaded, + "User Field(" + field_meta.get_name().get() + ") is not loaded"); + } +} + +std::vector +ChunkedSegmentSealedImpl::search_pk(const PkType& pk, + Timestamp timestamp) const { + auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); + auto pk_column = fields_.at(pk_field_id); + std::vector pk_offsets; + switch (schema_->get_fields().at(pk_field_id).get_data_type()) { + case DataType::INT64: { + auto target = std::get(pk); + // get int64 pks + auto num_chunk = pk_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto src = reinterpret_cast(pk_column->Data(i)); + auto chunk_row_num = pk_column->chunk_row_nums(i); + auto it = std::lower_bound( + src, + src + chunk_row_num, + target, + [](const int64_t& elem, const int64_t& value) { + return elem < value; + }); + for (; it != src + chunk_row_num && *it == target; it++) { + auto offset = it - src; + if (insert_record_.timestamps_[offset] <= timestamp) { + pk_offsets.emplace_back(offset); + } + } + } + break; + } + case DataType::VARCHAR: { + auto target = std::get(pk); + // get varchar pks + auto var_column = + std::dynamic_pointer_cast>( + pk_column); + auto num_chunk = var_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto views = var_column->StringViews(i).first; + auto it = std::lower_bound(views.begin(), views.end(), target); + for (; it != views.end() && *it == target; it++) { + auto offset = std::distance(views.begin(), it); + if (insert_record_.timestamps_[offset] <= timestamp) { + pk_offsets.emplace_back(offset); + } + } + } + break; + } + default: { + PanicInfo( + DataTypeInvalid, + fmt::format( + "unsupported type {}", + schema_->get_fields().at(pk_field_id).get_data_type())); + } + } + + return pk_offsets; +} + +std::vector +ChunkedSegmentSealedImpl::search_pk(const PkType& pk, + int64_t insert_barrier) const { + auto pk_field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(pk_field_id.get() != -1, "Primary key is -1"); + auto pk_column = fields_.at(pk_field_id); + std::vector pk_offsets; + switch (schema_->get_fields().at(pk_field_id).get_data_type()) { + case DataType::INT64: { + auto target = std::get(pk); + // get int64 pks + + auto num_chunk = pk_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto src = reinterpret_cast(pk_column->Data(i)); + auto chunk_row_num = pk_column->chunk_row_nums(i); + auto it = std::lower_bound( + src, + src + chunk_row_num, + target, + [](const int64_t& elem, const int64_t& value) { + return elem < value; + }); + for (; it != src + chunk_row_num && *it == target; it++) { + auto offset = it - src; + if (offset < insert_barrier) { + pk_offsets.emplace_back(offset); + } + } + } + + break; + } + case DataType::VARCHAR: { + auto target = std::get(pk); + // get varchar pks + auto var_column = + std::dynamic_pointer_cast>( + pk_column); + + auto num_chunk = var_column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto views = var_column->StringViews(i).first; + auto it = std::lower_bound(views.begin(), views.end(), target); + for (; it != views.end() && *it == target; it++) { + auto offset = std::distance(views.begin(), it); + if (offset < insert_barrier) { + pk_offsets.emplace_back(offset); + } + } + } + break; + } + default: { + PanicInfo( + DataTypeInvalid, + fmt::format( + "unsupported type {}", + schema_->get_fields().at(pk_field_id).get_data_type())); + } + } + + return pk_offsets; +} + +std::shared_ptr +ChunkedSegmentSealedImpl::get_deleted_bitmap_s( + int64_t del_barrier, + int64_t insert_barrier, + DeletedRecord& delete_record, + Timestamp query_timestamp) const { + // if insert_barrier and del_barrier have not changed, use cache data directly + bool hit_cache = false; + int64_t old_del_barrier = 0; + auto current = delete_record.clone_lru_entry( + insert_barrier, del_barrier, old_del_barrier, hit_cache); + if (hit_cache) { + return current; + } + + auto bitmap = current->bitmap_ptr; + + int64_t start, end; + if (del_barrier < old_del_barrier) { + // in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp + // so these deletion records do not take effect in query/search + // so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] will be reset to 0 + // for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0] + start = del_barrier; + end = old_del_barrier; + } else { + // the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier] + // for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0] + start = old_del_barrier; + end = del_barrier; + } + + // Avoid invalid calculations when there are a lot of repeated delete pks + std::unordered_map delete_timestamps; + for (auto del_index = start; del_index < end; ++del_index) { + auto pk = delete_record.pks()[del_index]; + auto timestamp = delete_record.timestamps()[del_index]; + + delete_timestamps[pk] = timestamp > delete_timestamps[pk] + ? timestamp + : delete_timestamps[pk]; + } + + for (auto& [pk, timestamp] : delete_timestamps) { + auto segOffsets = search_pk(pk, insert_barrier); + for (auto offset : segOffsets) { + int64_t insert_row_offset = offset.get(); + + // The deletion record do not take effect in search/query, + // and reset bitmap to 0 + if (timestamp > query_timestamp) { + bitmap->reset(insert_row_offset); + continue; + } + // Insert after delete with same pk, delete will not task effect on this insert record, + // and reset bitmap to 0 + if (insert_record_.timestamps_[offset.get()] >= timestamp) { + bitmap->reset(insert_row_offset); + continue; + } + // insert data corresponding to the insert_row_offset will be ignored in search/query + bitmap->set(insert_row_offset); + } + } + + delete_record.insert_lru_entry(current); + return current; +} + +std::pair, bool> +ChunkedSegmentSealedImpl::find_first(int64_t limit, + const BitsetType& bitset) const { + if (!is_sorted_by_pk_) { + return insert_record_.pk2offset_->find_first(limit, bitset); + } + if (limit == Unlimited || limit == NoLimit) { + limit = num_rows_.value(); + } + + int64_t hit_num = 0; // avoid counting the number everytime. + auto size = bitset.size(); + int64_t cnt = size - bitset.count(); + auto more_hit_than_limit = cnt > limit; + limit = std::min(limit, cnt); + std::vector seg_offsets; + seg_offsets.reserve(limit); + + int64_t offset = 0; + for (; hit_num < limit && offset < num_rows_.value(); offset++) { + if (offset >= size) { + // In fact, this case won't happen on sealed segments. + continue; + } + + if (!bitset[offset]) { + seg_offsets.push_back(offset); + hit_num++; + } + } + + return {seg_offsets, more_hit_than_limit && offset != num_rows_.value()}; +} + +ChunkedSegmentSealedImpl::ChunkedSegmentSealedImpl( + SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id, + bool TEST_skip_index_for_retrieve, + bool is_sorted_by_pk) + : segcore_config_(segcore_config), + field_data_ready_bitset_(schema->size()), + index_ready_bitset_(schema->size()), + binlog_index_bitset_(schema->size()), + scalar_indexings_(schema->size()), + insert_record_(*schema, MAX_ROW_COUNT), + schema_(schema), + id_(segment_id), + col_index_meta_(index_meta), + TEST_skip_index_for_retrieve_(TEST_skip_index_for_retrieve), + is_sorted_by_pk_(is_sorted_by_pk) { + mmap_descriptor_ = std::shared_ptr( + new storage::MmapChunkDescriptor({segment_id, SegmentType::Sealed})); + auto mcm = storage::MmapManager::GetInstance().GetMmapChunkManager(); + mcm->Register(mmap_descriptor_); +} + +ChunkedSegmentSealedImpl::~ChunkedSegmentSealedImpl() { + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + if (cc == nullptr) { + return; + } + // munmap and remove binlog from chunk cache + for (const auto& iter : field_data_info_.field_infos) { + for (const auto& binlog : iter.second.insert_files) { + cc->Remove(binlog); + } + } + if (mmap_descriptor_ != nullptr) { + auto mm = storage::MmapManager::GetInstance().GetMmapChunkManager(); + mm->UnRegister(mmap_descriptor_); + } +} + +void +ChunkedSegmentSealedImpl::bulk_subscript(SystemFieldType system_type, + const int64_t* seg_offsets, + int64_t count, + void* output) const { + AssertInfo(is_system_field_ready(), + "System field isn't ready when do bulk_insert, segID:{}", + id_); + switch (system_type) { + case SystemFieldType::Timestamp: + AssertInfo( + insert_record_.timestamps_.num_chunk() == 1, + "num chunk of timestamp not equal to 1 for sealed segment"); + bulk_subscript_impl( + this->insert_record_.timestamps_.get_chunk_data(0), + seg_offsets, + count, + static_cast(output)); + break; + case SystemFieldType::RowId: + PanicInfo(ErrorCode::Unsupported, "RowId retrieve not supported"); + break; + default: + PanicInfo(DataTypeInvalid, + fmt::format("unknown subscript fields", system_type)); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(const void* src_raw, + const int64_t* seg_offsets, + int64_t count, + T* dst) { + static_assert(IsScalar); + auto src = static_cast(src_raw); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = src[offset]; + } +} +template +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + T* dst) { + static_assert(IsScalar); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = *static_cast( + static_cast(field->ValueAt(offset))); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw) { + auto field = reinterpret_cast*>(column); + auto dst = reinterpret_cast(dst_raw); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = std::move(T(field->RawAt(offset))); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_ptr_impl( + const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst) { + auto field = reinterpret_cast*>(column); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst->at(i) = std::move(T(field->RawAt(offset))); + } +} + +template +void +ChunkedSegmentSealedImpl::bulk_subscript_array_impl( + const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst) { + auto field = reinterpret_cast(column); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst->at(i) = std::move(field->RawAt(offset)); + } +} + +// for dense vector +void +ChunkedSegmentSealedImpl::bulk_subscript_impl(int64_t element_sizeof, + const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw) { + auto dst_vec = reinterpret_cast(dst_raw); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + auto src = field->ValueAt(offset); + auto dst = dst_vec + i * element_sizeof; + memcpy(dst, src, element_sizeof); + } +} + +void +ChunkedSegmentSealedImpl::ClearData() { + { + std::unique_lock lck(mutex_); + field_data_ready_bitset_.reset(); + index_ready_bitset_.reset(); + binlog_index_bitset_.reset(); + system_ready_count_ = 0; + num_rows_ = std::nullopt; + scalar_indexings_.clear(); + vector_indexings_.clear(); + insert_record_.clear(); + fields_.clear(); + variable_fields_avg_size_.clear(); + stats_.mem_size = 0; + } + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + if (cc == nullptr) { + return; + } + // munmap and remove binlog from chunk cache + for (const auto& iter : field_data_info_.field_infos) { + for (const auto& binlog : iter.second.insert_files) { + cc->Remove(binlog); + } + } +} + +std::unique_ptr +ChunkedSegmentSealedImpl::fill_with_empty(FieldId field_id, + int64_t count) const { + auto& field_meta = schema_->operator[](field_id); + if (IsVectorDataType(field_meta.get_data_type())) { + return CreateVectorDataArray(count, field_meta); + } + return CreateScalarDataArray(count, field_meta); +} + +void +ChunkedSegmentSealedImpl::CreateTextIndex(FieldId field_id) { + std::unique_lock lck(mutex_); + + const auto& field_meta = schema_->operator[](field_id); + auto& cfg = storage::MmapManager::GetInstance().GetMmapConfig(); + std::unique_ptr index; + if (!cfg.GetScalarIndexEnableMmap()) { + // build text index in ram. + index = std::make_unique( + std::numeric_limits::max(), + "milvus_tokenizer", + field_meta.get_tokenizer_params()); + } else { + // build text index using mmap. + index = std::make_unique( + cfg.GetMmapPath(), + "milvus_tokenizer", + field_meta.get_tokenizer_params()); + } + + { + // build + auto iter = fields_.find(field_id); + if (iter != fields_.end()) { + auto column = + std::dynamic_pointer_cast>( + iter->second); + AssertInfo( + column != nullptr, + "failed to create text index, field is not of text type: {}", + field_id.get()); + auto n = column->NumRows(); + for (size_t i = 0; i < n; i++) { + index->AddText(std::string(column->RawAt(i)), i); + } + } else { // fetch raw data from index. + auto field_index_iter = scalar_indexings_.find(field_id); + AssertInfo(field_index_iter != scalar_indexings_.end(), + "failed to create text index, neither raw data nor " + "index are found"); + auto ptr = field_index_iter->second.get(); + AssertInfo(ptr->HasRawData(), + "text raw data not found, trying to create text index " + "from index, but this index don't contain raw data"); + auto impl = dynamic_cast*>(ptr); + AssertInfo(impl != nullptr, + "failed to create text index, field index cannot be " + "converted to string index"); + auto n = impl->Size(); + for (size_t i = 0; i < n; i++) { + index->AddText(impl->Reverse_Lookup(i), i); + } + } + } + + // create index reader. + index->CreateReader(); + // release index writer. + index->Finish(); + + index->Reload(); + + index->RegisterTokenizer("milvus_tokenizer", + field_meta.get_tokenizer_params()); + + text_indexes_[field_id] = std::move(index); +} + +void +ChunkedSegmentSealedImpl::LoadTextIndex( + FieldId field_id, std::unique_ptr index) { + std::unique_lock lck(mutex_); + const auto& field_meta = schema_->operator[](field_id); + index->RegisterTokenizer("milvus_tokenizer", + field_meta.get_tokenizer_params()); + text_indexes_[field_id] = std::move(index); +} + +std::unique_ptr +ChunkedSegmentSealedImpl::get_raw_data(FieldId field_id, + const FieldMeta& field_meta, + const int64_t* seg_offsets, + int64_t count) const { + // DO NOT directly access the column by map like: `fields_.at(field_id)->Data()`, + // we have to clone the shared pointer, + // to make sure it won't get released if segment released + auto column = fields_.at(field_id); + auto ret = fill_with_empty(field_id, count); + if (column->IsNullable()) { + auto dst = ret->mutable_valid_data()->mutable_data(); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = column->IsValid(offset); + } + } + switch (field_meta.get_data_type()) { + case DataType::VARCHAR: + case DataType::STRING: { + bulk_subscript_ptr_impl( + column.get(), + seg_offsets, + count, + ret->mutable_scalars()->mutable_string_data()->mutable_data()); + break; + } + + case DataType::JSON: { + bulk_subscript_ptr_impl( + column.get(), + seg_offsets, + count, + ret->mutable_scalars()->mutable_json_data()->mutable_data()); + break; + } + + case DataType::ARRAY: { + bulk_subscript_array_impl( + column.get(), + seg_offsets, + count, + ret->mutable_scalars()->mutable_array_data()->mutable_data()); + break; + } + + case DataType::BOOL: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_bool_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT8: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_int_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT16: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_int_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT32: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_int_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::INT64: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_long_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::FLOAT: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_float_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::DOUBLE: { + bulk_subscript_impl(column.get(), + seg_offsets, + count, + ret->mutable_scalars() + ->mutable_double_data() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::VECTOR_FLOAT: { + bulk_subscript_impl(field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors() + ->mutable_float_vector() + ->mutable_data() + ->mutable_data()); + break; + } + case DataType::VECTOR_FLOAT16: { + bulk_subscript_impl( + field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors()->mutable_float16_vector()->data()); + break; + } + case DataType::VECTOR_BFLOAT16: { + bulk_subscript_impl( + field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors()->mutable_bfloat16_vector()->data()); + break; + } + case DataType::VECTOR_BINARY: { + bulk_subscript_impl( + field_meta.get_sizeof(), + column.get(), + seg_offsets, + count, + ret->mutable_vectors()->mutable_binary_vector()->data()); + break; + } + case DataType::VECTOR_SPARSE_FLOAT: { + auto dst = ret->mutable_vectors()->mutable_sparse_float_vector(); + SparseRowsToProto( + [&](size_t i) { + auto offset = seg_offsets[i]; + auto row = + static_cast*>( + static_cast(column->ValueAt(offset))); + return offset != INVALID_SEG_OFFSET ? row : nullptr; + }, + count, + dst); + ret->mutable_vectors()->set_dim(dst->dim()); + break; + } + + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported data type {}", + field_meta.get_data_type())); + } + } + return ret; +} + +std::unique_ptr +ChunkedSegmentSealedImpl::bulk_subscript(FieldId field_id, + const int64_t* seg_offsets, + int64_t count) const { + auto& field_meta = schema_->operator[](field_id); + // if count == 0, return empty data array + if (count == 0) { + return fill_with_empty(field_id, count); + } + + if (HasIndex(field_id)) { + // if field has load scalar index, reverse raw data from index + if (!IsVectorDataType(field_meta.get_data_type())) { + // AssertInfo(num_chunk() == 1, + // "num chunk not equal to 1 for sealed segment"); + auto index = chunk_index_impl(field_id, 0); + if (index->HasRawData()) { + return ReverseDataFromIndex( + index, seg_offsets, count, field_meta); + } + return get_raw_data(field_id, field_meta, seg_offsets, count); + } + return get_vector(field_id, seg_offsets, count); + } + + Assert(get_bit(field_data_ready_bitset_, field_id)); + + return get_raw_data(field_id, field_meta, seg_offsets, count); +} + +std::unique_ptr +ChunkedSegmentSealedImpl::bulk_subscript( + FieldId field_id, + const int64_t* seg_offsets, + int64_t count, + const std::vector& dynamic_field_names) const { + Assert(!dynamic_field_names.empty()); + auto& field_meta = schema_->operator[](field_id); + if (count == 0) { + return fill_with_empty(field_id, 0); + } + + auto column = fields_.at(field_id); + auto ret = fill_with_empty(field_id, count); + if (column->IsNullable()) { + auto dst = ret->mutable_valid_data()->mutable_data(); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst[i] = column->IsValid(offset); + } + } + auto dst = ret->mutable_scalars()->mutable_json_data()->mutable_data(); + auto field = + reinterpret_cast*>(column.get()); + for (int64_t i = 0; i < count; ++i) { + auto offset = seg_offsets[i]; + dst->at(i) = ExtractSubJson(std::string(field->RawAt(offset)), + dynamic_field_names); + } + return ret; +} + +bool +ChunkedSegmentSealedImpl::HasIndex(FieldId field_id) const { + std::shared_lock lck(mutex_); + return get_bit(index_ready_bitset_, field_id) | + get_bit(binlog_index_bitset_, field_id); +} + +bool +ChunkedSegmentSealedImpl::HasFieldData(FieldId field_id) const { + std::shared_lock lck(mutex_); + if (SystemProperty::Instance().IsSystem(field_id)) { + return is_system_field_ready(); + } else { + return get_bit(field_data_ready_bitset_, field_id); + } +} + +bool +ChunkedSegmentSealedImpl::HasRawData(int64_t field_id) const { + std::shared_lock lck(mutex_); + auto fieldID = FieldId(field_id); + const auto& field_meta = schema_->operator[](fieldID); + if (IsVectorDataType(field_meta.get_data_type())) { + if (get_bit(index_ready_bitset_, fieldID) | + get_bit(binlog_index_bitset_, fieldID)) { + AssertInfo(vector_indexings_.is_ready(fieldID), + "vector index is not ready"); + auto field_indexing = vector_indexings_.get_field_indexing(fieldID); + auto vec_index = dynamic_cast( + field_indexing->indexing_.get()); + return vec_index->HasRawData(); + } + } else { + auto scalar_index = scalar_indexings_.find(fieldID); + if (scalar_index != scalar_indexings_.end()) { + return scalar_index->second->HasRawData(); + } + } + return true; +} + +DataType +ChunkedSegmentSealedImpl::GetFieldDataType(milvus::FieldId field_id) const { + auto& field_meta = schema_->operator[](field_id); + return field_meta.get_data_type(); +} + +std::pair, std::vector> +ChunkedSegmentSealedImpl::search_ids(const IdArray& id_array, + Timestamp timestamp) const { + auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(field_id.get() != -1, "Primary key is -1"); + auto& field_meta = schema_->operator[](field_id); + auto data_type = field_meta.get_data_type(); + auto ids_size = GetSizeOfIdArray(id_array); + std::vector pks(ids_size); + ParsePksFromIDs(pks, data_type, id_array); + + auto res_id_arr = std::make_unique(); + std::vector res_offsets; + res_offsets.reserve(pks.size()); + for (auto& pk : pks) { + std::vector pk_offsets; + if (!is_sorted_by_pk_) { + pk_offsets = insert_record_.search_pk(pk, timestamp); + } else { + pk_offsets = search_pk(pk, timestamp); + } + for (auto offset : pk_offsets) { + switch (data_type) { + case DataType::INT64: { + res_id_arr->mutable_int_id()->add_data( + std::get(pk)); + break; + } + case DataType::VARCHAR: { + res_id_arr->mutable_str_id()->add_data( + std::get(std::move(pk))); + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported type {}", data_type)); + } + } + res_offsets.push_back(offset); + } + } + return {std::move(res_id_arr), std::move(res_offsets)}; +} + +SegcoreError +ChunkedSegmentSealedImpl::Delete(int64_t reserved_offset, // deprecated + int64_t size, + const IdArray* ids, + const Timestamp* timestamps_raw) { + auto field_id = schema_->get_primary_field_id().value_or(FieldId(-1)); + AssertInfo(field_id.get() != -1, "Primary key is -1"); + auto& field_meta = schema_->operator[](field_id); + std::vector pks(size); + ParsePksFromIDs(pks, field_meta.get_data_type(), *ids); + + // filter out the deletions that the primary key not exists + std::vector> ordering(size); + for (int i = 0; i < size; i++) { + ordering[i] = std::make_tuple(timestamps_raw[i], pks[i]); + } + // if insert_record_ is empty (may be only-load meta but not data for lru-cache at go side), + // filtering may cause the deletion lost, skip the filtering to avoid it. + if (!insert_record_.empty_pks()) { + auto end = std::remove_if( + ordering.begin(), + ordering.end(), + [&](const std::tuple& record) { + return !insert_record_.contain(std::get<1>(record)); + }); + size = end - ordering.begin(); + ordering.resize(size); + } + if (size == 0) { + return SegcoreError::success(); + } + + // step 1: sort timestamp + std::sort(ordering.begin(), ordering.end()); + std::vector sort_pks(size); + std::vector sort_timestamps(size); + + for (int i = 0; i < size; i++) { + auto [t, pk] = ordering[i]; + sort_timestamps[i] = t; + sort_pks[i] = pk; + } + + deleted_record_.push(sort_pks, sort_timestamps.data()); + return SegcoreError::success(); +} + +std::string +ChunkedSegmentSealedImpl::debug() const { + std::string log_str; + log_str += "Sealed\n"; + log_str += "\n"; + return log_str; +} + +void +ChunkedSegmentSealedImpl::LoadSegmentMeta( + const proto::segcore::LoadSegmentMeta& segment_meta) { + std::unique_lock lck(mutex_); + std::vector slice_lengths; + for (auto& info : segment_meta.metas()) { + slice_lengths.push_back(info.row_count()); + } + insert_record_.timestamp_index_.set_length_meta(std::move(slice_lengths)); + PanicInfo(NotImplemented, "unimplemented"); +} + +int64_t +ChunkedSegmentSealedImpl::get_active_count(Timestamp ts) const { + // TODO optimize here to reduce expr search range + return this->get_row_count(); +} + +void +ChunkedSegmentSealedImpl::mask_with_timestamps(BitsetTypeView& bitset_chunk, + Timestamp timestamp) const { + // TODO change the + AssertInfo(insert_record_.timestamps_.num_chunk() == 1, + "num chunk not equal to 1 for sealed segment"); + auto timestamps_data = + (const milvus::Timestamp*)insert_record_.timestamps_.get_chunk_data(0); + auto timestamps_data_size = insert_record_.timestamps_.get_chunk_size(0); + + AssertInfo(timestamps_data_size == get_row_count(), + fmt::format("Timestamp size not equal to row count: {}, {}", + timestamps_data_size, + get_row_count())); + auto range = insert_record_.timestamp_index_.get_active_range(timestamp); + + // range == (size_, size_) and size_ is this->timestamps_.size(). + // it means these data are all useful, we don't need to update bitset_chunk. + // It can be thought of as an OR operation with another bitmask that is all 0s, but it is not necessary to do so. + if (range.first == range.second && range.first == timestamps_data_size) { + // just skip + return; + } + // range == (0, 0). it means these data can not be used, directly set bitset_chunk to all 1s. + // It can be thought of as an OR operation with another bitmask that is all 1s. + if (range.first == range.second && range.first == 0) { + bitset_chunk.set(); + return; + } + auto mask = TimestampIndex::GenerateBitset( + timestamp, range, timestamps_data, timestamps_data_size); + bitset_chunk |= mask; +} + +bool +ChunkedSegmentSealedImpl::generate_interim_index(const FieldId field_id) { + if (col_index_meta_ == nullptr || !col_index_meta_->HasFiled(field_id)) { + return false; + } + auto& field_meta = schema_->operator[](field_id); + auto& field_index_meta = col_index_meta_->GetFieldIndexMeta(field_id); + auto& index_params = field_index_meta.GetIndexParams(); + + bool is_sparse = + field_meta.get_data_type() == DataType::VECTOR_SPARSE_FLOAT; + + auto enable_binlog_index = [&]() { + // checkout config + if (!segcore_config_.get_enable_interim_segment_index()) { + return false; + } + // check data type + if (field_meta.get_data_type() != DataType::VECTOR_FLOAT && + !is_sparse) { + return false; + } + // check index type + if (index_params.find(knowhere::meta::INDEX_TYPE) == + index_params.end() || + field_index_meta.IsFlatIndex()) { + return false; + } + // check index exist + if (vector_indexings_.is_ready(field_id)) { + return false; + } + return true; + }; + if (!enable_binlog_index()) { + return false; + } + try { + // get binlog data and meta + int64_t row_count; + { + std::shared_lock lck(mutex_); + row_count = num_rows_.value(); + } + + // generate index params + auto field_binlog_config = std::unique_ptr( + new VecIndexConfig(row_count, + field_index_meta, + segcore_config_, + SegmentType::Sealed, + is_sparse)); + if (row_count < field_binlog_config->GetBuildThreshold()) { + return false; + } + std::shared_ptr vec_data{}; + { + std::shared_lock lck(mutex_); + vec_data = fields_.at(field_id); + } + auto dim = + is_sparse + ? dynamic_cast(vec_data.get())->Dim() + : field_meta.get_dim(); + + auto build_config = field_binlog_config->GetBuildBaseParams(); + build_config[knowhere::meta::DIM] = std::to_string(dim); + build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1); + auto index_metric = field_binlog_config->GetMetricType(); + + auto vec_index = std::make_unique>( + field_binlog_config->GetIndexType(), + index_metric, + knowhere::Version::GetCurrentVersion().VersionNumber()); + auto num_chunk = fields_.at(field_id)->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto dataset = knowhere::GenDataSet( + vec_data->chunk_row_nums(i), dim, vec_data->Data(i)); + dataset->SetIsOwner(false); + dataset->SetIsSparse(is_sparse); + + if (i == 0) { + vec_index->BuildWithDataset(dataset, build_config); + } else { + vec_index->AddWithDataset(dataset, build_config); + } + } + + if (enable_binlog_index()) { + std::unique_lock lck(mutex_); + vector_indexings_.append_field_indexing( + field_id, index_metric, std::move(vec_index)); + + vec_binlog_config_[field_id] = std::move(field_binlog_config); + set_bit(binlog_index_bitset_, field_id, true); + LOG_INFO( + "replace binlog with binlog index in segment {}, field {}.", + this->get_segment_id(), + field_id.get()); + } + return true; + } catch (std::exception& e) { + LOG_WARN("fail to generate binlog index, because {}", e.what()); + return false; + } +} +void +ChunkedSegmentSealedImpl::RemoveFieldFile(const FieldId field_id) { + auto cc = storage::MmapManager::GetInstance().GetChunkCache(); + if (cc == nullptr) { + return; + } + for (const auto& iter : field_data_info_.field_infos) { + if (iter.second.field_id == field_id.get()) { + for (const auto& binlog : iter.second.insert_files) { + cc->Remove(binlog); + } + return; + } + } +} + +} // namespace milvus::segcore diff --git a/internal/core/src/segcore/ChunkedSegmentSealedImpl.h b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h new file mode 100644 index 0000000000000..fb07c1594b553 --- /dev/null +++ b/internal/core/src/segcore/ChunkedSegmentSealedImpl.h @@ -0,0 +1,392 @@ +// Copyright (C) 2019-2020 Zilliz. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under the License + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "ConcurrentVector.h" +#include "DeletedRecord.h" +#include "SealedIndexingRecord.h" +#include "SegmentSealed.h" +#include "TimestampIndex.h" +#include "common/EasyAssert.h" +#include "google/protobuf/message_lite.h" +#include "mmap/ChunkedColumn.h" +#include "index/ScalarIndex.h" +#include "sys/mman.h" +#include "common/Types.h" +#include "common/IndexMeta.h" + +namespace milvus::segcore { + +class ChunkedSegmentSealedImpl : public SegmentSealed { + public: + explicit ChunkedSegmentSealedImpl(SchemaPtr schema, + IndexMetaPtr index_meta, + const SegcoreConfig& segcore_config, + int64_t segment_id, + bool TEST_skip_index_for_retrieve = false, + bool is_sorted_by_pk = false); + ~ChunkedSegmentSealedImpl() override; + void + LoadIndex(const LoadIndexInfo& info) override; + void + LoadFieldData(const LoadFieldDataInfo& info) override; + void + LoadDeletedRecord(const LoadDeletedRecordInfo& info) override; + void + LoadSegmentMeta( + const milvus::proto::segcore::LoadSegmentMeta& segment_meta) override; + void + DropIndex(const FieldId field_id) override; + void + DropFieldData(const FieldId field_id) override; + bool + HasIndex(FieldId field_id) const override; + bool + HasFieldData(FieldId field_id) const override; + + bool + Contain(const PkType& pk) const override { + return insert_record_.contain(pk); + } + + void + LoadFieldData(FieldId field_id, FieldDataInfo& data) override; + void + MapFieldData(const FieldId field_id, FieldDataInfo& data) override; + void + AddFieldDataInfoForSealed( + const LoadFieldDataInfo& field_data_info) override; + + int64_t + get_segment_id() const override { + return id_; + } + + bool + HasRawData(int64_t field_id) const override; + + DataType + GetFieldDataType(FieldId fieldId) const override; + + void + RemoveFieldFile(const FieldId field_id) override; + + void + CreateTextIndex(FieldId field_id) override; + + void + LoadTextIndex(FieldId field_id, + std::unique_ptr index) override; + + public: + size_t + GetMemoryUsageInBytes() const override { + return stats_.mem_size.load() + deleted_record_.mem_size(); + } + + int64_t + get_row_count() const override; + + int64_t + get_deleted_count() const override; + + const Schema& + get_schema() const override; + + std::vector + search_pk(const PkType& pk, Timestamp timestamp) const; + + std::vector + search_pk(const PkType& pk, int64_t insert_barrier) const; + + std::shared_ptr + get_deleted_bitmap_s(int64_t del_barrier, + int64_t insert_barrier, + DeletedRecord& delete_record, + Timestamp query_timestamp) const; + + std::unique_ptr + get_vector(FieldId field_id, + const int64_t* ids, + int64_t count) const override; + + bool + is_nullable(FieldId field_id) const override { + auto it = fields_.find(field_id); + AssertInfo(it != fields_.end(), + "Cannot find field with field_id: " + + std::to_string(field_id.get())); + return it->second->IsNullable(); + }; + + bool + is_chunked() const override { + return true; + } + + public: + int64_t + num_chunk_index(FieldId field_id) const override; + + // count of chunk that has raw data + int64_t + num_chunk_data(FieldId field_id) const override; + + int64_t + num_chunk(FieldId field_id) const override; + + // return size_per_chunk for each chunk, renaming against confusion + int64_t + size_per_chunk() const override; + + int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const override; + + std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const override; + + int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override; + + std::string + debug() const override; + + SegcoreError + Delete(int64_t reserved_offset, + int64_t size, + const IdArray* pks, + const Timestamp* timestamps) override; + + std::pair, bool> + find_first(int64_t limit, const BitsetType& bitset) const override; + + // Calculate: output[i] = Vec[seg_offset[i]] + // where Vec is determined from field_offset + std::unique_ptr + bulk_subscript(FieldId field_id, + const int64_t* seg_offsets, + int64_t count) const override; + + std::unique_ptr + bulk_subscript( + FieldId field_id, + const int64_t* seg_offsets, + int64_t count, + const std::vector& dynamic_field_names) const override; + + bool + is_mmap_field(FieldId id) const override; + + void + ClearData(); + + protected: + // blob and row_count + SpanBase + chunk_data_impl(FieldId field_id, int64_t chunk_id) const override; + + std::pair, FixedVector> + chunk_view_impl(FieldId field_id, int64_t chunk_id) const override; + + std::pair> + get_chunk_buffer(FieldId field_id, + int64_t chunk_id, + int64_t start_offset, + int64_t length) const override; + + const index::IndexBase* + chunk_index_impl(FieldId field_id, int64_t chunk_id) const override; + + // Calculate: output[i] = Vec[seg_offset[i]], + // where Vec is determined from field_offset + void + bulk_subscript(SystemFieldType system_type, + const int64_t* seg_offsets, + int64_t count, + void* output) const override; + + void + check_search(const query::Plan* plan) const override; + + int64_t + get_active_count(Timestamp ts) const override; + + const ConcurrentVector& + get_timestamps() const override { + return insert_record_.timestamps_; + } + + private: + template + static void + bulk_subscript_impl(const void* src_raw, + const int64_t* seg_offsets, + int64_t count, + T* dst_raw); + + template + static void + bulk_subscript_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + T* dst_raw); + + template + static void + bulk_subscript_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw); + + template + static void + bulk_subscript_ptr_impl(const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst_raw); + + template + static void + bulk_subscript_array_impl(const ChunkedColumnBase* column, + const int64_t* seg_offsets, + int64_t count, + google::protobuf::RepeatedPtrField* dst); + + static void + bulk_subscript_impl(int64_t element_sizeof, + const ChunkedColumnBase* field, + const int64_t* seg_offsets, + int64_t count, + void* dst_raw); + + std::unique_ptr + fill_with_empty(FieldId field_id, int64_t count) const; + + std::unique_ptr + get_raw_data(FieldId field_id, + const FieldMeta& field_meta, + const int64_t* seg_offsets, + int64_t count) const; + + void + update_row_count(int64_t row_count) { + // if (row_count_opt_.has_value()) { + // AssertInfo(row_count_opt_.value() == row_count, "load data has different row count from other columns"); + // } else { + num_rows_ = row_count; + // } + } + + void + mask_with_timestamps(BitsetTypeView& bitset_chunk, + Timestamp timestamp) const override; + + void + vector_search(SearchInfo& search_info, + const void* query_data, + int64_t query_count, + Timestamp timestamp, + const BitsetView& bitset, + SearchResult& output) const override; + + void + mask_with_delete(BitsetTypeView& bitset, + int64_t ins_barrier, + Timestamp timestamp) const override; + + bool + is_system_field_ready() const { + return system_ready_count_ == 2; + } + + const DeletedRecord& + get_deleted_record() const { + return deleted_record_; + } + + std::pair, std::vector> + search_ids(const IdArray& id_array, Timestamp timestamp) const override; + + std::tuple + GetFieldDataPath(FieldId field_id, int64_t offset) const; + + void + LoadVecIndex(const LoadIndexInfo& info); + + void + LoadScalarIndex(const LoadIndexInfo& info); + + virtual void + WarmupChunkCache(const FieldId field_id, bool mmap_enabled) override; + + bool + generate_interim_index(const FieldId field_id); + + private: + // mmap descriptor, used in chunk cache + storage::MmapChunkDescriptorPtr mmap_descriptor_ = nullptr; + // segment loading state + BitsetType field_data_ready_bitset_; + BitsetType index_ready_bitset_; + BitsetType binlog_index_bitset_; + std::atomic system_ready_count_ = 0; + // segment data + + // TODO: generate index for scalar + std::optional num_rows_; + + // scalar field index + std::unordered_map scalar_indexings_; + // vector field index + SealedIndexingRecord vector_indexings_; + + // inserted fields data and row_ids, timestamps + InsertRecord insert_record_; + + // deleted pks + mutable DeletedRecord deleted_record_; + + LoadFieldDataInfo field_data_info_; + + SchemaPtr schema_; + int64_t id_; + std::unordered_map> fields_; + std::unordered_set mmap_fields_; + + // only useful in binlog + IndexMetaPtr col_index_meta_; + SegcoreConfig segcore_config_; + std::unordered_map> + vec_binlog_config_; + + SegmentStats stats_{}; + + // for sparse vector unit test only! Once a type of sparse index that + // doesn't has raw data is added, this should be removed. + bool TEST_skip_index_for_retrieve_ = false; + + // whether the segment is sorted by the pk + bool is_sorted_by_pk_ = false; +}; + +} // namespace milvus::segcore diff --git a/internal/core/src/segcore/ConcurrentVector.h b/internal/core/src/segcore/ConcurrentVector.h index 52971063ad02e..484ff7d293c04 100644 --- a/internal/core/src/segcore/ConcurrentVector.h +++ b/internal/core/src/segcore/ConcurrentVector.h @@ -234,9 +234,11 @@ class ConcurrentVectorImpl : public VectorBase { if (element_count == 0) { return; } + auto size = + size_per_chunk_ == MAX_ROW_COUNT ? element_count : size_per_chunk_; chunks_ptr_->emplace_to_at_least( - upper_div(element_offset + element_count, size_per_chunk_), - elements_per_row_ * size_per_chunk_); + upper_div(element_offset + element_count, size), + elements_per_row_ * size); set_data( element_offset, static_cast(source), element_count); } diff --git a/internal/core/src/segcore/InsertRecord.h b/internal/core/src/segcore/InsertRecord.h index a731e84bab1f6..76a1dcf2cd33d 100644 --- a/internal/core/src/segcore/InsertRecord.h +++ b/internal/core/src/segcore/InsertRecord.h @@ -27,6 +27,7 @@ #include "common/Schema.h" #include "common/Types.h" #include "fmt/format.h" +#include "mmap/ChunkedColumn.h" #include "mmap/Column.h" #include "segcore/AckResponder.h" #include "segcore/ConcurrentVector.h" @@ -487,12 +488,52 @@ struct InsertRecord { void insert_pks(milvus::DataType data_type, - const std::shared_ptr& data) { + const std::shared_ptr& data) { std::lock_guard lck(shared_mutex_); int64_t offset = 0; switch (data_type) { case DataType::INT64: { - auto column = std::dynamic_pointer_cast(data); + auto column = std::dynamic_pointer_cast(data); + auto num_chunk = column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto pks = + reinterpret_cast(column->Data(i)); + for (int i = 0; i < column->NumRows(); ++i) { + pk2offset_->insert(pks[i], offset++); + } + } + break; + } + case DataType::VARCHAR: { + auto column = std::dynamic_pointer_cast< + ChunkedVariableColumn>(data); + + auto num_chunk = column->num_chunks(); + for (int i = 0; i < num_chunk; ++i) { + auto pks = column->StringViews(i).first; + for (auto& pk : pks) { + pk2offset_->insert(std::string(pk), offset++); + } + } + break; + } + default: { + PanicInfo(DataTypeInvalid, + fmt::format("unsupported primary key data type", + data_type)); + } + } + } + + void + insert_pks(milvus::DataType data_type, + const std::shared_ptr& data) { + std::lock_guard lck(shared_mutex_); + int64_t offset = 0; + switch (data_type) { + case DataType::INT64: { + auto column = + std::dynamic_pointer_cast(data); auto pks = reinterpret_cast(column->Data()); for (int i = 0; i < column->NumRows(); ++i) { pk2offset_->insert(pks[i], offset++); @@ -500,9 +541,8 @@ struct InsertRecord { break; } case DataType::VARCHAR: { - auto column = - std::dynamic_pointer_cast>( - data); + auto column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(data); auto pks = column->Views(); for (int i = 0; i < column->NumRows(); ++i) { diff --git a/internal/core/src/segcore/SegmentGrowingImpl.cpp b/internal/core/src/segcore/SegmentGrowingImpl.cpp index 8dc1304e6143e..b90953c858066 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.cpp +++ b/internal/core/src/segcore/SegmentGrowingImpl.cpp @@ -399,7 +399,7 @@ SegmentGrowingImpl::chunk_view_impl(FieldId field_id, int64_t chunk_id) const { } int64_t -SegmentGrowingImpl::num_chunk() const { +SegmentGrowingImpl::num_chunk(FieldId field_id) const { auto size = get_insert_record().ack_responder_.GetAck(); return upper_div(size, segcore_config_.get_chunk_rows()); } diff --git a/internal/core/src/segcore/SegmentGrowingImpl.h b/internal/core/src/segcore/SegmentGrowingImpl.h index 163b64da21097..f90bba0f5df1d 100644 --- a/internal/core/src/segcore/SegmentGrowingImpl.h +++ b/internal/core/src/segcore/SegmentGrowingImpl.h @@ -135,6 +135,22 @@ class SegmentGrowingImpl : public SegmentGrowing { return segcore_config_.get_chunk_rows(); } + virtual int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const final { + return segcore_config_.get_chunk_rows(); + } + + std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const override { + auto size_per_chunk = segcore_config_.get_chunk_rows(); + return {offset / size_per_chunk, offset % size_per_chunk}; + } + + int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override { + return chunk_id * segcore_config_.get_chunk_rows(); + } + void try_remove_chunks(FieldId fieldId); @@ -320,7 +336,7 @@ class SegmentGrowingImpl : public SegmentGrowing { protected: int64_t - num_chunk() const override; + num_chunk(FieldId field_id) const override; SpanBase chunk_data_impl(FieldId field_id, int64_t chunk_id) const override; diff --git a/internal/core/src/segcore/SegmentInterface.cpp b/internal/core/src/segcore/SegmentInterface.cpp index b7b8efbf2418d..ee31b16d5fab4 100644 --- a/internal/core/src/segcore/SegmentInterface.cpp +++ b/internal/core/src/segcore/SegmentInterface.cpp @@ -392,14 +392,6 @@ SegmentInternalInterface::LoadPrimitiveSkipIndex(milvus::FieldId field_id, field_id, chunk_id, data_type, chunk_data, valid_data, count); } -void -SegmentInternalInterface::LoadStringSkipIndex( - milvus::FieldId field_id, - int64_t chunk_id, - const milvus::VariableColumn& var_column) { - skip_index_.LoadString(field_id, chunk_id, var_column); -} - index::TextMatchIndex* SegmentInternalInterface::GetTextIndex(FieldId field_id) const { std::shared_lock lock(mutex_); diff --git a/internal/core/src/segcore/SegmentInterface.h b/internal/core/src/segcore/SegmentInterface.h index 2d4e02d2f5fd4..fe09f7c3afb79 100644 --- a/internal/core/src/segcore/SegmentInterface.h +++ b/internal/core/src/segcore/SegmentInterface.h @@ -21,6 +21,7 @@ #include "DeletedRecord.h" #include "FieldIndexing.h" +#include "common/Common.h" #include "common/Schema.h" #include "common/Span.h" #include "common/SystemProperty.h" @@ -179,13 +180,24 @@ class SegmentInternalInterface : public SegmentInterface { BufferView buffer = chunk_info.first; std::vector res; res.reserve(length); - char* pos = buffer.data_; - for (size_t j = 0; j < length; j++) { - uint32_t size; - size = *reinterpret_cast(pos); - pos += sizeof(uint32_t); - res.emplace_back(ViewType(pos, size)); - pos += size; + if (buffer.data_.index() == 1) { + char* pos = std::get<1>(buffer.data_).first; + for (size_t j = 0; j < length; j++) { + uint32_t size; + size = *reinterpret_cast(pos); + pos += sizeof(uint32_t); + res.emplace_back(ViewType(pos, size)); + pos += size; + } + } else { + auto elements = std::get<0>(buffer.data_); + for (auto& element : elements) { + for (int i = element.start_; i < element.end_; i++) { + res.emplace_back(ViewType( + element.data_ + element.offsets_[i], + element.offsets_[i + 1] - element.offsets_[i])); + } + } } return std::make_pair(res, chunk_info.second); } @@ -246,6 +258,10 @@ class SegmentInternalInterface : public SegmentInterface { set_field_avg_size(FieldId field_id, int64_t num_rows, int64_t field_size) override; + virtual bool + is_chunked() const { + return false; + } const SkipIndex& GetSkipIndex() const; @@ -258,10 +274,13 @@ class SegmentInternalInterface : public SegmentInterface { const bool* valid_data, int64_t count); + template void LoadStringSkipIndex(FieldId field_id, int64_t chunk_id, - const milvus::VariableColumn& var_column); + const T& var_column) { + skip_index_.LoadString(field_id, chunk_id, var_column); + } virtual DataType GetFieldDataType(FieldId fieldId) const = 0; @@ -291,6 +310,9 @@ class SegmentInternalInterface : public SegmentInterface { virtual int64_t num_chunk_data(FieldId field_id) const = 0; + virtual int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const = 0; + // bitset 1 means not hit. 0 means hit. virtual void mask_with_timestamps(BitsetTypeView& bitset_chunk, @@ -298,7 +320,13 @@ class SegmentInternalInterface : public SegmentInterface { // count of chunks virtual int64_t - num_chunk() const = 0; + num_chunk(FieldId field_id) const = 0; + + virtual int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const = 0; + + virtual std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const = 0; // element size in each chunk virtual int64_t @@ -384,7 +412,13 @@ class SegmentInternalInterface : public SegmentInterface { // internal API: return chunk_index in span, support scalar index only virtual const index::IndexBase* chunk_index_impl(FieldId field_id, int64_t chunk_id) const = 0; + virtual void + check_search(const query::Plan* plan) const = 0; + + virtual const ConcurrentVector& + get_timestamps() const = 0; + public: // calculate output[i] = Vec[seg_offsets[i]}, where Vec binds to system_type virtual void bulk_subscript(SystemFieldType system_type, @@ -405,12 +439,6 @@ class SegmentInternalInterface : public SegmentInterface { int64_t count, const std::vector& dynamic_field_names) const = 0; - virtual void - check_search(const query::Plan* plan) const = 0; - - virtual const ConcurrentVector& - get_timestamps() const = 0; - protected: mutable std::shared_mutex mutex_; // fieldID -> std::pair diff --git a/internal/core/src/segcore/SegmentSealed.h b/internal/core/src/segcore/SegmentSealed.h index a3c8cf951a5db..b84b3b9b94d5c 100644 --- a/internal/core/src/segcore/SegmentSealed.h +++ b/internal/core/src/segcore/SegmentSealed.h @@ -19,7 +19,6 @@ #include "pb/segcore.pb.h" #include "segcore/SegmentInterface.h" #include "segcore/Types.h" -#include "mmap/Column.h" namespace milvus::segcore { @@ -42,6 +41,12 @@ class SegmentSealed : public SegmentInternalInterface { AddFieldDataInfoForSealed(const LoadFieldDataInfo& field_data_info) = 0; virtual void WarmupChunkCache(const FieldId field_id, bool mmap_enabled) = 0; + virtual void + RemoveFieldFile(const FieldId field_id) = 0; + virtual void + ClearData() = 0; + virtual std::unique_ptr + get_vector(FieldId field_id, const int64_t* ids, int64_t count) const = 0; virtual void LoadTextIndex(FieldId field_id, diff --git a/internal/core/src/segcore/SegmentSealedImpl.cpp b/internal/core/src/segcore/SegmentSealedImpl.cpp index de7643751f73b..9fff1a9d09410 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.cpp +++ b/internal/core/src/segcore/SegmentSealedImpl.cpp @@ -345,15 +345,15 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { : DEFAULT_MEM_VRCOL_BLOCK_SIZE; }; - std::shared_ptr column{}; + std::shared_ptr column{}; if (IsVariableDataType(data_type)) { int64_t field_data_size = 0; switch (data_type) { case milvus::DataType::STRING: case milvus::DataType::VARCHAR: { - auto var_column = - std::make_shared>( - num_rows, field_meta, get_block_size()); + auto var_column = std::make_shared< + SingleChunkVariableColumn>( + num_rows, field_meta, get_block_size()); FieldDataPtr field_data; while (data.channel->pop(field_data)) { var_column->Append(std::move(field_data)); @@ -366,9 +366,9 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::JSON: { - auto var_column = - std::make_shared>( - num_rows, field_meta, get_block_size()); + auto var_column = std::make_shared< + SingleChunkVariableColumn>( + num_rows, field_meta, get_block_size()); FieldDataPtr field_data; while (data.channel->pop(field_data)) { var_column->Append(std::move(field_data)); @@ -380,8 +380,8 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::ARRAY: { - auto var_column = - std::make_shared(num_rows, field_meta); + auto var_column = std::make_shared( + num_rows, field_meta); FieldDataPtr field_data; while (data.channel->pop(field_data)) { for (auto i = 0; i < field_data->get_num_rows(); i++) { @@ -407,7 +407,8 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::VECTOR_SPARSE_FLOAT: { - auto col = std::make_shared(field_meta); + auto col = std::make_shared( + field_meta); FieldDataPtr field_data; while (data.channel->pop(field_data)) { stats_.mem_size += field_data->Size(); @@ -426,7 +427,7 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) { SegmentInternalInterface::set_field_avg_size( field_id, num_rows, field_data_size); } else { - column = std::make_shared(num_rows, field_meta); + column = std::make_shared(num_rows, field_meta); FieldDataPtr field_data; while (data.channel->pop(field_data)) { column->AppendBatch(field_data); @@ -516,24 +517,25 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { valid_data); } WriteFieldPadding(file, data_type, total_written); - std::shared_ptr column{}; + std::shared_ptr column{}; auto num_rows = data.row_count; if (IsVariableDataType(data_type)) { switch (data_type) { case milvus::DataType::STRING: case milvus::DataType::VARCHAR: { - auto var_column = std::make_shared>( - file, - total_written, - field_meta, - DEFAULT_MMAP_VRCOL_BLOCK_SIZE); + auto var_column = + std::make_shared>( + file, + total_written, + field_meta, + DEFAULT_MMAP_VRCOL_BLOCK_SIZE); var_column->Seal(std::move(indices)); column = std::move(var_column); break; } case milvus::DataType::JSON: { auto var_column = - std::make_shared>( + std::make_shared>( file, total_written, field_meta, @@ -543,7 +545,7 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::ARRAY: { - auto arr_column = std::make_shared( + auto arr_column = std::make_shared( file, total_written, field_meta); arr_column->Seal(std::move(indices), std::move(element_indices)); @@ -551,8 +553,9 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { break; } case milvus::DataType::VECTOR_SPARSE_FLOAT: { - auto sparse_column = std::make_shared( - file, total_written, field_meta, std::move(indices)); + auto sparse_column = + std::make_shared( + file, total_written, field_meta, std::move(indices)); column = std::move(sparse_column); break; } @@ -562,7 +565,8 @@ SegmentSealedImpl::MapFieldData(const FieldId field_id, FieldDataInfo& data) { } } } else { - column = std::make_shared(file, total_written, field_meta); + column = std::make_shared( + file, total_written, field_meta); } column->SetValidData(std::move(valid_data)); @@ -664,7 +668,7 @@ SegmentSealedImpl::num_chunk_data(FieldId field_id) const { } int64_t -SegmentSealedImpl::num_chunk() const { +SegmentSealedImpl::num_chunk(FieldId field_id) const { return 1; } @@ -790,9 +794,8 @@ SegmentSealedImpl::search_pk(const PkType& pk, Timestamp timestamp) const { case DataType::VARCHAR: { auto target = std::get(pk); // get varchar pks - auto var_column = - std::dynamic_pointer_cast>( - pk_column); + auto var_column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(pk_column); auto views = var_column->Views(); auto it = std::lower_bound(views.begin(), views.end(), target); for (; it != views.end() && *it == target; it++) { @@ -843,9 +846,8 @@ SegmentSealedImpl::search_pk(const PkType& pk, int64_t insert_barrier) const { case DataType::VARCHAR: { auto target = std::get(pk); // get varchar pks - auto var_column = - std::dynamic_pointer_cast>( - pk_column); + auto var_column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(pk_column); auto views = var_column->Views(); auto it = std::lower_bound(views.begin(), views.end(), target); while (it != views.end() && *it == target) { @@ -1057,17 +1059,24 @@ SegmentSealedImpl::GetFieldDataPath(FieldId field_id, int64_t offset) const { return {data_path, offset_in_binlog}; } -std::tuple> static ReadFromChunkCache( - const storage::ChunkCachePtr& cc, - const std::string& data_path, - const storage::MmapChunkDescriptorPtr& descriptor) { +std::tuple< + std::string, + std::shared_ptr< + SingleChunkColumnBase>> static ReadFromChunkCache(const storage:: + ChunkCachePtr& cc, + const std::string& + data_path, + const storage:: + MmapChunkDescriptorPtr& + descriptor) { // For mmap mode, field_meta is unused, so just construct a fake field meta. auto fm = FieldMeta(FieldName(""), FieldId(0), milvus::DataType::NONE, false); // TODO: add Load() interface for chunk cache when support retrieve_enable, make Read() raise error if cache miss auto column = cc->Read(data_path, descriptor, fm, true); cc->Prefetch(data_path); - return {data_path, column}; + return {data_path, + std::dynamic_pointer_cast(column)}; } std::unique_ptr @@ -1115,7 +1124,8 @@ SegmentSealedImpl::get_vector(FieldId field_id, auto id_to_data_path = std::unordered_map>{}; auto path_to_column = - std::unordered_map>{}; + std::unordered_map>{}; for (auto i = 0; i < count; i++) { const auto& tuple = GetFieldDataPath(field_id, ids[i]); id_to_data_path.emplace(ids[i], tuple); @@ -1124,8 +1134,8 @@ SegmentSealedImpl::get_vector(FieldId field_id, // read and prefetch auto& pool = ThreadPools::GetThreadPool(milvus::ThreadPoolPriority::HIGH); - std::vector< - std::future>>> + std::vector>>> futures; futures.reserve(path_to_column.size()); for (const auto& iter : path_to_column) { @@ -1152,7 +1162,7 @@ SegmentSealedImpl::get_vector(FieldId field_id, column->NumRows(), data_path); auto sparse_column = - std::dynamic_pointer_cast(column); + std::dynamic_pointer_cast(column); AssertInfo(sparse_column, "incorrect column created"); buf[i] = static_cast*>( static_cast( @@ -1344,11 +1354,11 @@ SegmentSealedImpl::bulk_subscript_impl(const void* src_raw, template void -SegmentSealedImpl::bulk_subscript_impl(const ColumnBase* column, +SegmentSealedImpl::bulk_subscript_impl(const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, void* dst_raw) { - auto field = reinterpret_cast*>(column); + auto field = reinterpret_cast*>(column); auto dst = reinterpret_cast(dst_raw); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; @@ -1359,11 +1369,11 @@ SegmentSealedImpl::bulk_subscript_impl(const ColumnBase* column, template void SegmentSealedImpl::bulk_subscript_ptr_impl( - const ColumnBase* column, + const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst) { - auto field = reinterpret_cast*>(column); + auto field = reinterpret_cast*>(column); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; dst->at(i) = std::move(T(field->RawAt(offset))); @@ -1373,11 +1383,11 @@ SegmentSealedImpl::bulk_subscript_ptr_impl( template void SegmentSealedImpl::bulk_subscript_array_impl( - const ColumnBase* column, + const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst) { - auto field = reinterpret_cast(column); + auto field = reinterpret_cast(column); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; dst->at(i) = std::move(field->RawAt(offset)); @@ -1630,7 +1640,7 @@ SegmentSealedImpl::bulk_subscript(FieldId field_id, if (HasIndex(field_id)) { // if field has load scalar index, reverse raw data from index if (!IsVectorDataType(field_meta.get_data_type())) { - AssertInfo(num_chunk() == 1, + AssertInfo(num_chunk(field_id) == 1, "num chunk not equal to 1 for sealed segment"); auto index = chunk_index_impl(field_id, 0); if (index->HasRawData()) { @@ -1669,7 +1679,8 @@ SegmentSealedImpl::bulk_subscript( } } auto dst = ret->mutable_scalars()->mutable_json_data()->mutable_data(); - auto field = reinterpret_cast*>(column.get()); + auto field = + reinterpret_cast*>(column.get()); for (int64_t i = 0; i < count; ++i) { auto offset = seg_offsets[i]; dst->at(i) = ExtractSubJson(std::string(field->RawAt(offset)), @@ -1965,14 +1976,16 @@ SegmentSealedImpl::generate_interim_index(const FieldId field_id) { if (row_count < field_binlog_config->GetBuildThreshold()) { return false; } - std::shared_ptr vec_data{}; + std::shared_ptr vec_data{}; { std::shared_lock lck(mutex_); vec_data = fields_.at(field_id); } - auto dim = is_sparse - ? dynamic_cast(vec_data.get())->Dim() - : field_meta.get_dim(); + auto dim = + is_sparse + ? dynamic_cast(vec_data.get()) + ->Dim() + : field_meta.get_dim(); auto build_config = field_binlog_config->GetBuildBaseParams(); build_config[knowhere::meta::DIM] = std::to_string(dim); @@ -2049,9 +2062,8 @@ SegmentSealedImpl::CreateTextIndex(FieldId field_id) { // build auto iter = fields_.find(field_id); if (iter != fields_.end()) { - auto column = - std::dynamic_pointer_cast>( - iter->second); + auto column = std::dynamic_pointer_cast< + SingleChunkVariableColumn>(iter->second); AssertInfo( column != nullptr, "failed to create text index, field is not of text type: {}", diff --git a/internal/core/src/segcore/SegmentSealedImpl.h b/internal/core/src/segcore/SegmentSealedImpl.h index 4e44a57e472f5..1c07c1047a7e1 100644 --- a/internal/core/src/segcore/SegmentSealedImpl.h +++ b/internal/core/src/segcore/SegmentSealedImpl.h @@ -31,6 +31,7 @@ #include "google/protobuf/message_lite.h" #include "mmap/Column.h" #include "index/ScalarIndex.h" +#include "segcore/ChunkedSegmentSealedImpl.h" #include "sys/mman.h" #include "common/Types.h" #include "common/IndexMeta.h" @@ -127,7 +128,9 @@ class SegmentSealedImpl : public SegmentSealed { Timestamp query_timestamp) const; std::unique_ptr - get_vector(FieldId field_id, const int64_t* ids, int64_t count) const; + get_vector(FieldId field_id, + const int64_t* ids, + int64_t count) const override; bool is_nullable(FieldId field_id) const override { @@ -147,12 +150,30 @@ class SegmentSealedImpl : public SegmentSealed { num_chunk_data(FieldId field_id) const override; int64_t - num_chunk() const override; + num_chunk(FieldId field_id) const override; // return size_per_chunk for each chunk, renaming against confusion int64_t size_per_chunk() const override; + int64_t + chunk_size(FieldId field_id, int64_t chunk_id) const override { + PanicInfo(ErrorCode::Unsupported, "Not implemented"); + } + bool + is_chunked() const override { + return false; + } + + std::pair + get_chunk_by_offset(FieldId field_id, int64_t offset) const override { + PanicInfo(ErrorCode::Unsupported, "Not implemented"); + } + + int64_t + num_rows_until_chunk(FieldId field_id, int64_t chunk_id) const override { + PanicInfo(ErrorCode::Unsupported, "Not implemented"); + } std::string debug() const override; @@ -231,21 +252,21 @@ class SegmentSealedImpl : public SegmentSealed { template static void - bulk_subscript_impl(const ColumnBase* field, + bulk_subscript_impl(const SingleChunkColumnBase* field, const int64_t* seg_offsets, int64_t count, void* dst_raw); template static void - bulk_subscript_ptr_impl(const ColumnBase* field, + bulk_subscript_ptr_impl(const SingleChunkColumnBase* field, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst_raw); template static void - bulk_subscript_array_impl(const ColumnBase* column, + bulk_subscript_array_impl(const SingleChunkColumnBase* column, const int64_t* seg_offsets, int64_t count, google::protobuf::RepeatedPtrField* dst); @@ -348,7 +369,7 @@ class SegmentSealedImpl : public SegmentSealed { SchemaPtr schema_; int64_t id_; - std::unordered_map> fields_; + std::unordered_map> fields_; std::unordered_set mmap_fields_; // only useful in binlog @@ -374,13 +395,24 @@ CreateSealedSegment( int64_t segment_id = -1, const SegcoreConfig& segcore_config = SegcoreConfig::default_config(), bool TEST_skip_index_for_retrieve = false, - bool is_sorted_by_pk = false) { - return std::make_unique(schema, - index_meta, - segcore_config, - segment_id, - TEST_skip_index_for_retrieve, - is_sorted_by_pk); + bool is_sorted_by_pk = false, + bool is_multi_chunk = false) { + if (!is_multi_chunk) { + return std::make_unique(schema, + index_meta, + segcore_config, + segment_id, + TEST_skip_index_for_retrieve, + is_sorted_by_pk); + } else { + return std::make_unique( + schema, + index_meta, + segcore_config, + segment_id, + TEST_skip_index_for_retrieve, + is_sorted_by_pk); + } } -} // namespace milvus::segcore +} // namespace milvus::segcore \ No newline at end of file diff --git a/internal/core/src/segcore/Utils.cpp b/internal/core/src/segcore/Utils.cpp index a9ff746c2ae98..e0bd00007b461 100644 --- a/internal/core/src/segcore/Utils.cpp +++ b/internal/core/src/segcore/Utils.cpp @@ -10,6 +10,7 @@ // or implied. See the License for the specific language governing permissions and limitations under the License #include "segcore/Utils.h" +#include #include #include @@ -22,6 +23,7 @@ #include "index/ScalarIndex.h" #include "mmap/Utils.h" #include "log/Log.h" +#include "storage/DataCodec.h" #include "storage/RemoteChunkManagerSingleton.h" #include "storage/ThreadPools.h" #include "storage/Util.h" @@ -783,6 +785,42 @@ ReverseDataFromIndex(const index::IndexBase* index, // init segcore storage config first, and create default remote chunk manager // segcore use default remote chunk manager to load data from minio/s3 +void +LoadArrowReaderFromRemote(const std::vector& remote_files, + std::shared_ptr channel) { + try { + auto rcm = storage::RemoteChunkManagerSingleton::GetInstance() + .GetRemoteChunkManager(); + auto& pool = ThreadPools::GetThreadPool(ThreadPoolPriority::HIGH); + + std::vector>> + futures; + futures.reserve(remote_files.size()); + for (const auto& file : remote_files) { + auto future = pool.Submit([&]() { + auto fileSize = rcm->Size(file); + auto buf = std::shared_ptr(new uint8_t[fileSize]); + rcm->Read(file, buf.get(), fileSize); + auto result = + storage::DeserializeFileData(buf, fileSize, false); + result->SetData(buf); + return result->GetReader(); + }); + futures.emplace_back(std::move(future)); + } + + for (auto& future : futures) { + auto field_data = future.get(); + channel->push(field_data); + } + + channel->close(); + } catch (std::exception& e) { + LOG_INFO("failed to load data from remote: {}", e.what()); + channel->close(std::current_exception()); + } +} + void LoadFieldDatasFromRemote(const std::vector& remote_files, FieldDataChannelPtr channel) { @@ -815,7 +853,6 @@ LoadFieldDatasFromRemote(const std::vector& remote_files, channel->close(std::current_exception()); } } - int64_t upper_bound(const ConcurrentVector& timestamps, int64_t first, diff --git a/internal/core/src/segcore/Utils.h b/internal/core/src/segcore/Utils.h index c32210d660dae..226e0da6441f0 100644 --- a/internal/core/src/segcore/Utils.h +++ b/internal/core/src/segcore/Utils.h @@ -184,10 +184,13 @@ ReverseDataFromIndex(const index::IndexBase* index, int64_t count, const FieldMeta& field_meta); +void +LoadArrowReaderFromRemote(const std::vector& remote_files, + std::shared_ptr channel); + void LoadFieldDatasFromRemote(const std::vector& remote_files, FieldDataChannelPtr channel); - /** * Returns an index pointing to the first element in the range [first, last) such that `value < element` is true * (i.e. that is strictly greater than value), or last if no such element is found. diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index 0baa75345dbd2..fd7180d1ef184 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -26,9 +26,11 @@ #include "log/Log.h" #include "mmap/Types.h" #include "segcore/Collection.h" +#include "segcore/SegcoreConfig.h" #include "segcore/SegmentGrowingImpl.h" #include "segcore/SegmentSealedImpl.h" #include "segcore/Utils.h" +#include "storage/Event.h" #include "storage/Util.h" #include "futures/Future.h" #include "futures/Executor.h" @@ -59,8 +61,20 @@ NewSegment(CCollection collection, segment_id, milvus::segcore::SegcoreConfig::default_config(), false, - is_sorted_by_pk); + is_sorted_by_pk, + false); break; + case ChunkedSealed: + segment = milvus::segcore::CreateSealedSegment( + col->get_schema(), + col->get_index_meta(), + segment_id, + milvus::segcore::SegcoreConfig::default_config(), + false, + is_sorted_by_pk, + true); + break; + default: PanicInfo(milvus::UnexpectedError, "invalid segment type: {}", @@ -82,7 +96,7 @@ DeleteSegment(CSegmentInterface c_segment) { void ClearSegmentData(CSegmentInterface c_segment) { - auto s = static_cast(c_segment); + auto s = static_cast(c_segment); s->ClearData(); } @@ -549,8 +563,7 @@ WarmupChunkCache(CSegmentInterface c_segment, void RemoveFieldFile(CSegmentInterface c_segment, int64_t field_id) { - auto segment = - reinterpret_cast(c_segment); + auto segment = reinterpret_cast(c_segment); segment->RemoveFieldFile(milvus::FieldId(field_id)); } diff --git a/internal/core/src/storage/ChunkCache.cpp b/internal/core/src/storage/ChunkCache.cpp index 4b85011a578a2..3fd68868d6ddb 100644 --- a/internal/core/src/storage/ChunkCache.cpp +++ b/internal/core/src/storage/ChunkCache.cpp @@ -18,9 +18,97 @@ #include #include "ChunkCache.h" +#include "common/ChunkWriter.h" +#include "common/FieldMeta.h" #include "common/Types.h" +#include "log/Log.h" namespace milvus::storage { +std::shared_ptr +ChunkCache::Read(const std::string& filepath, + const MmapChunkDescriptorPtr& descriptor, + const FieldMeta& field_meta) { + // use rlock to get future + { + std::shared_lock lck(mutex_); + auto it = columns_.find(filepath); + if (it != columns_.end()) { + lck.unlock(); + auto result = it->second.second.get(); + AssertInfo(result, "unexpected null column, file={}", filepath); + return result; + } + } + + // lock for mutation + std::unique_lock lck(mutex_); + // double check no-futurn + auto it = columns_.find(filepath); + if (it != columns_.end()) { + lck.unlock(); + auto result = it->second.second.get(); + AssertInfo(result, "unexpected null column, file={}", filepath); + return result; + } + + std::promise> p; + std::shared_future> f = p.get_future(); + columns_.emplace(filepath, std::make_pair(std::move(p), f)); + lck.unlock(); + + // release lock and perform download and decode + // other thread request same path shall get the future. + bool allocate_success = false; + ErrorCode err_code = Success; + std::string err_msg = ""; + std::shared_ptr column; + try { + auto field_data = + DownloadAndDecodeRemoteFile(cm_.get(), filepath, false); + + auto chunk = create_chunk( + field_meta, field_meta.get_dim(), field_data->GetReader()->reader); + + auto data_type = field_meta.get_data_type(); + if (IsSparseFloatVectorDataType(data_type)) { + auto sparse_column = + std::make_shared(field_meta); + sparse_column->AddChunk(chunk); + column = std::move(sparse_column); + } else if (IsVariableDataType(data_type)) { + AssertInfo(false, + "TODO: unimplemented for variable data type: {}", + data_type); + } else { + std::vector> chunks{chunk}; + column = std::make_shared(chunks); + } + } catch (const SegcoreError& e) { + err_code = e.get_error_code(); + err_msg = fmt::format("failed to read for chunkCache, seg_core_err:{}", + e.what()); + } + std::unique_lock mmap_lck(mutex_); + + it = columns_.find(filepath); + if (it != columns_.end()) { + // check pair exists then set value + it->second.first.set_value(column); + if (allocate_success) { + AssertInfo(column, "unexpected null column, file={}", filepath); + } + } else { + PanicInfo(UnexpectedError, + "Wrong code, the thread to download for cache should get the " + "target entry"); + } + if (err_code != Success) { + columns_.erase(filepath); + throw SegcoreError(err_code, err_msg); + } + return column; +} + std::shared_ptr ChunkCache::Read(const std::string& filepath, const MmapChunkDescriptorPtr& descriptor, @@ -98,7 +186,8 @@ ChunkCache::Read(const std::string& filepath, } } else { PanicInfo(UnexpectedError, - "Wrong code, the thread to download for cache should get the " + "Wrong code, the thread to download for " + "cache should get the " "target entry"); } if (err_code != Success) { @@ -148,23 +237,25 @@ ChunkCache::ConvertToColumn(const FieldDataPtr& field_data, if (IsSparseFloatVectorDataType(data_type)) { if (mmap_enabled) { - column = std::make_shared(mcm_, descriptor); + column = std::make_shared(mcm_, + descriptor); } else { - column = std::make_shared(field_meta); + column = std::make_shared(field_meta); } } else if (IsVariableDataType(data_type)) { AssertInfo( false, "TODO: unimplemented for variable data type: {}", data_type); } else { if (mmap_enabled) { - column = std::make_shared(field_data->Size(), - data_type, - mcm_, - descriptor, - field_data->IsNullable()); + column = + std::make_shared(field_data->Size(), + data_type, + mcm_, + descriptor, + field_data->IsNullable()); } else { - column = std::make_shared(field_data->get_num_rows(), - field_meta); + column = std::make_shared( + field_data->get_num_rows(), field_meta); } } column->AppendBatch(field_data); diff --git a/internal/core/src/storage/ChunkCache.h b/internal/core/src/storage/ChunkCache.h index 0c03dcac633de..fecb8e5bac58c 100644 --- a/internal/core/src/storage/ChunkCache.h +++ b/internal/core/src/storage/ChunkCache.h @@ -17,8 +17,9 @@ #pragma once #include #include +#include "common/FieldMeta.h" #include "storage/MmapChunkManager.h" -#include "mmap/Column.h" +#include "mmap/ChunkedColumn.h" namespace milvus::storage { @@ -44,6 +45,11 @@ class ChunkCache { ~ChunkCache() = default; public: + std::shared_ptr + Read(const std::string& filepath, + const MmapChunkDescriptorPtr& descriptor, + const FieldMeta& field_meta); + std::shared_ptr Read(const std::string& filepath, const MmapChunkDescriptorPtr& descriptor, @@ -58,6 +64,9 @@ class ChunkCache { Prefetch(const std::string& filepath); private: + std::string + CachePath(const std::string& filepath); + std::shared_ptr ConvertToColumn(const FieldDataPtr& field_data, const MmapChunkDescriptorPtr& descriptor, diff --git a/internal/core/src/storage/DataCodec.cpp b/internal/core/src/storage/DataCodec.cpp index 96f0aeac73570..5035a07cd9d14 100644 --- a/internal/core/src/storage/DataCodec.cpp +++ b/internal/core/src/storage/DataCodec.cpp @@ -27,7 +27,7 @@ namespace milvus::storage { // deserialize remote insert and index file std::unique_ptr -DeserializeRemoteFileData(BinlogReaderPtr reader) { +DeserializeRemoteFileData(BinlogReaderPtr reader, bool is_field_data) { DescriptorEvent descriptor_event(reader); DataType data_type = DataType(descriptor_event.event_data.fix_part.data_type); @@ -45,10 +45,17 @@ DeserializeRemoteFileData(BinlogReaderPtr reader) { case EventType::InsertEvent: { auto event_data_length = header.event_length_ - GetEventHeaderSize(header); - auto insert_event_data = - InsertEventData(reader, event_data_length, data_type, nullable); - auto insert_data = - std::make_unique(insert_event_data.field_data); + auto insert_event_data = InsertEventData( + reader, event_data_length, data_type, nullable, is_field_data); + + std::unique_ptr insert_data; + if (is_field_data) { + insert_data = + std::make_unique(insert_event_data.field_data); + } else { + insert_data = std::make_unique( + insert_event_data.payload_reader); + } insert_data->SetFieldDataMeta(data_meta); insert_data->SetTimestamps(insert_event_data.start_timestamp, insert_event_data.end_timestamp); @@ -105,13 +112,14 @@ DeserializeLocalFileData(BinlogReaderPtr reader) { std::unique_ptr DeserializeFileData(const std::shared_ptr input_data, - int64_t length) { + int64_t length, + bool is_field_data) { auto binlog_reader = std::make_shared(input_data, length); auto medium_type = ReadMediumType(binlog_reader); std::unique_ptr res; switch (medium_type) { case StorageType::Remote: { - res = DeserializeRemoteFileData(binlog_reader); + res = DeserializeRemoteFileData(binlog_reader, is_field_data); break; } case StorageType::LocalDisk: { diff --git a/internal/core/src/storage/DataCodec.h b/internal/core/src/storage/DataCodec.h index 74fe0a65c4c4c..51d11a9db5de6 100644 --- a/internal/core/src/storage/DataCodec.h +++ b/internal/core/src/storage/DataCodec.h @@ -16,11 +16,14 @@ #pragma once +#include +#include #include #include #include #include "common/FieldData.h" +#include "storage/PayloadReader.h" #include "storage/Types.h" #include "storage/PayloadStream.h" #include "storage/BinlogReader.h" @@ -33,6 +36,10 @@ class DataCodec { : field_data_(std::move(data)), codec_type_(type) { } + explicit DataCodec(std::shared_ptr reader, CodecType type) + : payload_reader_(reader), codec_type_(type) { + } + virtual ~DataCodec() = default; // Serialized data can be written directly to remote or local disk @@ -69,18 +76,36 @@ class DataCodec { return field_data_; } + virtual std::shared_ptr + GetReader() { + auto ret = std::make_shared(); + ret->reader = payload_reader_->get_reader(); + ret->arrow_reader = payload_reader_->get_file_reader(); + ret->file_data = data_; + return ret; + } + + void + SetData(std::shared_ptr data) { + data_ = data; + } + protected: CodecType codec_type_; std::pair time_range_; FieldDataPtr field_data_; + std::shared_ptr payload_reader_; + std::shared_ptr data_; }; // Deserialize the data stream of the file obtained from remote or local std::unique_ptr -DeserializeFileData(const std::shared_ptr input, int64_t length); +DeserializeFileData(const std::shared_ptr input, + int64_t length, + bool is_field_data = true); std::unique_ptr -DeserializeRemoteFileData(BinlogReaderPtr reader); +DeserializeRemoteFileData(BinlogReaderPtr reader, bool is_field_data); std::unique_ptr DeserializeLocalFileData(BinlogReaderPtr reader); diff --git a/internal/core/src/storage/Event.cpp b/internal/core/src/storage/Event.cpp index 607191ab010f0..b76657de3fd12 100644 --- a/internal/core/src/storage/Event.cpp +++ b/internal/core/src/storage/Event.cpp @@ -210,7 +210,8 @@ DescriptorEventData::Serialize() { BaseEventData::BaseEventData(BinlogReaderPtr reader, int event_length, DataType data_type, - bool nullable) { + bool nullable, + bool is_field_data) { auto ast = reader->Read(sizeof(start_timestamp), &start_timestamp); AssertInfo(ast.ok(), "read start timestamp failed"); ast = reader->Read(sizeof(end_timestamp), &end_timestamp); @@ -220,9 +221,11 @@ BaseEventData::BaseEventData(BinlogReaderPtr reader, event_length - sizeof(start_timestamp) - sizeof(end_timestamp); auto res = reader->Read(payload_length); AssertInfo(res.first.ok(), "read payload failed"); - auto payload_reader = std::make_shared( - res.second.get(), payload_length, data_type, nullable); - field_data = payload_reader->get_field_data(); + payload_reader = std::make_shared( + res.second.get(), payload_length, data_type, nullable, is_field_data); + if (is_field_data) { + field_data = payload_reader->get_field_data(); + } } std::vector diff --git a/internal/core/src/storage/Event.h b/internal/core/src/storage/Event.h index b974331394f9c..b87f8117f5b93 100644 --- a/internal/core/src/storage/Event.h +++ b/internal/core/src/storage/Event.h @@ -24,6 +24,7 @@ #include "common/FieldData.h" #include "common/Types.h" +#include "storage/PayloadReader.h" #include "storage/Types.h" #include "storage/BinlogReader.h" @@ -76,12 +77,14 @@ struct BaseEventData { Timestamp start_timestamp; Timestamp end_timestamp; FieldDataPtr field_data; + std::shared_ptr payload_reader; BaseEventData() = default; explicit BaseEventData(BinlogReaderPtr reader, int event_length, DataType data_type, - bool nullable); + bool nullable, + bool is_field_data = true); std::vector Serialize(); diff --git a/internal/core/src/storage/InsertData.h b/internal/core/src/storage/InsertData.h index eaccee1fe4802..92c906693b91d 100644 --- a/internal/core/src/storage/InsertData.h +++ b/internal/core/src/storage/InsertData.h @@ -20,6 +20,7 @@ #include #include "storage/DataCodec.h" +#include "storage/PayloadReader.h" namespace milvus::storage { @@ -29,6 +30,10 @@ class InsertData : public DataCodec { : DataCodec(data, CodecType::InsertDataType) { } + explicit InsertData(std::shared_ptr payload_reader) + : DataCodec(payload_reader, CodecType::InsertDataType) { + } + std::vector Serialize(StorageType medium) override; diff --git a/internal/core/src/storage/PayloadReader.cpp b/internal/core/src/storage/PayloadReader.cpp index b7fe5117edf8f..4d38ac69bfbe2 100644 --- a/internal/core/src/storage/PayloadReader.cpp +++ b/internal/core/src/storage/PayloadReader.cpp @@ -28,14 +28,16 @@ namespace milvus::storage { PayloadReader::PayloadReader(const uint8_t* data, int length, DataType data_type, - bool nullable) + bool nullable, + bool is_field_data) : column_type_(data_type), nullable_(nullable) { auto input = std::make_shared(data, length); - init(input); + init(input, is_field_data); } void -PayloadReader::init(std::shared_ptr input) { +PayloadReader::init(std::shared_ptr input, + bool is_field_data) { arrow::MemoryPool* pool = arrow::default_memory_pool(); // Configure general Parquet reader settings @@ -73,17 +75,21 @@ PayloadReader::init(std::shared_ptr input) { st = arrow_reader->GetRecordBatchReader(&rb_reader); AssertInfo(st.ok(), "get record batch reader"); - field_data_ = - CreateFieldData(column_type_, nullable_, dim_, total_num_rows); - for (arrow::Result> maybe_batch : - *rb_reader) { - AssertInfo(maybe_batch.ok(), "get batch record success"); - auto array = maybe_batch.ValueOrDie()->column(column_index); - // to read - field_data_->FillFieldData(array); + if (is_field_data) { + field_data_ = + CreateFieldData(column_type_, nullable_, dim_, total_num_rows); + for (arrow::Result> maybe_batch : + *rb_reader) { + AssertInfo(maybe_batch.ok(), "get batch record success"); + auto array = maybe_batch.ValueOrDie()->column(column_index); + // to read + field_data_->FillFieldData(array); + } + AssertInfo(field_data_->IsFull(), "field data hasn't been filled done"); + } else { + arrow_reader_ = std::move(arrow_reader); + record_batch_reader_ = std::move(rb_reader); } - AssertInfo(field_data_->IsFull(), "field data hasn't been filled done"); - // LOG_INFO("Peak arrow memory pool size {}", pool)->max_memory(); } } // namespace milvus::storage diff --git a/internal/core/src/storage/PayloadReader.h b/internal/core/src/storage/PayloadReader.h index 1e75dcd8cb2d2..214ac4c907d90 100644 --- a/internal/core/src/storage/PayloadReader.h +++ b/internal/core/src/storage/PayloadReader.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include #include "common/FieldData.h" @@ -29,23 +30,37 @@ class PayloadReader { explicit PayloadReader(const uint8_t* data, int length, DataType data_type, - bool nullable_); + bool nullable, + bool is_field_data = true); ~PayloadReader() = default; void - init(std::shared_ptr buffer); + init(std::shared_ptr buffer, bool is_field_data); const FieldDataPtr get_field_data() const { return field_data_; } + std::shared_ptr + get_reader() { + return record_batch_reader_; + } + + std::shared_ptr + get_file_reader() { + return arrow_reader_; + } + private: DataType column_type_; int dim_; bool nullable_; FieldDataPtr field_data_; + + std::shared_ptr arrow_reader_; + std::shared_ptr record_batch_reader_; }; } // namespace milvus::storage diff --git a/internal/core/src/storage/Util.cpp b/internal/core/src/storage/Util.cpp index 4efdd45d8cc0e..0ccf13b45fe9d 100644 --- a/internal/core/src/storage/Util.cpp +++ b/internal/core/src/storage/Util.cpp @@ -539,12 +539,15 @@ GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id) { std::unique_ptr DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager, - const std::string& file) { + const std::string& file, + bool is_field_data) { auto fileSize = chunk_manager->Size(file); auto buf = std::shared_ptr(new uint8_t[fileSize]); chunk_manager->Read(file, buf.get(), fileSize); - return DeserializeFileData(buf, fileSize); + auto res = DeserializeFileData(buf, fileSize, is_field_data); + res->SetData(buf); + return res; } std::pair @@ -599,7 +602,7 @@ GetObjectData(ChunkManager* remote_chunk_manager, futures.reserve(remote_files.size()); for (auto& file : remote_files) { futures.emplace_back(pool.Submit( - DownloadAndDecodeRemoteFile, remote_chunk_manager, file)); + DownloadAndDecodeRemoteFile, remote_chunk_manager, file, true)); } return futures; } diff --git a/internal/core/src/storage/Util.h b/internal/core/src/storage/Util.h index 4a62096bb7370..b3a6a124fbe70 100644 --- a/internal/core/src/storage/Util.h +++ b/internal/core/src/storage/Util.h @@ -102,7 +102,8 @@ GetSegmentRawDataPathPrefix(ChunkManagerPtr cm, int64_t segment_id); std::unique_ptr DownloadAndDecodeRemoteFile(ChunkManager* chunk_manager, - const std::string& file); + const std::string& file, + bool is_field_data = true); std::pair EncodeAndUploadIndexSlice(ChunkManager* chunk_manager, diff --git a/internal/core/unittest/test_chunk.cpp b/internal/core/unittest/test_chunk.cpp index 543284d16b1ab..126f11cc4739e 100644 --- a/internal/core/unittest/test_chunk.cpp +++ b/internal/core/unittest/test_chunk.cpp @@ -9,17 +9,23 @@ // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express // or implied. See the License for the specific language governing permissions and limitations under the License +#include #include #include #include #include +#include #include +#include +#include "boost/filesystem/operations.hpp" +#include "boost/filesystem/path.hpp" #include "common/Chunk.h" #include "common/ChunkWriter.h" #include "common/EasyAssert.h" #include "common/FieldDataInterface.h" #include "common/FieldMeta.h" +#include "common/File.h" #include "common/Types.h" #include "storage/Event.h" #include "storage/Util.h" @@ -53,8 +59,7 @@ TEST(chunk, test_int64_field) { FieldMeta field_meta( FieldName("a"), milvus::FieldId(1), DataType::INT64, false); auto chunk = create_chunk(field_meta, 1, rb_reader); - auto span = - std::dynamic_pointer_cast>(chunk)->Span(); + auto span = std::dynamic_pointer_cast(chunk)->Span(); EXPECT_EQ(span.row_count(), data.size()); for (size_t i = 0; i < data.size(); ++i) { auto n = *(int64_t*)((char*)span.data() + i * span.element_sizeof()); @@ -92,7 +97,7 @@ TEST(chunk, test_variable_field) { auto chunk = create_chunk(field_meta, 1, rb_reader); auto views = std::dynamic_pointer_cast(chunk)->StringViews(); for (size_t i = 0; i < data.size(); ++i) { - EXPECT_EQ(views[i], data[i]); + EXPECT_EQ(views.first[i], data[i]); } } @@ -183,4 +188,68 @@ TEST(chunk, test_sparse_float) { EXPECT_EQ(v1[j].val, v2[j].val); } } +} + +class TempDir { + public: + TempDir() { + auto path = boost::filesystem::unique_path("%%%%_%%%%"); + auto abs_path = boost::filesystem::temp_directory_path() / path; + boost::filesystem::create_directory(abs_path); + dir_ = abs_path; + } + + ~TempDir() { + boost::filesystem::remove_all(dir_); + } + + std::string + dir() { + return dir_.string(); + } + + private: + boost::filesystem::path dir_; +}; + +TEST(chunk, multiple_chunk_mmap) { + TempDir temp; + std::string temp_dir = temp.dir(); + auto file = File::Open(temp_dir + "/multi_chunk_mmap", O_CREAT | O_RDWR); + + FixedVector data = {1, 2, 3, 4, 5}; + auto field_data = + milvus::storage::CreateFieldData(storage::DataType::INT64); + field_data->FillFieldData(data.data(), data.size()); + storage::InsertEventData event_data; + event_data.field_data = field_data; + auto ser_data = event_data.Serialize(); + auto buffer = std::make_shared( + ser_data.data() + 2 * sizeof(milvus::Timestamp), + ser_data.size() - 2 * sizeof(milvus::Timestamp)); + + parquet::arrow::FileReaderBuilder reader_builder; + auto s = reader_builder.Open(buffer); + EXPECT_TRUE(s.ok()); + std::unique_ptr arrow_reader; + s = reader_builder.Build(&arrow_reader); + EXPECT_TRUE(s.ok()); + + std::shared_ptr<::arrow::RecordBatchReader> rb_reader; + s = arrow_reader->GetRecordBatchReader(&rb_reader); + EXPECT_TRUE(s.ok()); + + FieldMeta field_meta( + FieldName("a"), milvus::FieldId(1), DataType::INT64, false); + int file_offset = 0; + auto page_size = sysconf(_SC_PAGESIZE); + auto chunk = create_chunk(field_meta, 1, file, file_offset, rb_reader); + EXPECT_TRUE(chunk->Size() % page_size == 0); + file_offset += chunk->Size(); + + std::shared_ptr<::arrow::RecordBatchReader> rb_reader2; + s = arrow_reader->GetRecordBatchReader(&rb_reader2); + EXPECT_TRUE(s.ok()); + auto chunk2 = create_chunk(field_meta, 1, file, file_offset, rb_reader2); + EXPECT_TRUE(chunk->Size() % page_size == 0); } \ No newline at end of file diff --git a/internal/core/unittest/test_sealed.cpp b/internal/core/unittest/test_sealed.cpp index 5372847808345..68aec85c752a3 100644 --- a/internal/core/unittest/test_sealed.cpp +++ b/internal/core/unittest/test_sealed.cpp @@ -508,7 +508,7 @@ TEST(Sealed, LoadFieldData) { vec_info.index_params["metric_type"] = knowhere::metric::L2; segment->LoadIndex(vec_info); - ASSERT_EQ(segment->num_chunk(), 1); + ASSERT_EQ(segment->num_chunk(FieldId(0)), 1); ASSERT_EQ(segment->num_chunk_index(double_id), 0); ASSERT_EQ(segment->num_chunk_index(str_id), 0); auto chunk_span1 = segment->chunk_data(counter_id, 0); @@ -671,7 +671,7 @@ TEST(Sealed, ClearData) { vec_info.index_params["metric_type"] = knowhere::metric::L2; segment->LoadIndex(vec_info); - ASSERT_EQ(segment->num_chunk(), 1); + ASSERT_EQ(segment->num_chunk(FieldId(0)), 1); ASSERT_EQ(segment->num_chunk_index(double_id), 0); ASSERT_EQ(segment->num_chunk_index(str_id), 0); auto chunk_span1 = segment->chunk_data(counter_id, 0); @@ -775,7 +775,7 @@ TEST(Sealed, LoadFieldDataMmap) { vec_info.index_params["metric_type"] = knowhere::metric::L2; segment->LoadIndex(vec_info); - ASSERT_EQ(segment->num_chunk(), 1); + ASSERT_EQ(segment->num_chunk(FieldId(0)), 1); ASSERT_EQ(segment->num_chunk_index(double_id), 0); ASSERT_EQ(segment->num_chunk_index(str_id), 0); auto chunk_span1 = segment->chunk_data(counter_id, 0); diff --git a/internal/core/unittest/test_span.cpp b/internal/core/unittest/test_span.cpp index f0cca40d0b858..0fbc8f566f8e7 100644 --- a/internal/core/unittest/test_span.cpp +++ b/internal/core/unittest/test_span.cpp @@ -46,7 +46,7 @@ TEST(Span, Naive) { auto float_ptr = dataset.get_col(float_vec_fid); auto nullable_data_ptr = dataset.get_col(nullable_fid); auto nullable_valid_data_ptr = dataset.get_col_valid(nullable_fid); - auto num_chunk = segment->num_chunk(); + auto num_chunk = segment->num_chunk(FieldId(0)); ASSERT_EQ(num_chunk, upper_div(N, size_per_chunk)); auto row_count = segment->get_row_count(); ASSERT_EQ(N, row_count); diff --git a/internal/querynodev2/segments/segment.go b/internal/querynodev2/segments/segment.go index 50a9a51ddcc93..78b4624353f46 100644 --- a/internal/querynodev2/segments/segment.go +++ b/internal/querynodev2/segments/segment.go @@ -298,11 +298,16 @@ func NewSegment(ctx context.Context, return nil, err } + multipleChunkEnable := paramtable.Get().QueryNodeCfg.MultipleChunkedEnable.GetAsBool() var cSegType C.SegmentType var locker *state.LoadStateLock switch segmentType { case SegmentTypeSealed: - cSegType = C.Sealed + if multipleChunkEnable { + cSegType = C.ChunkedSealed + } else { + cSegType = C.Sealed + } locker = state.NewLoadStateLock(state.LoadStateOnlyMeta) case SegmentTypeGrowing: locker = state.NewLoadStateLock(state.LoadStateDataLoaded) diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index 371f242d6785a..d122694753577 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2336,6 +2336,7 @@ type queryNodeConfig struct { InterimIndexNProbe ParamItem `refreshable:"false"` InterimIndexMemExpandRate ParamItem `refreshable:"false"` InterimIndexBuildParallelRate ParamItem `refreshable:"false"` + MultipleChunkedEnable ParamItem `refreshable:"false"` KnowhereScoreConsistency ParamItem `refreshable:"false"` @@ -2546,6 +2547,15 @@ This defaults to true, indicating that Milvus creates temporary index for growin } p.InterimIndexBuildParallelRate.Init(base.mgr) + p.MultipleChunkedEnable = ParamItem{ + Key: "queryNode.segcore.multipleChunkedEnable", + Version: "2.0.0", + DefaultValue: "false", + Doc: "Enable multiple chunked search", + Export: true, + } + p.MultipleChunkedEnable.Init(base.mgr) + p.InterimIndexNProbe = ParamItem{ Key: "queryNode.segcore.interimIndex.nprobe", Version: "2.0.0",