Skip to content

Commit

Permalink
fix: search-group-by failed to get data from multi-chunked-segment(##… (
Browse files Browse the repository at this point in the history
#38383)

related: #38343

Signed-off-by: MrPresent-Han <[email protected]>
Co-authored-by: MrPresent-Han <[email protected]>
  • Loading branch information
MrPresent-Han and MrPresent-Han authored Dec 13, 2024
1 parent 3038383 commit c1f9158
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 34 deletions.
67 changes: 35 additions & 32 deletions internal/core/src/exec/operator/groupby/SearchGroupByOperator.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,50 +61,53 @@ class GrowingDataGetter : public DataGetter<T> {
template <typename T>
class SealedDataGetter : public DataGetter<T> {
private:
std::shared_ptr<Span<T>> field_data_;
std::shared_ptr<std::vector<std::string_view>> str_field_data_;
const index::ScalarIndex<T>* field_index_;
const segcore::SegmentSealed& segment_;
const FieldId field_id_;
bool from_data_;

mutable std::unordered_map<int64_t, std::vector<std::string_view>>
str_view_map_;
// Getting str_view from segment is cpu-costly, this map is to cache this view for performance
public:
SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id) {
if (segment.HasFieldData(field_id)) {
if constexpr (std::is_same_v<T, std::string>) {
str_field_data_ =
std::make_shared<std::vector<std::string_view>>(
segment.chunk_view<std::string_view>(field_id, 0)
.first);
} else {
auto span = segment.chunk_data<T>(field_id, 0);
field_data_ = std::make_shared<Span<T>>(
span.data(), span.valid_data(), span.row_count());
}
} else if (segment.HasIndex(field_id)) {
this->field_index_ = &(segment.chunk_scalar_index<T>(field_id, 0));
} else {
PanicInfo(UnexpectedError,
"The segment used to init data getter has no effective "
"data source, neither"
"index or data");
SealedDataGetter(const segcore::SegmentSealed& segment, FieldId& field_id)
: segment_(segment), field_id_(field_id) {
from_data_ = segment_.HasFieldData(field_id_);
if (!from_data_ && !segment_.HasIndex(field_id_)) {
PanicInfo(
UnexpectedError,
"The segment:{} used to init data getter has no effective "
"data source, neither"
"index or data",
segment_.get_segment_id());
}
}

SealedDataGetter(const SealedDataGetter<T>& other)
: field_data_(other.field_data_),
str_field_data_(other.str_field_data_),
field_index_(other.field_index_) {
}

T
Get(int64_t idx) const {
if (field_data_ || str_field_data_) {
if (from_data_) {
auto id_offset_pair = segment_.get_chunk_by_offset(field_id_, idx);
auto chunk_id = id_offset_pair.first;
auto inner_offset = id_offset_pair.second;
if constexpr (std::is_same_v<T, std::string>) {
if (str_view_map_.find(chunk_id) == str_view_map_.end()) {
// for now, search_group_by does not handle null values
auto [str_chunk_view, _] =
segment_.chunk_view<std::string_view>(field_id_,
chunk_id);
str_view_map_[chunk_id] = std::move(str_chunk_view);
}
auto& str_chunk_view = str_view_map_[chunk_id];
std::string_view str_val_view =
str_field_data_->operator[](idx);
str_chunk_view.operator[](inner_offset);
return std::string(str_val_view.data(), str_val_view.length());
} else {
Span<T> span = segment_.chunk_data<T>(field_id_, chunk_id);
auto raw = span.operator[](inner_offset);
return raw;
}
return field_data_->operator[](idx);
} else {
auto raw = (*field_index_).Reverse_Lookup(idx);
auto& chunk_index = segment_.chunk_scalar_index<T>(field_id_, 0);
auto raw = chunk_index.Reverse_Lookup(idx);
AssertInfo(raw.has_value(), "field data not found");
return raw.value();
}
Expand Down
2 changes: 1 addition & 1 deletion internal/core/src/index/ScalarIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class ScalarIndex : public IndexBase {
}

virtual bool
IsMmapSupported() const {
IsMmapSupported() const override {
return index_type_ == milvus::index::BITMAP_INDEX_TYPE ||
index_type_ == milvus::index::HYBRID_INDEX_TYPE;
}
Expand Down
9 changes: 8 additions & 1 deletion internal/core/src/segcore/SegmentSealedImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,14 @@ class SegmentSealedImpl : public SegmentSealed {

std::pair<int64_t, int64_t>
get_chunk_by_offset(FieldId field_id, int64_t offset) const override {
PanicInfo(ErrorCode::Unsupported, "Not implemented");
if (fields_.find(field_id) == fields_.end()) {
PanicInfo(
ErrorCode::FieldIDInvalid,
"Failed to get chunk offset towards a non-existing field:{}",
field_id.get());
}
// for sealed segment, chunk id is always zero and input offset is the target offset
return std::make_pair(0, offset);
}

int64_t
Expand Down

0 comments on commit c1f9158

Please sign in to comment.