Skip to content

Commit

Permalink
[Fix](SchemaChange) refactor variant root column iterator to make row…
Browse files Browse the repository at this point in the history
… read corret (apache#41700)
  • Loading branch information
eldenmoon committed Oct 16, 2024
1 parent 7a445ff commit 679e0ae
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 44 deletions.
85 changes: 41 additions & 44 deletions be/src/olap/rowset/segment_v2/column_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1581,28 +1581,15 @@ void DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP
}
}

Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst,
bool* has_null) {
size_t size = dst->size();
Status VariantRootColumnIterator::_process_root_column(
vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& root_column,
const vectorized::DataTypePtr& most_common_type) {
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
if (obj.is_null_root()) {
obj.create_root();
}
if (!obj.is_finalized()) {
obj.finalize();
}
auto root_column = obj.get_root();
RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));
obj.incr_num_rows(*n);
for (auto& entry : obj.get_subcolumns()) {
if (entry->data.size() != size + *n) {
entry->data.insert_many_defaults(*n);
}
}

// fill nullmap
if (root_column->is_nullable() && dst->is_nullable()) {
vectorized::ColumnUInt8& dst_null_map =
Expand All @@ -1611,47 +1598,57 @@ Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColum
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size());
}

// add root column to a tmp object column
auto tmp = vectorized::ColumnObject::create(true, false);
auto& tmp_obj = assert_cast<vectorized::ColumnObject&>(*tmp);
tmp_obj.add_sub_column({}, std::move(root_column), most_common_type);

// merge tmp object column to dst
obj.insert_range_from(*tmp, 0, tmp->size());

// finalize object if needed
if (!obj.is_finalized()) {
obj.finalize();
}

#ifndef NDEBUG
obj.check_consistency();
#endif

return Status::OK();
}

Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst,
bool* has_null) {
// read root column
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);

auto most_common_type = obj.get_most_common_type();
auto root_column = most_common_type->create_column();
RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null));

return _process_root_column(dst, root_column, most_common_type);
}

Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count,
vectorized::MutableColumnPtr& dst) {
size_t size = dst->size();
// read root column
auto& obj =
dst->is_nullable()
? assert_cast<vectorized::ColumnObject&>(
assert_cast<vectorized::ColumnNullable&>(*dst).get_nested_column())
: assert_cast<vectorized::ColumnObject&>(*dst);
if (obj.is_null_root()) {
obj.create_root();
}
if (!obj.is_finalized()) {
obj.finalize();
}
auto root_column = obj.get_root();

auto most_common_type = obj.get_most_common_type();
auto root_column = most_common_type->create_column();
RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column));
obj.incr_num_rows(count);
for (auto& entry : obj.get_subcolumns()) {
if (entry->data.size() != (size + count)) {
entry->data.insert_many_defaults(count);
}
}
// fill nullmap
if (root_column->is_nullable() && dst->is_nullable()) {
vectorized::ColumnUInt8& dst_null_map =
assert_cast<vectorized::ColumnNullable&>(*dst).get_null_map_column();
vectorized::ColumnUInt8& src_null_map =
assert_cast<vectorized::ColumnNullable&>(*root_column).get_null_map_column();
DCHECK_EQ(src_null_map.size() - size, count);
dst_null_map.insert_range_from(src_null_map, size, count);
}
#ifndef NDEBUG
obj.check_consistency();
#endif
return Status::OK();

return _process_root_column(dst, root_column, most_common_type);
}

Status DefaultNestedColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst) {
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/rowset/segment_v2/column_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,9 @@ class VariantRootColumnIterator : public ColumnIterator {
ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); }

private:
Status _process_root_column(vectorized::MutableColumnPtr& dst,
vectorized::MutableColumnPtr& root_column,
const vectorized::DataTypePtr& most_common_type);
std::unique_ptr<FileColumnIterator> _inner_iter;
};

Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1803,6 +1803,12 @@ void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& colum
add_sub_column({}, std::move(column), type);
}

DataTypePtr ColumnObject::get_most_common_type() const {
auto type = is_nullable ? make_nullable(std::make_shared<MostCommonType>())
: std::make_shared<MostCommonType>();
return type;
}

bool ColumnObject::is_null_root() const {
auto* root = subcolumns.get_root();
if (root == nullptr) {
Expand Down
2 changes: 2 additions & 0 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,8 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
// create root with type and column if missing
void create_root(const DataTypePtr& type, MutableColumnPtr&& column);

DataTypePtr get_most_common_type() const;

// root is null or type nothing
bool is_null_root() const;

Expand Down

0 comments on commit 679e0ae

Please sign in to comment.