Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove enum string feature #7858

Merged
merged 5 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
-----------

### Internals
* None.
* Ability to enumerate a string column has been removed.

----------------------------------------------

Expand Down
88 changes: 3 additions & 85 deletions src/realm/array_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,9 @@ void ArrayString::init_from_mem(MemRef mem) noexcept
else {
auto arr = new (&m_storage) Array(m_alloc);
arr->init_from_mem(mem);
// The context flag is used to indicate interned strings vs old enum strings
// (in conjunction with has_refs() == false)
if (arr->get_context_flag_from_header(arr->get_header())) {
// init for new interned strings (replacing old enum strings)
m_type = Type::interned_strings;
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
}
else {
// init for old enum strings
m_string_enum_values = std::make_unique<ArrayString>(m_alloc);
ArrayParent* p;
REALM_ASSERT(m_spec != nullptr);
REALM_ASSERT(m_col_ndx != realm::npos);
ref_type r = m_spec->get_enumkeys_ref(m_col_ndx, p);
m_string_enum_values->init_from_ref(r);
m_string_enum_values->set_parent(p, m_col_ndx);
m_type = Type::enum_strings;
}
// init for new interned strings
m_type = Type::interned_strings;
// consider if we want this invariant: REALM_ASSERT_DEBUG(m_string_interner);
}
}
else {
Expand Down Expand Up @@ -122,7 +107,6 @@ size_t ArrayString::size() const
return static_cast<ArraySmallBlobs*>(m_arr)->size();
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->size();
case Type::enum_strings:
case Type::interned_strings:
return static_cast<Array*>(m_arr)->size();
}
Expand All @@ -141,7 +125,6 @@ void ArrayString::add(StringData value)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->add_string(value);
break;
case Type::enum_strings:
case Type::interned_strings: {
auto a = static_cast<Array*>(m_arr);
size_t ndx = a->size();
Expand Down Expand Up @@ -169,16 +152,6 @@ void ArrayString::set(size_t ndx, StringData value)
static_cast<Array*>(m_arr)->set(ndx, id);
break;
}
case Type::enum_strings: {
size_t sz = m_string_enum_values->size();
size_t res = m_string_enum_values->find_first(value, 0, sz);
if (res == realm::not_found) {
m_string_enum_values->add(value);
res = sz;
}
static_cast<Array*>(m_arr)->set(ndx, res);
break;
}
}
}

Expand All @@ -194,11 +167,6 @@ void ArrayString::insert(size_t ndx, StringData value)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->insert_string(ndx, value);
break;
case Type::enum_strings: {
static_cast<Array*>(m_arr)->insert(ndx, 0);
set(ndx, value);
break;
}
case Type::interned_strings: {
static_cast<Array*>(m_arr)->insert(ndx, 0);
set(ndx, value);
Expand All @@ -216,31 +184,6 @@ StringData ArrayString::get(size_t ndx) const
return static_cast<ArraySmallBlobs*>(m_arr)->get_string(ndx);
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->get_string(ndx);
case Type::enum_strings: {
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->get(index);
}
case Type::interned_strings: {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this has been deleted?

Copy link
Contributor Author

@jedelbo jedelbo Jul 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It has not been deleted, but the diff view plays tricks on you. It is the next function that is deleted.

size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_interner->get(id);
}
}
return {};
}

StringData ArrayString::get_legacy(size_t ndx) const
{
switch (m_type) {
case Type::small_strings:
return static_cast<ArrayStringShort*>(m_arr)->get(ndx);
case Type::medium_strings:
return static_cast<ArraySmallBlobs*>(m_arr)->get_string_legacy(ndx);
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->get_string(ndx);
case Type::enum_strings: {
size_t index = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->get(index);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_interner->get(id);
Expand All @@ -263,10 +206,6 @@ bool ArrayString::is_null(size_t ndx) const
return static_cast<ArraySmallBlobs*>(m_arr)->is_null(ndx);
case Type::big_strings:
return static_cast<ArrayBigBlobs*>(m_arr)->is_null(ndx);
case Type::enum_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return m_string_enum_values->is_null(id);
}
case Type::interned_strings: {
size_t id = size_t(static_cast<Array*>(m_arr)->get(ndx));
return id == 0;
Expand All @@ -288,7 +227,6 @@ void ArrayString::erase(size_t ndx)
static_cast<ArrayBigBlobs*>(m_arr)->erase(ndx);
break;
case Type::interned_strings:
case Type::enum_strings:
static_cast<Array*>(m_arr)->erase(ndx);
break;
}
Expand All @@ -311,10 +249,6 @@ void ArrayString::move(ArrayString& dst, size_t ndx)
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->truncate(ndx);
break;
case Type::enum_strings:
// this operation will never be called for enumerated columns
REALM_UNREACHABLE();
break;
case Type::interned_strings:
m_arr->truncate(ndx);
break;
Expand All @@ -333,7 +267,6 @@ void ArrayString::clear()
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->clear();
break;
case Type::enum_strings:
case Type::interned_strings:
static_cast<Array*>(m_arr)->clear();
break;
Expand All @@ -355,14 +288,6 @@ size_t ArrayString::find_first(StringData value, size_t begin, size_t end) const
return static_cast<ArrayBigBlobs*>(m_arr)->find_first(as_binary, true, begin, end);
break;
}
case Type::enum_strings: {
size_t sz = m_string_enum_values->size();
size_t res = m_string_enum_values->find_first(value, 0, sz);
if (res != realm::not_found) {
return static_cast<Array*>(m_arr)->find_first(res, begin, end);
}
break;
}
case Type::interned_strings: {
// we need a way to avoid this lookup for each leaf array. The lookup must appear
// higher up the call stack and passed down.
Expand Down Expand Up @@ -420,8 +345,6 @@ size_t ArrayString::lower_bound(StringData value)
return lower_bound_string(static_cast<ArraySmallBlobs*>(m_arr), value);
case Type::big_strings:
return lower_bound_string(static_cast<ArrayBigBlobs*>(m_arr), value);
case Type::enum_strings:
break;
case Type::interned_strings:
REALM_UNREACHABLE();
break;
Expand All @@ -434,9 +357,6 @@ ArrayString::Type ArrayString::upgrade_leaf(size_t value_size)
if (m_type == Type::big_strings)
return Type::big_strings;

if (m_type == Type::enum_strings)
return Type::enum_strings;

if (m_type == Type::interned_strings)
return Type::interned_strings;

Expand Down Expand Up @@ -529,7 +449,6 @@ void ArrayString::verify() const
case Type::big_strings:
static_cast<ArrayBigBlobs*>(m_arr)->verify();
break;
case Type::enum_strings:
case Type::interned_strings:
static_cast<Array*>(m_arr)->verify();
break;
Expand Down Expand Up @@ -567,7 +486,6 @@ ref_type ArrayString::typed_write(ref_type ref, _impl::ArrayWriterBase& out, All
leaf.destroy_deep(true);
}
else {
// whether it's the old enum strings or the new interned strings,
// just write out the array using integer leaf compression
ret_val = leaf.write(out, false, out.only_modified, out.compress);
}
Expand Down
14 changes: 1 addition & 13 deletions src/realm/array_string.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,6 @@ class ArrayString : public ArrayPayload {
{
m_string_interner = string_interner;
}
bool need_spec() const override
{
return true;
}
void set_spec(Spec* spec, size_t col_ndx) const override
{
m_spec = spec;
m_col_ndx = col_ndx;
}

void update_parent()
{
Expand All @@ -108,7 +99,6 @@ class ArrayString : public ArrayPayload {
}
void insert(size_t ndx, StringData value);
StringData get(size_t ndx) const;
StringData get_legacy(size_t ndx) const;
Mixed get_any(size_t ndx) const override;
bool is_null(size_t ndx) const;
void erase(size_t ndx);
Expand Down Expand Up @@ -137,16 +127,14 @@ class ArrayString : public ArrayPayload {
static constexpr size_t storage_size =
std::max({sizeof(ArrayStringShort), sizeof(ArraySmallBlobs), sizeof(ArrayBigBlobs), sizeof(Array)});

enum class Type { small_strings, medium_strings, big_strings, enum_strings, interned_strings };
enum class Type { small_strings, medium_strings, big_strings, interned_strings };

Type m_type = Type::small_strings;

Allocator& m_alloc;
alignas(storage_alignment) std::byte m_storage[storage_size];
Array* m_arr;
bool m_nullable = true;
mutable Spec* m_spec = nullptr;
mutable size_t m_col_ndx = realm::npos;
std::unique_ptr<ArrayString> m_string_enum_values;
mutable StringInterner* m_string_interner = nullptr;

Expand Down
53 changes: 3 additions & 50 deletions src/realm/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,12 +154,7 @@ void Cluster::create()
do_create<ArrayDoubleNull>(col_key);
break;
case col_type_String: {
if (m_tree_top.is_string_enum_type(col_ndx)) {
do_create<ArrayInteger>(col_key);
}
else {
do_create<ArrayString>(col_key);
}
do_create<ArrayString>(col_key);
break;
}
case col_type_Binary:
Expand Down Expand Up @@ -267,17 +262,6 @@ inline void Cluster::set_string_interner(ArrayMixed& arr, ColKey col_key) const
m_tree_top.set_string_interner(arr, col_key);
}

template <class T>
inline void Cluster::set_spec(T&, ColKey::Idx) const
{
}

template <>
inline void Cluster::set_spec(ArrayString& arr, ColKey::Idx col_ndx) const
{
m_tree_top.set_spec(arr, col_ndx);
}

template <class T>
inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool nullable)
{
Expand All @@ -286,7 +270,6 @@ inline void Cluster::do_insert_row(size_t ndx, ColKey col, Mixed init_val, bool
T arr(m_alloc);
auto col_ndx = col.get_index();
arr.set_parent(this, col_ndx.val + s_first_col_index);
set_spec<T>(arr, col_ndx);
set_string_interner<T>(arr, col);
arr.init_from_parent();
if (init_val.is_null()) {
Expand Down Expand Up @@ -507,13 +490,9 @@ void Cluster::move(size_t ndx, ClusterNode* new_node, int64_t offset)
case col_type_Double:
do_move<ArrayDouble>(ndx, col_key, new_leaf);
break;
case col_type_String: {
if (m_tree_top.is_string_enum_type(col_key.get_index()))
do_move<ArrayInteger>(ndx, col_key, new_leaf);
else
do_move<ArrayString>(ndx, col_key, new_leaf);
case col_type_String:
do_move<ArrayString>(ndx, col_key, new_leaf);
break;
}
case col_type_Binary:
do_move<ArrayBinary>(ndx, col_key, new_leaf);
break;
Expand Down Expand Up @@ -781,7 +760,6 @@ inline void Cluster::do_erase(size_t ndx, ColKey col_key)
auto col_ndx = col_key.get_index();
T values(m_alloc);
values.set_parent(this, col_ndx.val + s_first_col_index);
set_spec<T>(values, col_ndx);
set_string_interner<T>(values, col_key);
values.init_from_parent();
if constexpr (std::is_same_v<T, ArrayTypedLink>) {
Expand Down Expand Up @@ -1048,26 +1026,6 @@ void Cluster::nullify_incoming_links(RowKey key, CascadeState& state)
m_tree_top.get_owning_table()->for_each_backlink_column(nullify_fwd_links);
}

void Cluster::upgrade_string_to_enum(ColKey col_key, ArrayString& keys)
{
auto col_ndx = col_key.get_index();
Array indexes(m_alloc);
indexes.create(Array::type_Normal, false);
ArrayString values(m_alloc);
ref_type ref = Array::get_as_ref(col_ndx.val + s_first_col_index);
set_string_interner(values, col_key);
values.init_from_ref(ref);
size_t sz = values.size();
for (size_t i = 0; i < sz; i++) {
auto v = values.get(i);
size_t pos = keys.lower_bound(v);
REALM_ASSERT_3(pos, !=, keys.size());
indexes.add(pos);
}
Array::set(col_ndx.val + s_first_col_index, indexes.get_ref());
Array::destroy_deep(ref, m_alloc);
}

void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
{
auto col_ndx = col_key.get_index();
Expand All @@ -1080,9 +1038,6 @@ void Cluster::init_leaf(ColKey col_key, ArrayPayload* leaf) const
if (leaf->need_string_interner()) {
m_tree_top.set_string_interner(*leaf, col_key);
}
if (leaf->need_spec()) {
m_tree_top.set_spec(*leaf, col_ndx);
}
leaf->init_from_ref(ref);
leaf->set_parent(const_cast<Cluster*>(this), col_ndx.val + 1);
}
Expand All @@ -1098,7 +1053,6 @@ template <typename ArrayType>
void Cluster::verify(ref_type ref, size_t index, util::Optional<size_t>& sz) const
{
ArrayType arr(get_alloc());
set_spec(arr, ColKey::Idx{unsigned(index) - 1});
auto table = get_owning_table();
REALM_ASSERT(index <= table->m_leaf_ndx2colkey.size());
auto col_key = table->m_leaf_ndx2colkey[index - 1];
Expand Down Expand Up @@ -1440,7 +1394,6 @@ void Cluster::dump_objects(int64_t key_offset, std::string lead) const
}
case col_type_String: {
ArrayString arr(m_alloc);
set_spec(arr, col.get_index());
set_string_interner(arr, col);
ref_type ref = Array::get_as_ref(j);
arr.init_from_ref(ref);
Expand Down
1 change: 0 additions & 1 deletion src/realm/cluster.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ class Cluster : public ClusterNode {
size_t get_ndx(RowKey key, size_t ndx) const noexcept override;
size_t erase(RowKey k, CascadeState& state) override;
void nullify_incoming_links(RowKey key, CascadeState& state) override;
void upgrade_string_to_enum(ColKey col, ArrayString& keys);

void init_leaf(ColKey col, ArrayPayload* leaf) const;
void add_leaf(ColKey col, ref_type ref);
Expand Down
Loading
Loading