Skip to content

Commit

Permalink
code review
Browse files Browse the repository at this point in the history
  • Loading branch information
nicola-cab committed Aug 1, 2024
1 parent 8b68853 commit 195716c
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 76 deletions.
18 changes: 8 additions & 10 deletions test/benchmark-common-tasks/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1632,28 +1632,28 @@ struct BenchmarkSort : BenchmarkWithStrings {

// benchmark for testing compressed string sorting.
// N is the size of the string to generate
// M is the numer of times we want store the string (number of dups)
// M is the number of times we want store the string (number of dups)
template <size_t N, size_t M>
struct BenchmarkSortCompressed : BenchmarkWithStrings {
mutable std::string _name;
struct BenchmarkSortCompressed : BenchmarkWithStringsTable {
std::string compressed_benchmark_name;

BenchmarkSortCompressed()
: BenchmarkWithStrings()
: BenchmarkWithStringsTable()
{
if constexpr (N <= 15) {
_name = util::format("SortCompressedSmall(%1,%2)", std::to_string(N), std::to_string(M));
compressed_benchmark_name = util::format("SortCompressedSmall(%1,%2)", N, M);
}
else if constexpr (N <= 63) {
_name = util::format("SortCompressedMedium(%1,%2)", std::to_string(N), std::to_string(M));
compressed_benchmark_name = util::format("SortCompressedMedium(%1,%2)", N, M);
}
else {
_name = util::format("SortCompressedLarge(%1,%2)", std::to_string(N), std::to_string(M));
compressed_benchmark_name = util::format("SortCompressedLarge(%1,%2)", N, M);
}
}

const char* name() const
{
return _name.c_str();
return compressed_benchmark_name.c_str();
}

void before_all(DBRef group)
Expand All @@ -1678,13 +1678,11 @@ struct BenchmarkSortCompressed : BenchmarkWithStrings {

std::string str = "";
for (size_t i = 0; i < BASE_SIZE; ++i) {

if (i % M == 0)
str = gen_string(N);

Obj obj = t->create_object();
obj.set<StringData>(m_col, str);
m_keys.push_back(obj.get_key());
}
tr.commit();
}
Expand Down
93 changes: 27 additions & 66 deletions test/test_utf8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,46 +142,19 @@ TEST(UTF8_Compare_Strings)


TEST(UTF8_Compare_Core_utf8)
{
auto str_compare = [](StringData a, StringData b) {
return a < b;
};
// single utf16 code points (tests mostly Windows)
CHECK_EQUAL(false, str_compare(uae, uae));
CHECK_EQUAL(false, str_compare(uAE, uAE));

CHECK_EQUAL(false, str_compare(uae, ua));
CHECK_EQUAL(true, str_compare(ua, uae));

CHECK_EQUAL(true, str_compare(uAE, uae));

CHECK_EQUAL(false, str_compare(uae, uA));
CHECK_EQUAL(true, str_compare(uA, uAE));

// char needing utf16 surrogate pair (tests mostly windows because *nix uses utf32 as wchar_t). These are symbols
// that are beyond 'Latin Extended 2' (0...591), where 'compare_method 0' will sort them by unicode value instead.
// Test where one char is surrogate, and other is non-surrogate
CHECK_EQUAL(true, str_compare(uA, u16sur));
CHECK_EQUAL(false, str_compare(u16sur, uA));
CHECK_EQUAL(false, str_compare(u16sur, u16sur));

// Test where both are surrogate
CHECK_EQUAL(true, str_compare(u16sur, u16sur2));
CHECK_EQUAL(false, str_compare(u16sur2, u16sur2));
CHECK_EQUAL(false, str_compare(u16sur2, u16sur2));
}

TEST(UTF8_Compare_Core_utf8_interned_strings)
{
Array parent(Allocator::get_default());
parent.create(NodeHeader::type_HasRefs, false, 1, 0);
StringInterner interner(Allocator::get_default(), parent, ColKey(0), true);

auto str_compare = [&](StringData a, StringData b) {
auto str_compare = [&interner, this](StringData a, StringData b) {
auto id1 = interner.lookup(a);
auto id2 = interner.lookup(b);
CHECK(id1 && id2);
return interner.compare(*id1, *id2) < 0;
CHECK_EQUAL((id1 && id2), true);
const auto ret_interner_cmp = interner.compare(*id1, *id2) < 0;
const auto ret_cmp = a < b;
CHECK_EQUAL(ret_cmp, ret_interner_cmp);
return ret_cmp;
};

// intern all the utf8 strings
Expand All @@ -192,18 +165,26 @@ TEST(UTF8_Compare_Core_utf8_interned_strings)
interner.intern(u16sur);
interner.intern(u16sur2);

// re-do the same comparisons as per the test that is not using
// compressed strings.
// single utf16 code points (tests mostly Windows)
CHECK_EQUAL(false, str_compare(uae, uae));
CHECK_EQUAL(false, str_compare(uAE, uAE));

CHECK_EQUAL(false, str_compare(uae, ua));
CHECK_EQUAL(true, str_compare(ua, uae));

CHECK_EQUAL(true, str_compare(uAE, uae));

CHECK_EQUAL(false, str_compare(uae, uA));
CHECK_EQUAL(true, str_compare(uA, uAE));

// char needing utf16 surrogate pair (tests mostly windows because *nix uses utf32 as wchar_t). These are symbols
// that are beyond 'Latin Extended 2' (0...591), where 'compare_method 0' will sort them by unicode value instead.
// Test where one char is surrogate, and other is non-surrogate
CHECK_EQUAL(true, str_compare(uA, u16sur));
CHECK_EQUAL(false, str_compare(u16sur, uA));
CHECK_EQUAL(false, str_compare(u16sur, u16sur));

// Test where both are surrogate
CHECK_EQUAL(true, str_compare(u16sur, u16sur2));
CHECK_EQUAL(false, str_compare(u16sur2, u16sur2));
CHECK_EQUAL(false, str_compare(u16sur2, u16sur2));
Expand Down Expand Up @@ -242,7 +223,7 @@ TEST(UTF8_Compare_Core_utf8_invalid)
StringInterner interner(Allocator::get_default(), parent, ColKey(0), true);
auto id1 = interner.intern(invalid1);
auto id2 = interner.intern(invalid2);
bool ret_interned = interner.compare(id1, id2) == -1;
bool ret_interned = interner.compare(id1, id2) < 0;
CHECK_EQUAL(ret_interned, ret);
parent.destroy_deep();
}
Expand All @@ -269,53 +250,34 @@ TEST(Compare_Core_utf8_invalid_crash)
}

TEST(UTF8_Compare_Core_utf8_zero)
{
auto str_compare = [](StringData a, StringData b) {
return a < b;
};
// Realm must support 0 characters in utf8 strings
CHECK_EQUAL(false, str_compare(StringData("\0", 1), StringData("\0", 1)));
CHECK_EQUAL(true, str_compare(StringData("\0", 1), StringData("a")));
CHECK_EQUAL(false, str_compare("a", StringData("\0", 1)));

// 0 in middle of strings
CHECK_EQUAL(true, str_compare(StringData("a\0a", 3), StringData("a\0b", 3)));
CHECK_EQUAL(false, str_compare(StringData("a\0b", 3), StringData("a\0a", 3)));
CHECK_EQUAL(false, str_compare(StringData("a\0a", 3), StringData("a\0a", 3)));

// Number of trailing 0 makes a difference
CHECK_EQUAL(true, str_compare(StringData("a\0", 2), StringData("a\0\0", 3)));
CHECK_EQUAL(false, str_compare(StringData("a\0\0", 3), StringData("a\0", 2)));
}

TEST(UTF8_Compare_Core_utf8_zero_compressed_string)
{
Array parent(Allocator::get_default());
parent.create(NodeHeader::type_HasRefs, false, 1, 0);
StringInterner interner(Allocator::get_default(), parent, ColKey(0), true);

auto str_compare = [&](StringData a, StringData b) {
auto id1 = interner.lookup(a);
auto id2 = interner.lookup(b);
CHECK(id1 && id2);
return interner.compare(*id1, *id2) < 0;
};

// intern all the utf8 strings
const char* empty = "\0";
const char* a = "a";
const char* aa = "a\0a";
const char* ab = "a\0b";
const char* a0 = "a\0";
const char* a00 = "a\0\0";

// intern all the utf8 strings
interner.intern(StringData(empty, 1));
interner.intern(StringData(a));
interner.intern(StringData(aa, 3));
interner.intern(StringData(ab, 3));
interner.intern(StringData(a0, 2));
interner.intern(StringData(a00, 3));

auto str_compare = [&interner, this](StringData a, StringData b) {
auto id1 = interner.lookup(a);
auto id2 = interner.lookup(b);
CHECK_EQUAL((id1 && id2), true);
const auto ret_interner_cmp = interner.compare(*id1, *id2) < 0;
const auto ret_cmp = a < b;
CHECK_EQUAL(ret_cmp, ret_interner_cmp);
return ret_cmp;
};

// Realm must support 0 characters in utf8 strings
CHECK_EQUAL(false, str_compare(StringData("\0", 1), StringData("\0", 1)));
Expand All @@ -333,7 +295,6 @@ TEST(UTF8_Compare_Core_utf8_zero_compressed_string)

parent.destroy_deep();
}

} // anonymous namespace

#endif // TEST_UTF8

0 comments on commit 195716c

Please sign in to comment.