Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix transient failures in unit_seedable_global_PRNG. #5234

Merged
merged 2 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions tiledb/common/random/random_label.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,17 @@ RandomLabelGenerator::RandomLabelGenerator()
/* API */
/* ********************************* */
RandomLabelWithTimestamp RandomLabelGenerator::generate() {
auto now = tiledb::sm::utils::time::timestamp_now_ms();
return generate(now);
}

RandomLabelWithTimestamp RandomLabelGenerator::generate(uint64_t timestamp) {
PRNG& prng = PRNG::get();
std::lock_guard<std::mutex> lock(mtx_);
auto now = tiledb::sm::utils::time::timestamp_now_ms();

// If no label has been generated this millisecond, generate a new one.
if (now != prev_time_) {
prev_time_ = now;
if (timestamp != prev_time_) {
prev_time_ = timestamp;
counter_ = static_cast<uint32_t>(prng());
// Clear the top bit of the counter such that a full 2 billion values
// could be generated within a single millisecond.
Expand All @@ -69,7 +73,7 @@ RandomLabelWithTimestamp RandomLabelGenerator::generate() {
ss << std::hex << std::setw(8) << std::setfill('0')
<< static_cast<uint32_t>(prng());
ss << std::hex << std::setw(16) << std::setfill('0') << prng();
return {ss.str(), now};
return {ss.str(), timestamp};
}

RandomLabelWithTimestamp RandomLabelGenerator::generate_random_label() {
Expand Down
3 changes: 3 additions & 0 deletions tiledb/common/random/random_label.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ class RandomLabelGenerator {
/** Generate a random label with a timestamp. */
RandomLabelWithTimestamp generate();

/** Generate a random label at the specified timestamp. */
RandomLabelWithTimestamp generate(uint64_t timestamp);

public:
/** Generate a random label. */
static RandomLabelWithTimestamp generate_random_label();
Expand Down
150 changes: 89 additions & 61 deletions tiledb/common/random/test/unit_random_label_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,86 +28,111 @@
* Tests for the random label generator.
*/

#include <math.h>
#include <ranges>
#include <string>

#include <test/support/tdb_catch.h>
#include "../random_label.h"

using namespace tiledb::common;
using namespace tiledb::sm;

size_t generate_labels(std::vector<std::string>& labels) {
size_t labels_size = labels.size();
auto now = utils::time::timestamp_now_ms();
size_t idx = 0;
while ((utils::time::timestamp_now_ms()) < now + 100 && idx < labels_size) {
labels[idx++] = random_label();
class TestRandomLabelGenerator : public RandomLabelGenerator {
public:
TestRandomLabelGenerator() = default;
~TestRandomLabelGenerator() = default;

RandomLabelWithTimestamp generate_at(uint64_t timestamp) {
return generate(timestamp);
}
};

return idx;
uint32_t prefix_as_uint32(std::string& label) {
return std::stoul(label.substr(0, 8), nullptr, 16);
}

void validate_labels(std::vector<std::string>& labels, size_t num_labels) {
// Given the label randomness and the fact that we're racing the processor,
// the best we can do here (for now) is assert that there's 10 ordered groups.
// In this manner, groups are defined as sharing the first 4 bytes.
uint64_t num_groups = 0;
uint64_t this_group = 0;
for (size_t i = 1; i < num_labels; i++) {
bool match = true;
for (size_t j = 0; j < 4; j++) {
if (labels[i - 1][j] != labels[i][j]) {
match = false;
break;
}
}
if (!match) {
if (this_group > 10) {
num_groups += 1;
}
this_group = 0;
continue;
}
TEST_CASE("RandomLabelGenerator: validation", "[RandomLabelGenerator]") {
TestRandomLabelGenerator x;

// We share a prefix so assert that they're ordered.
REQUIRE(labels[i] > labels[i - 1]);
this_group += 1;
}
// Generate a random label to validate initialization.
auto label0 = x.generate_at(0);
REQUIRE(label0.timestamp_ == 0);
REQUIRE(label0.random_label_.size() == 32);

// Generate a second label, at a second timestamp
auto label1 = x.generate_at(1);
REQUIRE(label1.timestamp_ == 1);
REQUIRE(label1.random_label_.size() == 32);
REQUIRE(label1.random_label_ != label0.random_label_);

// Check that prefixes aren't off by one to show that the prefix was
// regenerated after the time change.
auto prefix0 = prefix_as_uint32(label0.random_label_);
auto prefix1 = prefix_as_uint32(label1.random_label_);
REQUIRE(std::max(prefix0, prefix1) - std::min(prefix0, prefix1) > 1);

// Check that the label prefix is random by going backwards in time and
// generating another label at time 0.
auto label0_2 = x.generate_at(0);
REQUIRE(label0_2.timestamp_ == 0);
REQUIRE(label0_2.random_label_.size() == 32);
REQUIRE(label0_2.random_label_ != label1.random_label_);
REQUIRE(label0_2.random_label_ != label0.random_label_);

REQUIRE(num_groups > 10);
// Validate that label0_2 had a different prefix generated.
auto prefix0_2 = prefix_as_uint32(label0_2.random_label_);
REQUIRE(std::max(prefix0_2, prefix0) - std::min(prefix0_2, prefix0) > 1);
}

TEST_CASE(
"RandomLabelGenerator: serial generation",
"[RandomLabelGenerator][serial]") {
// Generate a random label to validate initialization.
auto label = random_label();
REQUIRE(label.size() == 32);
TestRandomLabelGenerator x;

// Generating a large number of labels at the same time results
// in a common prefix for all labels, with all labels sorted, while having
// no duplicates generated.
std::vector<std::string> labels(25000);
for (auto idx : std::views::iota(0, 25000)) {
labels[idx] = x.generate_at(5).random_label_;
}
auto prefix = prefix_as_uint32(labels[0]);
auto prev = labels[0];

// Test one million strings. Let's assume the buffer overflow check works.
std::vector<std::string> labels{1000000};
auto num_labels = generate_labels(labels);
validate_labels(labels, num_labels);
// Every generated label should have the same prefix and be strictly
// greater than the previous label. Strictly greater assures that there are
// no duplicates given that these strings are strictly ordered.
for (auto idx : std::views::iota(1, 25000)) {
auto curr_prefix = prefix_as_uint32(labels[idx]);
REQUIRE(curr_prefix - prefix == 1);
REQUIRE(labels[idx] > prev);
prefix = curr_prefix;
prev = labels[idx];
}
}

TEST_CASE(
"RandomLabelGenerator: parallel generation",
"[RandomLabelGenerator][parallel]") {
TestRandomLabelGenerator x;
const unsigned nthreads = 20;
const unsigned labels_per_thread = 25000;
std::vector<std::thread> threads;
std::vector<std::vector<std::string>> labels{nthreads};
size_t num_labels[nthreads];

// Pre-allocate our buffers so we're getting as much contention as possible
for (size_t i = 0; i < nthreads; i++) {
labels[i].resize(1000000);
labels[i].resize(labels_per_thread);
}

// Generate labels simultaneously in multiple threads.
for (size_t i = 0; i < nthreads; i++) {
auto num_ptr = &num_labels[i];
auto vec_ptr = &labels[i];
threads.emplace_back([num_ptr, vec_ptr]() {
auto num = generate_labels(*vec_ptr);
*num_ptr = num;
threads.emplace_back([&x, vec_ptr]() {
for (size_t idx = 0; idx < labels_per_thread; idx++) {
(*vec_ptr)[idx] = x.generate_at(3).random_label_;
}
});
}

Expand All @@ -116,23 +141,26 @@ TEST_CASE(
t.join();
}

// Check that we've generated the correct number of random labels.
std::unordered_set<std::string> label_set;
size_t total_labels = 0;
for (size_t i = 0; i < nthreads; i++) {
total_labels += num_labels[i];
for (size_t j = 0; j < num_labels[i]; j++) {
label_set.insert(labels[i][j]);
}
}
REQUIRE(label_set.size() == total_labels);

// Sort and validate the parallel threads as if they were serially generated.
std::vector<std::string> all_labels{total_labels};
std::vector<std::string> all_labels{labels_per_thread * nthreads};
size_t idx = 0;
for (auto label : label_set) {
all_labels[idx++] = label;
for (auto thrlabels : labels) {
for (auto label : thrlabels) {
all_labels[idx++] = label;
}
}
std::sort(all_labels.begin(), all_labels.end());
validate_labels(all_labels, total_labels);

// Verify a common prefix and unique suffix amongst all generated labels.
REQUIRE(all_labels[0].size() == 32);
auto prefix = prefix_as_uint32(all_labels[0]);
auto prev = all_labels[0];
for (size_t idx = 1; idx < labels_per_thread * nthreads; idx++) {
auto curr_prefix = prefix_as_uint32(all_labels[idx]);
REQUIRE(curr_prefix - prefix == 1);
REQUIRE(all_labels[idx].size() == 32);
REQUIRE(all_labels[idx] > prev);
prefix = curr_prefix;
prev = all_labels[idx];
}
}
Loading