Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

*: add util functions to generate random data #8481

Merged
merged 9 commits into from
Dec 13, 2023
98 changes: 98 additions & 0 deletions dbms/src/Common/RandomData.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/RandomData.h>
#include <fmt/format.h>

#include <random>

namespace DB::random
{

String randomString(UInt64 length)
{
static const std::string charset{
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()|[]{}:;',<.>`~"};
std::random_device rand_dev;
JinheLin marked this conversation as resolved.
Show resolved Hide resolved
std::mt19937_64 rand_gen(rand_dev());
String str(length, 0);
Lloyd-Pottiger marked this conversation as resolved.
Show resolved Hide resolved
std::generate_n(str.begin(), str.size(), [&]() { return charset[rand_gen() % charset.size()]; });
return str;
}

int randomTimeOffset()
{
std::random_device rand_dev;
std::mt19937_64 rand_gen(rand_dev());
static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test
return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1);
}

time_t randomUTCTimestamp()
{
using namespace std::chrono;
return duration_cast<seconds>(system_clock::now().time_since_epoch()).count() + randomTimeOffset();
}

struct tm randomLocalTime()
{
time_t t = randomUTCTimestamp();
struct tm res
{
};
if (localtime_r(&t, &res) == nullptr)
{
throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno)));
}
return res;
}

String randomDate()
{
auto res = randomLocalTime();
return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday);
}

String randomDateTime()
{
auto res = randomLocalTime();
return fmt::format(
"{}-{}-{} {}:{}:{}",
res.tm_year + 1900,
res.tm_mon + 1,
res.tm_mday,
res.tm_hour,
res.tm_min,
res.tm_sec);
}

String randomDuration()
{
auto res = randomLocalTime();
return fmt::format("{}:{}:{}", res.tm_hour, res.tm_min, res.tm_sec);
}

String randomDecimal(uint64_t prec, uint64_t scale)
{
std::random_device rand_dev;
std::mt19937_64 rand_gen(rand_dev());
auto s = std::to_string(rand_gen());
if (s.size() < prec)
s += String(prec - s.size(), '0');
else if (s.size() > prec)
s = s.substr(0, prec);
return s.substr(0, prec - scale) + "." + s.substr(prec - scale);
}

} // namespace DB::random
30 changes: 30 additions & 0 deletions dbms/src/Common/RandomData.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/types.h>

namespace DB::random
{

String randomString(UInt64 length);
JaySon-Huang marked this conversation as resolved.
Show resolved Hide resolved
time_t randomUTCTimestamp();
struct tm randomLocalTime();
String randomDate();
String randomDateTime();
String randomDuration();
String randomDecimal(uint64_t prec, uint64_t scale);

} // namespace DB::random
20 changes: 4 additions & 16 deletions dbms/src/Encryption/tests/gtest_encryption_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/RandomData.h>
#include <Encryption/AESCTRCipherStream.h>
#include <Encryption/EncryptedRandomAccessFile.h>
#include <Encryption/EncryptedWritableFile.h>
Expand Down Expand Up @@ -50,19 +51,6 @@ const unsigned char KEY[33] = "\xe4\x3e\x8e\xca\x2a\x83\xe1\x88\xfb\xd8\x02\xdc\
const unsigned char IV_RANDOM[17] = "\x77\x9b\x82\x72\x26\xb5\x76\x50\xf7\x05\xd2\xd6\xb8\xaa\xa9\x2c";
const unsigned char IV_OVERFLOW_LOW[17] = "\x77\x9b\x82\x72\x26\xb5\x76\x50\xff\xff\xff\xff\xff\xff\xff\xff";
const unsigned char IV_OVERFLOW_FULL[17] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";

std::string random_string(size_t length)
{
std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
while (str.length() < length)
{
str += str;
}
std::random_device rd;
std::mt19937 generator(rd());
std::shuffle(str.begin(), str.end(), generator);
return str.substr(0, length);
}
} // namespace test

constexpr size_t MAX_SIZE = 16 * 10;
Expand All @@ -72,13 +60,13 @@ constexpr size_t MAX_SIZE = 16 * 10;
class EncryptionTest : public testing::TestWithParam<std::tuple<bool, EncryptionMethod>>
{
public:
unsigned char plaintext[MAX_SIZE];
unsigned char plaintext[MAX_SIZE]{};
// Reserve a bit more room to make sure OpenSSL have enough buffer.
unsigned char ciphertext[MAX_SIZE + 16 * 2];
unsigned char ciphertext[MAX_SIZE + 16 * 2]{};

void generateCiphertext(const unsigned char * iv)
{
std::string random_string = test::random_string(MAX_SIZE);
std::string random_string = DB::random::randomString(MAX_SIZE);
memcpy(plaintext, random_string.data(), MAX_SIZE);

EVP_CIPHER_CTX * ctx;
Expand Down
18 changes: 2 additions & 16 deletions dbms/src/Server/DTTool/DTToolBench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/RandomData.h>
#include <Common/TiFlashMetrics.h>
#include <Encryption/MockKeyManager.h>
#include <IO/ChecksumBuffer.h>
Expand Down Expand Up @@ -92,21 +93,6 @@ ColumnDefinesPtr createColumnDefines(size_t column_number)
return primitive;
}

String createRandomString(std::size_t limit, std::mt19937_64 & eng, size_t & acc)
{
// libc++-15 forbids instantiate `std::uniform_int_distribution<char>`.
// see https://github.com/llvm/llvm-project/blob/bfcd536a8ef6b1d6e9dd211925be3b078d06fe77/libcxx/include/__random/is_valid.h#L28
// and https://github.com/llvm/llvm-project/blob/bfcd536a8ef6b1d6e9dd211925be3b078d06fe77/libcxx/include/__random/uniform_int_distribution.h#L162
std::uniform_int_distribution<uint8_t> dist('a', 'z');
std::string buffer((eng() % limit) + 1, 0);
for (auto & i : buffer)
{
i = dist(eng);
}
acc += buffer.size();
return buffer;
}

DB::Block createBlock(
size_t column_number,
size_t start,
Expand Down Expand Up @@ -191,7 +177,7 @@ DB::Block createBlock(
IColumn::MutablePtr m_col = str_col.type->createColumn();
for (size_t j = 0; j < row_number; j++)
{
Field field = createRandomString(limit, eng, acc);
Field field = DB::random::randomString(limit);
m_col->insert(field);
}
str_col.column = std::move(m_col);
Expand Down
71 changes: 5 additions & 66 deletions dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Common/RandomData.h>
#include <DataTypes/DataTypeEnum.h>
#include <Storages/DeltaMerge/workload/DataGenerator.h>
#include <Storages/DeltaMerge/workload/KeyGenerator.h>
Expand Down Expand Up @@ -146,7 +147,7 @@ class RandomDataGenerator : public DataGenerator
}
else if (family_name == "String")
{
Field f = randomString();
Field f = DB::random::randomString(128);
mut_col->insert(f);
}
else if (family_name == "Enum8")
Expand All @@ -165,19 +166,19 @@ class RandomDataGenerator : public DataGenerator
}
else if (family_name == "MyDateTime")
{
Field f = parseMyDateTime(randomDateTime());
Field f = parseMyDateTime(DB::random::randomDateTime());
mut_col->insert(f);
}
else if (family_name == "MyDate")
{
Field f = parseMyDateTime(randomDate());
Field f = parseMyDateTime(DB::random::randomDate());
mut_col->insert(f);
}
else if (family_name == "Decimal")
{
auto prec = getDecimalPrecision(*data_type, 0);
auto scale = getDecimalScale(*data_type, 0);
auto s = randomDecimal(prec, scale);
auto s = DB::random::randomDecimal(prec, scale);
bool negative = rand_gen() % 2 == 0;
Field f;
if (parseDecimal(s.data(), s.size(), negative, f))
Expand All @@ -198,68 +199,6 @@ class RandomDataGenerator : public DataGenerator
return col;
}

std::string randomDecimal(uint64_t prec, uint64_t scale)
{
auto s = std::to_string(rand_gen());
if (s.size() < prec)
{
s += std::string(prec - s.size(), '0');
}
else if (s.size() > prec)
{
s = s.substr(0, prec);
}
return s.substr(0, prec - scale) + "." + s.substr(prec - scale);
}

std::string randomDate()
{
auto res = randomLocalTime();
return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday);
}

std::string randomDateTime()
{
auto res = randomLocalTime();
return fmt::format(
"{}-{}-{} {}:{}:{}",
res.tm_year + 1900,
res.tm_mon + 1,
res.tm_mday,
res.tm_hour,
res.tm_min,
res.tm_sec);
}

time_t randomUTCTimestamp() { return ::time(nullptr) + randomTimeOffset(); }

int randomTimeOffset()
{
static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test
return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1);
}

struct tm randomLocalTime()
{
time_t t = randomUTCTimestamp();
struct tm res
{
};
if (localtime_r(&t, &res) == nullptr)
{
throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno)));
}
return res;
}

std::string randomString()
{
constexpr int size = 128;
std::string str(size, 0);
std::generate_n(str.begin(), str.size(), [this]() { return charset[rand_gen() % charset.size()]; });
return str;
}

const TableInfo & table_info;
TimestampGenerator & ts_gen;
std::mt19937_64 rand_gen;
Expand Down
Loading