diff --git a/dbms/src/Common/RandomData.cpp b/dbms/src/Common/RandomData.cpp new file mode 100644 index 00000000000..98dbc77ed52 --- /dev/null +++ b/dbms/src/Common/RandomData.cpp @@ -0,0 +1,98 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +#include + +namespace DB::random +{ + +String randomString(UInt64 length) +{ + static const std::string charset{ + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()|[]{}:;',<.>`~"}; + std::random_device rand_dev; + std::mt19937_64 rand_gen(rand_dev()); + String str(length, '\x00'); + std::generate_n(str.begin(), str.size(), [&]() { return charset[rand_gen() % charset.size()]; }); + return str; +} + +int randomTimeOffset() +{ + std::random_device rand_dev; + std::mt19937_64 rand_gen(rand_dev()); + static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test + return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1); +} + +time_t randomUTCTimestamp() +{ + using namespace std::chrono; + return duration_cast(system_clock::now().time_since_epoch()).count() + randomTimeOffset(); +} + +struct tm randomLocalTime() +{ + time_t t = randomUTCTimestamp(); + struct tm res + { + }; + if (localtime_r(&t, &res) == nullptr) + { + throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno))); + } + return res; +} + +String randomDate() +{ + auto res = randomLocalTime(); + return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday); +} + +String randomDateTime() +{ + auto res = randomLocalTime(); + return fmt::format( + "{}-{}-{} {}:{}:{}", + res.tm_year + 1900, + res.tm_mon + 1, + res.tm_mday, + res.tm_hour, + res.tm_min, + res.tm_sec); +} + +String randomDuration() +{ + auto res = randomLocalTime(); + return fmt::format("{}:{}:{}", res.tm_hour, res.tm_min, res.tm_sec); +} + +String randomDecimal(uint64_t prec, uint64_t scale) +{ + std::random_device rand_dev; + std::mt19937_64 rand_gen(rand_dev()); + auto s = std::to_string(rand_gen()); + if (s.size() < prec) + s += String(prec - s.size(), '0'); + else if (s.size() > prec) + s = s.substr(0, prec); + return s.substr(0, prec - scale) + "." + s.substr(prec - scale); +} + +} // namespace DB::random diff --git a/dbms/src/Common/RandomData.h b/dbms/src/Common/RandomData.h new file mode 100644 index 00000000000..890bccd93a2 --- /dev/null +++ b/dbms/src/Common/RandomData.h @@ -0,0 +1,30 @@ +// Copyright 2023 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::random +{ + +String randomString(UInt64 length); +time_t randomUTCTimestamp(); +struct tm randomLocalTime(); +String randomDate(); +String randomDateTime(); +String randomDuration(); +String randomDecimal(uint64_t prec, uint64_t scale); + +} // namespace DB::random diff --git a/dbms/src/Encryption/tests/gtest_encryption_test.cpp b/dbms/src/Encryption/tests/gtest_encryption_test.cpp index 087eac2a5b2..51f269b51bc 100644 --- a/dbms/src/Encryption/tests/gtest_encryption_test.cpp +++ b/dbms/src/Encryption/tests/gtest_encryption_test.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -50,19 +51,6 @@ const unsigned char KEY[33] = "\xe4\x3e\x8e\xca\x2a\x83\xe1\x88\xfb\xd8\x02\xdc\ const unsigned char IV_RANDOM[17] = "\x77\x9b\x82\x72\x26\xb5\x76\x50\xf7\x05\xd2\xd6\xb8\xaa\xa9\x2c"; const unsigned char IV_OVERFLOW_LOW[17] = "\x77\x9b\x82\x72\x26\xb5\x76\x50\xff\xff\xff\xff\xff\xff\xff\xff"; const unsigned char IV_OVERFLOW_FULL[17] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; - -std::string random_string(size_t length) -{ - std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"); - while (str.length() < length) - { - str += str; - } - std::random_device rd; - std::mt19937 generator(rd()); - std::shuffle(str.begin(), str.end(), generator); - return str.substr(0, length); -} } // namespace test constexpr size_t MAX_SIZE = 16 * 10; @@ -72,13 +60,13 @@ constexpr size_t MAX_SIZE = 16 * 10; class EncryptionTest : public testing::TestWithParam> { public: - unsigned char plaintext[MAX_SIZE]; + unsigned char plaintext[MAX_SIZE]{}; // Reserve a bit more room to make sure OpenSSL have enough buffer. - unsigned char ciphertext[MAX_SIZE + 16 * 2]; + unsigned char ciphertext[MAX_SIZE + 16 * 2]{}; void generateCiphertext(const unsigned char * iv) { - std::string random_string = test::random_string(MAX_SIZE); + std::string random_string = DB::random::randomString(MAX_SIZE); memcpy(plaintext, random_string.data(), MAX_SIZE); EVP_CIPHER_CTX * ctx; diff --git a/dbms/src/Server/DTTool/DTToolBench.cpp b/dbms/src/Server/DTTool/DTToolBench.cpp index f8c6e8f956f..de965c2b9c6 100644 --- a/dbms/src/Server/DTTool/DTToolBench.cpp +++ b/dbms/src/Server/DTTool/DTToolBench.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -92,21 +93,6 @@ ColumnDefinesPtr createColumnDefines(size_t column_number) return primitive; } -String createRandomString(std::size_t limit, std::mt19937_64 & eng, size_t & acc) -{ - // libc++-15 forbids instantiate `std::uniform_int_distribution`. - // see https://github.com/llvm/llvm-project/blob/bfcd536a8ef6b1d6e9dd211925be3b078d06fe77/libcxx/include/__random/is_valid.h#L28 - // and https://github.com/llvm/llvm-project/blob/bfcd536a8ef6b1d6e9dd211925be3b078d06fe77/libcxx/include/__random/uniform_int_distribution.h#L162 - std::uniform_int_distribution dist('a', 'z'); - std::string buffer((eng() % limit) + 1, 0); - for (auto & i : buffer) - { - i = dist(eng); - } - acc += buffer.size(); - return buffer; -} - DB::Block createBlock( size_t column_number, size_t start, @@ -191,7 +177,7 @@ DB::Block createBlock( IColumn::MutablePtr m_col = str_col.type->createColumn(); for (size_t j = 0; j < row_number; j++) { - Field field = createRandomString(limit, eng, acc); + Field field = DB::random::randomString(limit); m_col->insert(field); } str_col.column = std::move(m_col); diff --git a/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp index f9e97ea9e9f..6a6ee198044 100644 --- a/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp +++ b/dbms/src/Storages/DeltaMerge/workload/DataGenerator.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include @@ -146,7 +147,7 @@ class RandomDataGenerator : public DataGenerator } else if (family_name == "String") { - Field f = randomString(); + Field f = DB::random::randomString(128); mut_col->insert(f); } else if (family_name == "Enum8") @@ -165,19 +166,19 @@ class RandomDataGenerator : public DataGenerator } else if (family_name == "MyDateTime") { - Field f = parseMyDateTime(randomDateTime()); + Field f = parseMyDateTime(DB::random::randomDateTime()); mut_col->insert(f); } else if (family_name == "MyDate") { - Field f = parseMyDateTime(randomDate()); + Field f = parseMyDateTime(DB::random::randomDate()); mut_col->insert(f); } else if (family_name == "Decimal") { auto prec = getDecimalPrecision(*data_type, 0); auto scale = getDecimalScale(*data_type, 0); - auto s = randomDecimal(prec, scale); + auto s = DB::random::randomDecimal(prec, scale); bool negative = rand_gen() % 2 == 0; Field f; if (parseDecimal(s.data(), s.size(), negative, f)) @@ -198,68 +199,6 @@ class RandomDataGenerator : public DataGenerator return col; } - std::string randomDecimal(uint64_t prec, uint64_t scale) - { - auto s = std::to_string(rand_gen()); - if (s.size() < prec) - { - s += std::string(prec - s.size(), '0'); - } - else if (s.size() > prec) - { - s = s.substr(0, prec); - } - return s.substr(0, prec - scale) + "." + s.substr(prec - scale); - } - - std::string randomDate() - { - auto res = randomLocalTime(); - return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday); - } - - std::string randomDateTime() - { - auto res = randomLocalTime(); - return fmt::format( - "{}-{}-{} {}:{}:{}", - res.tm_year + 1900, - res.tm_mon + 1, - res.tm_mday, - res.tm_hour, - res.tm_min, - res.tm_sec); - } - - time_t randomUTCTimestamp() { return ::time(nullptr) + randomTimeOffset(); } - - int randomTimeOffset() - { - static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test - return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1); - } - - struct tm randomLocalTime() - { - time_t t = randomUTCTimestamp(); - struct tm res - { - }; - if (localtime_r(&t, &res) == nullptr) - { - throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno))); - } - return res; - } - - std::string randomString() - { - constexpr int size = 128; - std::string str(size, 0); - std::generate_n(str.begin(), str.size(), [this]() { return charset[rand_gen() % charset.size()]; }); - return str; - } - const TableInfo & table_info; TimestampGenerator & ts_gen; std::mt19937_64 rand_gen; diff --git a/dbms/src/TestUtils/ColumnGenerator.cpp b/dbms/src/TestUtils/ColumnGenerator.cpp index 92563ed3fef..340770cebe6 100644 --- a/dbms/src/TestUtils/ColumnGenerator.cpp +++ b/dbms/src/TestUtils/ColumnGenerator.cpp @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include +#include #include #include #include @@ -31,7 +32,6 @@ ColumnWithTypeAndName ColumnGenerator::generateNullMapColumn(const ColumnGenerat ColumnWithTypeAndName ColumnGenerator::generate(const ColumnGeneratorOpts & opts) { - int_rand_gen = std::uniform_int_distribution(0, opts.string_max_size); DataTypePtr type; if (opts.type_name == "Decimal") type = createDecimalType(); @@ -97,9 +97,12 @@ ColumnWithTypeAndName ColumnGenerator::generate(const ColumnGeneratorOpts & opts genFloat(col); break; case TypeIndex::String: + { + auto int_rand_gen = std::uniform_int_distribution(0, opts.string_max_size); for (size_t i = 0; i < opts.size; ++i) - genString(col); + genString(col, int_rand_gen(rand_gen)); break; + } case TypeIndex::Decimal32: case TypeIndex::Decimal64: case TypeIndex::Decimal128: @@ -124,8 +127,6 @@ ColumnWithTypeAndName ColumnGenerator::generate(const ColumnGeneratorOpts & opts for (size_t i = 0; i < opts.size; ++i) genEnumValue(col, type); break; - { - } default: throw std::invalid_argument("RandomColumnGenerator invalid type"); } @@ -141,73 +142,6 @@ DataTypePtr ColumnGenerator::createDecimalType() return DB::createDecimal(prec, scale); } -String ColumnGenerator::randomString() -{ - String str(int_rand_gen(rand_gen), 0); - std::generate_n(str.begin(), str.size(), [this]() { return charset[rand_gen() % charset.size()]; }); - return str; -} - -int ColumnGenerator::randomTimeOffset() -{ - static constexpr int max_offset = 24 * 3600 * 10000; // 10000 days for test - return (rand_gen() % max_offset) * (rand_gen() % 2 == 0 ? 1 : -1); -} - -time_t ColumnGenerator::randomUTCTimestamp() -{ - return ::time(nullptr) + randomTimeOffset(); -} - -struct tm ColumnGenerator::randomLocalTime() -{ - time_t t = randomUTCTimestamp(); - struct tm res - { - }; - - if (localtime_r(&t, &res) == nullptr) - { - throw std::invalid_argument(fmt::format("localtime_r({}) ret {}", t, strerror(errno))); - } - return res; -} - -String ColumnGenerator::randomDate() -{ - auto res = randomLocalTime(); - return fmt::format("{}-{}-{}", res.tm_year + 1900, res.tm_mon + 1, res.tm_mday); -} - -String ColumnGenerator::randomDuration() -{ - auto res = randomLocalTime(); - return fmt::format("{}:{}:{}", res.tm_hour, res.tm_min, res.tm_sec); -} - -String ColumnGenerator::randomDateTime() -{ - auto res = randomLocalTime(); - return fmt::format( - "{}-{}-{} {}:{}:{}", - res.tm_year + 1900, - res.tm_mon + 1, - res.tm_mday, - res.tm_hour, - res.tm_min, - res.tm_sec); -} - -String ColumnGenerator::randomDecimal(uint64_t prec, uint64_t scale) -{ - auto s = std::to_string(rand_gen()); - if (s.size() < prec) - s += String(prec - s.size(), '0'); - else if (s.size() > prec) - s = s.substr(0, prec); - return s.substr(0, prec - scale) + "." + s.substr(prec - scale); -} - template void ColumnGenerator::genInt(MutableColumnPtr & col) { @@ -264,27 +198,27 @@ void ColumnGenerator::genFloat(MutableColumnPtr & col) col->insert(f); } -void ColumnGenerator::genString(MutableColumnPtr & col) +void ColumnGenerator::genString(MutableColumnPtr & col, UInt64 max_size) { - Field f = randomString(); + Field f = DB::random::randomString(max_size); col->insert(f); } void ColumnGenerator::genDate(MutableColumnPtr & col) { - Field f = parseMyDateTime(randomDate()); + Field f = parseMyDateTime(DB::random::randomDate()); col->insert(f); } void ColumnGenerator::genDateTime(MutableColumnPtr & col) { - Field f = parseMyDateTime(randomDateTime()); + Field f = parseMyDateTime(DB::random::randomDateTime()); col->insert(f); } void ColumnGenerator::genDuration(MutableColumnPtr & col) { - Field f = parseMyDuration(randomDuration()); + Field f = parseMyDuration(DB::random::randomDuration()); col->insert(f); } @@ -292,7 +226,7 @@ void ColumnGenerator::genDecimal(MutableColumnPtr & col, DataTypePtr & data_type { auto prec = getDecimalPrecision(*data_type, 0); auto scale = getDecimalScale(*data_type, 0); - auto s = randomDecimal(prec, scale); + auto s = DB::random::randomDecimal(prec, scale); bool negative = rand_gen() % 2 == 0; Field f; if (parseDecimal(s.data(), s.size(), negative, f)) diff --git a/dbms/src/TestUtils/ColumnGenerator.h b/dbms/src/TestUtils/ColumnGenerator.h index f1bf33b73a4..1722dee83fe 100644 --- a/dbms/src/TestUtils/ColumnGenerator.h +++ b/dbms/src/TestUtils/ColumnGenerator.h @@ -33,7 +33,7 @@ struct ColumnGeneratorOpts size_t size; String type_name; DataDistribution distribution; - String name = ""; + String name = ""; // NOLINT size_t string_max_size = 128; }; @@ -45,19 +45,7 @@ class ColumnGenerator : public ext::Singleton private: ColumnWithTypeAndName generateNullMapColumn(const ColumnGeneratorOpts & opts); std::mt19937_64 rand_gen; - std::uniform_int_distribution int_rand_gen = std::uniform_int_distribution(0, 128); std::uniform_real_distribution real_rand_gen; - /// todo support multibyte characters - const std::string charset{"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!@#$%^&*()|[]{}:;',<.>`~"}; - - String randomString(); - int randomTimeOffset(); - time_t randomUTCTimestamp(); - struct tm randomLocalTime(); - String randomDate(); - String randomDateTime(); - String randomDuration(); - String randomDecimal(uint64_t prec, uint64_t scale); DataTypePtr createDecimalType(); @@ -67,10 +55,10 @@ class ColumnGenerator : public ext::Singleton template void genUInt(MutableColumnPtr & col); void genFloat(MutableColumnPtr & col); - void genString(MutableColumnPtr & col); - void genDate(MutableColumnPtr & col); - void genDateTime(MutableColumnPtr & col); - void genDuration(MutableColumnPtr & col); + static void genString(MutableColumnPtr & col, UInt64 max_size); + static void genDate(MutableColumnPtr & col); + static void genDateTime(MutableColumnPtr & col); + static void genDuration(MutableColumnPtr & col); void genDecimal(MutableColumnPtr & col, DataTypePtr & data_type); void genEnumValue(MutableColumnPtr & col, DataTypePtr & enum_type); };