Skip to content

Commit

Permalink
Updating Benchmark; #576
Browse files Browse the repository at this point in the history
  • Loading branch information
the-moisrex committed Dec 21, 2024
1 parent 9c4a925 commit eb2e31f
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 14 deletions.
2 changes: 1 addition & 1 deletion benchmarks/common_utils_pch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ static StrType str_generator(
StrType str;
str.reserve(size);
for (std::size_t i = 0; i < size; i++) {
str.append(chars);
str.push_back(chars[i % chars.size()]);
}
std::shuffle(str.begin(), str.end(), std::mt19937(std::random_device()()));
return str.substr(0, size);
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/utf_convertion/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
flags = -std=c++20 -isystem /usr/local/include -L/usr/local/lib -lpthread -lbenchmark_main -lbenchmark
flags = -std=c++23 -isystem /usr/local/include -L/usr/local/lib -lpthread -lbenchmark_main -lbenchmark
optflags = -flto -Ofast -DNDEBUG -march=native -mtune=native
files = utf_conversion_benchmark.cpp

Expand Down
85 changes: 79 additions & 6 deletions benchmarks/utf_convertion/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,85 @@
# UTF conversions benchmark

The version 2 does more things, obviously; and also the other implementation that I found somewhere, is optimizing for
ASCII strings, but they don't seem to squeeze much out of it.

Clang 18.1.8:

```
-----------------------------------------------------
Benchmark Time CPU Iterations
-----------------------------------------------------
UTFConv_v1 19903 ns 19867 ns 34903
UTFConv_v2 24368 ns 24171 ns 29093
clang++ -std=c++23 -isystem /usr/local/include -L/usr/local/lib -lpthread -lbenchmark_main -lbenchmark -flto -Ofast -DNDEBUG -march=native -mtune=native utf_conversion_benchmark.cpp
2024-12-21T03:59:42-10:00
Running ./a.out
Run on (32 X 1370.7 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x16)
L1 Instruction 32 KiB (x16)
L2 Unified 2048 KiB (x16)
L3 Unified 36864 KiB (x1)
Load Average: 1.17, 1.27, 1.28
------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------
UTFConv_v1 16192 ns 16184 ns 43201
UTFConv_v1_variant 18646 ns 18638 ns 37570
UTFConv_v2 14719 ns 14713 ns 47801
UTFConv_v2_variant 20259 ns 20250 ns 34527
UTFConv_OtherImpl 16196 ns 16159 ns 43395
UTFConv_v1_ascii 3682 ns 3680 ns 189649
UTFConv_v2_ascii 6426 ns 6421 ns 108535
UTFConv_OtherImpl_ascii 6410 ns 6408 ns 108967
```

The version 2 does more things obviously.
g++ (GCC) 14.2.1 20240910:

```
g++ -std=c++23 -isystem /usr/local/include -L/usr/local/lib -lpthread -lbenchmark_main -lbenchmark -flto -Ofast -DNDEBUG -march=native -mtune=native utf_conversion_benchmark.cpp
2024-12-21T03:59:31-10:00
Running ./a.out
Run on (32 X 4602.67 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x16)
L1 Instruction 32 KiB (x16)
L2 Unified 2048 KiB (x16)
L3 Unified 36864 KiB (x1)
Load Average: 1.30, 1.30, 1.29
------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------
UTFConv_v1 15658 ns 15636 ns 44860
UTFConv_v1_variant 19684 ns 19674 ns 35629
UTFConv_v2 16626 ns 16619 ns 41914
UTFConv_v2_variant 23569 ns 23560 ns 29711
UTFConv_OtherImpl 16651 ns 16642 ns 41946
UTFConv_v1_ascii 3240 ns 3236 ns 217270
UTFConv_v2_ascii 7260 ns 7255 ns 96711
UTFConv_OtherImpl_ascii 7280 ns 7269 ns 99537
```


----

```bash
$ make; ./a.out --benchmark_min_time=3s

g++ -std=c++23 -isystem /usr/local/include -L/usr/local/lib -lpthread -lbenchmark_main -lbenchmark -flto -Ofast -DNDEBUG -march=native -mtune=native utf_conversion_benchmark.cpp
2024-12-21T04:01:59-10:00
Running ./a.out
Run on (32 X 1515.99 MHz CPU s)
CPU Caches:
L1 Data 48 KiB (x16)
L1 Instruction 32 KiB (x16)
L2 Unified 2048 KiB (x16)
L3 Unified 36864 KiB (x1)
Load Average: 0.54, 0.99, 1.17
------------------------------------------------------------------
Benchmark Time CPU Iterations
------------------------------------------------------------------
UTFConv_v1 15361 ns 15353 ns 273867
UTFConv_v1_variant 19581 ns 19573 ns 215039
UTFConv_v2 16988 ns 16978 ns 246694
UTFConv_v2_variant 23369 ns 23350 ns 179951
UTFConv_OtherImpl 16797 ns 16790 ns 248423
UTFConv_v1_ascii 3204 ns 3202 ns 1292673
UTFConv_v2_ascii 7269 ns 7266 ns 578137
UTFConv_OtherImpl_ascii 7241 ns 7235 ns 577496
```
199 changes: 194 additions & 5 deletions benchmarks/utf_convertion/utf_conversion_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "../benchmark.hpp"
#include "../common_utils_pch.hpp"

#include <cstring>

// NOLINTBEGIN(*-magic-numbers)
namespace v1 {

Expand Down Expand Up @@ -83,7 +85,23 @@ namespace v1 {

std::u32string utf8_to_utf32(std::u8string const& src) {
std::u32string out;
out.reserve(src.length() * 4); // Estimate maximum size of UTF-8 string
out.resize_and_overwrite(src.size(), [&](char32_t* buf, std::size_t buf_size) {
auto pos = src.begin();
while (pos != src.end()) {
auto const code_point = next_code_point(pos, src.end());
if (code_point == 0) {
break;
}
*buf++ = code_point;
}
return pos - src.begin();
});
return out;
}

std::u32string utf8_to_utf32_variant(std::u8string const& src) {
std::u32string out;
out.reserve(src.size());

auto pos = src.begin();
while (pos != src.end()) {
Expand Down Expand Up @@ -240,7 +258,23 @@ namespace v2 {

std::u32string utf8_to_utf32(std::u8string const& src) {
std::u32string out;
out.reserve(src.length() * 4); // Estimate maximum size of UTF-8 string
out.resize_and_overwrite(src.size(), [&](char32_t* buf, std::size_t buf_size) {
auto pos = src.begin();
while (pos != src.end()) {
auto const code_point = next_code_point(pos, src.end());
if (code_point == 0) {
break;
}
*buf++ = code_point;
}
return pos - src.begin();
});
return out;
}

std::u32string utf8_to_utf32_variant(std::u8string const& src) {
std::u32string out;
out.reserve(src.size());

auto pos = src.begin();
while (pos != src.end()) {
Expand All @@ -257,24 +291,179 @@ namespace v2 {

} // namespace v2

auto const str = str8_generator(10'000);
namespace other_impl {
/// this is not my implementation
size_t utf8_to_utf32(char8_t const* buf, size_t len, char32_t* out) {
uint8_t const* ptr = reinterpret_cast<uint8_t const*>(buf);
size_t pos = 0;
char32_t* start{out};
while (pos < len) {
if (pos + 16 <= len) {
uint64_t v1;
std::memcpy(&v1, ptr + pos, sizeof(uint64_t));
uint64_t v2;
std::memcpy(&v2, ptr + pos + sizeof(uint64_t), sizeof(uint64_t));
uint64_t v{v1 | v2};
if ((v & 0x8080'8080'8080'8080) == 0) {
size_t final_pos = pos + 16;
while (pos < final_pos) {
*out++ = char32_t(buf[pos]);
pos++;
}
continue;
}
}
uint8_t cu1 = ptr[pos];
if (cu1 < 0b1000'0000) {
*out++ = char32_t(cu1);
pos++;
} else if ((cu1 & 0b1110'0000) == 0b1100'0000) {
// We have a two-byte UTF-8
if (pos + 1 >= len) {
return 0;
} // minimal bound checking
if ((ptr[pos + 1] & 0b1100'0000) != 0b1000'0000) {
return 0;
}
// range check
uint32_t cp = (cu1 & 0b0001'1111) << 6 | (ptr[pos + 1] & 0b0011'1111);
if (cp < 0x80 || 0x7ff < cp) {
return 0;
}
*out++ = char32_t(cp);
pos += 2;
} else if ((cu1 & 0b1111'0000) == 0b1110'0000) {
if (pos + 2 >= len) {
return 0;
}

if ((ptr[pos + 1] & 0b1100'0000) != 0b1000'0000) {
return 0;
}
if ((ptr[pos + 2] & 0b1100'0000) != 0b1000'0000) {
return 0;
}
// range check
uint32_t cp = (cu1 & 0b0000'1111) << 12 | (ptr[pos + 1] & 0b0011'1111) << 6 |
(ptr[pos + 2] & 0b0011'1111);
if (cp < 0x800 || 0xffff < cp || (0xd7ff < cp && cp < 0xe000)) {
return 0;
}
*out++ = char32_t(cp);
pos += 3;
} else if ((cu1 & 0b1111'1000) == 0b1111'0000) { // 0b11110000
if (pos + 3 >= len) {
return 0;
}
if ((ptr[pos + 1] & 0b1100'0000) != 0b1000'0000) {
return 0;
}
if ((ptr[pos + 2] & 0b1100'0000) != 0b1000'0000) {
return 0;
}
if ((ptr[pos + 3] & 0b1100'0000) != 0b1000'0000) {
return 0;
}

uint32_t cp = (cu1 & 0b0000'0111) << 18 | (ptr[pos + 1] & 0b0011'1111) << 12 |
(ptr[pos + 2] & 0b0011'1111) << 6 | (ptr[pos + 3] & 0b0011'1111);
if (cp <= 0xffff || 0x10'ffff < cp) {
return 0;
}
*out++ = char32_t(cp);
pos += 4;
} else {
return 0;
}
}
return out - start;
}

std::u32string utf8_to_utf32(std::u8string const& src) {
std::u32string out;
out.resize_and_overwrite(src.size(), [&](char32_t* buf, std::size_t buf_size) {
return utf8_to_utf32(src.data(), src.size(), buf);
});
return out;
}

} // namespace other_impl

auto const str8 = str8_generator(10'000);
auto const str = str_generator<std::u8string>(10'000);

static void UTFConv_v1(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v1::utf8_to_utf32(str);
auto res = v1::utf8_to_utf32(str8);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_v1);

static void UTFConv_v1_variant(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v1::utf8_to_utf32_variant(str8);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_v1_variant);

static void UTFConv_v2(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v2::utf8_to_utf32(str);
auto res = v2::utf8_to_utf32(str8);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_v2);

static void UTFConv_v2_variant(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v2::utf8_to_utf32_variant(str8);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_v2_variant);

static void UTFConv_OtherImpl(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v2::utf8_to_utf32(str8);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_OtherImpl);

//////////////////////////////// ASCII ////////////////////////////

static void UTFConv_v1_ascii(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v1::utf8_to_utf32(str);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_v1_ascii);

static void UTFConv_v2_ascii(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v2::utf8_to_utf32(str);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_v2_ascii);

static void UTFConv_OtherImpl_ascii(benchmark::State& state) {
for ([[maybe_unused]] auto _ : state) {
auto res = v2::utf8_to_utf32(str);
benchmark::DoNotOptimize(res);
}
}

BENCHMARK(UTFConv_OtherImpl_ascii);

// NOLINTEND(*-magic-numbers)
2 changes: 1 addition & 1 deletion webpp/common/meta.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ namespace webpp::details {

} // namespace webpp::details

# if defined(__cpp_if_consteval) && CXX23
# if defined(__cpp_if_consteval) && defined(CXX23)
# define webpp_assume(...) \
do { \
if consteval { \
Expand Down

0 comments on commit eb2e31f

Please sign in to comment.