layout | title |
---|---|
post |
第156期 |
公众号
qq群 点击进入
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等,评论区留言
本期文章由 HNY 赞助
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2024-05-01 第252期
教了一些google测试使用案例,这里直接贴一下吧,还算常用
分段采集
static void sorting(bm::State &state) {
auto count = static_cast<size_t>(state.range(0));
auto include_preprocessing = static_cast<bool>(state.range(1));
std::vector<int32_t> array(count);
std::iota(array.begin(), array.end(), 1);
for (auto _ : state) {
if (!include_preprocessing)
state.PauseTiming();
// Reverse order is the most classical worst case,
// but not the only one.
std::reverse(array.begin(), array.end());
if (!include_preprocessing)
state.ResumeTiming();
std::sort(array.begin(), array.end());
bm::DoNotOptimize(array.size());
}
}
BENCHMARK(sorting)->Args({3, false})->Args({3, true});
BENCHMARK(sorting)->Args({4, false})->Args({4, true});
不同策略压测
template <typename execution_policy_t>
static void super_sort(bm::State &state, execution_policy_t &&policy) {
auto count = static_cast<size_t>(state.range(0));
std::vector<int32_t> array(count);
std::iota(array.begin(), array.end(), 1);
for (auto _ : state) {
std::reverse(policy, array.begin(), array.end());
std::sort(policy, array.begin(), array.end());
bm::DoNotOptimize(array.size());
}
state.SetComplexityN(count);
state.SetItemsProcessed(count * state.iterations());
state.SetBytesProcessed(count * state.iterations() * sizeof(int32_t));
}
BENCHMARK_CAPTURE(super_sort, seq, std::execution::seq)
->RangeMultiplier(8)
->Range(1l << 20, 1l << 32)
->MinTime(10)
->Complexity(bm::oNLogN);
BENCHMARK_CAPTURE(super_sort, par_unseq, std::execution::par_unseq)
->RangeMultiplier(8)
->Range(1l << 20, 1l << 32)
->MinTime(10)
->Complexity(bm::oNLogN);
perf
$ sudo ./build_release/tutorial --benchmark_perf_counters="CYCLES,INSTRUCTIONS"
$ sudo perf stat taskset 0xEFFFEFFFEFFFEFFFEFFFEFFFEFFFEFFF ./build_release/tutorial --benchmark_filter=super_sort
Performance counter stats for 'taskset 0xEFFFEFFFEFFFEFFFEFFFEFFFEFFFEFFF ./build_release/tutorial --benchmark_filter=super_sort':
23048674.55 msec task-clock # 35.901 CPUs utilized
6627669 context-switches # 0.288 K/sec
75843 cpu-migrations # 0.003 K/sec
119085703 page-faults # 0.005 M/sec
91429892293048 cycles # 3.967 GHz (83.33%)
13895432483288 stalled-cycles-frontend # 15.20% frontend cycles idle (83.33%)
3277370121317 stalled-cycles-backend # 3.58% backend cycles idle (83.33%)
16689799241313 instructions # 0.18 insn per cycle
# 0.83 stalled cycles per insn (83.33%)
3413731599819 branches # 148.110 M/sec (83.33%)
11861890556 branch-misses # 0.35% of all branches (83.34%)
642.008618457 seconds time elapsed
21779.611381000 seconds user
1244.984080000 seconds sys
struct Point { int x, y; };
void foo(unsigned char* buf, size_t len) {
assert(len == sizeof(Point));
Point* p = reinterpret_cast<Point*>(buf);
if (p->x == 0) {
// ...
}
}
这段代码是UB,主要问题是buf不能保证活着,所以通常有一种转移大法
void foo(unsigned char* buf, size_t len) {
assert(len == sizeof(Point));
Point point; // a Point is created
std::memcpy(&point, buf, len);
if (point.x == 0) { // Ok
// ...
}
}
怎么避免这种拷贝?就比如说我作为写代码的人我确认这段buf绝对是活的,start_lifetime_as
void foo(unsigned char* buf, size_t len) {
assert(len == sizeof(Point));
Point* p = std::start_lifetime_as<Point>(buf);
if (p->x == 0) {
// ...
}
}
我们再来回顾一下new和malloc的区别
new分配了空间(1)然后构造了对象(2),malloc没有构造
实际上从构造的那一刻开始 new还生成了一个活的对象,开始了一个对象的生命周期 start lifetime(3)
那对于placement new来说,就没有 (1)
对于这种buffer转POD对象的场景,就缺少 (3)
我们不知道他是不是活的,这也就是 start_lifetime_as
我们再回到start_lifetime_as 和 reinterpret_cast
实际上reinterpret_cast就是过于强了
对于只读的这么玩只要作者能保证buffer没问题UB也就UB了对付用
但是reinterpret_cast不能保证对应的buffer是不是活的,而从c来的语法,malloc就直接用了,根本就没有UB这一说
struct X { int a, b; };
X *make_x() {
X *p = (X*)malloc(sizeof(struct X));
p->a = 1; // before P0593: UB, no X lives at p
p->b = 2; // before P0593: UB, no X lives at p
return p;
}
按照start_lifetime_as的设计,得这么写
X *p = std::start_lifetime_as<X>( malloc(sizeof(struct X)) );
说了这么一大堆,也没有什么最佳解法。知道这套东西傻逼就好了,只能修修补补
看一乐
expected的大小?https://godbolt.org/z/xx3eWcP8x
#include <iostream>
#include <typeinfo>
#include <expected>
#include <cxxabi.h>
template <typename T, typename U>
void printSizes() {
int status = 0;
char *realnameT = abi::__cxa_demangle(typeid(T).name(), 0, 0, &status);
char *realnameU = abi::__cxa_demangle(typeid(U).name(), 0, 0, &status);
std::cout << "\nType: " << (realnameT ? realnameT : typeid(T).name()) << '\n';
std::cout << "Size: " << sizeof(T) << '\n';
std::cout << "Type: " << (realnameU ? realnameU : typeid(U).name()) << '\n';
std::cout << "Size: " << sizeof(U) << '\n';
std::cout << "Sizeof std::expected: ";
std::cout << sizeof(std::expected<T, U>) << '\n';
free(realnameT);
free(realnameU);
}
int main() {
printSizes<int, std::string>();
printSizes<int, int>();
printSizes<int, double>();
printSizes<int, std::pair<int, int>>();
}
/*
Type: int
Size: 4
Type: std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >
Size: 32
Sizeof std::expected: 40
Type: int
Size: 4
Type: int
Size: 4
Sizeof std::expected: 8
Type: int
Size: 4
Type: double
Size: 8
Sizeof std::expected: 16
Type: int
Size: 4
Type: std::pair<int, int>
Size: 8
Sizeof std::expected: 12
*/
这个是之前的解析文件挑战,大概就是读到hashmap然后聚合一下
作者从map到hash到多线程优化,比用stl实现快很多倍。看个乐。感觉这就是面试题
直接贴godbolt和代码吧 https://godbolt.org/z/xc5MjTrob
#include <iostream>
#include <sys/stat.h>
namespace detail
{
// Tag classes to get suggestive type names into error messages
struct invalid_permission_character {};
struct invalid_permission_string_length {};
// Check a single character in the string, returning
// the mode_t for that bit-position. Peculiar name means
// we get a suggestive error message
template<int position, char accept>
consteval mode_t expected_character_at_position(const char * const permission_string)
{
const char c = permission_string[position];
if(c == accept) { return 1 << (8-position); }
if(c == '-') { return 0; }
throw invalid_permission_character{};
}
// Worker function, assumes that it is called only by
// from_readable_mode() or operator ""_mode, which have
// done checking beforehand for the validity of s.
//
// It's wordy, but that does mean you get a suggestive
// error message with the bit-position and expected character.
consteval mode_t from_readable_mode_string(const char * const s)
{
return
detail::expected_character_at_position<0, 'r'>(s) |
detail::expected_character_at_position<1, 'w'>(s) |
detail::expected_character_at_position<2, 'x'>(s) |
detail::expected_character_at_position<3, 'r'>(s) |
detail::expected_character_at_position<4, 'w'>(s) |
detail::expected_character_at_position<5, 'x'>(s) |
detail::expected_character_at_position<6, 'r'>(s) |
detail::expected_character_at_position<7, 'w'>(s) |
detail::expected_character_at_position<8, 'x'>(s);
}
}
// Turns a 9-character permission string like you would get
// from `ls -l` into the mode_t that it represents.
//
// (the [10] is because of the trailing NUL byte)
consteval mode_t from_readable_mode(const char (&permission_string)[10])
{
return detail::from_readable_mode_string(permission_string);
}
// Turns a 9-character permission string like you would get
// from `ls -l` into the mode_t that it represents.
consteval mode_t operator""_mode(const char *s, size_t len)
{
if (len != 9)
{
throw detail::invalid_permission_string_length{};
}
return detail::from_readable_mode_string(s);
}
// Shorthand for printing a line with a string literal
// and also getting the consteval value of the mode_t
// represented by that literal.
#define EXAMPLE(x) \
std::cout << x << '\t' << std::oct << from_readable_mode(x) << std::dec << '\n'
int main() {
EXAMPLE("rw-rw----"); // 660
EXAMPLE("rwxr-xr-x"); // 755
EXAMPLE("r--------"); // 400
EXAMPLE("-w-r---wx"); // 243??
// These won't even compile, but show compile-time
// bugs being caught.
//
// EXAMPLE("bug"); // Not a 9-character literal
// EXAMPLE("rwxbadbug");
// Using _mode to indicate what the string means
// might be easier to read.
std::cout << "rw-r--r--" << '\t' << std::oct << "rw-r--r--"_mode << std::dec << '\n';
// These won't even compile, for the same reasons
//
// std::cout << "birb"_mode << "------uwu"_mode;
return 0;
}
- Awaiting a set of handles with a timeout, part 1: Starting with two
- Awaiting a set of handles with a timeout, part 2: Continuing with two
- Awaiting a set of handles with a timeout, part 3: Going beyond two
这里放个预告哈,cppcon视频出完了。后面准备更新cppcon2023了
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群753302367和作者对线
- OffsetAllocator 一个O1内存实现,类似TLSF 这里标记个TODO有时间研究一下
- rusty.cpp 抄rust的特性实现
- 一个function实现 只存指针。对于lambda应该不行,生命周期有问题,另外有五个指针,相当于模拟vtable
时间真快,转眼间假期又结束了
最近有点忙可能更新很不及时。主要还是有内容的文章少,
加上干眼症加重,看屏幕眼睛酸的要死,想把眼珠子抠出来洗洗
最近也发生了很多事情。说不出啥话来
推荐大家都看一看了不起的盖茨比 不要做一个浪漫的傻瓜