layout | title |
---|---|
post |
第114期 |
公众号
RSS https://github.com/wanghenshui/cppweeklynews/releases.atom
欢迎投稿,推荐或自荐文章/软件/资源等
标准委员会动态/ide/编译器信息放在这里
编译器信息最新动态推荐关注hellogcc公众号 本周更新 2023-05-17 第202期
美国印第安纳大学布鲁明顿主校区Dr. Dingwen Tao招收高性能计算方向博士后和全奖博士生
有感兴趣的可以看看,不知道我的读者里有米有学生
内容
- MaskRay: Relocation Overflow and Code Models 这个他写了博客 Relocation overflow and code models
- 王璞:Bluespec:高层次函数式硬件描述语言介绍
- 刘鑫: 我在 openEuler RISC-V SIG 出道的日子
- Revy:作为 RevyOS 之父的首次公开亮相
- 张来: RISC-V助力端侧智能语音AIoT场景落地
确实,学生很需要这些,不然都得从头学,不知道我的读者学生多不多,如果没学,这里补课 https://missing-semester-cn.github.io/
c++知识点的一本书,这里友情推荐一下
一段猥琐的SFINAE
template <typename T> class is_std_string_like {
template <typename U>
static auto check(U* p)
-> decltype((void)p->find('a'), p->length(), (void)p->data(), int());
template <typename> static void check(...);
public:
static constexpr const bool value =
is_string<T>::value ||
std::is_convertible<T, std_string_view<char>>::value ||
!std::is_void<decltype(check<T>(nullptr))>::value;
};
这段代码一搜遍地都是,fmt/range.h
d41d8c手把手带你走近这段代码的演进。。。不过还是直接看c++20的写法吧
template <typename T> constexpr bool is_std_string_like =
is_string<T>::value ||
is_convertible<T, std_string_view<char>>::value ||
requires(T* p) {
p->find('a');
p->length();
p->data();
};
你看懂这个就行了
还是那些东西,循环不变量外提,循环外提等等
#include <memory_resource>
#include <cassert>
int main() {
// Create a memory resource
std::pmr::monotonic_buffer_resource resource(1024);
// Create a polymorphic allocator using the memory resource
std::pmr::polymorphic_allocator<int> allocator(&resource);
// Allocate memory for an array of 10 integers using the polymorphic allocator
int* ptr = allocator.allocate(10);
// Test that the allocation was successful
assert(ptr);
// Deallocate the memory
allocator.deallocate(ptr, 10);
}
用的还是挺多的
有点意思
老生常谈了。这里介绍一下thread_local,直接看代码
#include <iostream>
#include <thread>
#include <mutex>
std::mutex mutPrint;
thread_local int x = 0;
void foo() {
thread_local int y = 0;
std::lock_guard guard(mutPrint);
std::cout << "in thread\t" << std::this_thread::get_id() << " ";
std::cout << "&x " << &x << ", ";
std::cout << "&y " << &y << '\n';
}
int main() {
std::cout << "main\t" << std::this_thread::get_id() << " &x " << &x << '\n';
std::jthread worker1 { foo };
foo();
std::jthread worker2 { foo };
foo();
}
/*
main 4154632640 &x 0xf7a2a9b8
in thread 4154632640 &x 0xf7a2a9b8, &y 0xf7a2a9bc
in thread 4154628928 &x 0xf7a29b38, &y 0xf7a29b3c
in thread 4154632640 &x 0xf7a2a9b8, &y 0xf7a2a9bc
in thread 4146236224 &x 0xf7228b38, &y 0xf7228b3c
*/
注意地址可不一样
直接贴代码吧
普通
size_t scalar_utf8_length(const char * c, size_t len) {
size_t answer = 0;
for(size_t i = 0; i<len; i++) {
if((c[i]>>7)) { answer++;}
}
return answer + len;
}
复杂
uint64_t utf8_length_kvakil(const uint8_t *data, uint32_t length) {
uint64_t result = 0;
const int lanes = sizeof(uint8x16_t);
uint8_t rem = length % lanes;
const uint8_t *simd_end = data + (length / lanes) * lanes;
const uint8x16_t threshold = vdupq_n_u8(0x80);
for (; data < simd_end; data += lanes) {
// load 16 bytes
uint8x16_t input = vld1q_u8(data);
// compare to threshold (0x80)
uint8x16_t withhighbit = vcgeq_u8(input, threshold);
// shift and narrow
uint8x8_t highbits = vshrn_n_u16(vreinterpretq_u16_u8(withhighbit), 4);
// we have 0, 4 or 8 bits per byte
uint8x8_t bitsperbyte = vcnt_u8(highbits);
// sum the bytes vertically to uint16_t
result += vaddlv_u8(bitsperbyte);
}
result /= 4; // we overcounted by a factor of 4
// scalar tail
for (uint8_t j = 0; j < rem; j++) {
result += (simd_end[j] >> 7);
}
return result + length;
}
还是很复杂
uint64_t utf8_length_cawley(const uint8_t *data, uint32_t length) {
uint64_t result = 0;
const int lanes = sizeof(uint8x16_t);
uint8_t rem = length % lanes;
const uint8_t *simd_end = data + (length / lanes) * lanes;
const uint8x16_t threshold = vdupq_n_u8(0x80);
for (; data < simd_end; data += lanes) {
// load 16 bytes
uint8x16_t input = vld1q_u8(data);
// compare to threshold (0x80)
uint8x16_t withhighbit = vcgeq_u8(input, threshold);
// vertical addition
result -= vaddvq_s8(withhighbit);
}
// scalar tail
for (uint8_t j = 0; j < rem; j++) {
result += (simd_end[j] >> 7);
}
return result + length;
}
速度 scalar code ~7 GB/s NEON code (Vakil) ~27 GB/s NEON code (Cawley) ~30 GB/s
上一期讲了无分支的二分查找,这个作者看了也很感兴趣,也写了一版,并写了推导过程。大家感兴趣的可以看看推导过程,这里直接贴代码了
template<typename It, typename T, typename Cmp>
It lower_bound(It begin, It end, const T& value, Cmp comp) {
size_t n = end - begin;
size_t b = 0;
for (size_t bit = std::bit_floor(n); bit != 0; bit >>= 1) {
size_t i = (b | bit) - 1;
if (i < n && comp(begin[i], value)) b |= bit;
}
return begin + b;
}
还是经典的循环中的常量应用生命周期问题,作者建议,用 &限定接口并标记删除,这样就不会用到常量引用了
原来的代码
struct Pixel{
int r = 0;
int g = 0;
int b = 0;
friend auto operator<=>(const Pixel& a, const Pixel& b) = default;
};
struct Metadata { /* some metadata here, width, height, etc. */ };
class MyRawImage{
std::vector<Pixel> m_buffer;
Metadata m_metadata;
public:
MyRawImage(std::vector<Pixel> src) : m_buffer(std::move(src)) {}
const Pixel& operator[](int index) const { return m_buffer[index]; }
Pixel& operator[](int index) { return m_buffer[index]; }
const std::vector<Pixel>& data() const { return m_buffer; }
const Metadata& information() const { return m_metadata; }
};
MyRawImage loadImage(int i){
return std::vector<Pixel>(i * 100, Pixel {i, i, i});
}
MyRawImage problematic(int i){
std::vector<Pixel> filtered;
auto filter = [](Pixel p) {
p.r = std::min(p.r, 0xFF);
p.g = std::min(p.g, 0xFF);
p.b = std::min(p.b, 0xFF);
return p;
};
// oops:
// equivalent of `auto&& ps = loadImage(i).data(); for (Pixel p : ps) { ... }`
// loadImage() returns a temporary, temporary.data() reference is stored, then
// for-loop iterates over a stored reference to a property of deleted temporary
// 这里出大问题
// 最简单的修复方法就是存一下loadImage的返回值
for(Pixel p : loadImage(i).data())
filtered.push_back(filter(p));
return filtered;
}
Pixel fine(int i) {
auto max = [](auto&& range) -> Pixel {
return *std::max_element(std::begin(range), std::end(range));
};
// this one is fine: a temporary will be destroyed after the max() calcualtion
return max(loadImage(i).data());
}
int main(int, char**) {
constexpr static int pattern = 0x12;
constexpr static Pixel pixelPattern = Pixel { pattern, pattern, pattern };
Pixel maxPixel = fine(pattern);
assert(maxPixel == pixelPattern);
MyRawImage img = problematic(pattern);
auto isGood = [](const Pixel& p) { return p == pixelPattern; };
assert(img.data().end() == std::ranges::find_if_not(img.data(), isGood));
}
c++23之后循环就修复这个问题了,作者考虑成员函数限定符来解决这个问题
#include <iostream>
struct S {
void f() & { std::cout << "左值\n"; }
void f() && { std::cout << "右值\n"; }
};
int main() {
S s;
s.f(); // 打印“左值”
std::move(s).f(); // 打印“右值”
S().f(); // 打印“右值”
}
通过这个思路,上面的代码,实现两个data
#include <ranges>
#include <cassert>
#include <vector>
struct Pixel {
int r = 0;
int g = 0;
int b = 0;
friend auto operator<=>(const Pixel& a, const Pixel& b) = default;
};
struct Metadata { /* some metadata here, width, height, etc. */ };
class MyRawImage {
std::vector<Pixel> m_buffer;
Metadata m_metadata;
public:
class UnsafeReference {
std::vector<Pixel>& m_buffer;
public:
UnsafeReference(std::vector<Pixel>& buffer) : m_buffer(buffer) {}
// I would like it to be a free-function rather than a member function,
// to lower the chance that Intellisence will provide a disservice
// to the developer by slipping an unsafe getter by auto-suggestions.
// it's good to require a fair bit of attention here
friend std::vector<Pixel>& allowUnsafe(UnsafeReference&&);
};
MyRawImage(std::vector<Pixel> src) : m_buffer(std::move(src)) {}
const Pixel& operator[](int index) const { return m_buffer[index]; }
Pixel& operator[](int index) { return m_buffer[index]; }
const std::vector<Pixel>& data() const & { return m_buffer; }
// 循环临时对象,走这里,封装一层
UnsafeReference data() && { return m_buffer; }
const Metadata& information() const & { return m_metadata; }
const Metadata& information() && = delete;
};
std::vector<Pixel>& allowUnsafe(MyRawImage::UnsafeReference&& unsafe){
return unsafe.m_buffer;
}
MyRawImage loadImage(int i){
return std::vector<Pixel>(i * 100, Pixel {i, i, i});
}
MyRawImage was_problematic(int i){
std::vector<Pixel> filtered;
auto filter = [](Pixel p) {
p.r = std::min(p.r, 0xFF);
p.g = std::min(p.g, 0xFF);
p.b = std::min(p.b, 0xFF);
return p;
};
const MyRawImage& image = loadImage(i);
for(Pixel p : image.data())
filtered.push_back(filter(p));
return filtered;
}
Pixel fine_again(int i)
{
auto max = [](auto&& range) -> Pixel
{
return *std::max_element(std::begin(range), std::end(range));
};
// this one is fine: a temporary will be destroyed after the max() calcualtion
return max(allowUnsafe(loadImage(i).data()));
}
int main(int, char**){
constexpr static int pattern = 0x12;
constexpr static Pixel pixelPattern = Pixel { pattern, pattern, pattern };
Pixel maxPixel = fine_again(pattern);
assert(maxPixel == pixelPattern);
MyRawImage img = was_problematic(pattern);
auto isGood = [](const Pixel& p) { return p == pixelPattern; };
assert(img.data().end() == std::ranges::find_if_not(img.data(), isGood));
}
我觉得这个解决方法不咋地
协程入门demo
代码在这里 https://github.com/dietmarkuehl/co_await-all-the-things/blob/main/task-using-std.cpp
- asteria 一个脚本语言,可嵌入,长期找人,希望胖友们帮帮忙,也可以加群753302367和作者对线
- Unilang deepin的一个通用编程语言,点子有点意思,也缺人,感兴趣的可以github讨论区或者deepin论坛看一看。这里也挂着长期推荐了
- poac 类似cargo的编译工具
- llama.cpp 有人玩过这个吗?我都想买个显卡折腾折腾了
- A header-only C++20 library of fast delegates and events 感觉就是function和function_ref结合体
- snitch 单元测试库
如果有疑问评论最好在上面链接到评论区里评论,这样方便搜索,微信公众号有点封闭/知乎吞评论