Skip to content

Commit

Permalink
Slightly optimize Utf8CodePointLen
Browse files Browse the repository at this point in the history
A few more operations but the "lookup table" is now an immediate constant.

https://godbolt.org/z/7YG3ohWT6
  • Loading branch information
glebm committed Aug 5, 2024
1 parent 730c26a commit e5e007c
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions Source/utils/utf8.hpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#pragma once

#include <cstdint>
#include <cstddef>
#include <string>
#include <string_view>
#include <utility>

namespace devilution {

Expand Down Expand Up @@ -54,10 +53,19 @@ inline bool IsTrailUtf8CodeUnit(char x)

/**
* @brief Returns the number of code units for a code point starting at *src;
*
* `src` must not be empty.
* If `src` does not begin with a UTF-8 code point start byte, returns 1.
*/
inline size_t Utf8CodePointLen(const char *src)
{
return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"[static_cast<unsigned char>(*src) >> 4];
// This constant is effectively a lookup table for 2-bit keys, where
// values represent code point length - 1.
// `-1` is so that this method never returns 0, even for invalid values
// (which could lead to infinite loops in some code).
// Generated with:
// ruby -e 'p "0000000000000000000000001111223".reverse.to_i(4).to_s(16)'
return ((0x3a55000000000000ULL >> (2 * (static_cast<unsigned char>(*src) >> 3))) & 0x3) + 1;
}

/**
Expand Down

0 comments on commit e5e007c

Please sign in to comment.