Skip to content

Commit

Permalink
[Parser] Templatize lexing of integers (WebAssembly#6272)
Browse files Browse the repository at this point in the history
Have a single implementation for lexing each of unsigned, signed, and
uninterpreted integers, each generic over the bit width of the integer. This
reduces duplication in the existing code and it will make it much easier to
support lexing more 8- and 16-bit integers.
  • Loading branch information
tlively authored and radekdoulik committed Jul 12, 2024
1 parent b78f620 commit 1653bfd
Show file tree
Hide file tree
Showing 5 changed files with 216 additions and 274 deletions.
66 changes: 16 additions & 50 deletions src/parser/input-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ inline std::optional<uint64_t> ParseInput::takeOffset() {
if (subLexer == subLexer.end()) {
return {};
}
if (auto o = subLexer->getU64()) {
if (auto o = subLexer->getU<uint64_t>()) {
++subLexer;
if (subLexer == subLexer.end()) {
++lexer;
Expand All @@ -122,7 +122,7 @@ inline std::optional<uint32_t> ParseInput::takeAlign() {
if (subLexer == subLexer.end()) {
return {};
}
if (auto a = subLexer->getU32()) {
if (auto a = subLexer->getU<uint32_t>()) {
++subLexer;
if (subLexer == subLexer.end()) {
++lexer;
Expand All @@ -134,77 +134,43 @@ inline std::optional<uint32_t> ParseInput::takeAlign() {
return {};
}

inline std::optional<uint64_t> ParseInput::takeU64() {
template<typename T> inline std::optional<T> ParseInput::takeU() {
if (auto t = peek()) {
if (auto n = t->getU64()) {
if (auto n = t->getU<T>()) {
++lexer;
return n;
}
}
return std::nullopt;
}

inline std::optional<int64_t> ParseInput::takeS64() {
template<typename T> inline std::optional<T> ParseInput::takeI() {
if (auto t = peek()) {
if (auto n = t->getS64()) {
if (auto n = t->getI<T>()) {
++lexer;
return n;
}
}
return {};
return std::nullopt;
}

inline std::optional<int64_t> ParseInput::takeI64() {
if (auto t = peek()) {
if (auto n = t->getI64()) {
++lexer;
return n;
}
}
return {};
inline std::optional<uint64_t> ParseInput::takeU64() {
return takeU<uint64_t>();
}

inline std::optional<uint32_t> ParseInput::takeU32() {
if (auto t = peek()) {
if (auto n = t->getU32()) {
++lexer;
return n;
}
}
return std::nullopt;
inline std::optional<uint64_t> ParseInput::takeI64() {
return takeI<uint64_t>();
}

inline std::optional<int32_t> ParseInput::takeS32() {
if (auto t = peek()) {
if (auto n = t->getS32()) {
++lexer;
return n;
}
}
return {};
inline std::optional<uint32_t> ParseInput::takeU32() {
return takeU<uint64_t>();
}

inline std::optional<int32_t> ParseInput::takeI32() {
if (auto t = peek()) {
if (auto n = t->getI32()) {
++lexer;
return n;
}
}
return {};
inline std::optional<uint32_t> ParseInput::takeI32() {
return takeI<uint32_t>();
}

inline std::optional<uint8_t> ParseInput::takeU8() {
if (auto t = peek()) {
if (auto n = t->getU32()) {
if (n <= std::numeric_limits<uint8_t>::max()) {
++lexer;
return uint8_t(*n);
}
}
}
return {};
}
inline std::optional<uint8_t> ParseInput::takeU8() { return takeU<uint8_t>(); }

inline std::optional<double> ParseInput::takeF64() {
if (auto t = peek()) {
Expand Down
11 changes: 7 additions & 4 deletions src/parser/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,9 @@ struct ParseInput {
std::optional<uint64_t> takeOffset();
std::optional<uint32_t> takeAlign();
std::optional<uint64_t> takeU64();
std::optional<int64_t> takeS64();
std::optional<int64_t> takeI64();
std::optional<uint64_t> takeI64();
std::optional<uint32_t> takeU32();
std::optional<int32_t> takeS32();
std::optional<int32_t> takeI32();
std::optional<uint32_t> takeI32();
std::optional<uint8_t> takeU8();
std::optional<double> takeF64();
std::optional<float> takeF32();
Expand All @@ -67,6 +65,11 @@ struct ParseInput {
Index getPos();
[[nodiscard]] Err err(Index pos, std::string reason);
[[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); }

private:
template<typename T> std::optional<T> takeU();
template<typename T> std::optional<T> takeS();
template<typename T> std::optional<T> takeI();
};

#include "input-impl.h"
Expand Down
71 changes: 23 additions & 48 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,77 +767,52 @@ std::optional<LexResult> keyword(std::string_view in) {

} // anonymous namespace

std::optional<uint64_t> Token::getU64() const {
template<typename T> std::optional<T> Token::getU() const {
static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == NoSign) {
return tok->n;
}
}
return {};
}

std::optional<int64_t> Token::getS64() const {
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == Neg) {
if (uint64_t(INT64_MIN) <= tok->n || tok->n == 0) {
return int64_t(tok->n);
}
// TODO: Add error production for signed underflow.
} else {
if (tok->n <= uint64_t(INT64_MAX)) {
return int64_t(tok->n);
}
// TODO: Add error production for signed overflow.
}
}
return {};
}

std::optional<uint64_t> Token::getI64() const {
if (auto n = getU64()) {
return *n;
}
if (auto n = getS64()) {
return *n;
}
return {};
}

std::optional<uint32_t> Token::getU32() const {
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == NoSign && tok->n <= UINT32_MAX) {
return int32_t(tok->n);
if (tok->sign == NoSign && tok->n <= std::numeric_limits<T>::max()) {
return T(tok->n);
}
// TODO: Add error production for unsigned overflow.
}
return {};
}

std::optional<int32_t> Token::getS32() const {
template<typename T> std::optional<T> Token::getS() const {
static_assert(std::is_integral_v<T> && std::is_signed_v<T>);
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == Neg) {
if (uint64_t(INT32_MIN) <= tok->n || tok->n == 0) {
return int32_t(tok->n);
if (uint64_t(std::numeric_limits<T>::min()) <= tok->n || tok->n == 0) {
return T(tok->n);
}
} else {
if (tok->n <= uint64_t(INT32_MAX)) {
return int32_t(tok->n);
if (tok->n <= uint64_t(std::numeric_limits<T>::max())) {
return T(tok->n);
}
}
}
return {};
}

std::optional<uint32_t> Token::getI32() const {
if (auto n = getU32()) {
template<typename T> std::optional<T> Token::getI() const {
static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
if (auto n = getU<T>()) {
return *n;
}
if (auto n = getS32()) {
return uint32_t(*n);
if (auto n = getS<std::make_signed_t<T>>()) {
return T(*n);
}
return {};
}

template std::optional<uint64_t> Token::getU<uint64_t>() const;
template std::optional<int64_t> Token::getS<int64_t>() const;
template std::optional<uint64_t> Token::getI<uint64_t>() const;
template std::optional<uint32_t> Token::getU<uint32_t>() const;
template std::optional<int32_t> Token::getS<int32_t>() const;
template std::optional<uint32_t> Token::getI<uint32_t>() const;
template std::optional<uint8_t> Token::getU<uint8_t>() const;

std::optional<double> Token::getF64() const {
constexpr int signif = 52;
constexpr uint64_t payloadMask = (1ull << signif) - 1;
Expand Down
10 changes: 4 additions & 6 deletions src/parser/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,10 @@ struct Token {
}
return {};
}
std::optional<uint64_t> getU64() const;
std::optional<int64_t> getS64() const;
std::optional<uint64_t> getI64() const;
std::optional<uint32_t> getU32() const;
std::optional<int32_t> getS32() const;
std::optional<uint32_t> getI32() const;

template<typename T> std::optional<T> getU() const;
template<typename T> std::optional<T> getS() const;
template<typename T> std::optional<T> getI() const;
std::optional<double> getF64() const;
std::optional<float> getF32() const;
std::optional<std::string_view> getString() const;
Expand Down
Loading

0 comments on commit 1653bfd

Please sign in to comment.