Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Parser] Templatize lexing of integers #6272

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 16 additions & 50 deletions src/parser/input-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ inline std::optional<uint64_t> ParseInput::takeOffset() {
if (subLexer == subLexer.end()) {
return {};
}
if (auto o = subLexer->getU64()) {
if (auto o = subLexer->getU<uint64_t>()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getU<uint64_t> mentions "unsigned" twice. Can this be get<uint64_t>? (maybe using https://en.cppreference.com/w/cpp/types/is_unsigned)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getU and getI both return unsigned integers, so I think it's good to differentiate them in the method name and not favor one over the other (and all the other getXXX methods) by giving it the shorter get name.

One alternative I considered was to make the template parameter just 32 or 64, etc. but that would require more complex machinery in the implementation and the nicer API didn't seem worth the extra complexity. If you're interested, I can add a commit with that design so we can see the difference, though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, maybe I've forgotten what those functions mean. I don't see comments on

binaryen/src/parser/lexer.h

Lines 128 to 135 in 845e070

std::optional<uint64_t> getU64() const;
std::optional<int64_t> getS64() const;
std::optional<uint64_t> getI64() const;
std::optional<uint32_t> getU32() const;
std::optional<int32_t> getS32() const;
std::optional<uint32_t> getI32() const;
std::optional<double> getF64() const;
std::optional<float> getF32() const;

Perhaps it's worth adding some? Reading the code, the meaning seems to be "U is unsigned, S is signed, I accepts both as inputs but returns unsigned" - ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's right. uN, sN, and iN come directly from the grammar in the spec, but I can add comments here as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, sorry, I keep reading this code as normal C++ and I forget it mirrors the spec...

++subLexer;
if (subLexer == subLexer.end()) {
++lexer;
Expand All @@ -122,7 +122,7 @@ inline std::optional<uint32_t> ParseInput::takeAlign() {
if (subLexer == subLexer.end()) {
return {};
}
if (auto a = subLexer->getU32()) {
if (auto a = subLexer->getU<uint32_t>()) {
++subLexer;
if (subLexer == subLexer.end()) {
++lexer;
Expand All @@ -134,77 +134,43 @@ inline std::optional<uint32_t> ParseInput::takeAlign() {
return {};
}

inline std::optional<uint64_t> ParseInput::takeU64() {
template<typename T> inline std::optional<T> ParseInput::takeU() {
if (auto t = peek()) {
if (auto n = t->getU64()) {
if (auto n = t->getU<T>()) {
++lexer;
return n;
}
}
return std::nullopt;
}

inline std::optional<int64_t> ParseInput::takeS64() {
template<typename T> inline std::optional<T> ParseInput::takeI() {
if (auto t = peek()) {
if (auto n = t->getS64()) {
if (auto n = t->getI<T>()) {
++lexer;
return n;
}
}
return {};
return std::nullopt;
}

inline std::optional<int64_t> ParseInput::takeI64() {
if (auto t = peek()) {
if (auto n = t->getI64()) {
++lexer;
return n;
}
}
return {};
inline std::optional<uint64_t> ParseInput::takeU64() {
return takeU<uint64_t>();
}

inline std::optional<uint32_t> ParseInput::takeU32() {
if (auto t = peek()) {
if (auto n = t->getU32()) {
++lexer;
return n;
}
}
return std::nullopt;
inline std::optional<uint64_t> ParseInput::takeI64() {
return takeI<uint64_t>();
}

inline std::optional<int32_t> ParseInput::takeS32() {
if (auto t = peek()) {
if (auto n = t->getS32()) {
++lexer;
return n;
}
}
return {};
inline std::optional<uint32_t> ParseInput::takeU32() {
return takeU<uint64_t>();
}

inline std::optional<int32_t> ParseInput::takeI32() {
if (auto t = peek()) {
if (auto n = t->getI32()) {
++lexer;
return n;
}
}
return {};
inline std::optional<uint32_t> ParseInput::takeI32() {
return takeI<uint32_t>();
}

inline std::optional<uint8_t> ParseInput::takeU8() {
if (auto t = peek()) {
if (auto n = t->getU32()) {
if (n <= std::numeric_limits<uint8_t>::max()) {
++lexer;
return uint8_t(*n);
}
}
}
return {};
}
inline std::optional<uint8_t> ParseInput::takeU8() { return takeU<uint8_t>(); }

inline std::optional<double> ParseInput::takeF64() {
if (auto t = peek()) {
Expand Down
11 changes: 7 additions & 4 deletions src/parser/input.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,9 @@ struct ParseInput {
std::optional<uint64_t> takeOffset();
std::optional<uint32_t> takeAlign();
std::optional<uint64_t> takeU64();
std::optional<int64_t> takeS64();
std::optional<int64_t> takeI64();
std::optional<uint64_t> takeI64();
std::optional<uint32_t> takeU32();
std::optional<int32_t> takeS32();
std::optional<int32_t> takeI32();
std::optional<uint32_t> takeI32();
std::optional<uint8_t> takeU8();
std::optional<double> takeF64();
std::optional<float> takeF32();
Expand All @@ -67,6 +65,11 @@ struct ParseInput {
Index getPos();
[[nodiscard]] Err err(Index pos, std::string reason);
[[nodiscard]] Err err(std::string reason) { return err(getPos(), reason); }

private:
template<typename T> std::optional<T> takeU();
template<typename T> std::optional<T> takeS();
template<typename T> std::optional<T> takeI();
};

#include "input-impl.h"
Expand Down
71 changes: 23 additions & 48 deletions src/parser/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,77 +767,52 @@ std::optional<LexResult> keyword(std::string_view in) {

} // anonymous namespace

std::optional<uint64_t> Token::getU64() const {
template<typename T> std::optional<T> Token::getU() const {
static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == NoSign) {
return tok->n;
}
}
return {};
}

std::optional<int64_t> Token::getS64() const {
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == Neg) {
if (uint64_t(INT64_MIN) <= tok->n || tok->n == 0) {
return int64_t(tok->n);
}
// TODO: Add error production for signed underflow.
} else {
if (tok->n <= uint64_t(INT64_MAX)) {
return int64_t(tok->n);
}
// TODO: Add error production for signed overflow.
}
}
return {};
}

std::optional<uint64_t> Token::getI64() const {
if (auto n = getU64()) {
return *n;
}
if (auto n = getS64()) {
return *n;
}
return {};
}

std::optional<uint32_t> Token::getU32() const {
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == NoSign && tok->n <= UINT32_MAX) {
return int32_t(tok->n);
if (tok->sign == NoSign && tok->n <= std::numeric_limits<T>::max()) {
return T(tok->n);
}
// TODO: Add error production for unsigned overflow.
}
return {};
}

std::optional<int32_t> Token::getS32() const {
template<typename T> std::optional<T> Token::getS() const {
static_assert(std::is_integral_v<T> && std::is_signed_v<T>);
if (auto* tok = std::get_if<IntTok>(&data)) {
if (tok->sign == Neg) {
if (uint64_t(INT32_MIN) <= tok->n || tok->n == 0) {
return int32_t(tok->n);
if (uint64_t(std::numeric_limits<T>::min()) <= tok->n || tok->n == 0) {
return T(tok->n);
}
} else {
if (tok->n <= uint64_t(INT32_MAX)) {
return int32_t(tok->n);
if (tok->n <= uint64_t(std::numeric_limits<T>::max())) {
return T(tok->n);
}
}
}
return {};
}

std::optional<uint32_t> Token::getI32() const {
if (auto n = getU32()) {
template<typename T> std::optional<T> Token::getI() const {
static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>);
if (auto n = getU<T>()) {
return *n;
}
if (auto n = getS32()) {
return uint32_t(*n);
if (auto n = getS<std::make_signed_t<T>>()) {
return T(*n);
}
return {};
}

template std::optional<uint64_t> Token::getU<uint64_t>() const;
template std::optional<int64_t> Token::getS<int64_t>() const;
template std::optional<uint64_t> Token::getI<uint64_t>() const;
template std::optional<uint32_t> Token::getU<uint32_t>() const;
template std::optional<int32_t> Token::getS<int32_t>() const;
template std::optional<uint32_t> Token::getI<uint32_t>() const;
template std::optional<uint8_t> Token::getU<uint8_t>() const;

std::optional<double> Token::getF64() const {
constexpr int signif = 52;
constexpr uint64_t payloadMask = (1ull << signif) - 1;
Expand Down
10 changes: 4 additions & 6 deletions src/parser/lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,10 @@ struct Token {
}
return {};
}
std::optional<uint64_t> getU64() const;
std::optional<int64_t> getS64() const;
std::optional<uint64_t> getI64() const;
std::optional<uint32_t> getU32() const;
std::optional<int32_t> getS32() const;
std::optional<uint32_t> getI32() const;

template<typename T> std::optional<T> getU() const;
template<typename T> std::optional<T> getS() const;
template<typename T> std::optional<T> getI() const;
std::optional<double> getF64() const;
std::optional<float> getF32() const;
std::optional<std::string_view> getString() const;
Expand Down
Loading
Loading