Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[C++] Modernize Vocabulary using std::string_view #3378

Merged
merged 1 commit into from
Dec 7, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ TokenStartColumnEquals(i) ::= "tokenStartCharPositionInLine == <i>"

ImportListener(X) ::= ""

GetExpectedTokenNames() ::= "getExpectedTokens().toString(_tokenNames)"
GetExpectedTokenNames() ::= "getExpectedTokens().toString(getVocabulary())"

RuleInvocationStack() ::= "Arrays::listToString(getRuleInvocationStack(), \", \")"

Expand Down
14 changes: 0 additions & 14 deletions runtime/Cpp/runtime/src/LexerInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@

using namespace antlr4;

LexerInterpreter::LexerInterpreter(const std::string &grammarFileName, const std::vector<std::string> &tokenNames,
const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames, const std::vector<std::string> &modeNames,
const atn::ATN &atn, CharStream *input)
: LexerInterpreter(grammarFileName, dfa::Vocabulary::fromTokenNames(tokenNames), ruleNames, channelNames, modeNames, atn, input) {
}

LexerInterpreter::LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary,
const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames, const std::vector<std::string> &modeNames,
const atn::ATN &atn, CharStream *input)
Expand All @@ -31,10 +25,6 @@ LexerInterpreter::LexerInterpreter(const std::string &grammarFileName, const dfa
throw IllegalArgumentException("The ATN must be a lexer ATN.");
}

for (size_t i = 0; i < atn.maxTokenType; i++) {
_tokenNames.push_back(vocabulary.getDisplayName(i));
}

for (size_t i = 0; i < atn.getNumberOfDecisions(); ++i) {
_decisionToDFA.push_back(dfa::DFA(_atn.getDecisionState(i), i));
}
Expand All @@ -54,10 +44,6 @@ std::string LexerInterpreter::getGrammarFileName() const {
return _grammarFileName;
}

const std::vector<std::string>& LexerInterpreter::getTokenNames() const {
return _tokenNames;
}

const std::vector<std::string>& LexerInterpreter::getRuleNames() const {
return _ruleNames;
}
Expand Down
7 changes: 0 additions & 7 deletions runtime/Cpp/runtime/src/LexerInterpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,6 @@ namespace antlr4 {

class ANTLR4CPP_PUBLIC LexerInterpreter : public Lexer {
public:
// @deprecated
LexerInterpreter(const std::string &grammarFileName, const std::vector<std::string> &tokenNames,
const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames,
const std::vector<std::string> &modeNames, const atn::ATN &atn, CharStream *input);
LexerInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary,
const std::vector<std::string> &ruleNames, const std::vector<std::string> &channelNames,
const std::vector<std::string> &modeNames, const atn::ATN &atn, CharStream *input);
Expand All @@ -25,7 +21,6 @@ namespace antlr4 {

virtual const atn::ATN& getATN() const override;
virtual std::string getGrammarFileName() const override;
virtual const std::vector<std::string>& getTokenNames() const override;
virtual const std::vector<std::string>& getRuleNames() const override;
virtual const std::vector<std::string>& getChannelNames() const override;
virtual const std::vector<std::string>& getModeNames() const override;
Expand All @@ -36,8 +31,6 @@ namespace antlr4 {
const std::string _grammarFileName;
const atn::ATN &_atn;

// @deprecated
std::vector<std::string> _tokenNames;
const std::vector<std::string> &_ruleNames;
const std::vector<std::string> &_channelNames;
const std::vector<std::string> &_modeNames;
Expand Down
13 changes: 0 additions & 13 deletions runtime/Cpp/runtime/src/ParserInterpreter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,10 @@ using namespace antlr4::misc;

using namespace antlrcpp;

ParserInterpreter::ParserInterpreter(const std::string &grammarFileName, const std::vector<std::string>& tokenNames,
const std::vector<std::string>& ruleNames, const atn::ATN &atn, TokenStream *input)
: ParserInterpreter(grammarFileName, dfa::Vocabulary::fromTokenNames(tokenNames), ruleNames, atn, input) {
}

ParserInterpreter::ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary,
const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input)
: Parser(input), _grammarFileName(grammarFileName), _atn(atn), _ruleNames(ruleNames), _vocabulary(vocabulary) {

for (size_t i = 0; i < atn.maxTokenType; ++i) {
_tokenNames.push_back(vocabulary.getDisplayName(i));
}

// init decision DFA
for (size_t i = 0; i < atn.getNumberOfDecisions(); ++i) {
atn::DecisionState *decisionState = atn.getDecisionState(i);
Expand All @@ -72,10 +63,6 @@ const atn::ATN& ParserInterpreter::getATN() const {
return _atn;
}

const std::vector<std::string>& ParserInterpreter::getTokenNames() const {
return _tokenNames;
}

const dfa::Vocabulary& ParserInterpreter::getVocabulary() const {
return _vocabulary;
}
Expand Down
7 changes: 0 additions & 7 deletions runtime/Cpp/runtime/src/ParserInterpreter.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ namespace antlr4 {
/// </summary>
class ANTLR4CPP_PUBLIC ParserInterpreter : public Parser {
public:
// @deprecated
ParserInterpreter(const std::string &grammarFileName, const std::vector<std::string>& tokenNames,
const std::vector<std::string>& ruleNames, const atn::ATN &atn, TokenStream *input);
ParserInterpreter(const std::string &grammarFileName, const dfa::Vocabulary &vocabulary,
const std::vector<std::string> &ruleNames, const atn::ATN &atn, TokenStream *input);
~ParserInterpreter();
Expand All @@ -40,9 +37,6 @@ namespace antlr4 {

virtual const atn::ATN& getATN() const override;

// @deprecated
virtual const std::vector<std::string>& getTokenNames() const override;

virtual const dfa::Vocabulary& getVocabulary() const override;

virtual const std::vector<std::string>& getRuleNames() const override;
Expand Down Expand Up @@ -110,7 +104,6 @@ namespace antlr4 {

protected:
const std::string _grammarFileName;
std::vector<std::string> _tokenNames;
const atn::ATN &_atn;

std::vector<std::string> _ruleNames;
Expand Down
21 changes: 8 additions & 13 deletions runtime/Cpp/runtime/src/Recognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
using namespace antlr4;
using namespace antlr4::atn;

std::map<const dfa::Vocabulary*, std::map<std::string, size_t>> Recognizer::_tokenTypeMapCache;
std::map<const dfa::Vocabulary*, std::map<std::string_view, size_t>> Recognizer::_tokenTypeMapCache;
std::map<std::vector<std::string>, std::map<std::string, size_t>> Recognizer::_ruleIndexMapCache;

Recognizer::Recognizer() {
Expand All @@ -30,31 +30,26 @@ Recognizer::Recognizer() {
Recognizer::~Recognizer() {
}

dfa::Vocabulary const& Recognizer::getVocabulary() const {
static dfa::Vocabulary vocabulary = dfa::Vocabulary::fromTokenNames(getTokenNames());
return vocabulary;
}

std::map<std::string, size_t> Recognizer::getTokenTypeMap() {
std::map<std::string_view, size_t> Recognizer::getTokenTypeMap() {
const dfa::Vocabulary& vocabulary = getVocabulary();

std::lock_guard<std::mutex> lck(_mutex);
std::map<std::string, size_t> result;
std::map<std::string_view, size_t> result;
auto iterator = _tokenTypeMapCache.find(&vocabulary);
if (iterator != _tokenTypeMapCache.end()) {
result = iterator->second;
} else {
for (size_t i = 0; i <= getATN().maxTokenType; ++i) {
std::string literalName = vocabulary.getLiteralName(i);
std::string_view literalName = vocabulary.getLiteralName(i);
if (!literalName.empty()) {
result[literalName] = i;
}

std::string symbolicName = vocabulary.getSymbolicName(i);
std::string_view symbolicName = vocabulary.getSymbolicName(i);
if (!symbolicName.empty()) {
result[symbolicName] = i;
}
}
}
result["EOF"] = EOF;
_tokenTypeMapCache[&vocabulary] = result;
}
Expand All @@ -80,8 +75,8 @@ std::map<std::string, size_t> Recognizer::getRuleIndexMap() {
return result;
}

size_t Recognizer::getTokenType(const std::string &tokenName) {
const std::map<std::string, size_t> &map = getTokenTypeMap();
size_t Recognizer::getTokenType(std::string_view tokenName) {
const std::map<std::string_view, size_t> &map = getTokenTypeMap();
auto iterator = map.find(tokenName);
if (iterator == map.end())
return Token::INVALID_TYPE;
Expand Down
15 changes: 4 additions & 11 deletions runtime/Cpp/runtime/src/Recognizer.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ namespace antlr4 {

Recognizer& operator=(Recognizer const&) = delete;

/** Used to print out token names like ID during debugging and
* error reporting. The generated parsers implement a method
* that overrides this to point to their String[] tokenNames.
*
* @deprecated Use {@link #getVocabulary()} instead.
*/
virtual std::vector<std::string> const& getTokenNames() const = 0;
virtual std::vector<std::string> const& getRuleNames() const = 0;

/**
Expand All @@ -35,14 +28,14 @@ namespace antlr4 {
* @return A {@link Vocabulary} instance providing information about the
* vocabulary used by the grammar.
*/
virtual dfa::Vocabulary const& getVocabulary() const;
virtual dfa::Vocabulary const& getVocabulary() const = 0;

/// <summary>
/// Get a map from token names to token types.
/// <p/>
/// Used for XPath and tree pattern compilation.
/// </summary>
virtual std::map<std::string, size_t> getTokenTypeMap();
virtual std::map<std::string_view, size_t> getTokenTypeMap();

/// <summary>
/// Get a map from rule names to rule indexes.
Expand All @@ -51,7 +44,7 @@ namespace antlr4 {
/// </summary>
virtual std::map<std::string, size_t> getRuleIndexMap();

virtual size_t getTokenType(const std::string &tokenName);
virtual size_t getTokenType(std::string_view tokenName);

/// <summary>
/// If this recognizer was generated, it will have a serialized ATN
Expand Down Expand Up @@ -151,7 +144,7 @@ namespace antlr4 {
std::mutex _mutex;

private:
static std::map<const dfa::Vocabulary*, std::map<std::string, size_t>> _tokenTypeMapCache;
static std::map<const dfa::Vocabulary*, std::map<std::string_view, size_t>> _tokenTypeMapCache;
static std::map<std::vector<std::string>, std::map<std::string, size_t>> _ruleIndexMapCache;

ProxyErrorListener _proxListener; // Manages a collection of listeners.
Expand Down
58 changes: 13 additions & 45 deletions runtime/Cpp/runtime/src/Vocabulary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,58 +11,26 @@ using namespace antlr4::dfa;

const Vocabulary Vocabulary::EMPTY_VOCABULARY;

Vocabulary::Vocabulary(const std::vector<std::string> &literalNames, const std::vector<std::string> &symbolicNames)
: Vocabulary(literalNames, symbolicNames, {}) {
Vocabulary::Vocabulary(std::vector<std::string> literalNames, std::vector<std::string> symbolicNames)
: Vocabulary(std::move(literalNames), std::move(symbolicNames), {}) {
}

Vocabulary::Vocabulary(const std::vector<std::string> &literalNames,
const std::vector<std::string> &symbolicNames, const std::vector<std::string> &displayNames)
: _literalNames(literalNames), _symbolicNames(symbolicNames), _displayNames(displayNames),
Vocabulary::Vocabulary(std::vector<std::string> literalNames,
std::vector<std::string> symbolicNames, std::vector<std::string> displayNames)
: _literalNames(std::move(literalNames)), _symbolicNames(std::move(symbolicNames)), _displayNames(std::move(displayNames)),
_maxTokenType(std::max(_displayNames.size(), std::max(_literalNames.size(), _symbolicNames.size())) - 1) {
// See note here on -1 part: https://github.com/antlr/antlr4/pull/1146
}

Vocabulary::~Vocabulary() = default;

Vocabulary Vocabulary::fromTokenNames(const std::vector<std::string> &tokenNames) {
if (tokenNames.empty()) {
return EMPTY_VOCABULARY;
}

std::vector<std::string> literalNames = tokenNames;
std::vector<std::string> symbolicNames = tokenNames;
std::locale locale;
for (size_t i = 0; i < tokenNames.size(); i++) {
const std::string& tokenName = tokenNames[i];
if (tokenName.empty()) {
continue;
} else if (tokenName.front() == '\'') {
symbolicNames[i].clear();
} else if (std::isupper(tokenName.front(), locale)) {
literalNames[i].clear();
} else {
// wasn't a literal or symbolic name
literalNames[i].clear();
symbolicNames[i].clear();
}
}

return Vocabulary(literalNames, symbolicNames, tokenNames);
}

size_t Vocabulary::getMaxTokenType() const {
return _maxTokenType;
}

std::string Vocabulary::getLiteralName(size_t tokenType) const {
std::string_view Vocabulary::getLiteralName(size_t tokenType) const {
if (tokenType < _literalNames.size()) {
return _literalNames[tokenType];
}

return "";
}

std::string Vocabulary::getSymbolicName(size_t tokenType) const {
std::string_view Vocabulary::getSymbolicName(size_t tokenType) const {
if (tokenType == Token::EOF) {
return "EOF";
}
Expand All @@ -76,20 +44,20 @@ std::string Vocabulary::getSymbolicName(size_t tokenType) const {

std::string Vocabulary::getDisplayName(size_t tokenType) const {
if (tokenType < _displayNames.size()) {
std::string displayName = _displayNames[tokenType];
std::string_view displayName = _displayNames[tokenType];
if (!displayName.empty()) {
return displayName;
return std::string(displayName);
}
}

std::string literalName = getLiteralName(tokenType);
std::string_view literalName = getLiteralName(tokenType);
if (!literalName.empty()) {
return literalName;
return std::string(literalName);
}

std::string symbolicName = getSymbolicName(tokenType);
std::string_view symbolicName = getSymbolicName(tokenType);
if (!symbolicName.empty()) {
return symbolicName;
return std::string(symbolicName);
}

return std::to_string(tokenType);
Expand Down
Loading