Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add API to get table names #46

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions parser/cpp/AstNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class AstNode : public SimpleNode {
AstNode(SqlParser* parser, int id) : SimpleNode(parser, id) {}
int NumChildren() const { return jjtGetNumChildren(); }
AstNode* GetChild(int i) const { return static_cast<AstNode*>(jjtGetChild(i)); }
AstNode* LastChild() const { return static_cast<AstNode*>(jjtGetChild(NumChildren() - 1)); }
int Kind() const { return id; }
JJString GetImage() const { return NumChildren() == 0 ? beginToken->image : ""; }
JJString toString(const JJString& prefix) const {
Expand Down
22 changes: 22 additions & 0 deletions parser/cpp/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

CC = clag
CXX = clang++
WORKING_DIR=$(PWD)
GENDIR=$(WORKING_DIR)/target/generated-sources/javacc
GENERATED_FILES = $(GENDIR)/*.cc
GENERATED_OBJS = $(GENDIR)/*.o

override CXXFLAGS += -fbracket-depth=1024 -funsigned-char -I$(WORKING_DIR) -I$(GENDIR) -std=c++11

$(WORKING_DIR)/target/parser_test: main.cc ParseErrorHandler.cc main.cc ParserClient.cc $(GENERATED_OBJS)
$(CXX) $(OPT) $(CXXFLAGS) main.cc ParseErrorHandler.cc ParserClient.cc $(GENERATED_OBJS) -o $(WORKING_DIR)/target/parser_test

$(GENERATED_OBJS): ../grammar/sql-spec.txt
mvn compile
cd $(GENDIR) && $(CXX) $(OPT) $(CXXFLAGS) -Wno-dangling-else -c *.cc

clean:
/bin/rm -f target/*.*

real_clean:
/bin/rm -rf target
105 changes: 105 additions & 0 deletions parser/cpp/ParserClient.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@

#include "ParserClient.h"

#include "SqlParserConstants.h"
#include "SqlParserVisitor.h"

using namespace commonsql::parser;
using namespace std;

static const string GetIdSequence(Token* start, Token* end) {
string id_sequence;
for (; start != nullptr && start != end; start = start->next) {
id_sequence += start->image + ".";
}

id_sequence += start->image;
return id_sequence;
}

class QuietErrorHandler : public ErrorHandler {
public:
void handleUnexpectedToken(int expectedKind, const JJString& expectedToken, Token* actual, SqlParser* parser) {}
void handleParseError(Token* last, Token* unexpected, const JJSimpleString& production, SqlParser* parser) {}
void handleOtherError(const JJString& message, SqlParser* parser) {}
};

class TableNameVisitor : SqlParserDefaultVisitor {

vector<string> tableNames;

public:
virtual void defaultVisit(const SimpleNode* node, void* data) {
const AstNode* astNode = static_cast<const AstNode*>(node);
for (int i = 0; i < astNode->NumChildren(); i++) {
astNode->GetChild(i)->jjtAccept(this, data);
}
}

virtual void visit(const TableName* node, void* data) {
const AstNode* nameNode = node->GetChild(0);
tableNames.push_back(GetIdSequence(nameNode->beginToken, nameNode->endToken));
}

vector<string> GetTableNames() {
return tableNames;
}
};

vector<string> GetTableNamesApprox(const string& sql) {
vector<string> tableNames;

CharStream stream(sql.c_str(), sql.size() - 1, 1, 1);
SqlParserTokenManager scanner(&stream);
SqlParser parser(&scanner);
parser.setErrorHandler(new QuietErrorHandler());

TableNameVisitor tableNameVisitor;
Token* start = parser.getToken(1);
while (start != null) {
while (start != null && start->kind != FROM && start->kind != _EOF) {
start = parser.getNextToken();
}

if (start == null || start->kind == _EOF) break;

// Now try to parse a from clause
parser.from_clause();
AstNode* fromClause = static_cast<AstNode*>(parser.PopNode());
if (fromClause != nullptr) {
tableNameVisitor.defaultVisit(fromClause, nullptr);
}

delete fromClause;
parser.Reset();
start = parser.getToken(1);
}

//delete scanner;
//delete stream;
return tableNameVisitor.GetTableNames();
}

const vector<string> GetTableNames(string sql) {
// Save the start token
return GetTableNamesApprox(sql);

#if 0
parser.compilation_unit();

SimpleNode* root = (SimpleNode*)parser.jjtree.peekNode();
if (root != nullptr && static_cast<AstNode*>(root)->NumChildren() > 0) {
JAVACC_STRING_TYPE buffer;
root->dumpToBuffer(" ", "\n", &buffer);
printf("%s\n", buffer.c_str());
return GetTableNames(static_cast<AstNode*>(root));
} else {
printf("lexing***");
// Just read off all tokens from the parser.
while (parser.getNextToken()->kind != _EOF) ;
return GetTableNamesApprox(firstToken);
}

return d;
#endif
}
19 changes: 19 additions & 0 deletions parser/cpp/ParserClient.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

#include <fstream>
#include <iomanip>
#include <iostream>
#include <string>
#include <stdlib.h>

#include "SqlParserConstants.h"
#include "CharStream.h"
#include "CharStream.h"
#include "Token.h"
#include "SqlParser.h"
#include "SqlParserTokenManager.h"
#include "parser.h"

using namespace commonsql::parser;
using namespace std;

extern const vector<string> GetTableNames(string sql);
2 changes: 1 addition & 1 deletion parser/cpp/compile.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/sh
SRC_DIR=./target/generated-sources/javacc
TARGET_DIR=./target
clang++ -fbracket-depth=1024 -funsigned-char -I. -I./$SRC_DIR -o $TARGET_DIR/sqlparser -std=c++11 ParseErrorHandler.cc main.cc $SRC_DIR/*.cc
clang++ -O4 -fbracket-depth=1024 -funsigned-char -I. -I./$SRC_DIR -o $TARGET_DIR/sqlparser -std=c++11 ParseErrorHandler.cc main.cc ParserClient.cc $SRC_DIR/*.cc
56 changes: 25 additions & 31 deletions parser/cpp/javacc-options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ options {

PARSER_BEGIN(SqlParser)

inline bool HasError() { return hasError; }
inline bool IsIdNonReservedWord() {
auto kind = getToken(1)->kind;
if (__builtin_expect(kind == regular_identifier, 1) || kind == delimited_identifier || kind == Unicode_delimited_identifier) return true;
Expand Down Expand Up @@ -63,27 +64,28 @@ PARSER_BEGIN(SqlParser)
}

inline bool SyncToSemicolon() {
if (hasError || getToken(0)->kind != semicolon) {
while (getToken(1)->kind != _EOF && getToken(1)->kind != semicolon) {
getNextToken();
}

if (getToken(1)->kind == semicolon) {
getNextToken();
}
while (getToken(1)->kind != EOF && getToken(1)->kind != semicolon) getNextToken();

hasError = false;
while (getToken(1)->kind == semicolon) {
getNextToken();
}

hasError = false;
return true;
}


inline bool NotEof() {
return getToken(1)->kind != _EOF;
}

void PushNode(Node* node) { jjtree.pushNode(node); }
Node* PopNode() { return jjtree.popNode(); }
Node* PopNode() { return jjtree.peekNode() != nullptr ? jjtree.popNode() : nullptr; }
void Reset() {
jjtree.reset();
jjtree.clear();
hasError = false;
}

void jjtreeOpenNodeScope(Node* node) {
static_cast<AstNode*>(node)->beginToken = getToken(1);
Expand Down Expand Up @@ -171,27 +173,19 @@ Node* compilation_unit() #CompilationUnit:
Token *begin;
}
{
(
LOOKAHEAD({NotEof()})
direct_SQL_statement()
{
if (hasError) {
if (jjtree.peekNode() != nullptr) { jjtree.popNode(); }
SyncToSemicolon();
}
}
( <semicolon> )*
(
LOOKAHEAD({NotEof()})
{ begin = getToken(1); }
statement_list()
{ if (hasError) cout << "Error parsing statement at: " << begin->beginLine; }
{ SyncToSemicolon(); }
)*
<EOF>

{ if (jjtree.peekNode() != nullptr) return jjtree.peekNode(); return null; }
}
)*

void statement_list():
{}
{
(
LOOKAHEAD({NotEof()})
direct_SQL_statement()
( <semicolon> )*
)+
{ SyncToSemicolon(); }
}
<EOF>

{ return jjtThis; }
}
11 changes: 11 additions & 0 deletions parser/cpp/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include "SqlParserTokenManager.h"
#include "parser.h"

#include "ParserClient.h"

#include <ctime>

using namespace commonsql::parser;
Expand Down Expand Up @@ -39,6 +41,7 @@ int main(int argc, char **argv) {
double time;
start = clock();

#if 0
for (int i = 0; i < 1; i++) {
CharStream *stream = new CharStream(s.c_str(), s.size() - 1, 1, 1);
SqlParserTokenManager *scanner = new SqlParserTokenManager(stream);
Expand All @@ -52,8 +55,16 @@ int main(int argc, char **argv) {
printf("%s\n", buffer.c_str());
}
}
#endif

vector<string> tableNames = GetTableNames(s);
printf("TableNames: \n");
for (string tableName: tableNames) {
printf(" %s\n", tableName.c_str());
}

finish = clock();
time = (double(finish)-double(start))/CLOCKS_PER_SEC;
printf ("Avg parsing time: %lfms\n", (time*1000)/1);

}
2 changes: 1 addition & 1 deletion parser/cpp/prepare-javacc-grammar.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ pwd
GRAMMAR_DIR='../grammar'
GEN_DIR='target/generated-sources/javacc'
mkdir -p $GEN_DIR
cat ./javacc-options.txt $GRAMMAR_DIR/nonreservedwords.txt $GRAMMAR_DIR/reservedwords.txt $GRAMMAR_DIR/sql-spec.txt $GRAMMAR_DIR/presto-extensions.txt $GRAMMAR_DIR/lexical-elements.txt > $GEN_DIR/parser_tmp.jjt
cat ./javacc-options.txt $GRAMMAR_DIR/nonreservedwords.txt $GRAMMAR_DIR/reservedwords.txt $GRAMMAR_DIR/sql-spec.txt $GRAMMAR_DIR/presto-extensions.txt $GRAMMAR_DIR/unicode-identifier-start.txt $GRAMMAR_DIR/unicode-identifier-extend.txt $GRAMMAR_DIR/ws.txt $GRAMMAR_DIR/lexical-elements.txt > $GEN_DIR/parser_tmp.jjt
16 changes: 13 additions & 3 deletions parser/grammar/lexical-elements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,15 @@ regular_identifier()

| <#identifier_part: <identifier_start> | <identifier_extend> >

| <#identifier_start: ["a"-"z"] // temp
| <#identifier_start: (<UnicodeIdentifierStart>)
/*!! See the Syntax Rules.*/
/* Unicode char classes: <Ll> | <Lm> | <Lo> | <Lt> | <Lu> | <Nl> */
>

| <#identifier_extend: ["\u00B7", "0"-"9", "_"] // temp
| <#identifier_extend: ["\u00B7"] | <UnicodeIdentifierExtend>
| "_" // TODO(sreeni): Not allowed in spec!
//!! See the Syntax Rules.
/* Unicode char classes: <Mn>, <Mc>, <Nd>, <Pc>, <Cf> */
>

| <large_object_length_token: ( <digit> )+ <multiplier> >
Expand Down Expand Up @@ -150,7 +153,14 @@ delimiter_token:

SPECIAL_TOKEN:
{
<white_space: <newline> | [ " ", "\t" ] // temp
<white_space: (<UnicodeWhiteSpace> |
[ "\u0009" // Horizontal Tabulation
, "\n" // "\\u000A" //, Line Feed
, "\u000B" // Vertical Tabulation
, "\f" // "\\u000C" //, Form Feed
, "\r" // "\\u000D" //, Carriage Return
, "\u0085" // Next Line
])
//!! See the Syntax Rules.
>

Expand Down
2 changes: 1 addition & 1 deletion parser/grammar/prepare-javacc-grammar.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Concatenate all the fragments into a .jj file.
gendir='../target/generated-sources/javacc'
mkdir -p $gendir
cat javacc-options-java.txt nonreservedwords.txt reservedwords.txt sql-spec.txt presto-extensions.txt lexical-elements.txt > $gendir/parser_tmp.jjt
cat javacc-options-java.txt nonreservedwords.txt reservedwords.txt sql-spec.txt presto-extensions.txt unicode-identifier-start.txt unicode-identifier-extend.txt ws.txt lexical-elements.txt > $gendir/parser_tmp.jjt
2 changes: 1 addition & 1 deletion parser/grammar/presto-extensions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,6 @@ void weird_identifiers():

TOKEN:
{
<identifier_starting_with_underscore: ("_")+ (<regular_identifier>)? > { setKindToIdentifier(matchedToken); }
<identifier_with_underscore: (<regular_identifier>|"_")+ > { setKindToIdentifier(matchedToken); }
| <generic_unicode: "U&'" ( ~["'"] | ("''") )* "'"> { setUnicodeLiteralType(matchedToken); }
}
2 changes: 1 addition & 1 deletion parser/grammar/sql-spec.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6969,7 +6969,7 @@ void direct_SQL_statement() #DirectSqlStatement(true):
void directly_executable_statement():
{}
{
direct_SQL_data_statement()
LOOKAHEAD(1) direct_SQL_data_statement()
| SQL_schema_statement()
| SQL_transaction_statement()
| SQL_connection_statement()
Expand Down
Loading