From 0f7a8bb1d33ff67db724c31ee357dea2799d4a0f Mon Sep 17 00:00:00 2001 From: jievince <38901892+jievince@users.noreply.github.com> Date: Tue, 22 Mar 2022 09:48:08 +0800 Subject: [PATCH] add parser notes --- src/parser/parser.yy | 15 +++++++++++++++ src/parser/scanner.lex | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/parser/parser.yy b/src/parser/parser.yy index 6aaa82523f6..b3232ee6c72 100644 --- a/src/parser/parser.yy +++ b/src/parser/parser.yy @@ -1,9 +1,11 @@ +// Bison options %language "C++" %skeleton "lalr1.cc" %no-lines %locations %define api.namespace { nebula } %define api.parser.class { GraphParser } +// Parameters of scanner and parser %lex-param { nebula::GraphScanner& scanner } %parse-param { nebula::GraphScanner& scanner } %parse-param { std::string &errmsg } @@ -54,6 +56,7 @@ static constexpr size_t kCommentLengthLimit = 256; const nebula::GraphParser::location_type& loc); } +// Define types of semantic values %union { bool boolval; int64_t intval; @@ -402,6 +405,18 @@ static constexpr size_t kCommentLengthLimit = 256; %type opt_with_properties %type opt_ignore_existed_index +/* Define precedence and associativity of tokens. + * Associativity: + * The associativity of an operator op determines how repeated uses of the operator nest: + * whether ‘x op y op z’ is parsed by grouping x with y first or by grouping y with z first. + * %left specifies left-associativity (grouping x with y first) and %right specifies right-associativity (grouping y with z first). + * %nonassoc specifies no associativity, which means that ‘x op y op z’ is considered a syntax error. + * + * Precedence: + * The precedence of an operator determines how it nests with other operators. + * All the tokens declared in a single precedence declaration have equal precedence and nest together according to their associativity. + * When two tokens declared in different precedence declarations associate, the one declared later has the higher precedence and is grouped first. + */ %left QM COLON %left KW_OR KW_XOR %left KW_AND diff --git a/src/parser/scanner.lex b/src/parser/scanner.lex index d4c8e7ac627..7817f906469 100644 --- a/src/parser/scanner.lex +++ b/src/parser/scanner.lex @@ -1,3 +1,5 @@ +/* Flex definitions section */ +/* Flex options */ %option c++ %option yyclass="GraphScanner" %option nodefault noyywrap @@ -11,6 +13,7 @@ #include "GraphParser.hpp" #include "graph/service/GraphFlags.h" +/* YY_USER_ACTION is called to advance location after each time a pattern is matched. */ #define YY_USER_ACTION \ yylloc->step(); \ yylloc->columns(yyleng); @@ -19,6 +22,13 @@ static constexpr size_t MAX_STRING = 4096; %} +/* Define some exclusive states. + * Each state is referenced within a `<>` in the rules section + * double quoted string literal + * single quoted string literal + * accent quoted label, eg. `v2` + * comment + */ %x DQ_STR %x SQ_STR %x LB_STR @@ -60,6 +70,15 @@ LABEL_FULL_WIDTH {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}* %% + /* Flex rules section */ + /* How Does the input is matched? + * When the generated scanner is run, it analyzes its input looking for strings which match any of its patterns. + * 1. If it finds more than one match, it takes the one matching the most text. + * 2. If it finds two or more matches of the same length, the rule listed first in the flex input file is chosen. + * Once the match is determined, the text corresponding to the match is made available in the global character pointer yytext, and its length in the global integer yyleng. + * The action corresponding to the matched pattern is then executed, and then the remaining input is scanned for another match. + * The last rule `.` could match any single character except `\n`. + */ /* Reserved keyword */ "GO" { return TokenType::KW_GO; } "AS" { return TokenType::KW_AS; } @@ -538,3 +557,4 @@ LABEL_FULL_WIDTH {CN_EN_FULL_WIDTH}{CN_EN_NUM_FULL_WIDTH}* } %% +/* Flex user code section */