Skip to content

Commit

Permalink
Fix parser failure for character alias (#201)
Browse files Browse the repository at this point in the history
This PR fixes a parser failure that occurs when an SQL string contains a `character` alias.

Example query:
```
SELECT student.id AS character FROM student;
```

The fault is related to the data type `CHARACTER VARYING`, which is a synonym for `VARCHAR`.
Before this PR, the scanner (Flex) matched the strings `CHARACTER` and `VARYING` to support `CHARACTER VARYING`. However, having `CHARACTER` as a token, the `character` identifier in the query above is matched with the `CHARACTER` token. Thus, parsing the query fails since the corresponding parser rule expects the `IDENTIFIER` token rather than the `CHARACTER` token.

>Most flex programs are quite ambiguous, with multiple patterns that can match the same input. Flex resolves the ambiguity with two simple rules:
• Match the longest possible string every time the scanner matches input.
• In the case of a tie, use the pattern that appears first in the program.

Levine, J. (2009). Flex & Bison: Text Processing Tools. "O'Reilly Media, Inc.". Page 22.

For the above query, both the `CHARACTER` token and the `IDENTIFIER` token would match the longest possible string, which is `character`. `CHARACTER` is chosen over `IDENTIFIER` since it appears first in the program.

This PR removes the `CHARACTER` and `VARYING` tokens and adds a `CHARACTER_VARTING` token. This token matches strings with the following pattern:  
`CHARACTER<whitespace>*VARYING`
  • Loading branch information
mweisgut authored Jan 17, 2022
1 parent a621176 commit f497192
Show file tree
Hide file tree
Showing 6 changed files with 2,099 additions and 2,094 deletions.
2,021 changes: 1,012 additions & 1,009 deletions src/parser/bison_parser.cpp

Large diffs are not rendered by default.

329 changes: 164 additions & 165 deletions src/parser/bison_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,170 +84,169 @@ extern int hsql_debug;
#define HSQL_TOKENTYPE
enum hsql_tokentype {
SQL_HSQL_EMPTY = -2,
SQL_YYEOF = 0, /* "end of file" */
SQL_HSQL_error = 256, /* error */
SQL_HSQL_UNDEF = 257, /* "invalid token" */
SQL_IDENTIFIER = 258, /* IDENTIFIER */
SQL_STRING = 259, /* STRING */
SQL_FLOATVAL = 260, /* FLOATVAL */
SQL_INTVAL = 261, /* INTVAL */
SQL_DEALLOCATE = 262, /* DEALLOCATE */
SQL_PARAMETERS = 263, /* PARAMETERS */
SQL_INTERSECT = 264, /* INTERSECT */
SQL_TEMPORARY = 265, /* TEMPORARY */
SQL_TIMESTAMP = 266, /* TIMESTAMP */
SQL_DISTINCT = 267, /* DISTINCT */
SQL_NVARCHAR = 268, /* NVARCHAR */
SQL_RESTRICT = 269, /* RESTRICT */
SQL_TRUNCATE = 270, /* TRUNCATE */
SQL_ANALYZE = 271, /* ANALYZE */
SQL_BETWEEN = 272, /* BETWEEN */
SQL_CASCADE = 273, /* CASCADE */
SQL_COLUMNS = 274, /* COLUMNS */
SQL_CONTROL = 275, /* CONTROL */
SQL_DEFAULT = 276, /* DEFAULT */
SQL_EXECUTE = 277, /* EXECUTE */
SQL_EXPLAIN = 278, /* EXPLAIN */
SQL_INTEGER = 279, /* INTEGER */
SQL_NATURAL = 280, /* NATURAL */
SQL_PREPARE = 281, /* PREPARE */
SQL_PRIMARY = 282, /* PRIMARY */
SQL_SCHEMAS = 283, /* SCHEMAS */
SQL_CHARACTER = 284, /* CHARACTER */
SQL_VARYING = 285, /* VARYING */
SQL_REAL = 286, /* REAL */
SQL_DECIMAL = 287, /* DECIMAL */
SQL_SMALLINT = 288, /* SMALLINT */
SQL_SPATIAL = 289, /* SPATIAL */
SQL_VARCHAR = 290, /* VARCHAR */
SQL_VIRTUAL = 291, /* VIRTUAL */
SQL_DESCRIBE = 292, /* DESCRIBE */
SQL_BEFORE = 293, /* BEFORE */
SQL_COLUMN = 294, /* COLUMN */
SQL_CREATE = 295, /* CREATE */
SQL_DELETE = 296, /* DELETE */
SQL_DIRECT = 297, /* DIRECT */
SQL_DOUBLE = 298, /* DOUBLE */
SQL_ESCAPE = 299, /* ESCAPE */
SQL_EXCEPT = 300, /* EXCEPT */
SQL_EXISTS = 301, /* EXISTS */
SQL_EXTRACT = 302, /* EXTRACT */
SQL_CAST = 303, /* CAST */
SQL_FORMAT = 304, /* FORMAT */
SQL_GLOBAL = 305, /* GLOBAL */
SQL_HAVING = 306, /* HAVING */
SQL_IMPORT = 307, /* IMPORT */
SQL_INSERT = 308, /* INSERT */
SQL_ISNULL = 309, /* ISNULL */
SQL_OFFSET = 310, /* OFFSET */
SQL_RENAME = 311, /* RENAME */
SQL_SCHEMA = 312, /* SCHEMA */
SQL_SELECT = 313, /* SELECT */
SQL_SORTED = 314, /* SORTED */
SQL_TABLES = 315, /* TABLES */
SQL_UNIQUE = 316, /* UNIQUE */
SQL_UNLOAD = 317, /* UNLOAD */
SQL_UPDATE = 318, /* UPDATE */
SQL_VALUES = 319, /* VALUES */
SQL_AFTER = 320, /* AFTER */
SQL_ALTER = 321, /* ALTER */
SQL_CROSS = 322, /* CROSS */
SQL_DELTA = 323, /* DELTA */
SQL_FLOAT = 324, /* FLOAT */
SQL_GROUP = 325, /* GROUP */
SQL_INDEX = 326, /* INDEX */
SQL_INNER = 327, /* INNER */
SQL_LIMIT = 328, /* LIMIT */
SQL_LOCAL = 329, /* LOCAL */
SQL_MERGE = 330, /* MERGE */
SQL_MINUS = 331, /* MINUS */
SQL_ORDER = 332, /* ORDER */
SQL_OUTER = 333, /* OUTER */
SQL_RIGHT = 334, /* RIGHT */
SQL_TABLE = 335, /* TABLE */
SQL_UNION = 336, /* UNION */
SQL_USING = 337, /* USING */
SQL_WHERE = 338, /* WHERE */
SQL_CALL = 339, /* CALL */
SQL_CASE = 340, /* CASE */
SQL_CHAR = 341, /* CHAR */
SQL_COPY = 342, /* COPY */
SQL_DATE = 343, /* DATE */
SQL_DATETIME = 344, /* DATETIME */
SQL_DESC = 345, /* DESC */
SQL_DROP = 346, /* DROP */
SQL_ELSE = 347, /* ELSE */
SQL_FILE = 348, /* FILE */
SQL_FROM = 349, /* FROM */
SQL_FULL = 350, /* FULL */
SQL_HASH = 351, /* HASH */
SQL_HINT = 352, /* HINT */
SQL_INTO = 353, /* INTO */
SQL_JOIN = 354, /* JOIN */
SQL_LEFT = 355, /* LEFT */
SQL_LIKE = 356, /* LIKE */
SQL_LOAD = 357, /* LOAD */
SQL_LONG = 358, /* LONG */
SQL_NULL = 359, /* NULL */
SQL_PLAN = 360, /* PLAN */
SQL_SHOW = 361, /* SHOW */
SQL_TEXT = 362, /* TEXT */
SQL_THEN = 363, /* THEN */
SQL_TIME = 364, /* TIME */
SQL_VIEW = 365, /* VIEW */
SQL_WHEN = 366, /* WHEN */
SQL_WITH = 367, /* WITH */
SQL_ADD = 368, /* ADD */
SQL_ALL = 369, /* ALL */
SQL_AND = 370, /* AND */
SQL_ASC = 371, /* ASC */
SQL_END = 372, /* END */
SQL_FOR = 373, /* FOR */
SQL_INT = 374, /* INT */
SQL_KEY = 375, /* KEY */
SQL_NOT = 376, /* NOT */
SQL_OFF = 377, /* OFF */
SQL_SET = 378, /* SET */
SQL_TOP = 379, /* TOP */
SQL_AS = 380, /* AS */
SQL_BY = 381, /* BY */
SQL_IF = 382, /* IF */
SQL_IN = 383, /* IN */
SQL_IS = 384, /* IS */
SQL_OF = 385, /* OF */
SQL_ON = 386, /* ON */
SQL_OR = 387, /* OR */
SQL_TO = 388, /* TO */
SQL_ARRAY = 389, /* ARRAY */
SQL_CONCAT = 390, /* CONCAT */
SQL_ILIKE = 391, /* ILIKE */
SQL_SECOND = 392, /* SECOND */
SQL_MINUTE = 393, /* MINUTE */
SQL_HOUR = 394, /* HOUR */
SQL_DAY = 395, /* DAY */
SQL_MONTH = 396, /* MONTH */
SQL_YEAR = 397, /* YEAR */
SQL_SECONDS = 398, /* SECONDS */
SQL_MINUTES = 399, /* MINUTES */
SQL_HOURS = 400, /* HOURS */
SQL_DAYS = 401, /* DAYS */
SQL_MONTHS = 402, /* MONTHS */
SQL_YEARS = 403, /* YEARS */
SQL_INTERVAL = 404, /* INTERVAL */
SQL_TRUE = 405, /* TRUE */
SQL_FALSE = 406, /* FALSE */
SQL_TRANSACTION = 407, /* TRANSACTION */
SQL_BEGIN = 408, /* BEGIN */
SQL_COMMIT = 409, /* COMMIT */
SQL_ROLLBACK = 410, /* ROLLBACK */
SQL_EQUALS = 411, /* EQUALS */
SQL_NOTEQUALS = 412, /* NOTEQUALS */
SQL_LESS = 413, /* LESS */
SQL_GREATER = 414, /* GREATER */
SQL_LESSEQ = 415, /* LESSEQ */
SQL_GREATEREQ = 416, /* GREATEREQ */
SQL_NOTNULL = 417, /* NOTNULL */
SQL_UMINUS = 418 /* UMINUS */
SQL_YYEOF = 0, /* "end of file" */
SQL_HSQL_error = 256, /* error */
SQL_HSQL_UNDEF = 257, /* "invalid token" */
SQL_IDENTIFIER = 258, /* IDENTIFIER */
SQL_STRING = 259, /* STRING */
SQL_FLOATVAL = 260, /* FLOATVAL */
SQL_INTVAL = 261, /* INTVAL */
SQL_DEALLOCATE = 262, /* DEALLOCATE */
SQL_PARAMETERS = 263, /* PARAMETERS */
SQL_INTERSECT = 264, /* INTERSECT */
SQL_TEMPORARY = 265, /* TEMPORARY */
SQL_TIMESTAMP = 266, /* TIMESTAMP */
SQL_DISTINCT = 267, /* DISTINCT */
SQL_NVARCHAR = 268, /* NVARCHAR */
SQL_RESTRICT = 269, /* RESTRICT */
SQL_TRUNCATE = 270, /* TRUNCATE */
SQL_ANALYZE = 271, /* ANALYZE */
SQL_BETWEEN = 272, /* BETWEEN */
SQL_CASCADE = 273, /* CASCADE */
SQL_COLUMNS = 274, /* COLUMNS */
SQL_CONTROL = 275, /* CONTROL */
SQL_DEFAULT = 276, /* DEFAULT */
SQL_EXECUTE = 277, /* EXECUTE */
SQL_EXPLAIN = 278, /* EXPLAIN */
SQL_INTEGER = 279, /* INTEGER */
SQL_NATURAL = 280, /* NATURAL */
SQL_PREPARE = 281, /* PREPARE */
SQL_PRIMARY = 282, /* PRIMARY */
SQL_SCHEMAS = 283, /* SCHEMAS */
SQL_CHARACTER_VARYING = 284, /* CHARACTER_VARYING */
SQL_REAL = 285, /* REAL */
SQL_DECIMAL = 286, /* DECIMAL */
SQL_SMALLINT = 287, /* SMALLINT */
SQL_SPATIAL = 288, /* SPATIAL */
SQL_VARCHAR = 289, /* VARCHAR */
SQL_VIRTUAL = 290, /* VIRTUAL */
SQL_DESCRIBE = 291, /* DESCRIBE */
SQL_BEFORE = 292, /* BEFORE */
SQL_COLUMN = 293, /* COLUMN */
SQL_CREATE = 294, /* CREATE */
SQL_DELETE = 295, /* DELETE */
SQL_DIRECT = 296, /* DIRECT */
SQL_DOUBLE = 297, /* DOUBLE */
SQL_ESCAPE = 298, /* ESCAPE */
SQL_EXCEPT = 299, /* EXCEPT */
SQL_EXISTS = 300, /* EXISTS */
SQL_EXTRACT = 301, /* EXTRACT */
SQL_CAST = 302, /* CAST */
SQL_FORMAT = 303, /* FORMAT */
SQL_GLOBAL = 304, /* GLOBAL */
SQL_HAVING = 305, /* HAVING */
SQL_IMPORT = 306, /* IMPORT */
SQL_INSERT = 307, /* INSERT */
SQL_ISNULL = 308, /* ISNULL */
SQL_OFFSET = 309, /* OFFSET */
SQL_RENAME = 310, /* RENAME */
SQL_SCHEMA = 311, /* SCHEMA */
SQL_SELECT = 312, /* SELECT */
SQL_SORTED = 313, /* SORTED */
SQL_TABLES = 314, /* TABLES */
SQL_UNIQUE = 315, /* UNIQUE */
SQL_UNLOAD = 316, /* UNLOAD */
SQL_UPDATE = 317, /* UPDATE */
SQL_VALUES = 318, /* VALUES */
SQL_AFTER = 319, /* AFTER */
SQL_ALTER = 320, /* ALTER */
SQL_CROSS = 321, /* CROSS */
SQL_DELTA = 322, /* DELTA */
SQL_FLOAT = 323, /* FLOAT */
SQL_GROUP = 324, /* GROUP */
SQL_INDEX = 325, /* INDEX */
SQL_INNER = 326, /* INNER */
SQL_LIMIT = 327, /* LIMIT */
SQL_LOCAL = 328, /* LOCAL */
SQL_MERGE = 329, /* MERGE */
SQL_MINUS = 330, /* MINUS */
SQL_ORDER = 331, /* ORDER */
SQL_OUTER = 332, /* OUTER */
SQL_RIGHT = 333, /* RIGHT */
SQL_TABLE = 334, /* TABLE */
SQL_UNION = 335, /* UNION */
SQL_USING = 336, /* USING */
SQL_WHERE = 337, /* WHERE */
SQL_CALL = 338, /* CALL */
SQL_CASE = 339, /* CASE */
SQL_CHAR = 340, /* CHAR */
SQL_COPY = 341, /* COPY */
SQL_DATE = 342, /* DATE */
SQL_DATETIME = 343, /* DATETIME */
SQL_DESC = 344, /* DESC */
SQL_DROP = 345, /* DROP */
SQL_ELSE = 346, /* ELSE */
SQL_FILE = 347, /* FILE */
SQL_FROM = 348, /* FROM */
SQL_FULL = 349, /* FULL */
SQL_HASH = 350, /* HASH */
SQL_HINT = 351, /* HINT */
SQL_INTO = 352, /* INTO */
SQL_JOIN = 353, /* JOIN */
SQL_LEFT = 354, /* LEFT */
SQL_LIKE = 355, /* LIKE */
SQL_LOAD = 356, /* LOAD */
SQL_LONG = 357, /* LONG */
SQL_NULL = 358, /* NULL */
SQL_PLAN = 359, /* PLAN */
SQL_SHOW = 360, /* SHOW */
SQL_TEXT = 361, /* TEXT */
SQL_THEN = 362, /* THEN */
SQL_TIME = 363, /* TIME */
SQL_VIEW = 364, /* VIEW */
SQL_WHEN = 365, /* WHEN */
SQL_WITH = 366, /* WITH */
SQL_ADD = 367, /* ADD */
SQL_ALL = 368, /* ALL */
SQL_AND = 369, /* AND */
SQL_ASC = 370, /* ASC */
SQL_END = 371, /* END */
SQL_FOR = 372, /* FOR */
SQL_INT = 373, /* INT */
SQL_KEY = 374, /* KEY */
SQL_NOT = 375, /* NOT */
SQL_OFF = 376, /* OFF */
SQL_SET = 377, /* SET */
SQL_TOP = 378, /* TOP */
SQL_AS = 379, /* AS */
SQL_BY = 380, /* BY */
SQL_IF = 381, /* IF */
SQL_IN = 382, /* IN */
SQL_IS = 383, /* IS */
SQL_OF = 384, /* OF */
SQL_ON = 385, /* ON */
SQL_OR = 386, /* OR */
SQL_TO = 387, /* TO */
SQL_ARRAY = 388, /* ARRAY */
SQL_CONCAT = 389, /* CONCAT */
SQL_ILIKE = 390, /* ILIKE */
SQL_SECOND = 391, /* SECOND */
SQL_MINUTE = 392, /* MINUTE */
SQL_HOUR = 393, /* HOUR */
SQL_DAY = 394, /* DAY */
SQL_MONTH = 395, /* MONTH */
SQL_YEAR = 396, /* YEAR */
SQL_SECONDS = 397, /* SECONDS */
SQL_MINUTES = 398, /* MINUTES */
SQL_HOURS = 399, /* HOURS */
SQL_DAYS = 400, /* DAYS */
SQL_MONTHS = 401, /* MONTHS */
SQL_YEARS = 402, /* YEARS */
SQL_INTERVAL = 403, /* INTERVAL */
SQL_TRUE = 404, /* TRUE */
SQL_FALSE = 405, /* FALSE */
SQL_TRANSACTION = 406, /* TRANSACTION */
SQL_BEGIN = 407, /* BEGIN */
SQL_COMMIT = 408, /* COMMIT */
SQL_ROLLBACK = 409, /* ROLLBACK */
SQL_EQUALS = 410, /* EQUALS */
SQL_NOTEQUALS = 411, /* NOTEQUALS */
SQL_LESS = 412, /* LESS */
SQL_GREATER = 413, /* GREATER */
SQL_LESSEQ = 414, /* LESSEQ */
SQL_GREATEREQ = 415, /* GREATEREQ */
SQL_NOTNULL = 416, /* NOTNULL */
SQL_UMINUS = 417 /* UMINUS */
};
typedef enum hsql_tokentype hsql_token_kind_t;
#endif
Expand Down Expand Up @@ -314,7 +313,7 @@ union HSQL_STYPE {

std::pair<int64_t, int64_t>* ival_pair;

#line 320 "bison_parser.h"
#line 319 "bison_parser.h"
};
typedef union HSQL_STYPE HSQL_STYPE;
#define HSQL_STYPE_IS_TRIVIAL 1
Expand Down
4 changes: 2 additions & 2 deletions src/parser/bison_parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@
%token DEALLOCATE PARAMETERS INTERSECT TEMPORARY TIMESTAMP
%token DISTINCT NVARCHAR RESTRICT TRUNCATE ANALYZE BETWEEN
%token CASCADE COLUMNS CONTROL DEFAULT EXECUTE EXPLAIN
%token INTEGER NATURAL PREPARE PRIMARY SCHEMAS CHARACTER VARYING REAL DECIMAL SMALLINT
%token INTEGER NATURAL PREPARE PRIMARY SCHEMAS CHARACTER_VARYING REAL DECIMAL SMALLINT
%token SPATIAL VARCHAR VIRTUAL DESCRIBE BEFORE COLUMN CREATE DELETE DIRECT
%token DOUBLE ESCAPE EXCEPT EXISTS EXTRACT CAST FORMAT GLOBAL HAVING IMPORT
%token INSERT ISNULL OFFSET RENAME SCHEMA SELECT SORTED
Expand Down Expand Up @@ -544,7 +544,7 @@ column_def : IDENTIFIER column_type opt_column_constraints {

column_type : INT { $$ = ColumnType{DataType::INT}; }
| CHAR '(' INTVAL ')' { $$ = ColumnType{DataType::CHAR, $3}; }
| CHARACTER VARYING '(' INTVAL ')' { $$ = ColumnType{DataType::VARCHAR, $4}; }
| CHARACTER_VARYING '(' INTVAL ')' { $$ = ColumnType{DataType::VARCHAR, $3}; }
| DATE { $$ = ColumnType{DataType::DATE}; };
| DATETIME { $$ = ColumnType{DataType::DATETIME}; }
| DECIMAL opt_decimal_specification {
Expand Down
Loading

0 comments on commit f497192

Please sign in to comment.