Skip to content

Commit

Permalink
Merge pull request #363 from iifawzi/support-with-statements
Browse files Browse the repository at this point in the history
Completing, refactoring and improving the `WITH` statements parser
  • Loading branch information
MauricioFauth authored Dec 29, 2021
2 parents dbc80de + c8f72ec commit 403e38d
Show file tree
Hide file tree
Showing 40 changed files with 33,623 additions and 719 deletions.
53 changes: 45 additions & 8 deletions src/Statements/CreateStatement.php
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,17 @@ class CreateStatement extends Statement
*/
public $fields;

/**
* If `CREATE TABLE WITH`.
* If `CREATE TABLE AS WITH`.
* If `CREATE VIEW AS WITH`.
*
* Used by `CREATE TABLE`, `CREATE VIEW`
*
* @var WithStatement|null
*/
public $with;

/**
* If `CREATE TABLE ... SELECT`.
* If `CREATE VIEW AS ` ... SELECT`.
Expand Down Expand Up @@ -436,6 +447,13 @@ public function build()
. Expression::build($this->like);
}

if ($this->with !== null) {
return 'CREATE '
. OptionsArray::build($this->options) . ' '
. Expression::build($this->name) . ' '
. $this->with->build();
}

$partition = '';

if (! empty($this->partitionBy)) {
Expand Down Expand Up @@ -465,10 +483,17 @@ public function build()
. OptionsArray::build($this->entityOptions)
. $partition;
} elseif ($this->options->has('VIEW')) {
$builtStatement = '';
if ($this->select !== null) {
$builtStatement = $this->select->build();
} elseif ($this->with !== null) {
$builtStatement = $this->with->build();
}

return 'CREATE '
. OptionsArray::build($this->options) . ' '
. Expression::build($this->name) . ' '
. $fields . ' AS ' . ($this->select ? $this->select->build() : '')
. $fields . ' AS ' . $builtStatement
. (! empty($this->body) ? TokensList::build($this->body) : '') . ' '
. OptionsArray::build($this->entityOptions);
} elseif ($this->options->has('TRIGGER')) {
Expand Down Expand Up @@ -544,14 +569,22 @@ public function parse(Parser $parser, TokensList $list)
if (($token->type === Token::TYPE_KEYWORD) && ($token->keyword === 'SELECT')) {
/* CREATE TABLE ... SELECT */
$this->select = new SelectStatement($parser, $list);
} elseif ($token->type === Token::TYPE_KEYWORD && ($token->keyword === 'WITH')) {
/* CREATE TABLE WITH */
$this->with = new WithStatement($parser, $list);
} elseif (
($token->type === Token::TYPE_KEYWORD) && ($token->keyword === 'AS')
&& ($list->tokens[$nextidx]->type === Token::TYPE_KEYWORD)
&& ($list->tokens[$nextidx]->value === 'SELECT')
) {
/* CREATE TABLE ... AS SELECT */
$list->idx = $nextidx;
$this->select = new SelectStatement($parser, $list);
if ($list->tokens[$nextidx]->value === 'SELECT') {
/* CREATE TABLE ... AS SELECT */
$list->idx = $nextidx;
$this->select = new SelectStatement($parser, $list);
} elseif ($list->tokens[$nextidx]->value === 'WITH') {
/* CREATE TABLE WITH */
$list->idx = $nextidx;
$this->with = new WithStatement($parser, $list);
}
} elseif ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'LIKE') {
/* CREATE TABLE `new_tbl` LIKE 'orig_tbl' */
$list->idx = $nextidx;
Expand Down Expand Up @@ -707,10 +740,14 @@ public function parse(Parser $parser, TokensList $list)
$token->type === Token::TYPE_KEYWORD
&& $token->keyword === 'AS'
&& $list->tokens[$nextidx]->type === Token::TYPE_KEYWORD
&& $list->tokens[$nextidx]->value === 'SELECT'
) {
$list->idx = $nextidx;
$this->select = new SelectStatement($parser, $list);
if ($list->tokens[$nextidx]->value === 'SELECT') {
$list->idx = $nextidx;
$this->select = new SelectStatement($parser, $list);
} elseif ($list->tokens[$nextidx]->value === 'WITH') {
++$list->idx;
$this->with = new WithStatement($parser, $list);
}
}

// Parsing all other tokens
Expand Down
10 changes: 10 additions & 0 deletions src/Statements/InsertStatement.php
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ class InsertStatement extends Statement
*/
public $select;

/**
* If WITH CTE is present
* holds the WithStatement.
*
* @var WithStatement|null
*/
public $with;

/**
* If ON DUPLICATE KEY UPDATE clause is present
* holds the SetOperation.
Expand Down Expand Up @@ -208,6 +216,8 @@ public function parse(Parser $parser, TokensList $list)
$this->set = SetOperation::parse($parser, $list);
} elseif ($token->keyword === 'SELECT') {
$this->select = new SelectStatement($parser, $list);
} elseif ($token->keyword === 'WITH') {
$this->with = new WithStatement($parser, $list);
} else {
$parser->error('Unexpected keyword.', $token);
break;
Expand Down
205 changes: 168 additions & 37 deletions src/Statements/WithStatement.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ final class WithStatement extends Statement
/** @var WithKeyword[] */
public $withers = [];

/**
* holds the CTE parser.
*
* @var Parser|null
*/
public $cteStatementParser;

/**
* @param Parser $parser the instance that requests parsing
* @param TokensList $list the list of tokens to be parsed
Expand All @@ -68,26 +75,34 @@ final class WithStatement extends Statement
*/
public function parse(Parser $parser, TokensList $list)
{
++$list->idx; // Skipping `WITH`.

// parse any options if provided
$this->options = OptionsArray::parse($parser, $list, static::$OPTIONS);
++$list->idx;

/**
* The state of the parser.
*
* Below are the states of the parser.
*
* 0 ---------------- [ name ] -----------------> 1
* 1 -------------- [( columns )] AS ----------------> 2
* 2 ------------------ [ , ] --------------------> 0
*
* 1 ------------------ [ ( ] ------------------> 2
*
* 2 ------------------ [ AS ] -----------------> 3
*
* 3 ------------------ [ ( ] ------------------> 4
*
* 4 ------------------ [ , ] ------------------> 1
*
* 4 ----- [ SELECT/UPDATE/DELETE/INSERT ] -----> 5
*
* @var int
*/
$state = 0;
$wither = null;

++$list->idx; // Skipping `WITH`.

// parse any options if provided
$this->options = OptionsArray::parse($parser, $list, static::$OPTIONS);
++$list->idx;

for (; $list->idx < $list->count; ++$list->idx) {
/**
* Token parsed at this moment.
Expand All @@ -99,57 +114,165 @@ public function parse(Parser $parser, TokensList $list)
continue;
}

if ($token->type === Token::TYPE_NONE) {
if ($state === 0) {
if ($token->type !== Token::TYPE_NONE) {
$parser->error('The name of the CTE was expected.', $token);
break;
}

$wither = $token->value;
$this->withers[$wither] = new WithKeyword($wither);
$state = 1;
continue;
}

if ($state === 1) {
if ($token->value === '(') {
} elseif ($state === 1) {
if ($token->type === Token::TYPE_OPERATOR && $token->value === '(') {
$this->withers[$wither]->columns = Array2d::parse($parser, $list);
continue;
}

if ($token->keyword === 'AS') {
++$list->idx;
$state = 2;
continue;
} elseif ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS') {
$state = 3;
} else {
$parser->error('Unexpected token.', $token);
break;
}
} elseif ($state === 2) {
if ($token->value === '(') {
++$list->idx;
$subList = $this->getSubTokenList($list);
if ($subList instanceof ParserException) {
$parser->errors[] = $subList;
continue;
}
if (! ($token->type === Token::TYPE_KEYWORD && $token->keyword === 'AS')) {
$parser->error('AS keyword was expected.', $token);
break;
}

$state = 3;
} elseif ($state === 3) {
$idxBeforeGetNext = $list->idx;

$subParser = new Parser($subList);
// We want to get the next non-comment and non-space token after $token
// therefore, the first getNext call will start with the current $idx which's $token,
// will return it and increase $idx by 1, which's not guaranteed to be non-comment
// and non-space, that's why we're calling getNext again.
$list->getNext();
$nextKeyword = $list->getNext();

if (count($subParser->errors)) {
foreach ($subParser->errors as $error) {
$parser->errors[] = $error;
}
if (! ($token->value === '(' && ($nextKeyword && $nextKeyword->value === 'SELECT'))) {
$parser->error('Subquery of the CTE was expected.', $token);
$list->idx = $idxBeforeGetNext;
break;
}

// Restore the index
$list->idx = $idxBeforeGetNext;

++$list->idx;
$subList = $this->getSubTokenList($list);
if ($subList instanceof ParserException) {
$parser->errors[] = $subList;
break;
}

$subParser = new Parser($subList);

if (count($subParser->errors)) {
foreach ($subParser->errors as $error) {
$parser->errors[] = $error;
}

$this->withers[$wither]->statement = $subParser;
continue;
break;
}

// There's another WITH expression to parse, go back to state=0
$this->withers[$wither]->statement = $subParser;

$state = 4;
} elseif ($state === 4) {
if ($token->value === ',') {
$list->idx++;
// There's another WITH expression to parse, go back to state=0
$state = 0;
continue;
}

// No more WITH expressions, we're done with this statement
if (
$token->type === Token::TYPE_KEYWORD && (
$token->value === 'SELECT'
|| $token->value === 'INSERT'
|| $token->value === 'UPDATE'
|| $token->value === 'DELETE'
)
) {
$state = 5;
--$list->idx;
continue;
}

$parser->error('An expression was expected.', $token);
break;
} elseif ($state === 5) {
/**
* We need to parse all of the remaining tokens becuase mostly, they are only the CTE expression
* which's mostly is SELECT, or INSERT, UPDATE, or delete statement.
* e.g: INSERT .. ( SELECT 1 ) SELECT col1 FROM cte ON DUPLICATE KEY UPDATE col_name = 3.
* The issue is that, `ON DUPLICATE KEY UPDATE col_name = 3` is related to the main INSERT query
* not the cte expression (SELECT col1 FROM cte) we need to determine the end of the expression
* to parse `ON DUPLICATE KEY UPDATE` from the InsertStatement parser instead.
*/

// Index of the last parsed token by default would be the last token in the $list, because we're
// assuming that all remaining tokens at state 4, are related to the expression.
$idxOfLastParsedToken = $list->count - 1;
// Index before search to be able to restore the index.
$idxBeforeSearch = $list->idx;
// Length of expression tokens is null by default, in order for the $subList to start
// from $list->idx to the end of the $list.
$lengthOfExpressionTokens = null;

if ($list->getNextOfTypeAndValue(Token::TYPE_KEYWORD, 'ON')) {
// (-1) because getNextOfTypeAndValue returned ON and increased the index.
$idxOfOn = $list->idx - 1;
// We want to make sure that it's `ON DUPLICATE KEY UPDATE`
$dubplicateToken = $list->getNext();
$keyToken = $list->getNext();
$updateToken = $list->getNext();
if (
$dubplicateToken && $dubplicateToken->keyword === 'DUPLICATE'
&& ($keyToken && $keyToken->keyword === 'KEY')
&& ($updateToken && $updateToken->keyword === 'UPDATE')
) {
// Index of the last parsed token will be the token before the ON Keyword
$idxOfLastParsedToken = $idxOfOn - 1;
// The length of the expression tokens would be the difference
// between the first unrelated token `ON` and the idx
// before skipping the CTE tokens.
$lengthOfExpressionTokens = $idxOfOn - $idxBeforeSearch;
}
}

// Restore the index
$list->idx = $idxBeforeSearch;

$subList = new TokensList(array_slice($list->tokens, $list->idx, $lengthOfExpressionTokens));
$subParser = new Parser($subList);
if (count($subParser->errors)) {
foreach ($subParser->errors as $error) {
$parser->errors[] = $error;
}

break;
}

$this->cteStatementParser = $subParser;

$list->idx = $idxOfLastParsedToken;
break;
}
}

// 5 is the only valid end state
if ($state !== 5) {
/**
* Token parsed at this moment.
*
* @var Token
*/
$token = $list->tokens[$list->idx];

$parser->error('Unexpected end of the WITH CTE.', $token);
}

--$list->idx;
}

Expand All @@ -165,6 +288,14 @@ public function build()
$str .= WithKeyword::build($wither);
}

$str .= ' ';

if ($this->cteStatementParser) {
foreach ($this->cteStatementParser->statements as $statement) {
$str .= $statement->build();
}
}

return $str;
}

Expand Down
Loading

0 comments on commit 403e38d

Please sign in to comment.