From 6168d3fd5a2a64c89d1f1896a1537cc6276a738c Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 6 Jun 2017 00:16:56 +0800 Subject: [PATCH 1/3] Add Function Alias For MOD/POSITION. --- .../spark/sql/catalyst/analysis/FunctionRegistry.scala | 2 ++ .../sql/catalyst/expressions/stringExpressions.scala | 2 ++ .../src/test/resources/sql-tests/inputs/operators.sql | 3 +++ .../resources/sql-tests/inputs/string-functions.sql | 3 +++ .../test/resources/sql-tests/results/operators.sql.out | 10 +++++++++- .../sql-tests/results/string-functions.sql.out | 10 +++++++++- 6 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 116b26f612e02..4bf360f42034b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -215,6 +215,7 @@ object FunctionRegistry { expression[Log1p]("log1p"), expression[Log2]("log2"), expression[Log]("ln"), + expression[Remainder]("mod"), expression[UnaryMinus]("negative"), expression[Pi]("pi"), expression[Pmod]("pmod"), @@ -300,6 +301,7 @@ object FunctionRegistry { expression[StringTrimLeft]("ltrim"), expression[JsonTuple]("json_tuple"), expression[ParseUrl]("parse_url"), + expression[StringLocate]("position"), expression[FormatString]("printf"), expression[RegExpExtract]("regexp_extract"), expression[RegExpReplace]("regexp_replace"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 035a1afe8b782..6eb92f59477e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -654,6 +654,8 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: """, extended = """ Examples: + > SELECT _FUNC_('bar', 'foobarbar'); + 4 > SELECT _FUNC_('bar', 'foobarbar', 5); 7 """) diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index 7e3b86b76a34a..d37bc9be7a2f4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -70,3 +70,6 @@ select ceiling(1234567890123456); select floor(0); select floor(1); select floor(1234567890123456); + +-- mod +select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null); diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index d82df11251c5b..a1f55ba095dab 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -15,3 +15,6 @@ select replace('abc', 'b'); -- uuid select length(uuid()), (uuid() <> uuid()); + +-- position +select position('bar', 'foobarbar'), position('bar', 'foobarbar', 5), position(null, 'foobarbar'), position('aaads', null); diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 28cfb744193ec..f5855de2038d1 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 45 +-- Number of queries: 46 -- !query 0 @@ -372,3 +372,11 @@ select floor(1234567890123456) struct -- !query 44 output 1234567890123456 + + +-- !query 45 +select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null) +-- !query 45 schema +struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS DOUBLE)):double> +-- !query 45 output +1 NULL 0 NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 4093a7b9fc820..7e06fb35683fe 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 7 +-- Number of queries: 8 -- !query 0 @@ -78,3 +78,11 @@ select length(uuid()), (uuid() <> uuid()) struct -- !query 6 output 36 true + + +-- !query 7 +select position('bar', 'foobarbar'), position('bar', 'foobarbar', 5), position(null, 'foobarbar'), position('aaads', null) +-- !query 7 schema +struct +-- !query 7 output +4 7 NULL NULL From 8cce02bd27a034e017c9b9429e8e19716e28da1e Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 13 Jun 2017 23:21:40 +0800 Subject: [PATCH 2/3] Change parser to support POSITION(substr IN str). --- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 4 ++++ .../spark/sql/catalyst/expressions/arithmetic.scala | 2 ++ .../sql/catalyst/expressions/stringExpressions.scala | 2 ++ .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 7 +++++++ .../src/test/resources/sql-tests/inputs/operators.sql | 2 +- .../resources/sql-tests/inputs/string-functions.sql | 2 +- .../test/resources/sql-tests/results/operators.sql.out | 10 +++++++++- .../sql-tests/results/string-functions.sql.out | 6 +++--- 8 files changed, 29 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 43f7ff5cb4a36..128f820728ee8 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -574,6 +574,7 @@ primaryExpression | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference | '(' expression ')' #parenthesizedExpression + | POSITION '(' valueExpression IN valueExpression ')' #position ; constant @@ -720,6 +721,7 @@ nonReserved | SET | RESET | VIEW | REPLACE | IF + | POSITION | NO | DATA | START | TRANSACTION | COMMIT | ROLLBACK | IGNORE | SORT | CLUSTER | DISTRIBUTE | UNSET | TBLPROPERTIES | SKEWED | STORED | DIRECTORIES | LOCATION @@ -851,6 +853,8 @@ IGNORE: 'IGNORE'; IF: 'IF'; +POSITION: 'POSITION'; + EQ : '=' | '=='; NSEQ: '<=>'; NEQ : '<>'; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index f2b252259b89d..ec6e6ba0f091b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -320,6 +320,8 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic Examples: > SELECT 2 _FUNC_ 1.8; 0.2 + > SELECT MOD(2, 1.8); + 0.2 """) case class Remainder(left: Expression, right: Expression) extends BinaryArithmetic { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 6eb92f59477e0..3e3d2fcb44163 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -658,6 +658,8 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: 4 > SELECT _FUNC_('bar', 'foobarbar', 5); 7 + > SELECT POSITION('bar' in 'foobarbar'); + 4 """) // scalastyle:on line.size.limit case class StringLocate(substr: Expression, str: Expression, start: Expression) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index a16611af28a7d..758df980c4d99 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1076,6 +1076,13 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging Last(expression(ctx.expression), Literal(ignoreNullsExpr)).toAggregateExpression() } + /** + * Create a Position expression. + */ + override def visitPosition(ctx: PositionContext): Expression = withOrigin(ctx) { + StringLocate(expression(ctx.valueExpression(0)), expression(ctx.valueExpression(1)), Literal(1)) + } + /** * Create a (windowed) Function expression. */ diff --git a/sql/core/src/test/resources/sql-tests/inputs/operators.sql b/sql/core/src/test/resources/sql-tests/inputs/operators.sql index d2f80c7a1ac79..3934620577e99 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/operators.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/operators.sql @@ -76,7 +76,7 @@ select floor(0.01); select floor(-0.10); -- comparison operator -select 1 > 0.00001 +select 1 > 0.00001; -- mod select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null); diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index a1f55ba095dab..20c0390664037 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -17,4 +17,4 @@ select replace('abc', 'b'); select length(uuid()), (uuid() <> uuid()); -- position -select position('bar', 'foobarbar'), position('bar', 'foobarbar', 5), position(null, 'foobarbar'), position('aaads', null); +select position('bar' in 'foobarbar'), position(null, 'foobarbar'), position('aaads', null); diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index 57e8a612fab44..51ccf764d952f 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 50 +-- Number of queries: 51 -- !query 0 @@ -412,3 +412,11 @@ select 1 > 0.00001 struct<(CAST(1 AS BIGINT) > 0):boolean> -- !query 49 output true + + +-- !query 50 +select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null) +-- !query 50 schema +struct<(7 % 2):int,(7 % 0):int,(0 % 2):int,(7 % CAST(NULL AS INT)):int,(CAST(NULL AS INT) % 2):int,(CAST(NULL AS DOUBLE) % CAST(NULL AS DOUBLE)):double> +-- !query 50 output +1 NULL 0 NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 7e06fb35683fe..52eb554edf89e 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -81,8 +81,8 @@ struct -- !query 7 -select position('bar', 'foobarbar'), position('bar', 'foobarbar', 5), position(null, 'foobarbar'), position('aaads', null) +select position('bar' in 'foobarbar'), position(null, 'foobarbar'), position('aaads', null) -- !query 7 schema -struct +struct -- !query 7 output -4 7 NULL NULL +4 NULL NULL From d0686047427fe9d64c3a473742d135078f0b46a2 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Jun 2017 08:02:56 +0800 Subject: [PATCH 3/3] Change to POSITION '(' substr=valueExpression IN str=valueExpression ')' --- .../antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 3 +-- .../spark/sql/catalyst/expressions/stringExpressions.scala | 2 +- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../spark/sql/catalyst/parser/TableIdentifierParserSuite.scala | 2 +- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 128f820728ee8..ef5648c6dbe47 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -563,6 +563,7 @@ primaryExpression | CAST '(' expression AS dataType ')' #cast | FIRST '(' expression (IGNORE NULLS)? ')' #first | LAST '(' expression (IGNORE NULLS)? ')' #last + | POSITION '(' substr=valueExpression IN str=valueExpression ')' #position | constant #constantDefault | ASTERISK #star | qualifiedName '.' ASTERISK #star @@ -574,7 +575,6 @@ primaryExpression | identifier #columnReference | base=primaryExpression '.' fieldName=identifier #dereference | '(' expression ')' #parenthesizedExpression - | POSITION '(' valueExpression IN valueExpression ')' #position ; constant @@ -852,7 +852,6 @@ MACRO: 'MACRO'; IGNORE: 'IGNORE'; IF: 'IF'; - POSITION: 'POSITION'; EQ : '=' | '=='; diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 3e3d2fcb44163..717ada225a4f1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -658,7 +658,7 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: 4 > SELECT _FUNC_('bar', 'foobarbar', 5); 7 - > SELECT POSITION('bar' in 'foobarbar'); + > SELECT POSITION('bar' IN 'foobarbar'); 4 """) // scalastyle:on line.size.limit diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 758df980c4d99..500d999c30da7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1080,7 +1080,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging * Create a Position expression. */ override def visitPosition(ctx: PositionContext): Expression = withOrigin(ctx) { - StringLocate(expression(ctx.valueExpression(0)), expression(ctx.valueExpression(1)), Literal(1)) + new StringLocate(expression(ctx.substr), expression(ctx.str)) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index f33abc5b2e049..76be6ee3f50bc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -51,7 +51,7 @@ class TableIdentifierParserSuite extends SparkFunSuite { "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", - "int", "smallint", "timestamp", "at") + "int", "smallint", "timestamp", "at", "position") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",