diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index f2b9b2c1a3ad5..a404e7441a1bd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -124,6 +124,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val OVERWRITE = Keyword("OVERWRITE") protected val LIKE = Keyword("LIKE") protected val RLIKE = Keyword("RLIKE") + protected val UPPER = Keyword("UPPER") + protected val LOWER = Keyword("LOWER") protected val REGEXP = Keyword("REGEXP") protected val ORDER = Keyword("ORDER") protected val OUTER = Keyword("OUTER") @@ -329,6 +331,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers { AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } | MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } | MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } | + UPPER ~> "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) } | + LOWER ~> "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) } | IF ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ { case c ~ "," ~ t ~ "," ~ f => If(c,t,f) } | diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index ddc16ce87b895..dcded0774180e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -70,6 +70,22 @@ trait StringRegexExpression { } } +trait CaseConversionExpression { + self: UnaryExpression => + + type EvaluatedType = Any + + def convert(v: String): String + + def nullable: Boolean = child.nullable + def dataType: DataType = StringType + + override def eval(input: Row): Any = { + val converted = child.eval(input) + convert(converted.toString) + } +} + /** * Simple RegEx pattern matching function */ @@ -115,3 +131,19 @@ case class RLike(left: Expression, right: Expression) override def escape(v: String): String = v override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0) } + +/** + * A function that converts the characters of a string to uppercase. + */ +case class Upper(child: Expression) extends UnaryExpression with CaseConversionExpression { + + override def convert(v: String): String = v.toUpperCase() +} + +/** + * A function that converts the characters of a string to lowercase. + */ +case class Lower(child: Expression) extends UnaryExpression with CaseConversionExpression { + + override def convert(v: String): String = v.toLowerCase() +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 189dccd5253e5..95860e6683f67 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -313,4 +313,27 @@ class SQLQuerySuite extends QueryTest { (3, "C"), (4, "D"))) } + + test("system function upper()") { + checkAnswer( + sql("SELECT n,UPPER(l) FROM lowerCaseData"), + Seq( + (1, "A"), + (2, "B"), + (3, "C"), + (4, "D"))) + } + + test("system function lower()") { + checkAnswer( + sql("SELECT N,LOWER(L) FROM upperCaseData"), + Seq( + (1, "a"), + (2, "b"), + (3, "c"), + (4, "d"), + (5, "e"), + (6, "f"))) + } + } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 93b9057a23816..e8a3ee5535b6e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -781,6 +781,10 @@ private[hive] object HiveQl { val COUNT = "(?i)COUNT".r val AVG = "(?i)AVG".r val SUM = "(?i)SUM".r + val MAX = "(?i)MAX".r + val MIN = "(?i)MIN".r + val UPPER = "(?i)UPPER".r + val LOWER = "(?i)LOWER".r val RAND = "(?i)RAND".r val AND = "(?i)AND".r val OR = "(?i)OR".r @@ -817,7 +821,13 @@ private[hive] object HiveQl { case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) => CountDistinct(args.map(nodeToExpr)) case Token("TOK_FUNCTION", Token(SUM(), Nil) :: arg :: Nil) => Sum(nodeToExpr(arg)) case Token("TOK_FUNCTIONDI", Token(SUM(), Nil) :: arg :: Nil) => SumDistinct(nodeToExpr(arg)) - + case Token("TOK_FUNCTION", Token(MAX(), Nil) :: arg :: Nil) => Max(nodeToExpr(arg)) + case Token("TOK_FUNCTION", Token(MIN(), Nil) :: arg :: Nil) => Min(nodeToExpr(arg)) + + /* System functions about string operations */ + case Token("TOK_FUNCTION", Token(UPPER(), Nil) :: arg :: Nil) => Upper(nodeToExpr(arg)) + case Token("TOK_FUNCTION", Token(LOWER(), Nil) :: arg :: Nil) => Lower(nodeToExpr(arg)) + /* Casts */ case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) => Cast(nodeToExpr(arg), StringType)