Skip to content

Commit

Permalink
[SPARK-1995][SQL] system function upper and lower can be supported
Browse files Browse the repository at this point in the history
I don't know whether it's time to implement system function about string operation in spark sql now.

Author: egraldlo <[email protected]>

Closes #936 from egraldlo/stringoperator and squashes the following commits:

3c6c60a [egraldlo] Add UPPER, LOWER, MAX and MIN into hive parser
ea76d0a [egraldlo] modify the formatting issues
b49f25e [egraldlo] modify the formatting issues
1f0bbb5 [egraldlo] system function upper and lower supported
13d3267 [egraldlo] system function upper and lower supported
  • Loading branch information
egraldlo authored and marmbrus committed Jun 3, 2014
1 parent d000ca9 commit ec8be27
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
protected val OVERWRITE = Keyword("OVERWRITE")
protected val LIKE = Keyword("LIKE")
protected val RLIKE = Keyword("RLIKE")
protected val UPPER = Keyword("UPPER")
protected val LOWER = Keyword("LOWER")
protected val REGEXP = Keyword("REGEXP")
protected val ORDER = Keyword("ORDER")
protected val OUTER = Keyword("OUTER")
Expand Down Expand Up @@ -329,6 +331,8 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } |
MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } |
MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } |
UPPER ~> "(" ~> expression <~ ")" ^^ { case exp => Upper(exp) } |
LOWER ~> "(" ~> expression <~ ")" ^^ { case exp => Lower(exp) } |
IF ~> "(" ~> expression ~ "," ~ expression ~ "," ~ expression <~ ")" ^^ {
case c ~ "," ~ t ~ "," ~ f => If(c,t,f)
} |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,22 @@ trait StringRegexExpression {
}
}

trait CaseConversionExpression {
self: UnaryExpression =>

type EvaluatedType = Any

def convert(v: String): String

def nullable: Boolean = child.nullable
def dataType: DataType = StringType

override def eval(input: Row): Any = {
val converted = child.eval(input)
convert(converted.toString)
}
}

/**
* Simple RegEx pattern matching function
*/
Expand Down Expand Up @@ -115,3 +131,19 @@ case class RLike(left: Expression, right: Expression)
override def escape(v: String): String = v
override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0)
}

/**
* A function that converts the characters of a string to uppercase.
*/
case class Upper(child: Expression) extends UnaryExpression with CaseConversionExpression {

override def convert(v: String): String = v.toUpperCase()
}

/**
* A function that converts the characters of a string to lowercase.
*/
case class Lower(child: Expression) extends UnaryExpression with CaseConversionExpression {

override def convert(v: String): String = v.toLowerCase()
}
23 changes: 23 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -313,4 +313,27 @@ class SQLQuerySuite extends QueryTest {
(3, "C"),
(4, "D")))
}

test("system function upper()") {
checkAnswer(
sql("SELECT n,UPPER(l) FROM lowerCaseData"),
Seq(
(1, "A"),
(2, "B"),
(3, "C"),
(4, "D")))
}

test("system function lower()") {
checkAnswer(
sql("SELECT N,LOWER(L) FROM upperCaseData"),
Seq(
(1, "a"),
(2, "b"),
(3, "c"),
(4, "d"),
(5, "e"),
(6, "f")))
}

}
12 changes: 11 additions & 1 deletion sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,10 @@ private[hive] object HiveQl {
val COUNT = "(?i)COUNT".r
val AVG = "(?i)AVG".r
val SUM = "(?i)SUM".r
val MAX = "(?i)MAX".r
val MIN = "(?i)MIN".r
val UPPER = "(?i)UPPER".r
val LOWER = "(?i)LOWER".r
val RAND = "(?i)RAND".r
val AND = "(?i)AND".r
val OR = "(?i)OR".r
Expand Down Expand Up @@ -817,7 +821,13 @@ private[hive] object HiveQl {
case Token("TOK_FUNCTIONDI", Token(COUNT(), Nil) :: args) => CountDistinct(args.map(nodeToExpr))
case Token("TOK_FUNCTION", Token(SUM(), Nil) :: arg :: Nil) => Sum(nodeToExpr(arg))
case Token("TOK_FUNCTIONDI", Token(SUM(), Nil) :: arg :: Nil) => SumDistinct(nodeToExpr(arg))

case Token("TOK_FUNCTION", Token(MAX(), Nil) :: arg :: Nil) => Max(nodeToExpr(arg))
case Token("TOK_FUNCTION", Token(MIN(), Nil) :: arg :: Nil) => Min(nodeToExpr(arg))

/* System functions about string operations */
case Token("TOK_FUNCTION", Token(UPPER(), Nil) :: arg :: Nil) => Upper(nodeToExpr(arg))
case Token("TOK_FUNCTION", Token(LOWER(), Nil) :: arg :: Nil) => Lower(nodeToExpr(arg))

/* Casts */
case Token("TOK_FUNCTION", Token("TOK_STRING", Nil) :: arg :: Nil) =>
Cast(nodeToExpr(arg), StringType)
Expand Down

0 comments on commit ec8be27

Please sign in to comment.