Skip to content

Commit

Permalink
support soundex
Browse files Browse the repository at this point in the history
  • Loading branch information
hujy committed Jul 23, 2015
1 parent 7ce416b commit c79482d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -605,10 +605,9 @@ case class InitCap(child: Expression) extends UnaryExpression
override def inputTypes: Seq[DataType] = Seq(StringType)

override def nullSafeEval(string: Any): Any = {
if (string.asInstanceOf[UTF8String].getBytes.length == 0) {
if (string.asInstanceOf[UTF8String].numBytes() == 0) {
return string
}
else {
} else {
val sb = new StringBuffer()
sb.append(string)
sb.setCharAt(0, sb.charAt(0).toUpper)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,19 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(Decode(b, Literal.create(null, StringType)), null, create_row(null))
}

test("initcap unit test") {
checkEvaluation(InitCap(Literal(null)), null, create_row("s1"))
checkEvaluation(InitCap(Literal("")), "", create_row("s2"))
checkEvaluation(InitCap(Literal("a b")), "A B", create_row("s3"))
checkEvaluation(InitCap(Literal(" a")), " A", create_row("s4"))
checkEvaluation(InitCap(Literal("the test")), "The Test", create_row("s5"))
// scalastyle:off
// non ascii characters are not allowed in the code, so we disable the scalastyle here.
checkEvaluation(InitCap(Literal("世界")), "世界", create_row("s6"))
// scalastyle:on
}


test("Levenshtein distance") {
checkEvaluation(Levenshtein(Literal.create(null, StringType), Literal("")), null)
checkEvaluation(Levenshtein(Literal(""), Literal.create(null, StringType)), null)
Expand Down
8 changes: 0 additions & 8 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1773,14 +1773,6 @@ object functions {
*/
def initcap(e: Column): Column = InitCap(e.expr)

/**
* Returns string, with the first letter of each word in uppercase,
* all other letters in lowercase. Words are delimited by whitespace.
*
* @group string_funcs
* @since 1.5.0
*/
def initcap(columnName: String): Column = initcap(Column(columnName))

/**
* Locate the position of the first occurrence of substr column in the given string.
Expand Down

0 comments on commit c79482d

Please sign in to comment.