Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jovanm-db committed Jan 8, 2025
1 parent 82ef90c commit a0bb4eb
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.internal.types

import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
import org.apache.spark.sql.types.{AbstractDataType, CharType, DataType, StringType, VarcharType}

/**
* AbstractStringType is an abstract class for StringType with collation support.
Expand All @@ -29,6 +29,8 @@ abstract class AbstractStringType(supportsTrimCollation: Boolean = false)
override private[sql] def simpleString: String = "string"

override private[sql] def acceptsType(other: DataType): Boolean = other match {
case CharType(_) | VarcharType(_) =>
false
case st: StringType =>
canUseTrimCollation(st) && acceptsStringType(st)
case _ =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
} else {
None
}
case (CharType(_) | VarcharType(_), _: AbstractStringType) => Some(StringType)
case (_: StringType, _: AbstractStringType) => None

// If a function expects integral type, fractional input is not allowed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ object TypeCoercion extends TypeCoercionBase {
} else {
null
}
case (CharType(_) | VarcharType(_), _: AbstractStringType) => StringType
// If the function accepts any numeric type and the input is a string, we follow the hive
// convention and cast that input into a double
case (_: StringType, NumericType) => NumericType.defaultConcreteType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,34 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
}
}
}

test("string expressions") {
def run(typ: String, expected: Row): Unit = {
val df = sql(
s"""
|SELECT
|'X' || '5 '::$typ(5) || 'X',
|LENGTH('5 '::$typ(5)),
|UPPER('a '::$typ(5)),
|LENGTH(UPPER('a '::$typ(5))),
|'"' || UPPER('a '::$typ(5)) || '"',
|REPLACE('abc', 'ab '::$typ(5), 'x'),
|TRANSLATE('abc', 'abc', 'xy '::$typ(5))
""".stripMargin)
assert(df.schema.map(_.dataType) == Seq(StringType, IntegerType, StringType, IntegerType,
StringType, StringType, StringType))
assert(df.collect() === Array(expected))
}

val expected = Row("X5 X", 2, "A ", 2, "\"A \"", "abc", "xy ")
Seq(("char", expected, Row("X5 X", 5, "A ", 5, "\"A \"", "abc", "xy ")),
("varchar", expected, expected)).foreach { case (typ, a, b) =>
run(typ, a)
withSQLConf((SQLConf.PRESERVE_CHAR_VARCHAR_TYPE_INFO.key, "true")) {
run(typ, b)
}
}
}
}

class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSparkSession {
Expand Down

0 comments on commit a0bb4eb

Please sign in to comment.