Skip to content

Commit

Permalink
[SPARK-50478][SQL] Fix StringType matching
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
In `canUpCast` method in `UpCastRule.scala` we match against `StringType`, which does not match collated string, as opposed to matching `_: StringType`. Similarly, in `needsTimeZone` method in `Cast.scala` it is matched against non collated StringType.

### Why are the changes needed?
Upcasting to collated strings was disabled even when upcasting to non collated strings was not. Likewise, casting from timestamp to collated string did not need time zone.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Test added in `DataTypeWriteCompatibilitySuite.scala` and `CastSuiteBase.scala`.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#49043 from jovanm-db/upcast_fix.

Lead-authored-by: Jovan Markovic <[email protected]>
Co-authored-by: Maxim Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
jovanm-db and MaxGekk committed Dec 6, 2024
1 parent ede9cfc commit 1d6932c
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ private[sql] object UpCastRule {
case (DateType, TimestampNTZType) => true
case (TimestampNTZType, TimestampType) => true
case (TimestampType, TimestampNTZType) => true
case (_: AtomicType, StringType) => true
case (_: CalendarIntervalType, StringType) => true
case (_: AtomicType, _: StringType) => true
case (_: CalendarIntervalType, _: StringType) => true
case (NullType, _) => true

// Spark supports casting between long and timestamp, please see `longToTimestamp` and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ object Cast extends QueryErrorsBase {
def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
case (VariantType, _) => true
case (_: StringType, TimestampType) => true
case (TimestampType, StringType) => true
case (TimestampType, _: StringType) => true
case (DateType, TimestampType) => true
case (TimestampType, DateType) => true
case (TimestampType, TimestampNTZType) => true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
assert(Cast.canUpCast(DateType, TimestampNTZType))
assert(Cast.canUpCast(TimestampType, TimestampNTZType))
assert(Cast.canUpCast(TimestampNTZType, TimestampType))
assert(Cast.canUpCast(IntegerType, StringType("UTF8_LCASE")))
assert(Cast.canUpCast(CalendarIntervalType, StringType("UTF8_LCASE")))
assert(!Cast.canUpCast(TimestampType, DateType))
assert(!Cast.canUpCast(TimestampNTZType, DateType))
}
Expand Down Expand Up @@ -1409,4 +1411,10 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
assert(!Cast(timestampLiteral, TimestampNTZType).resolved)
assert(!Cast(timestampNTZLiteral, TimestampType).resolved)
}

test("Casting between TimestampType and StringType requires timezone") {
val timestampLiteral = Literal.create(1L, TimestampType)
assert(!Cast(timestampLiteral, StringType).resolved)
assert(!Cast(timestampLiteral, StringType("UTF8_LCASE")).resolved)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,11 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
)
}

test("Check string types: cast allowed regardless of collation") {
assertAllowed(StringType, StringType("UTF8_LCASE"),
"date time types", "Should allow writing string to collated string")
}

// Helper functions

def assertAllowed(
Expand Down

0 comments on commit 1d6932c

Please sign in to comment.