diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampFamily.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampFamily.scala new file mode 100644 index 0000000000000..c10670b14c3b0 --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampFamily.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.types + +/** + * Shared classification helpers for the LTZ/NTZ timestamp family: the microsecond types + * ([[TimestampType]] / [[TimestampNTZType]]) and their nanosecond-precision counterparts + * ([[TimestampLTZNanosType]] / [[TimestampNTZNanosType]]). Centralizes the notion of effective + * fractional-second precision and time-zone family so that up-cast resolution ([[UpCastRule]]), + * ANSI store assignment, and common-type resolution all agree. + */ +private[sql] object TimestampFamily { + + /** + * The effective fractional-second precision of a timestamp-family type, or [[None]] for types + * that are not on the timestamp fractional-precision axis (DATE, TIME, and everything else). + * The microsecond types [[TimestampType]] / [[TimestampNTZType]] have precision 6; the + * nanosecond types carry their own precision `p` in [7, 9]. + */ + def fractionalPrecision(dt: DataType): Option[Int] = dt match { + case TimestampType | TimestampNTZType => Some(6) + case t: TimestampLTZNanosType => Some(t.precision) + case t: TimestampNTZNanosType => Some(t.precision) + case _ => None + } + + /** Whether `dt` is a local-time-zone (instant) timestamp: micro [[TimestampType]] or nanos. */ + def isLtz(dt: DataType): Boolean = + dt.isInstanceOf[TimestampType] || dt.isInstanceOf[TimestampLTZNanosType] + + /** Whether `dt` is a no-time-zone (local) timestamp: micro [[TimestampNTZType]] or nanos. */ + def isNtz(dt: DataType): Boolean = + dt.isInstanceOf[TimestampNTZType] || dt.isInstanceOf[TimestampNTZNanosType] +} diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala index 6272cb03bd797..54de45f6eb8cc 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala @@ -36,10 +36,20 @@ private[sql] object UpCastRule { case (from: NumericType, to: DecimalType) if to.isWiderThan(from) => true case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true case (f, t) if legalNumericPrecedence(f, t) => true - case (DateType, TimestampType) => true - case (DateType, TimestampNTZType) => true - case (TimestampNTZType, TimestampType) => true - case (TimestampType, TimestampNTZType) => true + // Widening DATE -> timestamp family (micro or nanos, LTZ or NTZ) is lossless; the reverse + // (timestamp -> DATE) drops the time-of-day and is not matched here, so it stays a non-up-cast. + case (DateType, t) if TimestampFamily.fractionalPrecision(t).isDefined => true + // Lossless widening within the timestamp family: target fractional-second precision >= source. + // Covers micros <-> nanos and the cross-family LTZ <-> NTZ pairs (mirroring how the micro + // TimestampType <-> TimestampNTZType pair is a mutual up-cast). Same-type equal precision is + // short-circuited by `from == to` above; cross-family equal precision (e.g. LTZ(7) <-> NTZ(7)) + // is admitted here by the `<=`. The guard keeps non-timestamp pairs falling through to the + // cases below; lossy narrowing falls through to `case _ => false`. + case (f, t) + if TimestampFamily + .fractionalPrecision(f) + .exists(fp => TimestampFamily.fractionalPrecision(t).exists(fp <= _)) => + true case (s1: StringType, s2: StringType) => StringHelper.isMoreConstrained(s1, s2) // TODO: allow upcast from int/double/decimal to char/varchar of sufficient length diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala index e6329e465e004..942a6be948d8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala @@ -83,6 +83,7 @@ import org.apache.spark.sql.types.{ StringType, StringTypeExpression, StructType, + TimestampFamily, TimestampLTZNanosType, TimestampNTZNanosType, TimestampNTZType, @@ -264,27 +265,21 @@ abstract class TypeCoercionHelper { // The (family, precision) pair then maps back to a concrete type: precision 6 yields the // micro type, precision in [7, 9] yields the nanos type. // - // Note: this common-type resolution is intentionally more permissive than the nanosecond - // conversion rules in Cast.canUpCast / Cast.canANSIStoreAssign, which keep cross-family and - // DATE <-> nanos casts explicit-CAST-only while the nanos types are unreleased (SPARK-57323 - // etc.). Coercion here mirrors the microsecond precedent so that UNION / CASE / coalesce / - // IN / comparison resolve a common type the same way they do for the micro families; the - // stricter explicit-only stance is deliberately scoped to up-cast and store assignment, not - // to common-type resolution. + // Note: common-type resolution here is symmetric and widens to the maximum precision, while + // Cast.canUpCast / Cast.canANSIStoreAssign are directional (they block lossy narrowing). Both + // now agree on admissibility across the timestamp family -- including the cross-family + // LTZ <-> NTZ pairs and DATE <-> nanos (SPARK-57303) -- mirroring the microsecond precedent + // so that UNION / CASE / coalesce / IN / comparison resolve a common type the same way they + // do for the micro families. case _ => - // Fractional-seconds precision of the microsecond timestamp types; the nanos types carry - // 7-9. DATE has no time component and is treated as the micro precision so that - // DATE <-> micro widens to the micro type and DATE <-> nanos to the nanos type. + // Fractional-seconds precision of the timestamp family (micros: 6, nanos: 7-9). DATE has no + // time component and is treated as the micro precision (getOrElse) so that DATE <-> micro + // widens to the micro type and DATE <-> nanos to the nanos type. val MicrosPrecision = 6 - def isLtz(d: DatetimeType): Boolean = - d.isInstanceOf[TimestampType] || d.isInstanceOf[TimestampLTZNanosType] - def isNtz(d: DatetimeType): Boolean = - d.isInstanceOf[TimestampNTZType] || d.isInstanceOf[TimestampNTZNanosType] - def precisionOf(d: DatetimeType): Int = d match { - case t: TimestampLTZNanosType => t.precision - case t: TimestampNTZNanosType => t.precision - case _ => MicrosPrecision // DateType / TimestampType / TimestampNTZType - } + def isLtz(d: DatetimeType): Boolean = TimestampFamily.isLtz(d) + def isNtz(d: DatetimeType): Boolean = TimestampFamily.isNtz(d) + def precisionOf(d: DatetimeType): Int = + TimestampFamily.fractionalPrecision(d).getOrElse(MicrosPrecision) // Beyond TimeType (handled above), the only datetime types are DATE and the micro/nanos // timestamp families. Guard so that a future DatetimeType subtype fails fast here instead // of being silently mis-widened (treated as a family-neutral precision-6 type and folded diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 346047a8ba823..96ebe62b76ad8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -500,30 +500,13 @@ object Cast extends QueryErrorsBase { case (_: NumericType, _: NumericType) => true case (_: AtomicType, _: StringType) => true case (_: CalendarIntervalType, _: StringType) => true - // SPARK-57490: same-family cross-precision nanosecond casts: widening (e.g. TIMESTAMP_NTZ(7) -> - // TIMESTAMP_NTZ(9)) is lossless and allowed as a silent store assignment, while narrowing - // (e.g. (9) -> (7)) drops sub-microsecond digits and stays explicit-only. Equal precision is - // handled by the `from == to` short-circuit above; micros -> nanos widening (e.g. TIMESTAMP_NTZ - // -> TIMESTAMP_NTZ(9)) is lossless and falls to the catch-all below. - case (f: TimestampNTZNanosType, t: TimestampNTZNanosType) => f.precision <= t.precision - case (f: TimestampLTZNanosType, t: TimestampLTZNanosType) => f.precision <= t.precision - // SPARK-57323: DATE <-> nanosecond-precision timestamp requires an explicit CAST in both - // directions (nanos -> DATE drops fields; DATE -> nanos is lossless but kept explicit-only - // while the nanos types are unreleased). Stricter than micro DATE <-> TIMESTAMP[_NTZ], which - // the catch-all below allows. - case (DateType, _: AnyTimestampNanoType) => false - case (_: AnyTimestampNanoType, DateType) => false - // SPARK-57293/57511: narrowing any nanosecond timestamp to a microsecond timestamp drops the - // sub-microsecond digits, and cross-family casts additionally reinterpret the value against the - // session time zone; both stay explicit-only rather than silent store assignments while the - // nanos types are unreleased. This covers same-family narrowing (nanos -> micro), cross-family - // nanos <-> nanos, and the mixed micro/nanos pairs at the precision-6 boundary; everything - // matched here is explicit-only. The all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair and micros -> - // nanos same-family widening stay store-assignable via the catch-all below. - case (_: AnyTimestampNanoType, t) if AnyTimestampType.acceptsType(t) => false - case (TimestampType, _: TimestampNTZNanosType) => false - case (TimestampNTZType, _: TimestampLTZNanosType) => false - case (_: AnyTimestampNanoType, _: AnyTimestampNanoType) => false + // SPARK-57303: block lossy narrowing across the whole timestamp family (LTZ/NTZ, micros and + // nanos, including the cross-family LTZ <-> NTZ pairs) so store assignment never silently drops + // sub-microsecond digits. Lossless widening, equal precision, and DATE <-> timestamp (DATE has + // no fractional precision, so it never matches here) all fall through to the DatetimeType arm + // below, mirroring the micro TIMESTAMP <-> TIMESTAMP_NTZ behavior. + case (f, t) if TimestampFamily.fractionalPrecision(f) + .exists(fp => TimestampFamily.fractionalPrecision(t).exists(fp > _)) => false // SPARK-57585: widening a TIME(p) to a larger precision is lossless and allowed as a silent // store assignment, while narrowing (e.g. TIME(6) -> TIME(3)) drops fractional-seconds digits // and stays explicit-CAST-only. Equal precision is handled by the `from == to` short-circuit. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index 23d5783155fda..8f37f17c1069c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -754,12 +754,14 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } - test("SPARK-57293: nanos<->micros store-assignment and up-cast contract") { + test("SPARK-57303: nanos<->micros store-assignment and up-cast contract") { foreachNanosPrecision { p => - // Explicit-only: neither direction is an up-cast, so STRICT store assignment rejects both. - assert(!Cast.canUpCast(TimestampNTZType, TimestampNTZNanosType(p))) + // Lossless widening micros -> nanos(p) is an up-cast, mirroring the micro precedent where a + // lower-precision timestamp widens to a higher-precision one. + assert(Cast.canUpCast(TimestampNTZType, TimestampNTZNanosType(p))) + assert(Cast.canUpCast(TimestampType, TimestampLTZNanosType(p))) + // Lossy narrowing nanos(p) -> micros drops sub-microsecond digits, so it is not an up-cast. assert(!Cast.canUpCast(TimestampNTZNanosType(p), TimestampNTZType)) - assert(!Cast.canUpCast(TimestampType, TimestampLTZNanosType(p))) assert(!Cast.canUpCast(TimestampLTZNanosType(p), TimestampType)) // ANSI store assignment allows the lossless widening micros -> nanos(p) ... @@ -769,18 +771,17 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { assert(!Cast.canANSIStoreAssign(TimestampNTZNanosType(p), TimestampNTZType)) assert(!Cast.canANSIStoreAssign(TimestampLTZNanosType(p), TimestampType)) - // SPARK-57323: DATE <-> nanos requires an explicit CAST in both directions, so STRICT - // store assignment and ANSI store assignment both reject it. STRICT goes through - // Cast.canUpCast, so the assertions below also guard against a future blanket datetime arm - // in UpCastRule silently turning this into a safe store assignment. - assert(!Cast.canUpCast(DateType, TimestampNTZNanosType(p))) + // SPARK-57303: DATE <-> nanos mirrors micro DATE <-> TIMESTAMP[_NTZ]. The lossless widening + // DATE -> nanos is an up-cast and ANSI-store-assignable; the lossy nanos -> DATE drops the + // time-of-day, so it is not an up-cast but is still ANSI-store-assignable. + assert(Cast.canUpCast(DateType, TimestampNTZNanosType(p))) assert(!Cast.canUpCast(TimestampNTZNanosType(p), DateType)) - assert(!Cast.canUpCast(DateType, TimestampLTZNanosType(p))) + assert(Cast.canUpCast(DateType, TimestampLTZNanosType(p))) assert(!Cast.canUpCast(TimestampLTZNanosType(p), DateType)) - assert(!Cast.canANSIStoreAssign(DateType, TimestampNTZNanosType(p))) - assert(!Cast.canANSIStoreAssign(TimestampNTZNanosType(p), DateType)) - assert(!Cast.canANSIStoreAssign(DateType, TimestampLTZNanosType(p))) - assert(!Cast.canANSIStoreAssign(TimestampLTZNanosType(p), DateType)) + assert(Cast.canANSIStoreAssign(DateType, TimestampNTZNanosType(p))) + assert(Cast.canANSIStoreAssign(TimestampNTZNanosType(p), DateType)) + assert(Cast.canANSIStoreAssign(DateType, TimestampLTZNanosType(p))) + assert(Cast.canANSIStoreAssign(TimestampLTZNanosType(p), DateType)) } } @@ -789,10 +790,11 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { p1 <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION p2 <- TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION } { - // Cross-precision nanos casts are never up-casts (only equal precision is, via from == to), - // matching the micros <-> nanos precedent above; STRICT store assignment rejects them. - assert(Cast.canUpCast(TimestampNTZNanosType(p1), TimestampNTZNanosType(p2)) == (p1 == p2)) - assert(Cast.canUpCast(TimestampLTZNanosType(p1), TimestampLTZNanosType(p2)) == (p1 == p2)) + // Lossless widening (p1 <= p2) is an up-cast; lossy narrowing (p1 > p2) is not, matching the + // micros <-> nanos precedent above. STRICT store assignment accepts widening, rejects + // narrowing. + assert(Cast.canUpCast(TimestampNTZNanosType(p1), TimestampNTZNanosType(p2)) == (p1 <= p2)) + assert(Cast.canUpCast(TimestampLTZNanosType(p1), TimestampLTZNanosType(p2)) == (p1 <= p2)) // ANSI store assignment allows lossless widening (p1 <= p2) and equal precision, but blocks // lossy narrowing (p1 > p2) to avoid silently dropping sub-microsecond digits. assert(Cast.canANSIStoreAssign(TimestampNTZNanosType(p1), TimestampNTZNanosType(p2)) == @@ -829,12 +831,13 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { assert(Cast.canCast(ntz, ltz)) assert(Cast.canAnsiCast(ltz, ntz)) assert(Cast.canAnsiCast(ntz, ltz)) - // The cross-family reinterpretation against the session zone is never a safe up-cast. - assert(!Cast.canUpCast(ltz, ntz)) - assert(!Cast.canUpCast(ntz, ltz)) - // They stay explicit-only: never silent store assignments (mirroring the other nanos casts). - assert(!Cast.canANSIStoreAssign(ltz, ntz)) - assert(!Cast.canANSIStoreAssign(ntz, ltz)) + // SPARK-57303: the cross-family LTZ <-> NTZ pair is treated on the precision axis like the + // micro TIMESTAMP <-> TIMESTAMP_NTZ pair: widening (target precision >= source) is an up-cast + // and ANSI-store-assignable, while lossy narrowing is neither. + assert(Cast.canUpCast(ltz, ntz) == (p <= q)) + assert(Cast.canUpCast(ntz, ltz) == (q <= p)) + assert(Cast.canANSIStoreAssign(ltz, ntz) == (p <= q)) + assert(Cast.canANSIStoreAssign(ntz, ltz) == (q <= p)) // The conversion depends on the session time zone in both directions. assert(Cast.needsTimeZone(ltz, ntz)) assert(Cast.needsTimeZone(ntz, ltz)) @@ -847,31 +850,99 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { test("cross-family nanos cast: micro boundary (precision 6) admissibility and store contract") { // TIMESTAMP_LTZ(6) = TIMESTAMP and TIMESTAMP_NTZ(6) = TIMESTAMP_NTZ, so the precision-6 - // cross-family casts are the mixed micro/nanos pairs covered here. + // cross-family casts are the mixed micro/nanos pairs covered here. Each entry pairs a cast with + // whether it is a lossless widening (target precision >= source); p in [7, 9] always widens + // from the micro side (6) and always narrows to it. foreachNanosPrecision { p => val pairs = Seq( - (TimestampType: DataType, TimestampNTZNanosType(p): DataType), // LTZ(6) -> NTZ(p) - (TimestampNTZNanosType(p): DataType, TimestampType: DataType), // NTZ(p) -> LTZ(6) - (TimestampNTZType: DataType, TimestampLTZNanosType(p): DataType),// NTZ(6) -> LTZ(p) - (TimestampLTZNanosType(p): DataType, TimestampNTZType: DataType))// LTZ(p) -> NTZ(6) - pairs.foreach { case (from, to) => - // Explicit casts are allowed (ANSI and non-ANSI), but are never safe up-casts and never - // silent store assignments, and they depend on the session time zone. + (TimestampType: DataType, TimestampNTZNanosType(p): DataType, true), // LTZ(6) -> NTZ(p) + (TimestampNTZNanosType(p): DataType, TimestampType: DataType, false), // NTZ(p) -> LTZ(6) + (TimestampNTZType: DataType, TimestampLTZNanosType(p): DataType, true),// NTZ(6) -> LTZ(p) + (TimestampLTZNanosType(p): DataType, TimestampNTZType: DataType, false))// LTZ(p) -> NTZ(6) + pairs.foreach { case (from, to, widening) => + // Explicit casts are allowed (ANSI and non-ANSI) and depend on the session time zone. assert(Cast.canCast(from, to)) assert(Cast.canAnsiCast(from, to)) - assert(!Cast.canUpCast(from, to)) - assert(!Cast.canANSIStoreAssign(from, to)) + // SPARK-57303: widening is an up-cast and store-assignable; narrowing is neither. + assert(Cast.canUpCast(from, to) == widening) + assert(Cast.canANSIStoreAssign(from, to) == widening) assert(Cast.needsTimeZone(from, to)) // Null-safe like the micro TIMESTAMP <-> TIMESTAMP_NTZ pair. assert(!Cast.forceNullable(from, to)) } } // Sanity: the all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair (precision 6 <-> 6) stays a silent - // store assignment, unlike the mixed micro/nanos pairs above. + // store assignment (equal precision). assert(Cast.canANSIStoreAssign(TimestampType, TimestampNTZType)) assert(Cast.canANSIStoreAssign(TimestampNTZType, TimestampType)) } + test("SPARK-57303: full timestamp-family up-cast and store-assignment precision matrix") { + // The micro/nanos LTZ/NTZ timestamp types with their effective fractional-second precision + // (micros: 6, nanos: 7-9), across both time-zone families. + val tsTypes: Seq[DataType] = + Seq(TimestampType, TimestampNTZType) ++ + (TimestampLTZNanosType.MIN_PRECISION to TimestampLTZNanosType.MAX_PRECISION).flatMap { p => + Seq(TimestampLTZNanosType(p), TimestampNTZNanosType(p)) + } + def precisionOf(dt: DataType): Int = dt match { + case t: TimestampLTZNanosType => t.precision + case t: TimestampNTZNanosType => t.precision + case _ => 6 + } + // For every ordered pair, canUpCast and canANSIStoreAssign are true iff the target precision is + // >= the source precision (lossless widening or equal precision), false for lossy narrowing. + for { + from <- tsTypes + to <- tsTypes + } { + val widening = precisionOf(from) <= precisionOf(to) + withClue(s"$from -> $to: ") { + assert(Cast.canUpCast(from, to) == widening) + assert(Cast.canANSIStoreAssign(from, to) == widening) + } + } + + // DATE anchors (micros and nanos): DATE -> ts is a lossless widening (up-cast + store-assign); + // ts -> DATE drops the time-of-day (not an up-cast) but stays ANSI-store-assignable. + tsTypes.foreach { ts => + assert(Cast.canUpCast(DateType, ts), s"DATE -> $ts should be an up-cast") + assert(Cast.canANSIStoreAssign(DateType, ts), s"DATE -> $ts should be store-assignable") + assert(!Cast.canUpCast(ts, DateType), s"$ts -> DATE should not be an up-cast") + assert(Cast.canANSIStoreAssign(ts, DateType), s"$ts -> DATE should be store-assignable") + } + + // TIME anchors: TIME is intentionally outside the timestamp family, so TIME <-> ts matches the + // micro TIME <-> TIMESTAMP behavior - never an up-cast, but ANSI-store-assignable both ways. + for { + tq <- TimeType.MIN_PRECISION to TimeType.MAX_PRECISION + ts <- tsTypes + } { + val time = TimeType(tq) + assert(!Cast.canUpCast(time, ts), s"$time -> $ts should not be an up-cast") + assert(!Cast.canUpCast(ts, time), s"$ts -> $time should not be an up-cast") + assert(Cast.canANSIStoreAssign(time, ts), s"$time -> $ts should be store-assignable") + assert(Cast.canANSIStoreAssign(ts, time), s"$ts -> $time should be store-assignable") + } + } + + test("SPARK-57303: try-cast nullability follows up-cast admissibility for the timestamp family") { + // `Cast.nullable`'s try-cast branch keys on `Cast.canUpCast`: an up-cast (lossless widening + // within the timestamp family, or DATE -> ts) never fails, so a non-null child stays non-null; + // a lossy narrowing is not an up-cast, so the try-cast is conservatively nullable. + def tryCast(from: DataType, to: DataType): Cast = + Cast(AttributeReference("c", from, nullable = false)(), to, evalMode = EvalMode.TRY) + foreachNanosPrecision { p => + // Lossless widening micros -> nanos(p) and DATE -> nanos(p): non-null child stays non-null. + assert(!tryCast(TimestampNTZType, TimestampNTZNanosType(p)).nullable) + assert(!tryCast(TimestampType, TimestampLTZNanosType(p)).nullable) + assert(!tryCast(DateType, TimestampNTZNanosType(p)).nullable) + // Lossy narrowing nanos(p) -> micros is not an up-cast, so the try-cast is nullable. + assert(tryCast(TimestampNTZNanosType(p), TimestampNTZType).nullable) + assert(tryCast(TimestampLTZNanosType(p), TimestampType).nullable) + } + } + test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" + " overflow") { Seq(ByteType, ShortType, IntegerType, LongType).foreach { integralType => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/GeneratedColumnExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/GeneratedColumnExpressionSuite.scala new file mode 100644 index 0000000000000..de11b78452cce --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/GeneratedColumnExpressionSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.plans.logical + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.types.{DataType, TimestampLTZNanosType, TimestampNTZNanosType} +import org.apache.spark.sql.types.{TimestampNTZType, TimestampType} + +class GeneratedColumnExpressionSuite extends SparkFunSuite { + + private def genExpr(childType: DataType): GeneratedColumnExpression = + GeneratedColumnExpression(Literal.create(null, childType), "") + + test("SPARK-57303: validate accepts a lossless widening to a nanosecond timestamp column") { + // The generation expression's type is up-castable to the column type, so validate() succeeds: + // micros -> nanos is a lossless widening up-cast (Cast.canUpCast). + (TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION).foreach { p => + genExpr(TimestampNTZType).validate("c", TimestampNTZNanosType(p), allColumns = Seq.empty) + genExpr(TimestampType).validate("c", TimestampLTZNanosType(p), allColumns = Seq.empty) + } + } + + test("SPARK-57303: validate rejects a lossy narrowing from a nanosecond timestamp column") { + // nanos -> micros drops sub-microsecond digits and is not an up-cast, so validate() rejects it. + (TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION).foreach { p => + val ex = intercept[AnalysisException] { + genExpr(TimestampNTZNanosType(p)).validate("c", TimestampNTZType, allColumns = Seq.empty) + } + assert(ex.getCondition == "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN") + assert(ex.getMessageParameters.get("reason").contains("incompatible with column data type")) + } + } +}