Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.types

/**
* Shared classification helpers for the LTZ/NTZ timestamp family: the microsecond types
* ([[TimestampType]] / [[TimestampNTZType]]) and their nanosecond-precision counterparts
* ([[TimestampLTZNanosType]] / [[TimestampNTZNanosType]]). Centralizes the notion of effective
* fractional-second precision and time-zone family so that up-cast resolution ([[UpCastRule]]),
* ANSI store assignment, and common-type resolution all agree.
*/
private[sql] object TimestampFamily {

/**
* The effective fractional-second precision of a timestamp-family type, or [[None]] for types
* that are not on the timestamp fractional-precision axis (DATE, TIME, and everything else).
* The microsecond types [[TimestampType]] / [[TimestampNTZType]] have precision 6; the
* nanosecond types carry their own precision `p` in [7, 9].
*/
def fractionalPrecision(dt: DataType): Option[Int] = dt match {
case TimestampType | TimestampNTZType => Some(6)
case t: TimestampLTZNanosType => Some(t.precision)
case t: TimestampNTZNanosType => Some(t.precision)
case _ => None
}

/** Whether `dt` is a local-time-zone (instant) timestamp: micro [[TimestampType]] or nanos. */
def isLtz(dt: DataType): Boolean =
dt.isInstanceOf[TimestampType] || dt.isInstanceOf[TimestampLTZNanosType]

/** Whether `dt` is a no-time-zone (local) timestamp: micro [[TimestampNTZType]] or nanos. */
def isNtz(dt: DataType): Boolean =
dt.isInstanceOf[TimestampNTZType] || dt.isInstanceOf[TimestampNTZNanosType]
}
18 changes: 14 additions & 4 deletions sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,20 @@ private[sql] object UpCastRule {
case (from: NumericType, to: DecimalType) if to.isWiderThan(from) => true
case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true
case (f, t) if legalNumericPrecedence(f, t) => true
case (DateType, TimestampType) => true
case (DateType, TimestampNTZType) => true
case (TimestampNTZType, TimestampType) => true
case (TimestampType, TimestampNTZType) => true
// Widening DATE -> timestamp family (micro or nanos, LTZ or NTZ) is lossless; the reverse
// (timestamp -> DATE) drops the time-of-day and is not matched here, so it stays a non-up-cast.
case (DateType, t) if TimestampFamily.fractionalPrecision(t).isDefined => true
// Lossless widening within the timestamp family: target fractional-second precision >= source.
// Covers micros <-> nanos and the cross-family LTZ <-> NTZ pairs (mirroring how the micro
// TimestampType <-> TimestampNTZType pair is a mutual up-cast). Same-type equal precision is
// short-circuited by `from == to` above; cross-family equal precision (e.g. LTZ(7) <-> NTZ(7))
// is admitted here by the `<=`. The guard keeps non-timestamp pairs falling through to the
// cases below; lossy narrowing falls through to `case _ => false`.
case (f, t)
if TimestampFamily
.fractionalPrecision(f)
.exists(fp => TimestampFamily.fractionalPrecision(t).exists(fp <= _)) =>
true

case (s1: StringType, s2: StringType) => StringHelper.isMoreConstrained(s1, s2)
// TODO: allow upcast from int/double/decimal to char/varchar of sufficient length
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ import org.apache.spark.sql.types.{
StringType,
StringTypeExpression,
StructType,
TimestampFamily,
TimestampLTZNanosType,
TimestampNTZNanosType,
TimestampNTZType,
Expand Down Expand Up @@ -264,27 +265,21 @@ abstract class TypeCoercionHelper {
// The (family, precision) pair then maps back to a concrete type: precision 6 yields the
// micro type, precision in [7, 9] yields the nanos type.
//
// Note: this common-type resolution is intentionally more permissive than the nanosecond
// conversion rules in Cast.canUpCast / Cast.canANSIStoreAssign, which keep cross-family and
// DATE <-> nanos casts explicit-CAST-only while the nanos types are unreleased (SPARK-57323
// etc.). Coercion here mirrors the microsecond precedent so that UNION / CASE / coalesce /
// IN / comparison resolve a common type the same way they do for the micro families; the
// stricter explicit-only stance is deliberately scoped to up-cast and store assignment, not
// to common-type resolution.
// Note: common-type resolution here is symmetric and widens to the maximum precision, while
// Cast.canUpCast / Cast.canANSIStoreAssign are directional (they block lossy narrowing). Both
// now agree on admissibility across the timestamp family -- including the cross-family
// LTZ <-> NTZ pairs and DATE <-> nanos (SPARK-57303) -- mirroring the microsecond precedent
// so that UNION / CASE / coalesce / IN / comparison resolve a common type the same way they
// do for the micro families.
case _ =>
// Fractional-seconds precision of the microsecond timestamp types; the nanos types carry
// 7-9. DATE has no time component and is treated as the micro precision so that
// DATE <-> micro widens to the micro type and DATE <-> nanos to the nanos type.
// Fractional-seconds precision of the timestamp family (micros: 6, nanos: 7-9). DATE has no
// time component and is treated as the micro precision (getOrElse) so that DATE <-> micro
// widens to the micro type and DATE <-> nanos to the nanos type.
val MicrosPrecision = 6
def isLtz(d: DatetimeType): Boolean =
d.isInstanceOf[TimestampType] || d.isInstanceOf[TimestampLTZNanosType]
def isNtz(d: DatetimeType): Boolean =
d.isInstanceOf[TimestampNTZType] || d.isInstanceOf[TimestampNTZNanosType]
def precisionOf(d: DatetimeType): Int = d match {
case t: TimestampLTZNanosType => t.precision
case t: TimestampNTZNanosType => t.precision
case _ => MicrosPrecision // DateType / TimestampType / TimestampNTZType
}
def isLtz(d: DatetimeType): Boolean = TimestampFamily.isLtz(d)
def isNtz(d: DatetimeType): Boolean = TimestampFamily.isNtz(d)
def precisionOf(d: DatetimeType): Int =
TimestampFamily.fractionalPrecision(d).getOrElse(MicrosPrecision)
// Beyond TimeType (handled above), the only datetime types are DATE and the micro/nanos
// timestamp families. Guard so that a future DatetimeType subtype fails fast here instead
// of being silently mis-widened (treated as a family-neutral precision-6 type and folded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -500,30 +500,13 @@ object Cast extends QueryErrorsBase {
case (_: NumericType, _: NumericType) => true
case (_: AtomicType, _: StringType) => true
case (_: CalendarIntervalType, _: StringType) => true
// SPARK-57490: same-family cross-precision nanosecond casts: widening (e.g. TIMESTAMP_NTZ(7) ->
// TIMESTAMP_NTZ(9)) is lossless and allowed as a silent store assignment, while narrowing
// (e.g. (9) -> (7)) drops sub-microsecond digits and stays explicit-only. Equal precision is
// handled by the `from == to` short-circuit above; micros -> nanos widening (e.g. TIMESTAMP_NTZ
// -> TIMESTAMP_NTZ(9)) is lossless and falls to the catch-all below.
case (f: TimestampNTZNanosType, t: TimestampNTZNanosType) => f.precision <= t.precision
case (f: TimestampLTZNanosType, t: TimestampLTZNanosType) => f.precision <= t.precision
// SPARK-57323: DATE <-> nanosecond-precision timestamp requires an explicit CAST in both
// directions (nanos -> DATE drops fields; DATE -> nanos is lossless but kept explicit-only
// while the nanos types are unreleased). Stricter than micro DATE <-> TIMESTAMP[_NTZ], which
// the catch-all below allows.
case (DateType, _: AnyTimestampNanoType) => false
case (_: AnyTimestampNanoType, DateType) => false
// SPARK-57293/57511: narrowing any nanosecond timestamp to a microsecond timestamp drops the
// sub-microsecond digits, and cross-family casts additionally reinterpret the value against the
// session time zone; both stay explicit-only rather than silent store assignments while the
// nanos types are unreleased. This covers same-family narrowing (nanos -> micro), cross-family
// nanos <-> nanos, and the mixed micro/nanos pairs at the precision-6 boundary; everything
// matched here is explicit-only. The all-micro TIMESTAMP <-> TIMESTAMP_NTZ pair and micros ->
// nanos same-family widening stay store-assignable via the catch-all below.
case (_: AnyTimestampNanoType, t) if AnyTimestampType.acceptsType(t) => false
case (TimestampType, _: TimestampNTZNanosType) => false
case (TimestampNTZType, _: TimestampLTZNanosType) => false
case (_: AnyTimestampNanoType, _: AnyTimestampNanoType) => false
// SPARK-57303: block lossy narrowing across the whole timestamp family (LTZ/NTZ, micros and
// nanos, including the cross-family LTZ <-> NTZ pairs) so store assignment never silently drops
// sub-microsecond digits. Lossless widening, equal precision, and DATE <-> timestamp (DATE has
// no fractional precision, so it never matches here) all fall through to the DatetimeType arm
// below, mirroring the micro TIMESTAMP <-> TIMESTAMP_NTZ behavior.
case (f, t) if TimestampFamily.fractionalPrecision(f)
.exists(fp => TimestampFamily.fractionalPrecision(t).exists(fp > _)) => false
// SPARK-57585: widening a TIME(p) to a larger precision is lossless and allowed as a silent
// store assignment, while narrowing (e.g. TIME(6) -> TIME(3)) drops fractional-seconds digits
// and stays explicit-CAST-only. Equal precision is handled by the `from == to` short-circuit.
Expand Down
Loading