diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index d2160215778cf..f4c5d440ab875 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -8561,6 +8561,12 @@ ], "sqlState" : "0A000" }, + "UNSUPPORTED_HIVE_TYPE" : { + "message" : [ + "Cannot read the Hive type of the column because Spark SQL does not support this data type." + ], + "sqlState" : "0A000" + }, "UNSUPPORTED_INSERT" : { "message" : [ "Can't insert into the target." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 5eb1174651758..4495befe2f59e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1707,6 +1707,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE cause = e) } + def unsupportedHiveTypeError(fieldType: String, fieldName: String): Throwable = { + new SparkUnsupportedOperationException( + errorClass = "UNSUPPORTED_HIVE_TYPE", + messageParameters = Map( + "fieldType" -> toSQLType(fieldType), + "fieldName" -> toSQLId(fieldName))) + } def getTablesByTypeUnsupportedByHiveVersionError(): SparkUnsupportedOperationException = { new SparkUnsupportedOperationException( errorClass = "GET_TABLES_BY_TYPE_UNSUPPORTED_BY_HIVE_VERSION") diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index ed9bcf74b2f57..d1c6b7c20e379 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -1144,6 +1144,11 @@ private[hive] object HiveClientImpl extends Logging { CatalystSqlParser.parseDataType(typeStr) } catch { case e: ParseException => + // Hive's union type (uniontype<...>) is not supported by Spark SQL and makes the parser + // fail with a generic message. Detect it and report a clearer error (SPARK-21529). + if (hc.getType.toLowerCase(Locale.ROOT).contains("uniontype<")) { + throw QueryExecutionErrors.unsupportedHiveTypeError(hc.getType, hc.getName) + } throw QueryExecutionErrors.cannotRecognizeHiveTypeError(e, typeStr, hc.getName) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientImplSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientImplSuite.scala new file mode 100644 index 0000000000000..9bee44f8f57e9 --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientImplSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.client + +import org.apache.hadoop.hive.metastore.api.FieldSchema + +import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException} + +class HiveClientImplSuite extends SparkFunSuite { + + test("SPARK-21529: a clear error is raised for an unsupported Hive union type") { + val column = new FieldSchema("c", "uniontype", null) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + HiveClientImpl.fromHiveColumn(column) + }, + condition = "UNSUPPORTED_HIVE_TYPE", + parameters = Map( + "fieldType" -> "\"UNIONTYPE\"", + "fieldName" -> "`c`")) + } + + test("SPARK-21529: a Hive union type nested in a struct is detected") { + val column = new FieldSchema("c", "struct>", null) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + HiveClientImpl.fromHiveColumn(column) + }, + condition = "UNSUPPORTED_HIVE_TYPE", + parameters = Map( + "fieldType" -> "\"STRUCT>\"", + "fieldName" -> "`c`")) + } +}