From 731a104df30751ca0c333e621129564f34b81994 Mon Sep 17 00:00:00 2001 From: junyuc25 Date: Fri, 9 Aug 2024 11:54:27 +0200 Subject: [PATCH] [SPARK-47261][SQL] Assign better name for errors _LEGACY_ERROR_TEMP_1172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174 ### What changes were proposed in this pull request? Assign better name for error _LEGACY_ERROR_TEMP_1172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174. ### Why are the changes needed? Proper name improves user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? Added new tests and ran all the tests in the suite: ``` org.apache.spark.sql.execution.datasources.parquetParquetSchemaSuite org.apache.spark.SparkThrowableSuite ``` ### Was this patch authored or co-authored using generative AI tooling? No Closes #47421 from junyuc25/SPARK-47261. Authored-by: junyuc25 Signed-off-by: Max Gekk --- .../resources/error/error-conditions.json | 33 +++++++++-------- .../sql/errors/QueryCompilationErrors.scala | 6 +-- ...ld-with-enum-as-logical-annotation.parquet | Bin 0 -> 409 bytes ...nterval-using-fixed-len-byte-array.parquet | Bin 0 -> 369 bytes .../parquet/ParquetSchemaSuite.scala | 35 ++++++++++++++++-- 5 files changed, 52 insertions(+), 22 deletions(-) create mode 100644 sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet create mode 100644 sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 26bda26fef289..4766c77909158 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3543,6 +3543,24 @@ ], "sqlState" : "42805" }, + "PARQUET_TYPE_ILLEGAL" : { + "message" : [ + "Illegal Parquet type: ." + ], + "sqlState" : "42846" + }, + "PARQUET_TYPE_NOT_RECOGNIZED" : { + "message" : [ + "Unrecognized Parquet type: ." + ], + "sqlState" : "42846" + }, + "PARQUET_TYPE_NOT_SUPPORTED" : { + "message" : [ + "Parquet type not yet supported: ." + ], + "sqlState" : "42846" + }, "PARSE_EMPTY_STATEMENT" : { "message" : [ "Syntax error, unexpected empty statement." @@ -5881,21 +5899,6 @@ "createTableColumnTypes option column not found in schema ." ] }, - "_LEGACY_ERROR_TEMP_1172" : { - "message" : [ - "Parquet type not yet supported: ." - ] - }, - "_LEGACY_ERROR_TEMP_1173" : { - "message" : [ - "Illegal Parquet type: ." - ] - }, - "_LEGACY_ERROR_TEMP_1174" : { - "message" : [ - "Unrecognized Parquet type: ." - ] - }, "_LEGACY_ERROR_TEMP_1181" : { "message" : [ "Stream-stream join without equality predicate is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index cf801e3caacb2..09dfa6b3b603b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1995,19 +1995,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat def parquetTypeUnsupportedYetError(parquetType: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1172", + errorClass = "PARQUET_TYPE_NOT_SUPPORTED", messageParameters = Map("parquetType" -> parquetType)) } def illegalParquetTypeError(parquetType: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1173", + errorClass = "PARQUET_TYPE_ILLEGAL", messageParameters = Map("parquetType" -> parquetType)) } def unrecognizedParquetTypeError(field: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1174", + errorClass = "PARQUET_TYPE_NOT_RECOGNIZED", messageParameters = Map("field" -> field)) } diff --git a/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d315eb467a02b1a698b5140d0e412377a4493733 GIT binary patch literal 409 zcmYLGO-sW-5S^}V8cHt(cge~ga%rfbu`zAZKrey^ksOM6@=!_L5d%rnrl}uie}#X+ zgTKX}F4TLu6Jw5T#@5jq z)!J=q;?WV?T0xNOx%gNV@_*VH3%5&+}51zQAF|u zQ_=Z14dS!GkHjOt1d?HnDdvZiKv}()GOe(5^Fm|hE|VO|;A@hvG6d^-m6Yo(R;PJs zKV!K`i>2+k-pKXr!D5?c+Kxih7zGnQ3`fJE_H-Db??pl22aCkl=tt+o7Rj+xzhTcB I05kfOAFU`=NdN!< literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1504c6e4b4c844b6d2194cb6d49fe73e077c6b5d GIT binary patch literal 369 zcmaKo!AiqG5Qe8Kv^hvY=`6djha5Ddf}xvD69c^n9z;A8=~0l>-9sR0>o%?WZa#u% zAH*kcE{a|}el!2fKmW`x?=%9QV2qz1?^+=v>SF+Ke8m9Zpr^G?)6^J~Wtmhwzy+r| zGC8~Md-u5Z78y)I4BFR+rrvf$I7ER=1a{}Eu3oQB&u-snhBK+8iUNg0Iqe4^bKQCC!@cucN)hTrXNeBu<*sgQSQgH@GQTiIkxeF6+oE(v amw9gTWo4{0WlFTH#!@K?uX_M "INT64 (TIMESTAMP(NANOS,true))") + ) + } + + test("SPARK-47261: parquet file with unsupported type") { + val testDataPath = testFile("test-data/interval-using-fixed-len-byte-array.parquet") + checkError( + exception = intercept[AnalysisException] { + spark.read.parquet(testDataPath).collect() + }, + errorClass = "PARQUET_TYPE_NOT_SUPPORTED", + parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)") + ) + } + + test("SPARK-47261: parquet file with unrecognized parquet type") { + val testDataPath = testFile("test-data/group-field-with-enum-as-logical-annotation.parquet") + val expectedParameter = "required group my_list (ENUM) {\n repeated group list {\n" + + " optional binary element (STRING);\n }\n}" + checkError( + exception = intercept[AnalysisException] { + spark.read.parquet(testDataPath).collect() + }, + errorClass = "PARQUET_TYPE_NOT_RECOGNIZED", + parameters = Map("field" -> expectedParameter) + ) } // =======================================================