diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py index 54605455353..78537eb5fb3 100644 --- a/integration_tests/src/main/python/json_test.py +++ b/integration_tests/src/main/python/json_test.py @@ -599,7 +599,7 @@ def test_from_json_map_fallback(): @allow_non_gpu(*non_utc_allow) def test_from_json_struct(schema): # note that column 'a' does not use leading zeroes due to https://github.com/NVIDIA/spark-rapids/issues/9588 - json_string_gen = StringGen(r'{"a": [1-9]{0,5}, "b": "[A-Z]{0,5}", "c": 1\d\d\d}') \ + json_string_gen = StringGen(r'{\'a\': [1-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \ .with_special_pattern('', weight=50) \ .with_special_pattern('null', weight=50) assert_gpu_and_cpu_are_equal_collect( diff --git a/integration_tests/src/test/resources/dates.json b/integration_tests/src/test/resources/dates.json index 1fdfc3b4320..e32ff381dd4 100644 --- a/integration_tests/src/test/resources/dates.json +++ b/integration_tests/src/test/resources/dates.json @@ -1,5 +1,5 @@ -{ "number": "2020-09-16" } -{ "number": " 2020-09-16" } -{ "number": "2020-09-16 " } -{ "number": "1581-01-01" } -{ "number": "1583-01-01" } \ No newline at end of file +{ 'number': '2020-09-16' } +{ 'number': ' 2020-09-16' } +{ 'number': '2020-09-16 ' } +{ 'number': '1581-01-01' } +{ 'number': '1583-01-01' } \ No newline at end of file diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala index 138f99b0c72..c32838d903d 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala @@ -359,6 +359,7 @@ class JsonPartitionReader( cudf.JSONOptions.builder() .withRecoverWithNull(true) .withMixedTypesAsStrings(enableMixedTypesAsString) + .withNormalizeSingleQuotes(true) .build } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala index a6dcb9d8edf..77d7956c2e7 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala @@ -179,6 +179,7 @@ case class GpuJsonToStructs( val jsonOptions = cudf.JSONOptions.builder() .withRecoverWithNull(true) .withMixedTypesAsStrings(enableMixedTypesAsString) + .withNormalizeSingleQuotes(true) .build() withResource(cudf.Table.readJSON(jsonOptions, data, start, length)) { tableWithMeta => val names = tableWithMeta.getColumnNames