From 6325f5b01a686011d691d617c6708c7521d13568 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Wed, 16 Dec 2020 10:05:43 -0600 Subject: [PATCH] Update docs and tests for null CSV support Signed-off-by: Robert (Bobby) Evans --- docs/compatibility.md | 4 ---- integration_tests/src/main/python/csv_test.py | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/compatibility.md b/docs/compatibility.md index 61fd050ae14..41dd79107af 100644 --- a/docs/compatibility.md +++ b/docs/compatibility.md @@ -105,10 +105,6 @@ of issues surrounding it and they should be avoided. Escaped quote characters `'\"'` are not supported well as described by this [issue](https://github.com/NVIDIA/spark-rapids/issues/129). -Null values are not respected as described -[here](https://github.com/NVIDIA/spark-rapids/issues/127) even though they are -supported for other types. - ### CSV Dates Parsing a `timestamp` as a `date` does not work. The details are documented in this [issue](https://github.com/NVIDIA/spark-rapids/issues/869). diff --git a/integration_tests/src/main/python/csv_test.py b/integration_tests/src/main/python/csv_test.py index 609a4d449bd..c0af1aecfe6 100644 --- a/integration_tests/src/main/python/csv_test.py +++ b/integration_tests/src/main/python/csv_test.py @@ -130,7 +130,7 @@ def test_basic_read(std_input_path, name, schema, sep, header, read_func, v1_ena # Spark does not escape '\r' or '\n' even though it uses it to mark end of record # This would require multiLine reads to work correctly so we avoid these chars StringGen('(\\w| |\t|\ud720){0,10}', nullable=False), - pytest.param(StringGen('[aAbB ]{0,10}'), marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/127')), + StringGen('[aAbB ]{0,10}'), byte_gen, short_gen, int_gen, long_gen, boolean_gen, date_gen, DoubleGen(no_nans=True), # NaN, Inf, and -Inf are not supported # Once https://github.com/NVIDIA/spark-rapids/issues/125 and https://github.com/NVIDIA/spark-rapids/issues/124