Try to fix some tests

apache · Aug 23, 2020 · 87e8b65 · 87e8b65
1 parent 6358727
commit 87e8b65
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 10 deletions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
@@ -28,8 +28,7 @@ object CSVExprUtils {
     if (options.isCommentSet) {
       val commentPrefix = options.comment.toString
       iter.filter { line =>
-        val trimmed = line.trim
-        trimmed.nonEmpty && !trimmed.startsWith(commentPrefix)
+        line.trim.nonEmpty && !line.startsWith(commentPrefix)
       }
     } else {
       iter.filter(_.trim.nonEmpty)
@@ -40,8 +39,7 @@ object CSVExprUtils {
     if (options.isCommentSet) {
       val commentPrefix = options.comment.toString
       iter.dropWhile { line =>
-        val trimmed = line.trim
-        trimmed.isEmpty || trimmed.trim.startsWith(commentPrefix)
+        line.trim.isEmpty || line.startsWith(commentPrefix)
       }
     } else {
       iter.dropWhile(_.trim.isEmpty)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1902,25 +1902,26 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa
 
   test("SPARK-25387: bad input should not cause NPE") {
     val schema = StructType(StructField("a", IntegerType) :: Nil)
-    val input = spark.createDataset(Seq("\u0000\u0000\u0001234"))
+    val input = spark.createDataset(Seq("\u0001\u0000\u0001234"))
 
     checkAnswer(spark.read.schema(schema).csv(input), Row(null))
     checkAnswer(spark.read.option("multiLine", true).schema(schema).csv(input), Row(null))
-    assert(spark.read.csv(input).collect().toSet == Set(Row()))
+    assert(spark.read.schema(schema).csv(input).collect().toSet == Set(Row(null)))
   }
 
   test("SPARK-31261: bad csv input with `columnNameCorruptRecord` should not cause NPE") {
     val schema = StructType(
       StructField("a", IntegerType) :: StructField("_corrupt_record", StringType) :: Nil)
-    val input = spark.createDataset(Seq("\u0000\u0000\u0001234"))
+    val input = spark.createDataset(Seq("\u0001\u0000\u0001234"))
 
     checkAnswer(
       spark.read
         .option("columnNameOfCorruptRecord", "_corrupt_record")
         .schema(schema)
         .csv(input),
-      Row(null, null))
-    assert(spark.read.csv(input).collect().toSet == Set(Row()))
+      Row(null, "\u0001\u0000\u0001234"))
+    assert(spark.read.schema(schema).csv(input).collect().toSet ==
+      Set(Row(null, "\u0001\u0000\u0001234")))
   }
 
   test("field names of inferred schema shouldn't compare to the first row") {
@@ -2368,8 +2369,9 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa
 
   test("SPARK-32614: don't treat rows starting with null char as comment") {
     withTempPath { path =>
-      Seq("\u0000foo", "bar", "baz").toDS.write.text(path.getCanonicalPath)
+      Seq("value", "\u0000foo", "bar", "baz").toDS.write.text(path.getCanonicalPath)
       val df = spark.read.format("csv")
+        .option("header", "true")
         .option("inferSchema", "true")
         .load(path.getCanonicalPath)