Skip to content

Commit

Permalink
Try to fix some tests
Browse files Browse the repository at this point in the history
  • Loading branch information
srowen committed Aug 23, 2020
1 parent 6358727 commit 87e8b65
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ object CSVExprUtils {
if (options.isCommentSet) {
val commentPrefix = options.comment.toString
iter.filter { line =>
val trimmed = line.trim
trimmed.nonEmpty && !trimmed.startsWith(commentPrefix)
line.trim.nonEmpty && !line.startsWith(commentPrefix)
}
} else {
iter.filter(_.trim.nonEmpty)
Expand All @@ -40,8 +39,7 @@ object CSVExprUtils {
if (options.isCommentSet) {
val commentPrefix = options.comment.toString
iter.dropWhile { line =>
val trimmed = line.trim
trimmed.isEmpty || trimmed.trim.startsWith(commentPrefix)
line.trim.isEmpty || line.startsWith(commentPrefix)
}
} else {
iter.dropWhile(_.trim.isEmpty)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1902,25 +1902,26 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa

test("SPARK-25387: bad input should not cause NPE") {
val schema = StructType(StructField("a", IntegerType) :: Nil)
val input = spark.createDataset(Seq("\u0000\u0000\u0001234"))
val input = spark.createDataset(Seq("\u0001\u0000\u0001234"))

checkAnswer(spark.read.schema(schema).csv(input), Row(null))
checkAnswer(spark.read.option("multiLine", true).schema(schema).csv(input), Row(null))
assert(spark.read.csv(input).collect().toSet == Set(Row()))
assert(spark.read.schema(schema).csv(input).collect().toSet == Set(Row(null)))
}

test("SPARK-31261: bad csv input with `columnNameCorruptRecord` should not cause NPE") {
val schema = StructType(
StructField("a", IntegerType) :: StructField("_corrupt_record", StringType) :: Nil)
val input = spark.createDataset(Seq("\u0000\u0000\u0001234"))
val input = spark.createDataset(Seq("\u0001\u0000\u0001234"))

checkAnswer(
spark.read
.option("columnNameOfCorruptRecord", "_corrupt_record")
.schema(schema)
.csv(input),
Row(null, null))
assert(spark.read.csv(input).collect().toSet == Set(Row()))
Row(null, "\u0001\u0000\u0001234"))
assert(spark.read.schema(schema).csv(input).collect().toSet ==
Set(Row(null, "\u0001\u0000\u0001234")))
}

test("field names of inferred schema shouldn't compare to the first row") {
Expand Down Expand Up @@ -2368,8 +2369,9 @@ abstract class CSVSuite extends QueryTest with SharedSparkSession with TestCsvDa

test("SPARK-32614: don't treat rows starting with null char as comment") {
withTempPath { path =>
Seq("\u0000foo", "bar", "baz").toDS.write.text(path.getCanonicalPath)
Seq("value", "\u0000foo", "bar", "baz").toDS.write.text(path.getCanonicalPath)
val df = spark.read.format("csv")
.option("header", "true")
.option("inferSchema", "true")
.load(path.getCanonicalPath)

Expand Down

0 comments on commit 87e8b65

Please sign in to comment.