From 53ae39174b73b4ea7cf981b97f2bb24168752c5d Mon Sep 17 00:00:00 2001 From: prasha2 Date: Tue, 15 Oct 2019 10:23:25 +0530 Subject: [PATCH 1/3] Fix for SPARK-27259 For compressed files, the size of split can be set to -1. This was allowed till Spark 2.1 but regressed starting spark 2.2.x This commit removes the validation on length being at least 0. --- .../main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala index bfe8152d4dee2..b9e99ae4f64e2 100644 --- a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala +++ b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala @@ -76,7 +76,6 @@ private[spark] object InputFileBlockHolder { def set(filePath: String, startOffset: Long, length: Long): Unit = { require(filePath != null, "filePath cannot be null") require(startOffset >= 0, s"startOffset ($startOffset) cannot be negative") - require(length >= 0, s"length ($length) cannot be negative") inputBlock.get().set(new FileBlock(UTF8String.fromString(filePath), startOffset, length)) } From c67bcf303f0764bae900ac243121523bdcd35274 Mon Sep 17 00:00:00 2001 From: prasha2 Date: Wed, 16 Oct 2019 07:38:51 +0530 Subject: [PATCH 2/3] SPARK-27259 Fix Incorporating comment of modifying the validation to be `length >= -1` --- .../main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala index b9e99ae4f64e2..52b2b4035bcdc 100644 --- a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala +++ b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala @@ -76,6 +76,7 @@ private[spark] object InputFileBlockHolder { def set(filePath: String, startOffset: Long, length: Long): Unit = { require(filePath != null, "filePath cannot be null") require(startOffset >= 0, s"startOffset ($startOffset) cannot be negative") + require(length >= -1, s"length ($length) cannot be smaller than -1") inputBlock.get().set(new FileBlock(UTF8String.fromString(filePath), startOffset, length)) } From 840f1ce4bd15c58fe2d6b2a5aa9980127b2291b8 Mon Sep 17 00:00:00 2001 From: prasha2 Date: Wed, 16 Oct 2019 07:42:00 +0530 Subject: [PATCH 3/3] Formatting fix Formatting fix --- .../main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala index 52b2b4035bcdc..1beb085db27d9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala +++ b/core/src/main/scala/org/apache/spark/rdd/InputFileBlockHolder.scala @@ -76,7 +76,7 @@ private[spark] object InputFileBlockHolder { def set(filePath: String, startOffset: Long, length: Long): Unit = { require(filePath != null, "filePath cannot be null") require(startOffset >= 0, s"startOffset ($startOffset) cannot be negative") - require(length >= -1, s"length ($length) cannot be smaller than -1") + require(length >= -1, s"length ($length) cannot be smaller than -1") inputBlock.get().set(new FileBlock(UTF8String.fromString(filePath), startOffset, length)) }