From ea86bf547c46d68ab449cec586c88051d16fa66a Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 14 Apr 2022 16:22:10 +0800 Subject: [PATCH] [NSE-843] ArrowDataSouce: Arrow dataset inspect() is called every time a file is read (#844) Closes #843 --- .../spark/sql/execution/datasources/arrow/ArrowFileFormat.scala | 2 +- .../datasources/v2/arrow/ArrowPartitionReaderFactory.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala index f0426258c..21551f1ac 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/arrow/ArrowFileFormat.scala @@ -125,7 +125,7 @@ class ArrowFileFormat extends FileFormat with DataSourceRegister with Serializab options.asJava).asScala.toMap)) // todo predicate validation / pushdown - val dataset = factory.finish(); + val dataset = factory.finish(ArrowUtils.toArrowSchema(requiredSchema)); val filter = if (enableFilterPushDown) { // disable filter pushdown by arrow 7.0.0 diff --git a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala index b661cca97..d627eac81 100644 --- a/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala +++ b/arrow-data-source/standard/src/main/scala/com/intel/oap/spark/sql/execution/datasources/v2/arrow/ArrowPartitionReaderFactory.scala @@ -59,7 +59,7 @@ case class ArrowPartitionReaderFactory( val path = partitionedFile.filePath val factory = ArrowUtils.makeArrowDiscovery(URLDecoder.decode(path, "UTF-8"), partitionedFile.start, partitionedFile.length, options) - val dataset = factory.finish() + val dataset = factory.finish(ArrowUtils.toArrowSchema(readDataSchema)) val filter = if (enableFilterPushDown) { // disable filter pushdown by arrow 7.0.0 org.apache.arrow.dataset.filter.Filter.EMPTY