From 7ed2870710f7526c2c9631809c8d4187d08d8a05 Mon Sep 17 00:00:00 2001 From: Jimmy Lu Date: Thu, 12 Sep 2024 07:18:31 -0700 Subject: [PATCH] Add config to use selective Nimble reader Summary: As the initial step of rolling out selective Nimble reader, we add in a temporary Velox session property `selective_nimble_reader_enabled` to use the new selective Nimble reader if set to true. This session property will be removed once selective Nimble reader is fully rolled out. Differential Revision: D62534557 --- velox/connectors/Connector.h | 9 +++++++++ velox/connectors/hive/HiveConnectorUtil.cpp | 4 +++- velox/core/QueryConfig.h | 10 ++++++++++ velox/dwio/common/Options.h | 9 +++++++++ velox/exec/Operator.cpp | 5 ++++- 5 files changed, 35 insertions(+), 2 deletions(-) diff --git a/velox/connectors/Connector.h b/velox/connectors/Connector.h index 1dc77823e436..6aa1b55c9e2c 100644 --- a/velox/connectors/Connector.h +++ b/velox/connectors/Connector.h @@ -356,6 +356,14 @@ class ConnectorQueryCtx { return cancellationToken_; } + bool selectiveNimbleReaderEnabled() const { + return selectiveNimbleReaderEnabled_; + } + + void setSelectiveNimbleReaderEnabled(bool value) { + selectiveNimbleReaderEnabled_ = value; + } + private: memory::MemoryPool* const operatorPool_; memory::MemoryPool* const connectorPool_; @@ -371,6 +379,7 @@ class ConnectorQueryCtx { const std::string planNodeId_; const std::string sessionTimezone_; const folly::CancellationToken cancellationToken_; + bool selectiveNimbleReaderEnabled_{false}; }; class Connector { diff --git a/velox/connectors/hive/HiveConnectorUtil.cpp b/velox/connectors/hive/HiveConnectorUtil.cpp index ac7289c5b261..d7193699336c 100644 --- a/velox/connectors/hive/HiveConnectorUtil.cpp +++ b/velox/connectors/hive/HiveConnectorUtil.cpp @@ -28,7 +28,7 @@ #include "velox/dwio/dwrf/writer/Writer.h" #ifdef VELOX_ENABLE_PARQUET -#include "velox/dwio/parquet/writer/Writer.h" +#include "velox/dwio/parquet/writer/Writer.h" // @manual #endif #include "velox/expression/Expr.h" @@ -564,6 +564,8 @@ void configureReaderOptions( const auto timezone = tz::locateZone(sessionTzName); readerOptions.setSessionTimezone(timezone); } + readerOptions.setSelectiveNimbleReaderEnabled( + connectorQueryCtx->selectiveNimbleReaderEnabled()); if (readerOptions.fileFormat() != dwio::common::FileFormat::UNKNOWN) { VELOX_CHECK( diff --git a/velox/core/QueryConfig.h b/velox/core/QueryConfig.h index def08a9de314..304e1fd370bb 100644 --- a/velox/core/QueryConfig.h +++ b/velox/core/QueryConfig.h @@ -378,6 +378,16 @@ class QueryConfig { static constexpr const char* kDebugDisableExpressionWithLazyInputs = "debug_disable_expression_with_lazy_inputs"; + /// Temporary flag to control whether selective Nimble reader should be used + /// in this query or not. Will be removed after the selective Nimble reader + /// is fully rolled out. + static constexpr const char* kSelectiveNimbleReaderEnabled = + "selective_nimble_reader_enabled"; + + bool selectiveNimbleReaderEnabled() const { + return get(kSelectiveNimbleReaderEnabled, false); + } + bool debugDisableExpressionsWithPeeling() const { return get(kDebugDisableExpressionWithPeeling, false); } diff --git a/velox/dwio/common/Options.h b/velox/dwio/common/Options.h index 90c482230240..2fc07156f83b 100644 --- a/velox/dwio/common/Options.h +++ b/velox/dwio/common/Options.h @@ -577,6 +577,14 @@ class ReaderOptions : public io::ReaderOptions { scanSpec_ = std::move(scanSpec); } + bool selectiveNimbleReaderEnabled() const { + return selectiveNimbleReaderEnabled_; + } + + void setSelectiveNimbleReaderEnabled(bool value) { + selectiveNimbleReaderEnabled_ = value; + } + private: uint64_t tailLocation_; FileFormat fileFormat_; @@ -591,6 +599,7 @@ class ReaderOptions : public io::ReaderOptions { std::shared_ptr randomSkip_; std::shared_ptr scanSpec_; const tz::TimeZone* sessionTimezone_{nullptr}; + bool selectiveNimbleReaderEnabled_{false}; }; struct WriterOptions { diff --git a/velox/exec/Operator.cpp b/velox/exec/Operator.cpp index 67afed0eafa3..014d16a314a0 100644 --- a/velox/exec/Operator.cpp +++ b/velox/exec/Operator.cpp @@ -54,7 +54,7 @@ OperatorCtx::createConnectorQueryCtx( memory::MemoryPool* connectorPool, const common::SpillConfig* spillConfig) const { const auto& task = driverCtx_->task; - return std::make_shared( + auto connectorQueryCtx = std::make_shared( pool_, connectorPool, task->queryCtx()->connectorSessionProperties(connectorId), @@ -69,6 +69,9 @@ OperatorCtx::createConnectorQueryCtx( driverCtx_->driverId, driverCtx_->queryConfig().sessionTimezone(), task->getCancellationToken()); + connectorQueryCtx->setSelectiveNimbleReaderEnabled( + driverCtx_->queryConfig().selectiveNimbleReaderEnabled()); + return connectorQueryCtx; } Operator::Operator(