Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-1161] Format sql config string key #1169

Merged
merged 1 commit into from
Nov 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ object ParquetSQLConf {
// arrow-datasource-parquet.jar file is supposed to be placed into Spark's lib folder. Which
// means it's user's intention to use the replaced ParquetDataSource.
val OVERWRITE_PARQUET_DATASOURCE_READ =
SQLConf.buildConf("spark.sql.arrow.overwrite.parquet.read")
SQLConf.buildConf("spark.oap.sql.arrow.overwrite.parquet.read")
.doc("Overwrite Parquet datasource v1 with reader of Arrow datasource.")
.booleanConf
.createWithDefault(true)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@ package com.intel.oap.spark.sql.execution.datasources.v2.arrow
import org.apache.spark.sql.internal.SQLConf

object ArrowSQLConf {
val ARROW_FILTER_PUSHDOWN_ENABLED = SQLConf.buildConf("spark.sql.arrow.filterPushdown")
val ARROW_FILTER_PUSHDOWN_ENABLED = SQLConf.buildConf("spark.oap.sql.arrow.filterPushdown")
.doc("Enables Arrow filter push-down optimization when set to true.")
.booleanConf
.createWithDefault(true)

val FILES_DYNAMIC_MERGE_ENABLED = SQLConf.buildConf("spark.sql.files.dynamicMergeEnabled")
val FILES_DYNAMIC_MERGE_ENABLED = SQLConf.buildConf("spark.oap.sql.files.dynamicMergeEnabled")
.doc("Whether to merge file partition dynamically. If true, It will use the total size, " +
"file count and expectPartitionNum to dynamic merge filePartition. This is better to set " +
"true if there are many small files in the read path. This configuration is effective " +
"only when using file-based sources such as Parquet, JSON and ORC.")
.booleanConf
.createWithDefault(false)

val FILES_EXPECTED_PARTITION_NUM = SQLConf.buildConf("spark.sql.files.expectedPartitionNum")
val FILES_EXPECTED_PARTITION_NUM = SQLConf.buildConf("spark.oap.sql.files.expectedPartitionNum")
.doc("The expected number of File partitions. It will automatically merge file splits to " +
"provide the best concurrency when the file partitions after split exceed the " +
"expected num and the size of file partition is less than maxSplitSize. If not set, " +
Expand All @@ -44,7 +44,7 @@ object ArrowSQLConf {
.checkValue(v => v > 0, "The expected partition number must be a positive integer.")
.createOptional

val FILES_MAX_NUM_IN_PARTITION = SQLConf.buildConf("spark.sql.files.maxNumInPartition")
val FILES_MAX_NUM_IN_PARTITION = SQLConf.buildConf("spark.oap.sql.files.maxNumInPartition")
.doc("The max number of files in one filePartition. If set, it will limit the max file num " +
"in FilePartition while merging files. This can avoid too many little io in one task. " +
"This configuration is effective only when using file-based sources such as Parquet, " +
Expand Down
2 changes: 1 addition & 1 deletion docs/Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ You can add these configuration into spark-defaults.conf to enable or disable th
| spark.oap.sql.columnar.preferColumnar | Enable or Disable Columnar Operators, default is false.<br /> This parameter could impact the performance in different case. In some cases, to set false can get some performance boost. | false |
| spark.oap.sql.columnar.joinOptimizationLevel | Fallback to row operators if there are several continous joins | 18 |
| spark.sql.execution.arrow.maxRecordsPerBatch | Set up the Max Records per Batch | 10000 |
| spark.sql.execution.sort.spillThreshold | Set up the Max sort in memory threshold in bytes, default is disabled | -1 |
| spark.oap.sql.execution.sort.spillThreshold | Set up the Max sort in memory threshold in bytes, default is disabled | -1 |
| spark.oap.sql.columnar.wholestagecodegen.breakdownTime | Enable or Disable metrics in Columnar WholeStageCodeGen | false |
| spark.oap.sql.columnar.tmp_dir | Set up a folder to store the codegen files, default is disabled | "" |
| spark.oap.sql.columnar.shuffle.customizedCompression.codec | Set up the codec to be used for Columnar Shuffle, default is lz4. The other option is fastpfor which can bring better perf on compressing fixed-size based contents like int| lz4 |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class GazellePluginConfig(conf: SQLConf) extends Logging {
conf.getConfString("spark.sql.execution.arrow.maxRecordsPerBatch", "10000").toInt

val sortSpillThreshold: Long =
conf.getConfString("spark.sql.execution.sort.spillThreshold", "-1").toLong
conf.getConfString("spark.oap.sql.execution.sort.spillThreshold", "-1").toLong

// enable or disable metrics in columnar wholestagecodegen operator
val enableMetricsTime: Boolean =
Expand All @@ -204,7 +204,7 @@ class GazellePluginConfig(conf: SQLConf) extends Logging {
conf.getConfString("spark.oap.sql.columnar.tmp_dir", null)

@deprecated val broadcastCacheTimeout: Int =
conf.getConfString("spark.sql.columnar.sort.broadcast.cache.timeout", "-1").toInt
conf.getConfString("spark.oap.sql.columnar.sort.broadcast.cache.timeout", "-1").toInt

// Whether to spill the partition buffers when buffers are full.
// If false, the partition buffers will be cached in memory first,
Expand Down