Skip to content

Commit

Permalink
[VL] Fix warning when spark.gluten.sql.columnarToRowMemoryThreshold i…
Browse files Browse the repository at this point in the history
…s not set (#6866)
  • Loading branch information
zhztheplayer authored Aug 16, 2024
1 parent 68651e7 commit a038e93
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ class VeloxTPCHDistinctSpillSuite extends VeloxTPCHTableSupport {
super.sparkConf
.set("spark.memory.offHeap.size", "50m")
.set("spark.gluten.memory.overAcquiredMemoryRatio", "0.9") // to trigger distinct spill early
.set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY, "8k")
.set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key, "8k")
}

test("distinct spill") {
Expand Down
1 change: 0 additions & 1 deletion cpp/core/config/GlutenConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ const std::string kGzipWindowSize4k = "4096";
const std::string kParquetCompressionCodec = "spark.sql.parquet.compression.codec";

const std::string kColumnarToRowMemoryThreshold = "spark.gluten.sql.columnarToRowMemoryThreshold";
const std::string kColumnarToRowMemoryDefaultThreshold = "67108864"; // 64MB

const std::string kUGIUserName = "spark.gluten.ugi.username";
const std::string kUGITokens = "spark.gluten.ugi.tokens";
Expand Down
15 changes: 2 additions & 13 deletions cpp/core/jni/JniWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,19 +534,8 @@ Java_org_apache_gluten_vectorized_NativeColumnarToRowJniWrapper_nativeColumnarTo
auto& conf = ctx->getConfMap();
int64_t column2RowMemThreshold;
auto it = conf.find(kColumnarToRowMemoryThreshold);
bool confIsLegal =
((it == conf.end()) ? false : std::all_of(it->second.begin(), it->second.end(), [](unsigned char c) {
return std::isdigit(c);
}));
if (confIsLegal) {
column2RowMemThreshold = std::stoll(it->second);
} else {
LOG(INFO)
<< "Because the spark.gluten.sql.columnarToRowMemoryThreshold configuration item is invalid, the kColumnarToRowMemoryDefaultThreshold default value is used, which is "
<< kColumnarToRowMemoryDefaultThreshold << " byte";
column2RowMemThreshold = std::stoll(kColumnarToRowMemoryDefaultThreshold);
}

GLUTEN_CHECK(!(it == conf.end()), "Required key not found in runtime config: " + kColumnarToRowMemoryThreshold);
column2RowMemThreshold = std::stoll(it->second);
// Convert the native batch to Spark unsafe row.
return ctx->saveObject(ctx->createColumnar2RowConverter(column2RowMemThreshold));
JNI_METHOD_END(kInvalidObjectHandle)
Expand Down
11 changes: 5 additions & 6 deletions shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -587,9 +587,6 @@ object GlutenConfig {

val GLUTEN_SHUFFLE_WRITER_MERGE_THRESHOLD = "spark.gluten.sql.columnar.shuffle.merge.threshold"

// Columnar to row memory threshold.
val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY = "spark.gluten.sql.columnarToRowMemoryThreshold"

// Controls whether to load DLL from jars. User can get dependent native libs packed into a jar
// by executing dev/package.sh. Then, with that jar configured, Gluten can load the native libs
// at runtime. This config is just for velox backend. And it is NOT applicable to the situation
Expand Down Expand Up @@ -654,7 +651,6 @@ object GlutenConfig {
GLUTEN_SAVE_DIR,
GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY,
GLUTEN_MAX_BATCH_SIZE_KEY,
GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY,
GLUTEN_SHUFFLE_WRITER_BUFFER_SIZE,
SQLConf.SESSION_LOCAL_TIMEZONE.key,
GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY,
Expand Down Expand Up @@ -690,7 +686,10 @@ object GlutenConfig {
(SQLConf.IGNORE_MISSING_FILES.key, SQLConf.IGNORE_MISSING_FILES.defaultValueString),
(
COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.key,
COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString)
COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString),
(
GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key,
GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.defaultValue.get.toString)
)
keyWithDefault.forEach(e => nativeConfMap.put(e._1, conf.getOrElse(e._1, e._2)))

Expand Down Expand Up @@ -1123,7 +1122,7 @@ object GlutenConfig {
.createWithDefault(4096)

val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD =
buildConf(GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY)
buildConf("spark.gluten.sql.columnarToRowMemoryThreshold")
.internal()
.bytesConf(ByteUnit.BYTE)
.createWithDefaultString("64MB")
Expand Down

0 comments on commit a038e93

Please sign in to comment.