[NSE-912] Remove extra handleSafe costs (#922)

* Change setSafe to set to remove handleSafe costs * add missed ArrowColumnVectorUtils
oap-project · May 25, 2022 · d263ec5 · d263ec5
1 parent 32e2f70
commit d263ec5
Show file tree

Hide file tree

Showing 3 changed files with 183 additions and 26 deletions.
diff --git a/arrow-data-source/common/src/main/java/com/intel/oap/vectorized/ArrowColumnVectorUtils.java b/arrow-data-source/common/src/main/java/com/intel/oap/vectorized/ArrowColumnVectorUtils.java
@@ -0,0 +1,36 @@
+package com.intel.oap.vectorized;
+
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.unsafe.types.UTF8String;
+
+/**
+ * Utilities to help manipulate data associate with ColumnVectors. These should be used mostly
+ * for debugging or other non-performance critical paths.
+ * These utilities are mostly used to convert ColumnVectors into other formats.
+ */
+public class ArrowColumnVectorUtils {
+    /**
+     * Populates the entire `col` with `row[fieldIdx]`
+     * This is copied from {@link org.apache.spark.sql.execution.vectorized.ColumnVectorUtils#populate}.
+     * We changed the way to putByteArrays.
+     */
+    public static void populate(WritableColumnVector col, InternalRow row, int fieldIdx) {
+        ArrowWritableColumnVector arrowCol = (ArrowWritableColumnVector) col;
+        int capacity = arrowCol.getCapacity();
+
+        if (row.isNullAt(fieldIdx)) {
+            arrowCol.putNulls(0, capacity);
+        } else {
+            if (arrowCol.dataType() == DataTypes.StringType) {
+                UTF8String v = row.getUTF8String(fieldIdx);
+                byte[] bytes = v.getBytes();
+                arrowCol.putByteArrays(0, capacity, bytes, 0, bytes.length);
+            } else {
+                ColumnVectorUtils.populate(col, row, fieldIdx);
+            }
+        }
+    }
+}