Skip to content

Commit

Permalink
Fix allocating VariableWidthVector in Arrow
Browse files Browse the repository at this point in the history
  • Loading branch information
Kopilov authored and Kopilov committed Apr 20, 2023
1 parent 13f3c4b commit c3b9939
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ import org.jetbrains.kotlinx.dataframe.api.map
import org.jetbrains.kotlinx.dataframe.exceptions.CellConversionException
import org.jetbrains.kotlinx.dataframe.exceptions.TypeConverterNotFoundException
import org.jetbrains.kotlinx.dataframe.name
import org.jetbrains.kotlinx.dataframe.values
import java.nio.charset.Charset
import kotlin.reflect.full.isSubtypeOf
import kotlin.reflect.typeOf

/**
* Save [dataFrame] content in Apache Arrow format (can be written to File, ByteArray, OutputStream or raw Channel) with [targetSchema].
Expand All @@ -67,14 +71,25 @@ internal class ArrowWriterImpl(

private val allocator = RootAllocator()

private fun allocateVector(vector: FieldVector, size: Int) {
private fun allocateVector(vector: FieldVector, size: Int, totalBytes: Long? = null) {
when (vector) {
is FixedWidthVector -> vector.allocateNew(size)
is VariableWidthVector -> vector.allocateNew(size)
is VariableWidthVector -> totalBytes?.let { vector.allocateNew(it, size) } ?: vector.allocateNew(size)
else -> throw IllegalArgumentException("Can not allocate ${vector.javaClass.canonicalName}")
}
}

/**
* Calculate buffer size for VariableWidthVector (return null for FixedWidthVector)
*/
private fun countTotalBytes(column: AnyCol): Long? {
val columnType = column.type()
return when {
columnType.isSubtypeOf(typeOf<String?>()) -> column.values.fold(0L) {totalBytes, value -> totalBytes + value.toString().length * 4}
else -> null
}
}

private fun infillWithNulls(vector: FieldVector, size: Int) {
when (vector) {
is BaseFixedWidthVector -> for (i in 0 until size) { vector.setNull(i) }
Expand Down Expand Up @@ -189,11 +204,12 @@ internal class ArrowWriterImpl(
actualField.createVector(allocator)!!
}

allocateVector(vector, dataFrame.rowsCount())
if (convertedColumn == null) {
check(actualField.isNullable)
allocateVector(vector, dataFrame.rowsCount())
infillWithNulls(vector, dataFrame.rowsCount())
} else {
allocateVector(vector, dataFrame.rowsCount(), countTotalBytes(convertedColumn))
infillVector(vector, convertedColumn)
}
return vector
Expand Down
2 changes: 1 addition & 1 deletion gradle/libs.versions.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ openapi = "2.1.13"
junit = "4.13.2"
kotestAsserions = "4.6.3"
jsoup = "1.14.3"
arrow = "10.0.0"
arrow = "11.0.0"
docProcessor = "0.1.6"
simpleGit = "2.0.1"

Expand Down

0 comments on commit c3b9939

Please sign in to comment.