diff --git a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt index 31ff3a6ce..479070ae6 100644 --- a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt +++ b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/ArrowKtTest.kt @@ -266,4 +266,11 @@ internal class ArrowKtTest { Locale.setDefault(currentLocale) } } + + @Test + fun testBigStringColumn() { + val dataFrame = dataFrameOf(bigStringColumn) + val data = dataFrame.saveArrowFeatherToByteArray() + DataFrame.readArrowFeather(data) shouldBe dataFrame + } } diff --git a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/examplesToWrite.kt b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/examplesToWrite.kt index 8075d20de..0b09e4fcd 100644 --- a/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/examplesToWrite.kt +++ b/dataframe-arrow/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/examplesToWrite.kt @@ -154,3 +154,39 @@ val citiesExampleSchema = """{ } ] } """.trimIndent() + +/** + * String column (variable length vector) with size >1 MiB + */ +val bigStringColumn = run { + val list = ArrayList() + for (i in 0 until 1024) { + val row = StringBuilder() + for (j in 0 until 64) { + row.append("abcd") + } + list.add(row.toString()) + } + for (i in 0 until 1024) { + val row = StringBuilder() + for (j in 0 until 64) { + row.append("гдёж") + } + list.add(row.toString()) + } + for (i in 0 until 1024) { + val row = StringBuilder() + for (j in 0 until 64) { + row.append("αβγδ") + } + list.add(row.toString()) + } + for (i in 0 until 1024) { + val row = StringBuilder() + for (j in 0 until 64) { + row.append("正体字") + } + list.add(row.toString()) + } + DataColumn.createValueColumn("bigStringColumn", list) +}