diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt index f556bd119..ba114a510 100644 --- a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt +++ b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt @@ -52,58 +52,100 @@ internal class DefaultReadExcelMethod(path: String?) : AbstractDefaultReadMethod private const val readExcel = "readExcel" +/** + * @param sheetName sheet to read. By default, first sheet in the document + * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”) + * @param skipRows number of rows before header + * @param rowsCount number of rows to read. + */ public fun DataFrame.Companion.readExcel( url: URL, sheetName: String? = null, + skipRows: Int = 0, columns: String? = null, rowsCount: Int? = null ): AnyFrame { val wb = WorkbookFactory.create(url.openStream()) - return wb.use { readExcel(wb, sheetName, columns, rowsCount) } + return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount) } } +/** + * @param sheetName sheet to read. By default, first sheet in the document + * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”) + * @param skipRows number of rows before header + * @param rowsCount number of rows to read. + */ public fun DataFrame.Companion.readExcel( file: File, sheetName: String? = null, + skipRows: Int = 0, columns: String? = null, rowsCount: Int? = null ): AnyFrame { val wb = WorkbookFactory.create(file) - return wb.use { readExcel(it, sheetName, columns, rowsCount) } + return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount) } } +/** + * @param sheetName sheet to read. By default, first sheet in the document + * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”) + * @param skipRows number of rows before header + * @param rowsCount number of rows to read. + */ public fun DataFrame.Companion.readExcel( fileOrUrl: String, sheetName: String? = null, + skipRows: Int = 0, columns: String? = null, rowsCount: Int? = null -): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, columns, rowsCount) +): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount) +/** + * @param sheetName sheet to read. By default, first sheet in the document + * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”) + * @param skipRows number of rows before header + * @param rowsCount number of rows to read. + */ public fun DataFrame.Companion.readExcel( inputStream: InputStream, sheetName: String? = null, + skipRows: Int = 0, columns: String? = null, rowsCount: Int? = null ): AnyFrame { val wb = WorkbookFactory.create(inputStream) - return wb.use { readExcel(it, sheetName, columns, rowsCount) } + return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount) } } +/** + * @param sheetName sheet to read. By default, first sheet in the document + * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”) + * @param skipRows number of rows before header + * @param rowsCount number of rows to read. + */ public fun DataFrame.Companion.readExcel( wb: Workbook, sheetName: String? = null, + skipRows: Int = 0, columns: String? = null, rowsCount: Int? = null ): AnyFrame { val sheet: Sheet = sheetName ?.let { wb.getSheet(it) ?: error("Sheet with name $sheetName not found") } ?: wb.getSheetAt(0) - return readExcel(sheet, columns, rowsCount) + return readExcel(sheet, columns, skipRows, rowsCount) } +/** + * @param sheet sheet to read. + * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”) + * @param skipRows number of rows before header + * @param rowsCount number of rows to read. + */ public fun DataFrame.Companion.readExcel( sheet: Sheet, columns: String? = null, + skipRows: Int = 0, rowsCount: Int? = null ): AnyFrame { val columnIndexes = if (columns != null) { @@ -119,8 +161,8 @@ public fun DataFrame.Companion.readExcel( sheet.getRow(0).map { it.columnIndex } } - val headerRow = sheet.getRow(0) - val valueRows = sheet.drop(1).let { if (rowsCount != null) it.take(rowsCount) else it } + val headerRow = sheet.getRow(skipRows) + val valueRows = sheet.drop(1 + skipRows).let { if (rowsCount != null) it.take(rowsCount) else it } val columns = columnIndexes.map { index -> val headerCell = headerRow.getCell(index) val name = if (headerCell?.cellType == CellType.NUMERIC) { @@ -128,7 +170,7 @@ public fun DataFrame.Companion.readExcel( } else { headerCell?.stringCellValue ?: CellReference.convertNumToColString(index) // Use Excel column names if no data } - val values = valueRows.map { + val values: List = valueRows.map { val cell: Cell? = it.getCell(index) when (cell?.cellType) { CellType._NONE -> error("Cell ${cell.address} of sheet ${sheet.sheetName} has a CellType that should only be used internally. This is a bug, please report https://github.com/Kotlin/dataframe/issues") diff --git a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt index b51cd02f7..02a17b56d 100644 --- a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt +++ b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt @@ -36,7 +36,7 @@ class XlsxTest { @Test fun `column with empty header`() { - val df = DataFrame.readExcel(testResource("sample2.xlsx"), "Sheet1", "A:C") + val df = DataFrame.readExcel(testResource("sample2.xlsx"), "Sheet1", columns = "A:C") df shouldBe dataFrameOf("col1", "col2", "C")(1.0, null, 3.0) } @@ -78,4 +78,10 @@ class XlsxTest { df.writeExcel(temp) DataFrame.readExcel(temp) shouldBe df } + + @Test + fun `read header on second row`() { + val df = DataFrame.readExcel(testResource("custom_header_position.xlsx"), skipRows = 1) + df.columnNames() shouldBe listOf("header1", "header2") + } } diff --git a/dataframe-excel/src/test/resources/custom_header_position.xlsx b/dataframe-excel/src/test/resources/custom_header_position.xlsx new file mode 100644 index 000000000..9799b0c89 Binary files /dev/null and b/dataframe-excel/src/test/resources/custom_header_position.xlsx differ