Kotlin · koperagen · Aug 2, 2022 · Aug 1, 2022
diff --git a/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt b/dataframe-excel/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/xlsx.kt
@@ -52,58 +52,100 @@ internal class DefaultReadExcelMethod(path: String?) : AbstractDefaultReadMethod
 
 private const val readExcel = "readExcel"
 
+/**
+ * @param sheetName sheet to read. By default, first sheet in the document
+ * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param skipRows number of rows before header
+ * @param rowsCount number of rows to read.
+ */
 public fun DataFrame.Companion.readExcel(
     url: URL,
     sheetName: String? = null,
+    skipRows: Int = 0,
     columns: String? = null,
     rowsCount: Int? = null
 ): AnyFrame {
     val wb = WorkbookFactory.create(url.openStream())
-    return wb.use { readExcel(wb, sheetName, columns, rowsCount) }
+    return wb.use { readExcel(wb, sheetName, skipRows, columns, rowsCount) }
 }
 
+/**
+ * @param sheetName sheet to read. By default, first sheet in the document
+ * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param skipRows number of rows before header
+ * @param rowsCount number of rows to read.
+ */
 public fun DataFrame.Companion.readExcel(
     file: File,
     sheetName: String? = null,
+    skipRows: Int = 0,
     columns: String? = null,
     rowsCount: Int? = null
 ): AnyFrame {
     val wb = WorkbookFactory.create(file)
-    return wb.use { readExcel(it, sheetName, columns, rowsCount) }
+    return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount) }
 }
 
+/**
+ * @param sheetName sheet to read. By default, first sheet in the document
+ * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param skipRows number of rows before header
+ * @param rowsCount number of rows to read.
+ */
 public fun DataFrame.Companion.readExcel(
     fileOrUrl: String,
     sheetName: String? = null,
+    skipRows: Int = 0,
     columns: String? = null,
     rowsCount: Int? = null
-): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, columns, rowsCount)
+): AnyFrame = readExcel(asURL(fileOrUrl), sheetName, skipRows, columns, rowsCount)
 
+/**
+ * @param sheetName sheet to read. By default, first sheet in the document
+ * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param skipRows number of rows before header
+ * @param rowsCount number of rows to read.
+ */
 public fun DataFrame.Companion.readExcel(
     inputStream: InputStream,
     sheetName: String? = null,
+    skipRows: Int = 0,
     columns: String? = null,
     rowsCount: Int? = null
 ): AnyFrame {
     val wb = WorkbookFactory.create(inputStream)
-    return wb.use { readExcel(it, sheetName, columns, rowsCount) }
+    return wb.use { readExcel(it, sheetName, skipRows, columns, rowsCount) }
 }
 
+/**
+ * @param sheetName sheet to read. By default, first sheet in the document
+ * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param skipRows number of rows before header
+ * @param rowsCount number of rows to read.
+ */
 public fun DataFrame.Companion.readExcel(
     wb: Workbook,
     sheetName: String? = null,
+    skipRows: Int = 0,
     columns: String? = null,
     rowsCount: Int? = null
 ): AnyFrame {
     val sheet: Sheet = sheetName
         ?.let { wb.getSheet(it) ?: error("Sheet with name $sheetName not found") }
         ?: wb.getSheetAt(0)
-    return readExcel(sheet, columns, rowsCount)
+    return readExcel(sheet, columns, skipRows, rowsCount)
 }
 
+/**
+ * @param sheet sheet to read.
+ * @param columns comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
+ * @param skipRows number of rows before header
+ * @param rowsCount number of rows to read.
+ */
 public fun DataFrame.Companion.readExcel(
     sheet: Sheet,
     columns: String? = null,
+    skipRows: Int = 0,
     rowsCount: Int? = null
 ): AnyFrame {
     val columnIndexes = if (columns != null) {
@@ -119,16 +161,16 @@ public fun DataFrame.Companion.readExcel(
         sheet.getRow(0).map { it.columnIndex }
     }
 
-    val headerRow = sheet.getRow(0)
-    val valueRows = sheet.drop(1).let { if (rowsCount != null) it.take(rowsCount) else it }
+    val headerRow = sheet.getRow(skipRows)
+    val valueRows = sheet.drop(1 + skipRows).let { if (rowsCount != null) it.take(rowsCount) else it }
     val columns = columnIndexes.map { index ->
         val headerCell = headerRow.getCell(index)
         val name = if (headerCell?.cellType == CellType.NUMERIC) {
             headerCell.numericCellValue.toString() // Support numeric-named columns
         } else {
             headerCell?.stringCellValue ?: CellReference.convertNumToColString(index) // Use Excel column names if no data
         }
-        val values = valueRows.map {
+        val values: List<Any?> = valueRows.map {
             val cell: Cell? = it.getCell(index)
             when (cell?.cellType) {
                 CellType._NONE -> error("Cell ${cell.address} of sheet ${sheet.sheetName} has a CellType that should only be used internally. This is a bug, please report https://github.com/Kotlin/dataframe/issues")

diff --git a/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt b/dataframe-excel/src/test/kotlin/org/jetbrains/kotlinx/dataframe/io/XlsxTest.kt
@@ -36,7 +36,7 @@ class XlsxTest {
 
     @Test
     fun `column with empty header`() {
-        val df = DataFrame.readExcel(testResource("sample2.xlsx"), "Sheet1", "A:C")
+        val df = DataFrame.readExcel(testResource("sample2.xlsx"), "Sheet1", columns = "A:C")
         df shouldBe dataFrameOf("col1", "col2", "C")(1.0, null, 3.0)
     }
 
@@ -78,4 +78,10 @@ class XlsxTest {
         df.writeExcel(temp)
         DataFrame.readExcel(temp) shouldBe df
     }
+
+    @Test
+    fun `read header on second row`() {
+        val df = DataFrame.readExcel(testResource("custom_header_position.xlsx"), skipRows = 1)
+        df.columnNames() shouldBe listOf("header1", "header2")
+    }
 }
diff --git a/dataframe-excel/src/test/resources/custom_header_position.xlsx b/dataframe-excel/src/test/resources/custom_header_position.xlsx