From b3b6916f59c29dddb49367dec64ec175430f8efa Mon Sep 17 00:00:00 2001 From: Benjamin Skov Kaas-Hansen Date: Tue, 23 Jan 2024 15:56:54 +0100 Subject: [PATCH] Gracefully handle tables with only empty rows (#360) --- NEWS.md | 2 ++ R/table.R | 1 + tests/testthat/_snaps/table.md | 8 ++++++++ tests/testthat/test-table.R | 12 ++++++++++++ 4 files changed, 23 insertions(+) diff --git a/NEWS.md b/NEWS.md index 5a5468ba..3f668a76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # rvest (development version) +* `html_table()` discards rows without cells (@epiben, #360). + # rvest 1.0.3 * Re-document to fix HTML issues in `.Rd`. diff --git a/R/table.R b/R/table.R index eb2f72e0..bee96c2c 100644 --- a/R/table.R +++ b/R/table.R @@ -127,6 +127,7 @@ html_table.xml_node <- function(x, ns <- xml2::xml_ns(x) rows <- xml2::xml_find_all(x, ".//tr", ns = ns) cells <- lapply(rows, xml2::xml_find_all, ".//td|.//th", ns = ns) + cells <- compact(cells) if (length(cells) == 0) { return(tibble::tibble()) diff --git a/tests/testthat/_snaps/table.md b/tests/testthat/_snaps/table.md index 607dcdae..5d7cef58 100644 --- a/tests/testthat/_snaps/table.md +++ b/tests/testthat/_snaps/table.md @@ -90,3 +90,11 @@ # A tibble: 0 x 0 +# can handle tables consisting of a single empty row + + # A tibble: 0 x 0 + +# can handle tables consisting of only empty rows + + # A tibble: 0 x 0 + diff --git a/tests/testthat/test-table.R b/tests/testthat/test-table.R index 079e8a04..fc3f35fd 100644 --- a/tests/testthat/test-table.R +++ b/tests/testthat/test-table.R @@ -195,3 +195,15 @@ test_that("can handle empty tables", { table <- html_table(html)[[1]] expect_snapshot_output(table) }) + +test_that("can handle tables consisting of a single empty row", { + html <- minimal_html('
') + table <- html_table(html)[[1]] + expect_snapshot_output(table) +}) + +test_that("can handle tables consisting of only empty rows", { + html <- minimal_html('
') + table <- html_table(html)[[1]] + expect_snapshot_output(table) +})