From 676cd29b18caedf0d22f26b812be1e0bb66cc84a Mon Sep 17 00:00:00 2001 From: Benjamin Skov Kaas-Hansen Date: Mon, 26 Sep 2022 22:09:45 +0200 Subject: [PATCH 1/4] Fix error when parsing tables with only empty rows --- NEWS.md | 2 ++ R/table.R | 1 + tests/testthat/_snaps/table.md | 8 ++++++++ tests/testthat/test-table.R | 12 ++++++++++++ 4 files changed, 23 insertions(+) diff --git a/NEWS.md b/NEWS.md index 5a5468ba..bd03dd96 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # rvest (development version) +* `html_tables()` discards rows without cells (@epiben, #TBD). + # rvest 1.0.3 * Re-document to fix HTML issues in `.Rd`. diff --git a/R/table.R b/R/table.R index eb2f72e0..2c13c5eb 100644 --- a/R/table.R +++ b/R/table.R @@ -127,6 +127,7 @@ html_table.xml_node <- function(x, ns <- xml2::xml_ns(x) rows <- xml2::xml_find_all(x, ".//tr", ns = ns) cells <- lapply(rows, xml2::xml_find_all, ".//td|.//th", ns = ns) + cells <- cells[!map_lgl(cells, rlang::is_empty)] if (length(cells) == 0) { return(tibble::tibble()) diff --git a/tests/testthat/_snaps/table.md b/tests/testthat/_snaps/table.md index 4ce5a733..007a57ef 100644 --- a/tests/testthat/_snaps/table.md +++ b/tests/testthat/_snaps/table.md @@ -88,3 +88,11 @@ # A tibble: 0 x 0 +# can handle tables consisting of a single empty row + + # A tibble: 0 x 0 + +# can handle tables consisting of only empty rows + + # A tibble: 0 x 0 + diff --git a/tests/testthat/test-table.R b/tests/testthat/test-table.R index 079e8a04..fc3f35fd 100644 --- a/tests/testthat/test-table.R +++ b/tests/testthat/test-table.R @@ -195,3 +195,15 @@ test_that("can handle empty tables", { table <- html_table(html)[[1]] expect_snapshot_output(table) }) + +test_that("can handle tables consisting of a single empty row", { + html <- minimal_html('
') + table <- html_table(html)[[1]] + expect_snapshot_output(table) +}) + +test_that("can handle tables consisting of only empty rows", { + html <- minimal_html('
') + table <- html_table(html)[[1]] + expect_snapshot_output(table) +}) From 10155944e2bb9ef077bb193d8309827b6b78e755 Mon Sep 17 00:00:00 2001 From: Benjamin Skov Kaas-Hansen Date: Mon, 26 Sep 2022 22:12:55 +0200 Subject: [PATCH 2/4] Fix typo in NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index bd03dd96..0a9de71a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # rvest (development version) -* `html_tables()` discards rows without cells (@epiben, #TBD). +* `html_table()` discards rows without cells (@epiben, #TBD). # rvest 1.0.3 From 256f0d146e9ebc17b4cb8793174c2643f9b660be Mon Sep 17 00:00:00 2001 From: Benjamin Skov Kaas-Hansen Date: Mon, 26 Sep 2022 22:18:05 +0200 Subject: [PATCH 3/4] Add PR hook to NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 0a9de71a..3f668a76 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # rvest (development version) -* `html_table()` discards rows without cells (@epiben, #TBD). +* `html_table()` discards rows without cells (@epiben, #360). # rvest 1.0.3 From d58ebdf1b14575999ec5e16d297fce777f364889 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Tue, 23 Jan 2024 08:43:04 -0600 Subject: [PATCH 4/4] Use purrr shim helper --- R/table.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/table.R b/R/table.R index 2c13c5eb..bee96c2c 100644 --- a/R/table.R +++ b/R/table.R @@ -127,7 +127,7 @@ html_table.xml_node <- function(x, ns <- xml2::xml_ns(x) rows <- xml2::xml_find_all(x, ".//tr", ns = ns) cells <- lapply(rows, xml2::xml_find_all, ".//td|.//th", ns = ns) - cells <- cells[!map_lgl(cells, rlang::is_empty)] + cells <- compact(cells) if (length(cells) == 0) { return(tibble::tibble())