Skip to content

Commit

Permalink
Add/improve print methods for Array, ChunkedArray, Table, RecordBatch
Browse files Browse the repository at this point in the history
  • Loading branch information
nealrichardson committed Sep 24, 2019
1 parent b780c56 commit 2d4e744
Show file tree
Hide file tree
Showing 14 changed files with 145 additions and 33 deletions.
5 changes: 4 additions & 1 deletion r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ Array <- R6Class("Array",
ApproxEquals = function(other) Array__ApproxEquals(self, other),
data = function() shared_ptr(ArrayData, Array__data(self)),
as_vector = function() Array__as_vector(self),
ToString = function() Array__ToString(self),
ToString = function() {
typ <- paste0("<", self$type$ToString(), ">")
paste(typ, Array__ToString(self), sep = "\n")
},
Slice = function(offset, length = NULL){
if (is.null(length)) {
shared_ptr(Array, Array__Slice1(self, offset))
Expand Down
4 changes: 2 additions & 2 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ Object <- R6Class("Object",
self$`.:xp:.` <- xp
},
print = function(...){
cat(class(self)[[1]], "\n")
cat(class(self)[[1]], "\n", sep = "")
if (!is.null(self$ToString)){
cat(self$ToString(), "\n")
cat(self$ToString(), "\n", sep = "")
}
invisible(self)
}
Expand Down
14 changes: 14 additions & 0 deletions r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object,
},
Validate = function() {
ChunkedArray__Validate(self)
},
ToString = function() {
out <- self$chunk(0)$ToString()
if (self$num_chunks > 1) {
# Regardless of whether the first array prints with ellipsis, we need
# to ellipsize because there's more data than is contained in this
# chunk
if (grepl("...\n", out, fixed = TRUE)) {
out <- sub("\\.\\.\\..*$", "...\n]", out)
} else {
out <- sub("\\n\\]$", ",\n ...\n]", out)
}
}
out
}
),
active = list(
Expand Down
21 changes: 11 additions & 10 deletions r/R/dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,21 @@ DictionaryType <- R6Class("DictionaryType",
ordered = function() DictionaryType__ordered(self)
)
)
DictionaryType$create <- function(index_type = int32(),
value_type = utf8(),
ordered = FALSE) {
assert_is(index_type, "DataType")
assert_is(value_type, "DataType")
shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered))
}

#' Create a dictionary type
#'
#' @param index_type index type, e.g. [int32()]
#' @param value_type value type, probably [utf8()]
#' @param ordered Is this an ordered dictionary ?
#' @param index_type A DataType for the indexes (default [int32()])
#' @param value_type A DataType for the values (default [utf8()])
#' @param ordered Is this an ordered dictionary (default `FALSE`)?
#'
#' @return A [DictionaryType]
#' @seealso [Other Arrow data types][data-type]
#' @export
dictionary <- function(index_type, value_type, ordered = FALSE) {
assert_that(
inherits(index_type, "DataType"),
inherits(index_type, "DataType")
)
shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered))
}
dictionary <- DictionaryType$create
9 changes: 9 additions & 0 deletions r/R/record-batch.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object,
},

serialize = function() ipc___SerializeRecordBatch__Raw(self),
ToString = function() ToString_tabular(self),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_is(target_schema, "Schema")
Expand Down Expand Up @@ -246,3 +247,11 @@ tail.RecordBatch <- function(x, n = 6L, ...) {
}
x$Slice(n)
}

ToString_tabular <- function(x, ...) {
# Generic to work with both RecordBatch and Table
sch <- unlist(strsplit(x$schema$ToString(), "\n"))
sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch)
dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
paste(c(dims, sch), collapse = "\n")
}
1 change: 1 addition & 0 deletions r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Table <- R6Class("Table", inherit = Object,
field = function(i) shared_ptr(Field, Table__field(self, i)),

serialize = function(output_stream, ...) write_table(self, output_stream, ...),
ToString = function() ToString_tabular(self),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_is(target_schema, "Schema")
Expand Down
1 change: 0 additions & 1 deletion r/README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ library(arrow)
set.seed(24)
tab <- Table$create(x = 1:10, y = rnorm(10))
tab$schema
tab
as.data.frame(tab)
```
Expand Down
7 changes: 3 additions & 4 deletions r/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,11 @@ library(arrow)
set.seed(24)

tab <- Table$create(x = 1:10, y = rnorm(10))
tab$schema
#> Schema
#> x: int32
#> y: double
tab
#> Table
#> 10 rows x 2 columns
#> $x <int32>
#> $y <double>
as.data.frame(tab)
#> x y
#> 1 1 -0.545880758
Expand Down
9 changes: 5 additions & 4 deletions r/man/dictionary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ test_that("Array", {
expect_equal(z_dbl$as_vector(), as.numeric(4:5))
})

test_that("Array print method includes type", {
x <- Array$create(c(1:10, 1:10, 1:5))
expect_output(print(x), "Array\n<int32>\n[\n", fixed = TRUE)
})

test_that("Array supports NA", {
x_int <- Array$create(as.integer(c(1:10, NA)))
x_dbl <- Array$create(as.numeric(c(1:10, NA)))
Expand Down Expand Up @@ -257,7 +262,7 @@ test_that("array supports integer64", {
expect_true(a$IsNull(3L))
})

test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", {
test_that("array$as_vector() correctly handles all NA int64 (ARROW-3795)", {
x <- bit64::as.integer64(NA)
a <- Array$create(x)
expect_true(is.na(a$as_vector()))
Expand Down
25 changes: 21 additions & 4 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("RecordBatch", {
schema(
int = int32(), dbl = float64(),
lgl = boolean(), chr = utf8(),
fct = dictionary(int32(), Array$create(letters[1:10]))
fct = dictionary()
)
)
expect_equal(batch$num_columns, 5L)
Expand Down Expand Up @@ -69,12 +69,12 @@ test_that("RecordBatch", {
col_fct <- batch$column(4)
expect_true(inherits(col_fct, 'Array'))
expect_equal(col_fct$as_vector(), tbl$fct)
expect_equal(col_fct$type, dictionary(int32(), Array$create(letters[1:10])))
expect_equal(col_fct$type, dictionary())

batch2 <- batch$RemoveColumn(0)
expect_equal(
batch2$schema,
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), Array$create(letters[1:10])))
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary())
)
expect_equal(batch2$column(0), batch$column(1))
expect_identical(as.data.frame(batch2), tbl[,-1])
Expand Down Expand Up @@ -120,6 +120,23 @@ test_that("head and tail on RecordBatch", {
expect_identical(as.data.frame(tail(batch, -4)), tail(tbl, -4))
})

test_that("RecordBatch print method", {
expect_output(
print(batch),
paste(
"RecordBatch",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8, ordered=0>>",
sep = "\n"
),
fixed = TRUE
)
})

test_that("RecordBatch with 0 rows are supported", {
tbl <- tibble::tibble(
int = integer(),
Expand All @@ -139,7 +156,7 @@ test_that("RecordBatch with 0 rows are supported", {
dbl = float64(),
lgl = boolean(),
chr = utf8(),
fct = dictionary(int32(), Array$create(c("a", "b")))
fct = dictionary()
)
)
})
Expand Down
17 changes: 17 additions & 0 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,23 @@ test_that("head and tail on Table", {
expect_identical(as.data.frame(tail(tab, -4)), tail(tbl, -4))
})

test_that("Table print method", {
expect_output(
print(tab),
paste(
"Table",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8, ordered=0>>",
sep = "\n"
),
fixed = TRUE
)
})

test_that("table active bindings", {
expect_identical(dim(tbl), dim(tab))
expect_is(tab$columns, "list")
Expand Down
53 changes: 51 additions & 2 deletions r/tests/testthat/test-chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ test_that("ChunkedArray", {
y <- x$Slice(8)
expect_equal(y$type, int32())
expect_equal(y$num_chunks, 3L)
expect_equal(y$length(), 17L)
expect_equal(y$as_vector(), c(9:10, 1:10, 1:5))
expect_equal(length(y), 17L)
expect_equal(as.vector(y), c(9:10, 1:10, 1:5))

z <- x$Slice(8, 5)
expect_equal(z$type, int32())
Expand All @@ -55,6 +55,55 @@ test_that("ChunkedArray", {
expect_equal(z_dbl$as_vector(), as.numeric(3:4))
})

test_that("print ChunkedArray", {
x1 <- chunked_array(c(1,2,3), c(4,5,6))
expect_output(
print(x1),
paste(
"ChunkedArray",
"<double>",
"[",
" 1,",
" 2,",
" 3,",
" ...",
"]",
sep = "\n"
),
fixed = TRUE
)
x2 <- chunked_array(1:30, c(4,5,6))
expect_output(
print(x2),
paste(
"ChunkedArray",
"<int32>",
"[",
" 1,",
" 2,",
" 3,",
" 4,",
" 5,",
" 6,",
" 7,",
" 8,",
" 9,",
" 10,",
" ...",
"]",
sep = "\n"
),
fixed = TRUE
)
# If there's only one chunk, it should look like a regular Array
x3 <- chunked_array(1:30)
expect_output(
print(x3),
paste0("Chunked", paste(capture.output(print(Array$create(1:30))), collapse = "\n")),
fixed = TRUE
)
})

test_that("ChunkedArray handles !!! splicing", {
data <- list(1, 2, 3)
x <- chunked_array(!!!data)
Expand Down
5 changes: 1 addition & 4 deletions r/tests/testthat/test-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,7 @@ test_that("type() infers from R type", {
expect_equal(type(TRUE), boolean())
expect_equal(type(raw()), int8())
expect_equal(type(""), utf8())
expect_equal(
type(iris$Species),
dictionary(int8(), Array$create(levels(iris$Species)), FALSE)
)
expect_equal(type(iris$Species), dictionary())
expect_equal(
type(lubridate::ymd_hms("2019-02-14 13:55:05")),
timestamp(TimeUnit$MICRO, "GMT")
Expand Down

0 comments on commit 2d4e744

Please sign in to comment.