Skip to content

Commit

Permalink
ARROW-6649: [R] print methods for Array, ChunkedArray, Table, RecordB…
Browse files Browse the repository at this point in the history
…atch

See the readme and the new tests for example output.

This patch also fixes a validation bug in `dictionary()`, aliases that to `DictionaryType$create`, and adds default arguments.

Closes #5492 from nealrichardson/print-methods and squashes the following commits:

c092d30 <Neal Richardson> Merge branch 'print-methods' of github.com:nealrichardson/arrow into print-methods
02afb89 <Neal Richardson> Prettier printing of dictionary type's ordered attribute
5750100 <Neal Richardson> indices in the docs too
6be0328 <Neal Richardson> indices
2d4e744 <Neal Richardson> Add/improve print methods for Array, ChunkedArray, Table, RecordBatch

Authored-by: Neal Richardson <[email protected]>
Signed-off-by: Neal Richardson <[email protected]>
  • Loading branch information
nealrichardson committed Sep 25, 2019
1 parent 2c7fb24 commit a89c803
Show file tree
Hide file tree
Showing 19 changed files with 209 additions and 55 deletions.
5 changes: 4 additions & 1 deletion r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ Array <- R6Class("Array",
ApproxEquals = function(other) Array__ApproxEquals(self, other),
data = function() shared_ptr(ArrayData, Array__data(self)),
as_vector = function() Array__as_vector(self),
ToString = function() Array__ToString(self),
ToString = function() {
typ <- paste0("<", self$type$ToString(), ">")
paste(typ, Array__ToString(self), sep = "\n")
},
Slice = function(offset, length = NULL){
if (is.null(length)) {
shared_ptr(Array, Array__Slice1(self, offset))
Expand Down
7 changes: 5 additions & 2 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,18 @@ Object <- R6Class("Object",
self$`.:xp:.` <- xp
},
print = function(...){
cat(class(self)[[1]], "\n")
cat(class(self)[[1]], "\n", sep = "")
if (!is.null(self$ToString)){
cat(self$ToString(), "\n")
cat(self$ToString(), "\n", sep = "")
}
invisible(self)
}
)
)

#' @export
`!=.Object` <- function(lhs, rhs) !(lhs == rhs)

shared_ptr <- function(class, xp) {
if (!shared_ptr_is_null(xp)) class$new(xp)
}
Expand Down
14 changes: 14 additions & 0 deletions r/R/chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,20 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = Object,
},
Validate = function() {
ChunkedArray__Validate(self)
},
ToString = function() {
out <- self$chunk(0)$ToString()
if (self$num_chunks > 1) {
# Regardless of whether the first array prints with ellipsis, we need
# to ellipsize because there's more data than is contained in this
# chunk
if (grepl("...\n", out, fixed = TRUE)) {
out <- sub("\\.\\.\\..*$", "...\n]", out)
} else {
out <- sub("\\n\\]$", ",\n ...\n]", out)
}
}
out
}
),
active = list(
Expand Down
31 changes: 21 additions & 10 deletions r/R/dictionary.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,28 +31,39 @@
#' @name DictionaryType
DictionaryType <- R6Class("DictionaryType",
inherit = FixedWidthType,

public = list(
ToString = function() {
prettier_dictionary_type(DataType__ToString(self))
}
),
active = list(
index_type = function() DataType$create(DictionaryType__index_type(self)),
value_type = function() DataType$create(DictionaryType__value_type(self)),
name = function() DictionaryType__name(self),
ordered = function() DictionaryType__ordered(self)
)
)
DictionaryType$create <- function(index_type = int32(),
value_type = utf8(),
ordered = FALSE) {
assert_is(index_type, "DataType")
assert_is(value_type, "DataType")
shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered))
}

#' Create a dictionary type
#'
#' @param index_type index type, e.g. [int32()]
#' @param value_type value type, probably [utf8()]
#' @param ordered Is this an ordered dictionary ?
#' @param index_type A DataType for the indices (default [int32()])
#' @param value_type A DataType for the values (default [utf8()])
#' @param ordered Is this an ordered dictionary (default `FALSE`)?
#'
#' @return A [DictionaryType]
#' @seealso [Other Arrow data types][data-type]
#' @export
dictionary <- function(index_type, value_type, ordered = FALSE) {
assert_that(
inherits(index_type, "DataType"),
inherits(index_type, "DataType")
)
shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered))
dictionary <- DictionaryType$create

prettier_dictionary_type <- function(x) {
# Prettier format the "ordered" attribute
x <- sub(", ordered=0", "", x)
sub("ordered=1", "ordered", x)
}
2 changes: 1 addition & 1 deletion r/R/field.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
Field <- R6Class("Field", inherit = Object,
public = list(
ToString = function() {
Field__ToString(self)
prettier_dictionary_type(Field__ToString(self))
},
Equals = function(other) {
inherits(other, "Field") && Field__Equals(self, other)
Expand Down
9 changes: 9 additions & 0 deletions r/R/record-batch.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ RecordBatch <- R6Class("RecordBatch", inherit = Object,
},

serialize = function() ipc___SerializeRecordBatch__Raw(self),
ToString = function() ToString_tabular(self),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_is(target_schema, "Schema")
Expand Down Expand Up @@ -246,3 +247,11 @@ tail.RecordBatch <- function(x, n = 6L, ...) {
}
x$Slice(n)
}

ToString_tabular <- function(x, ...) {
# Generic to work with both RecordBatch and Table
sch <- unlist(strsplit(x$schema$ToString(), "\n"))
sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch)
dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
paste(c(dims, sch), collapse = "\n")
}
2 changes: 1 addition & 1 deletion r/R/schema.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
Schema <- R6Class("Schema",
inherit = Object,
public = list(
ToString = function() Schema__ToString(self),
ToString = function() prettier_dictionary_type(Schema__ToString(self)),
num_fields = function() Schema__num_fields(self),
field = function(i) shared_ptr(Field, Schema__field(self, i)),
serialize = function() Schema__serialize(self),
Expand Down
1 change: 1 addition & 0 deletions r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Table <- R6Class("Table", inherit = Object,
field = function(i) shared_ptr(Field, Table__field(self, i)),

serialize = function(output_stream, ...) write_table(self, output_stream, ...),
ToString = function() ToString_tabular(self),

cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
assert_is(target_schema, "Schema")
Expand Down
6 changes: 0 additions & 6 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
# under the License.

#' @include arrow-package.R

#' @export
`!=.Object` <- function(lhs, rhs){
!(lhs == rhs)
}

#' @title class arrow::DataType
#'
#' @usage NULL
Expand Down
8 changes: 6 additions & 2 deletions r/README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,13 @@ When installing from source, if the R and C++ library versions do not match, ins
library(arrow)
set.seed(24)
tab <- Table$create(x = 1:10, y = rnorm(10))
tab$schema
tab <- Table$create(
x = 1:10,
y = rnorm(10),
z = as.factor(rep(c("b", "c"), 5))
)
tab
tab$x
as.data.frame(tab)
```

Expand Down
51 changes: 35 additions & 16 deletions r/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,25 +69,44 @@ Arrow C++ library first.
library(arrow)
set.seed(24)

tab <- Table$create(x = 1:10, y = rnorm(10))
tab$schema
#> Schema
#> x: int32
#> y: double
tab <- Table$create(
x = 1:10,
y = rnorm(10),
z = as.factor(rep(c("b", "c"), 5))
)
tab
#> Table
#> 10 rows x 3 columns
#> $x <int32>
#> $y <double>
#> $z <dictionary<values=string, indices=int8>>
tab$x
#> ChunkedArray
#> <int32>
#> [
#> 1,
#> 2,
#> 3,
#> 4,
#> 5,
#> 6,
#> 7,
#> 8,
#> 9,
#> 10
#> ]
as.data.frame(tab)
#> x y
#> 1 1 -0.545880758
#> 2 2 0.536585304
#> 3 3 0.419623149
#> 4 4 -0.583627199
#> 5 5 0.847460017
#> 6 6 0.266021979
#> 7 7 0.444585270
#> 8 8 -0.466495124
#> 9 9 -0.848370044
#> 10 10 0.002311942
#> x y z
#> 1 1 -0.545880758 b
#> 2 2 0.536585304 c
#> 3 3 0.419623149 b
#> 4 4 -0.583627199 c
#> 5 5 0.847460017 b
#> 6 6 0.266021979 c
#> 7 7 0.444585270 b
#> 8 8 -0.466495124 c
#> 9 9 -0.848370044 b
#> 10 10 0.002311942 c
```

## Installing a development version
Expand Down
9 changes: 5 additions & 4 deletions r/man/dictionary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ test_that("Array", {
expect_equal(z_dbl$as_vector(), as.numeric(4:5))
})

test_that("Array print method includes type", {
x <- Array$create(c(1:10, 1:10, 1:5))
expect_output(print(x), "Array\n<int32>\n[\n", fixed = TRUE)
})

test_that("Array supports NA", {
x_int <- Array$create(as.integer(c(1:10, NA)))
x_dbl <- Array$create(as.numeric(c(1:10, NA)))
Expand Down Expand Up @@ -257,7 +262,7 @@ test_that("array supports integer64", {
expect_true(a$IsNull(3L))
})

test_that("array$as_vector() correctly handles all NA inte64 (ARROW-3795)", {
test_that("array$as_vector() correctly handles all NA int64 (ARROW-3795)", {
x <- bit64::as.integer64(NA)
a <- Array$create(x)
expect_true(is.na(a$as_vector()))
Expand Down
25 changes: 21 additions & 4 deletions r/tests/testthat/test-RecordBatch.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ test_that("RecordBatch", {
schema(
int = int32(), dbl = float64(),
lgl = boolean(), chr = utf8(),
fct = dictionary(int32(), Array$create(letters[1:10]))
fct = dictionary()
)
)
expect_equal(batch$num_columns, 5L)
Expand Down Expand Up @@ -69,12 +69,12 @@ test_that("RecordBatch", {
col_fct <- batch$column(4)
expect_true(inherits(col_fct, 'Array'))
expect_equal(col_fct$as_vector(), tbl$fct)
expect_equal(col_fct$type, dictionary(int32(), Array$create(letters[1:10])))
expect_equal(col_fct$type, dictionary())

batch2 <- batch$RemoveColumn(0)
expect_equal(
batch2$schema,
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int32(), Array$create(letters[1:10])))
schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary())
)
expect_equal(batch2$column(0), batch$column(1))
expect_identical(as.data.frame(batch2), tbl[,-1])
Expand Down Expand Up @@ -120,6 +120,23 @@ test_that("head and tail on RecordBatch", {
expect_identical(as.data.frame(tail(batch, -4)), tail(tbl, -4))
})

test_that("RecordBatch print method", {
expect_output(
print(batch),
paste(
"RecordBatch",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8>>",
sep = "\n"
),
fixed = TRUE
)
})

test_that("RecordBatch with 0 rows are supported", {
tbl <- tibble::tibble(
int = integer(),
Expand All @@ -139,7 +156,7 @@ test_that("RecordBatch with 0 rows are supported", {
dbl = float64(),
lgl = boolean(),
chr = utf8(),
fct = dictionary(int32(), Array$create(c("a", "b")))
fct = dictionary()
)
)
})
Expand Down
17 changes: 17 additions & 0 deletions r/tests/testthat/test-Table.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,23 @@ test_that("head and tail on Table", {
expect_identical(as.data.frame(tail(tab, -4)), tail(tbl, -4))
})

test_that("Table print method", {
expect_output(
print(tab),
paste(
"Table",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8>>",
sep = "\n"
),
fixed = TRUE
)
})

test_that("table active bindings", {
expect_identical(dim(tbl), dim(tab))
expect_is(tab$columns, "list")
Expand Down
Loading

0 comments on commit a89c803

Please sign in to comment.