Skip to content

Commit

Permalink
ARROW-13326: [R] [Archery] Add linting to dev CI
Browse files Browse the repository at this point in the history
This does three things:

  * Adds a linting step to the dev workflow that uses {lintr} to check that the R package lints correctly
  * Adds an autotune step that uses {styler} to autostyle R code such that it should then lint fine
  * Makes the slew of changes needed for the package to lint cleanly — there's a huge delta here, but it is almost all whitespace changes

Given the wide-reaching nature of these changes, it does already conflict with changes we've merged in. I am happy to go through, rebase, fix all that needs to be fixed and then merge quickly so we don't continue conflicting once we decide this is a good way to go. Though it would be helpful to do this once and only after we all agree this is a good step forward.

Closes #10805 from jonkeane/ARROW-13326-r-linting

Authored-by: Jonathan Keane <[email protected]>
Signed-off-by: Jonathan Keane <[email protected]>
  • Loading branch information
jonkeane committed Aug 3, 2021
1 parent 6cacfff commit a52050a
Show file tree
Hide file tree
Showing 129 changed files with 2,003 additions and 2,138 deletions.
16 changes: 15 additions & 1 deletion .github/workflows/comment_bot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ jobs:
}
if changed '^r/.*\.R$'; then
echo "R_DOCS=true" >> $GITHUB_ENV
echo "R_CODE=true" >> $GITHUB_ENV
fi
if changed 'cmake' || changed 'CMake'; then
echo "CMAKE_FORMAT=true" >> $GITHUB_ENV
Expand Down Expand Up @@ -113,7 +114,7 @@ jobs:
--exclude_glob=cpp/build-support/lint_exclusions.txt \
--source_dir=r/src --quiet --fix
- uses: r-lib/actions/setup-r@v1
if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything')
if: env.R_DOCS == 'true' || env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything')
- name: Update R docs
if: env.R_DOCS == 'true' || endsWith(github.event.comment.body, 'everything')
shell: Rscript {0}
Expand All @@ -124,6 +125,19 @@ jobs:
remotes::install_github("r-lib/roxygen2")
remotes::install_deps("r")
roxygen2::roxygenize("r")
- name: Style R code
if: env.R_CODE == 'true' || endsWith(github.event.comment.body, 'everything')
shell: Rscript {0}
run: |
changed_files <- system("git diff --name-only HEAD..upstream/master 2>&1", intern = TRUE)
# only grab the .R files under r/
changed_files <- grep('^r/.*\\.R$', changed_files, value = TRUE)
# remove latin1 which is unstylable due to encoding and codegen.R which is unique
changed_files <- changed_files[!changed_files %in% c("r/tests/testthat/latin1.R", "r/data-raw/codegen.R")]
source("ci/etc/rprofile")
install.packages(c("remotes", "styler"))
remotes::install_deps("r")
styler::style_file(changed_files)
- name: Commit results
run: |
git config user.name "$(git log -1 --pretty=format:%an)"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
run: |
sudo sysctl -w kernel.core_pattern="core.%e.%p"
ulimit -c unlimited
archery docker run ubuntu-lint
archery docker run -e GITHUB_ACTIONS=true ubuntu-lint
- name: Docker Push
if: success() && github.event_name == 'push' && github.repository == 'apache/arrow'
continue-on-error: true
Expand Down
39 changes: 39 additions & 0 deletions ci/docker/linux-apt-lint.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,48 @@ RUN apt-get update && \
python3-dev \
python3-pip \
ruby \
apt-transport-https \
software-properties-common \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

ARG r=4.1
RUN apt-key adv \
--keyserver keyserver.ubuntu.com \
--recv-keys E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
# NOTE: R 3.5 and 3.6 are available in the repos with -cran35 suffix
# for trusty, xenial, bionic, and eoan (as of May 2020)
# -cran40 has 4.0 versions for bionic and focal
# R 3.2, 3.3, 3.4 are available without the suffix but only for trusty and xenial
# TODO: make sure OS version and R version are valid together and conditionally set repo suffix
# This is a hack to turn 3.6 into 35, and 4.0/4.1 into 40:
add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran'$(echo "${r}" | tr -d . | tr 6 5 | tr 1 0)'/' && \
apt-get install -y \
r-base=${r}* \
r-recommended=${r}* \
libxml2-dev

# Ensure parallel R package installation, set CRAN repo mirror,
# and use pre-built binaries where possible
COPY ci/etc/rprofile /arrow/ci/etc/
RUN cat /arrow/ci/etc/rprofile >> $(R RHOME)/etc/Rprofile.site
# Also ensure parallel compilation of C/C++ code
RUN echo "MAKEFLAGS=-j$(R -s -e 'cat(parallel::detectCores())')" >> $(R RHOME)/etc/Makeconf


COPY ci/scripts/r_deps.sh /arrow/ci/scripts/
COPY r/DESCRIPTION /arrow/r/
# We need to install Arrow's dependencies in order for lintr's namespace searching to work.
# This could be removed if lintr no longer loads the dependency namespaces (see issues/PRs below)
RUN /arrow/ci/scripts/r_deps.sh /arrow
# This fork has a number of changes that have PRs and Issues to resolve upstream:
# https://github.com/jimhester/lintr/pull/843
# https://github.com/jimhester/lintr/pull/841
# https://github.com/jimhester/lintr/pull/845
# https://github.com/jimhester/lintr/issues/842
# https://github.com/jimhester/lintr/issues/846
RUN R -e "remotes::install_github('jonkeane/lintr@arrow-branch')"

# Docker linter
COPY --from=hadolint /bin/hadolint /usr/bin/hadolint

Expand Down
1 change: 1 addition & 0 deletions r/.Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ clang_format.sh
^apache-arrow.rb$
^.*\.Rhistory$
^extra-tests
^.lintr
31 changes: 31 additions & 0 deletions r/.lintr
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
license: # Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
linters: with_defaults(
line_length_linter = line_length_linter(120),
object_name_linter = NULL,
# Even with a liberal definition of name styles, some of our names cause issues due to `.`s for s3 classes or NA in the name
# TODO: figure out if we con contribute to lintr to make these work
# object_name_linter = object_name_linter(styles = c("snake_case", "camelCase", "CamelCase", "symbols", "dotted.case", "UPPERCASE", "SNAKE_CASE")),
object_length_linter = object_length_linter(40),
object_usage_linter = NULL, # R6 methods are flagged,
cyclocomp_linter = cyclocomp_linter(26) # TODO: reduce to default of 15
)
exclusions: list(
"tests/testthat/latin1.R",
"R/arrowExports.R",
"data-raw/codegen.R"
)
20 changes: 12 additions & 8 deletions r/R/array.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,9 @@
#' new_array$offset
#'
#' # Compare 2 arrays
#' na_array2 = na_array
#' na_array2 <- na_array
#' na_array2 == na_array # element-wise comparison
#' na_array2$Equals(na_array) # overall comparison
#'
#' @export
Array <- R6Class("Array",
inherit = ArrowDatum,
Expand Down Expand Up @@ -196,7 +195,8 @@ Array$import_from_c <- ImportArray
#' @usage NULL
#' @format NULL
#' @export
DictionaryArray <- R6Class("DictionaryArray", inherit = Array,
DictionaryArray <- R6Class("DictionaryArray",
inherit = Array,
public = list(
indices = function() DictionaryArray__indices(self),
dictionary = function() DictionaryArray__dictionary(self)
Expand Down Expand Up @@ -227,7 +227,8 @@ DictionaryArray$create <- function(x, dict = NULL) {
#' @usage NULL
#' @format NULL
#' @export
StructArray <- R6Class("StructArray", inherit = Array,
StructArray <- R6Class("StructArray",
inherit = Array,
public = list(
field = function(i) StructArray__field(self, i),
GetFieldByName = function(name) StructArray__GetFieldByName(self, name),
Expand Down Expand Up @@ -272,7 +273,8 @@ as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...
#' @usage NULL
#' @format NULL
#' @export
ListArray <- R6Class("ListArray", inherit = Array,
ListArray <- R6Class("ListArray",
inherit = Array,
public = list(
values = function() ListArray__values(self),
value_length = function(i) ListArray__value_length(self, i),
Expand All @@ -288,7 +290,8 @@ ListArray <- R6Class("ListArray", inherit = Array,
#' @usage NULL
#' @format NULL
#' @export
LargeListArray <- R6Class("LargeListArray", inherit = Array,
LargeListArray <- R6Class("LargeListArray",
inherit = Array,
public = list(
values = function() LargeListArray__values(self),
value_length = function(i) LargeListArray__value_length(self, i),
Expand All @@ -304,7 +307,8 @@ LargeListArray <- R6Class("LargeListArray", inherit = Array,
#' @usage NULL
#' @format NULL
#' @export
FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array,
FixedSizeListArray <- R6Class("FixedSizeListArray",
inherit = Array,
public = list(
values = function() FixedSizeListArray__values(self),
value_length = function(i) FixedSizeListArray__value_length(self, i),
Expand All @@ -316,7 +320,7 @@ FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array,
)
)

is.Array <- function(x, type = NULL) {
is.Array <- function(x, type = NULL) { # nolint
is_it <- inherits(x, c("Array", "ChunkedArray"))
if (is_it && !is.null(type)) {
is_it <- x$type$ToString() %in% type
Expand Down
12 changes: 6 additions & 6 deletions r/R/arrow-datum.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

# Base class for Array, ChunkedArray, and Scalar, for S3 method dispatch only.
# Does not exist in C++ class hierarchy
ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
ArrowDatum <- R6Class("ArrowDatum",
inherit = ArrowObject,
public = list(
cast = function(target_type, safe = TRUE, ...) {
opts <- cast_options(safe, ...)
Expand Down Expand Up @@ -107,15 +108,14 @@ eval_array_expression <- function(FUN,
# integer inputs and floating-point division on floats
if (FUN == "/") {
# TODO: omg so many ways it's wrong to assume these types
args <- map(args, ~.$cast(float64()))
args <- map(args, ~ .$cast(float64()))
} else if (FUN == "%/%") {
# In R, integer division works like floor(float division)
out <- eval_array_expression("/", args = args, options = options)
return(out$cast(int32(), allow_float_truncate = TRUE))
} else if (FUN == "%%") {
# {e1 - e2 * ( e1 %/% e2 )}
# ^^^ form doesn't work because Ops.Array evaluates eagerly,
# but we can build that up
# We can't simply do {e1 - e2 * ( e1 %/% e2 )} since Ops.Array evaluates
# eagerly, but we can build that up
quotient <- eval_array_expression("%/%", args = args)
base <- eval_array_expression("*", quotient, args[[2]])
# this cast is to ensure that the result of this and e1 are the same
Expand Down Expand Up @@ -193,7 +193,7 @@ filter_rows <- function(x, i, keep_na = TRUE, ...) {
if (is.Array(i)) {
stop("Cannot extract rows with an Array of type ", i$type$ToString(), call. = FALSE)
}
stop("Cannot extract rows with an object of class ", class(i), call.=FALSE)
stop("Cannot extract rows with an object of class ", class(i), call. = FALSE)
}
}

Expand Down
43 changes: 30 additions & 13 deletions r/R/arrow-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@
#' @importFrom R6 R6Class
#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dfr map_int map_lgl keep imap_chr
#' @importFrom assertthat assert_that is.string
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs expr caller_env is_character quo_name
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null enquos is_integerish quos
#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind as_label set_names exec
#' @importFrom rlang is_bare_character quo_get_expr quo_set_expr .data seq2 is_quosure enexpr enexprs
#' @importFrom rlang expr caller_env is_character quo_name
#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
#' @useDynLib arrow, .registration = TRUE
#' @keywords internal
Expand All @@ -42,8 +45,10 @@
}
s3_register("dplyr::tbl_vars", "arrow_dplyr_query")

for (cl in c("Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
"Field", "DataType", "RecordBatchReader")) {
for (cl in c(
"Array", "RecordBatch", "ChunkedArray", "Table", "Schema",
"Field", "DataType", "RecordBatchReader"
)) {
s3_register("reticulate::py_to_r", paste0("pyarrow.lib.", cl))
s3_register("reticulate::r_to_py", cl)
}
Expand Down Expand Up @@ -111,25 +116,33 @@
#' `vignette("install", package = "arrow")` for guidance on reinstalling the
#' package.
arrow_available <- function() {
tryCatch(.Call(`_arrow_available`), error = function(e) return(FALSE))
tryCatch(.Call(`_arrow_available`), error = function(e) {
return(FALSE)
})
}

#' @rdname arrow_available
#' @export
arrow_with_dataset <- function() {
tryCatch(.Call(`_dataset_available`), error = function(e) return(FALSE))
tryCatch(.Call(`_dataset_available`), error = function(e) {
return(FALSE)
})
}

#' @rdname arrow_available
#' @export
arrow_with_parquet <- function() {
tryCatch(.Call(`_parquet_available`), error = function(e) return(FALSE))
tryCatch(.Call(`_parquet_available`), error = function(e) {
return(FALSE)
})
}

#' @rdname arrow_available
#' @export
arrow_with_s3 <- function() {
tryCatch(.Call(`_s3_available`), error = function(e) return(FALSE))
tryCatch(.Call(`_s3_available`), error = function(e) {
return(FALSE)
})
}

option_use_threads <- function() {
Expand Down Expand Up @@ -218,7 +231,10 @@ print.arrow_info <- function(x, ...) {
))
if (some_features_are_off(x$capabilities) && identical(tolower(Sys.info()[["sysname"]]), "linux")) {
# Only on linux because (e.g.) we disable certain features on purpose on rtools35 and solaris
cat("To reinstall with more optional capabilities enabled, see\n https://arrow.apache.org/docs/r/articles/install.html\n\n")
cat(
"To reinstall with more optional capabilities enabled, see\n",
" https://arrow.apache.org/docs/r/articles/install.html\n\n"
)
}

if (length(x$options)) {
Expand All @@ -245,7 +261,10 @@ print.arrow_info <- function(x, ...) {
`Git ID` = x$build_info$git_id
))
} else {
cat("Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html for troubleshooting.\n")
cat(
"Arrow C++ library not available. See https://arrow.apache.org/docs/r/articles/install.html ",
"for troubleshooting.\n"
)
}
invisible(x)
}
Expand All @@ -258,7 +277,6 @@ option_compress_metadata <- function() {
ArrowObject <- R6Class("ArrowObject",
public = list(
initialize = function(xp) self$set_pointer(xp),

pointer = function() get(".:xp:.", envir = self),
`.:xp:.` = NULL,
set_pointer = function(xp) {
Expand All @@ -284,18 +302,17 @@ ArrowObject <- R6Class("ArrowObject",
}
invisible(self)
},

invalidate = function() {
assign(".:xp:.", NULL, envir = self)
}
)
)

#' @export
`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs)
`!=.ArrowObject` <- function(lhs, rhs) !(lhs == rhs) # nolint

#' @export
`==.ArrowObject` <- function(x, y) {
`==.ArrowObject` <- function(x, y) { # nolint
x$Equals(y)
}

Expand Down
5 changes: 3 additions & 2 deletions r/R/arrow-tabular.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@

# Base class for RecordBatch and Table for S3 method dispatch only.
# Does not exist in C++ class hierarchy
ArrowTabular <- R6Class("ArrowTabular", inherit = ArrowObject,
ArrowTabular <- R6Class("ArrowTabular",
inherit = ArrowObject,
public = list(
ToString = function() ToString_tabular(self),
Take = function(i) {
Expand Down Expand Up @@ -223,7 +224,7 @@ na.fail.ArrowTabular <- function(object, ...) {

#' @export
na.omit.ArrowTabular <- function(object, ...) {
not_na <- map(object$columns, ~call_function("is_valid", .x))
not_na <- map(object$columns, ~ call_function("is_valid", .x))
not_na_agg <- Reduce("&", not_na)
object$Filter(not_na_agg)
}
Expand Down
Loading

0 comments on commit a52050a

Please sign in to comment.