cjbarrie · TimBMK · Dec 15, 2021 · Dec 15, 2021 · Dec 15, 2021 · Dec 15, 2021
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(bind_errors)
 export(bind_tweet_jsons)
 export(bind_tweets)
 export(bind_user_jsons)

diff --git a/R/bind_errors.R b/R/bind_errors.R
@@ -0,0 +1,44 @@
+#' Bind errors stored as JSON files
+#'
+#' This function binds the errors stored as JSON files. Errors are only returned if requested in the retrieving function.
+#' 
+#' By default, `bind_errors` binds into a data frame containing tweets (from errors_*id*.json files). 
+#'
+#' @param data_path string, file path to directory of stored tweets data saved as errors_*id*.json 
+#' @param verbose If `FALSE`, messages are suppressed
+#' 
+#' @return a data.frame containing error information
+#' @export
+#'
+#' @examples
+#' \dontrun{
+#' # retrieve data with errors and store them locally as .json
+#' get_all_tweets(query = "BLM", 
+#'                start_tweets = "2020-01-01T00:00:00Z", 
+#'                end_tweets = "2020-01-05T00:00:00Z", 
+#'                bearer_token = bearer_token, 
+#'                data_path = "data/",
+#'                n = 500, 
+#'                bind_tweets = F,
+#'                errors = T)
+#' 
+#' # bind json files in the directory "data" into a data frame containing errors
+#' bind_errors(data_path = "data/")
+#' }
+bind_errors <- function(data_path, verbose = TRUE) {
+  files <- ls_files(data_path, "^errors_")
+  if (verbose) {
+    pb <- utils::txtProgressBar(min = 0, max = length(files), initial = 0)
+  }  
+  json.df.all <- data.frame()
+  for (i in seq_along(files)) {
+    filename <- files[[i]]
+    json.df <- jsonlite::read_json(filename, simplifyVector = TRUE)
+    json.df.all <- dplyr::bind_rows(json.df.all, json.df)
+    if (verbose) {
+      utils::setTxtProgressBar(pb, i)
+    }
+  }
+  .vcat(verbose, "\n")
+  return(json.df.all)
+}
diff --git a/R/get_all_tweets.R b/R/get_all_tweets.R
@@ -26,6 +26,7 @@
 #' @param page_n integer, amount of tweets to be returned by per page
 #' @param context_annotations If `TRUE`, context_annotations will be fetched. Note it will limit the page_n to 100 due restrictions of Twitter API. 
 #' @param verbose If `FALSE`, query progress messages are suppressed
+#' @param errors If `TRUE`, errors will be captured as .json objects in the data_path. Only works if a data_path is supplied
 #' @param ... arguments will be passed to [build_query()] function. See `?build_query()` for further information.
 #' 
 #' @return When bind_tweets is `TRUE` (default), the function returns a data frame. Nothing otherwise.
@@ -67,6 +68,7 @@ get_all_tweets <-
            page_n = 500,
            context_annotations = FALSE,
            verbose = TRUE,
+           errors = FALSE,
            ...) {    
     if (missing(start_tweets)) {
       stop("Start time must be specified.")
@@ -102,5 +104,5 @@ get_all_tweets <-
 
     # Get tweets
     get_tweets(params = params, endpoint_url = endpoint_url, n = n, file = file, bearer_token = bearer_token, 
-               export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose)
+               export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose, errors = errors)
  }
diff --git a/R/get_user_timeline.R b/R/get_user_timeline.R
@@ -24,6 +24,7 @@
 #' @param bind_tweets If `TRUE`, tweets captured are bound into a data.frame for assignment
 #' @param page_n integer, amount of tweets to be returned by per page
 #' @param verbose If `FALSE`, query progress messages are suppressed
+#' @param errors If `TRUE`, errors will be captured as .json objects in the data_path. Only works if a data_path is supplied
 #' @param ... arguments will be passed to `build_query()` function. See `?build_query()` for further information.
 #' 
 #' @return a data.frame
@@ -50,6 +51,7 @@ get_user_timeline <-
            bind_tweets = TRUE,
            page_n = 100,
            verbose = TRUE,
+           errors = FALSE,
            ...) {    
     if (missing(start_tweets)) {
       stop("Start time must be specified.")
@@ -81,7 +83,7 @@ get_user_timeline <-
 
       # Get tweets
       new_rows <- get_tweets(params = params, endpoint_url = endpoint_url, page_token_name = "pagination_token", n = n, file = file, bearer_token = bearer_token, 
-                             export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose)
+                             export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose, errors = errors)
       new_df <- dplyr::bind_rows(new_df, new_rows)
     }
     new_df

diff --git a/man/bind_errors.Rd b/man/bind_errors.Rd
diff --git a/man/get_all_tweets.Rd b/man/get_all_tweets.Rd
diff --git a/man/get_user_timeline.Rd b/man/get_user_timeline.Rd
diff --git a/tests/testdata/errordata/data_1213569129774366720.json b/tests/testdata/errordata/data_1213569129774366720.json
diff --git a/tests/testdata/errordata/data_1213594137561370630.json b/tests/testdata/errordata/data_1213594137561370630.json
diff --git a/tests/testdata/errordata/errors_1213569129774366720.json b/tests/testdata/errordata/errors_1213569129774366720.json
diff --git a/tests/testdata/errordata/errors_1213594137561370630.json b/tests/testdata/errordata/errors_1213594137561370630.json
diff --git a/tests/testdata/errordata/query b/tests/testdata/errordata/query
@@ -0,0 +1,3 @@
+BLM
+2020-01-01T00:00:00Z
+2020-01-05T00:00:00Z
diff --git a/tests/testdata/errordata/users_1213569129774366720.json b/tests/testdata/errordata/users_1213569129774366720.json
diff --git a/tests/testdata/errordata/users_1213594137561370630.json b/tests/testdata/errordata/users_1213594137561370630.json
diff --git a/tests/testthat/test-bind_errors.R b/tests/testthat/test-bind_errors.R
@@ -0,0 +1,27 @@
+test_that("Expect success in binding two jsons", {
+  empty_dir <- academictwitteR:::.gen_random_dir()
+  dir.create(empty_dir)
+  my_cars <- mtcars
+  my_cars$model <- rownames(my_cars)
+  jsonlite::write_json(my_cars, 
+                       path = file.path(empty_dir, "errors_1.json"))
+  jsonlite::write_json(my_cars, 
+                       path = file.path(empty_dir, "errors_2.json"))
+  expect_equal(bind_errors(empty_dir),
+               dplyr::bind_rows(my_cars,my_cars))
+  unlink(empty_dir, recursive = TRUE)
+  ## Error on finding no jsons to bind
+  temp_dir <-  academictwitteR:::.gen_random_dir()
+  dir.create(temp_dir)
+  expect_error(bind_errors(temp_dir))
+  unlink(temp_dir)
+})
+
+test_that("real data", {
+  expect_error(bind_errors("../testdata/errordata"), NA)
+  ## Trailing slash
+  expect_error(bind_errors("../testdata/errordata/"), NA)
+  ## Silence
+  expect_silent(bind_errors("../testdata/errordata/", verbose = FALSE))
+})
+