Skip to content
This repository has been archived by the owner on Jun 30, 2023. It is now read-only.

Update error capturing #309

Open
wants to merge 23 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7902420
Update utils.R
TimBMK Dec 15, 2021
b0d2095
Create tweets_lookup
TimBMK Dec 15, 2021
38bee24
Merge pull request #1 from TimBMK/TimBMK-error_catcher_&_lookup_tweets
TimBMK Dec 15, 2021
6759c52
Update tweets_lookup
TimBMK Dec 15, 2021
c1da645
Make runnable; pass all old unit tests
chainsawriot Dec 15, 2021
04b463b
Add Tim König a contributor
chainsawriot Dec 16, 2021
ec365ad
Exclude the `errors` part for now (It will be for another PR)
chainsawriot Dec 16, 2021
8a4c80e
Add documentation and rename `tweets_lookup` to `hydrate_tweets`
chainsawriot Dec 16, 2021
53fef74
Add tests for corner cases
chainsawriot Dec 16, 2021
7fedc52
Add basic test cases
chainsawriot Dec 16, 2021
dcac945
Clear check messages / errors
chainsawriot Dec 16, 2021
8159134
Correct documentation of `hydrate_tweets` on `context_annotations`
chainsawriot Dec 16, 2021
4020e2e
Update hydrate_tweets.R
TimBMK Dec 17, 2021
caac272
Reduce some verbosity
chainsawriot Dec 17, 2021
57bc5ec
bring up to date with cjbarrie master
TimBMK Mar 11, 2022
5e8c653
Revert "update to 0.3.1"
TimBMK Mar 11, 2022
cc8c63d
Merge pull request #3 from TimBMK/revert-2-master
TimBMK Mar 11, 2022
67e0b79
hard update to 0.3.1
TimBMK Mar 11, 2022
46776b8
proper 0.3.1 update
TimBMK Mar 11, 2022
1f3d662
Added error capturing for get_all_tweets() and get_user_timeline() an…
TimBMK Mar 11, 2022
903e15d
added newline
TimBMK Mar 11, 2022
b3f7d13
added documentation
TimBMK Mar 14, 2022
50343ca
added test data and testfile for bind_errors()
TimBMK Mar 14, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(bind_errors)
export(bind_tweet_jsons)
export(bind_tweets)
export(bind_user_jsons)
Expand Down
44 changes: 44 additions & 0 deletions R/bind_errors.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#' Bind errors stored as JSON files
#'
#' This function binds the errors stored as JSON files. Errors are only returned if requested in the retrieving function.
#'
#' By default, `bind_errors` binds into a data frame containing tweets (from errors_*id*.json files).
#'
#' @param data_path string, file path to directory of stored tweets data saved as errors_*id*.json
#' @param verbose If `FALSE`, messages are suppressed
#'
#' @return a data.frame containing error information
#' @export
#'
#' @examples
#' \dontrun{
#' # retrieve data with errors and store them locally as .json
#' get_all_tweets(query = "BLM",
#' start_tweets = "2020-01-01T00:00:00Z",
#' end_tweets = "2020-01-05T00:00:00Z",
#' bearer_token = bearer_token,
#' data_path = "data/",
#' n = 500,
#' bind_tweets = F,
#' errors = T)
#'
#' # bind json files in the directory "data" into a data frame containing errors
#' bind_errors(data_path = "data/")
#' }
bind_errors <- function(data_path, verbose = TRUE) {
files <- ls_files(data_path, "^errors_")
if (verbose) {
pb <- utils::txtProgressBar(min = 0, max = length(files), initial = 0)
}
json.df.all <- data.frame()
for (i in seq_along(files)) {
filename <- files[[i]]
json.df <- jsonlite::read_json(filename, simplifyVector = TRUE)
json.df.all <- dplyr::bind_rows(json.df.all, json.df)
if (verbose) {
utils::setTxtProgressBar(pb, i)
}
}
.vcat(verbose, "\n")
return(json.df.all)
}
4 changes: 3 additions & 1 deletion R/get_all_tweets.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#' @param page_n integer, amount of tweets to be returned by per page
#' @param context_annotations If `TRUE`, context_annotations will be fetched. Note it will limit the page_n to 100 due restrictions of Twitter API.
#' @param verbose If `FALSE`, query progress messages are suppressed
#' @param errors If `TRUE`, errors will be captured as .json objects in the data_path. Only works if a data_path is supplied
#' @param ... arguments will be passed to [build_query()] function. See `?build_query()` for further information.
#'
#' @return When bind_tweets is `TRUE` (default), the function returns a data frame. Nothing otherwise.
Expand Down Expand Up @@ -67,6 +68,7 @@ get_all_tweets <-
page_n = 500,
context_annotations = FALSE,
verbose = TRUE,
errors = FALSE,
...) {
if (missing(start_tweets)) {
stop("Start time must be specified.")
Expand Down Expand Up @@ -102,5 +104,5 @@ get_all_tweets <-

# Get tweets
get_tweets(params = params, endpoint_url = endpoint_url, n = n, file = file, bearer_token = bearer_token,
export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose)
export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose, errors = errors)
}
4 changes: 3 additions & 1 deletion R/get_user_timeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#' @param bind_tweets If `TRUE`, tweets captured are bound into a data.frame for assignment
#' @param page_n integer, amount of tweets to be returned by per page
#' @param verbose If `FALSE`, query progress messages are suppressed
#' @param errors If `TRUE`, errors will be captured as .json objects in the data_path. Only works if a data_path is supplied
#' @param ... arguments will be passed to `build_query()` function. See `?build_query()` for further information.
#'
#' @return a data.frame
Expand All @@ -50,6 +51,7 @@ get_user_timeline <-
bind_tweets = TRUE,
page_n = 100,
verbose = TRUE,
errors = FALSE,
...) {
if (missing(start_tweets)) {
stop("Start time must be specified.")
Expand Down Expand Up @@ -81,7 +83,7 @@ get_user_timeline <-

# Get tweets
new_rows <- get_tweets(params = params, endpoint_url = endpoint_url, page_token_name = "pagination_token", n = n, file = file, bearer_token = bearer_token,
export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose)
export_query = export_query, data_path = data_path, bind_tweets = bind_tweets, verbose = verbose, errors = errors)
new_df <- dplyr::bind_rows(new_df, new_rows)
}
new_df
Expand Down
38 changes: 38 additions & 0 deletions man/bind_errors.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/get_all_tweets.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions man/get_user_timeline.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions tests/testdata/errordata/data_1213569129774366720.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/testdata/errordata/data_1213594137561370630.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/testdata/errordata/errors_1213569129774366720.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/testdata/errordata/errors_1213594137561370630.json

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions tests/testdata/errordata/query
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
BLM
2020-01-01T00:00:00Z
2020-01-05T00:00:00Z
1 change: 1 addition & 0 deletions tests/testdata/errordata/users_1213569129774366720.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions tests/testdata/errordata/users_1213594137561370630.json

Large diffs are not rendered by default.

27 changes: 27 additions & 0 deletions tests/testthat/test-bind_errors.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
test_that("Expect success in binding two jsons", {
empty_dir <- academictwitteR:::.gen_random_dir()
dir.create(empty_dir)
my_cars <- mtcars
my_cars$model <- rownames(my_cars)
jsonlite::write_json(my_cars,
path = file.path(empty_dir, "errors_1.json"))
jsonlite::write_json(my_cars,
path = file.path(empty_dir, "errors_2.json"))
expect_equal(bind_errors(empty_dir),
dplyr::bind_rows(my_cars,my_cars))
unlink(empty_dir, recursive = TRUE)
## Error on finding no jsons to bind
temp_dir <- academictwitteR:::.gen_random_dir()
dir.create(temp_dir)
expect_error(bind_errors(temp_dir))
unlink(temp_dir)
})

test_that("real data", {
expect_error(bind_errors("../testdata/errordata"), NA)
## Trailing slash
expect_error(bind_errors("../testdata/errordata/"), NA)
## Silence
expect_silent(bind_errors("../testdata/errordata/", verbose = FALSE))
})