diff --git a/R/refresh_coronavirus.R b/R/refresh_coronavirus.R new file mode 100644 index 0000000..83e5fec --- /dev/null +++ b/R/refresh_coronavirus.R @@ -0,0 +1,56 @@ +#' Refresh the 2019 Novel Coronavirus COVID-19 (2019-nCoV) Dataset in the Covid19R Project Format +#' +#' Daily summary of the Coronavirus (COVID-19) cases by state/province. +#' @return A tibble object +#' * date - The date in YYYY-MM-DD form +#' * location - The name of the location as provided by the data source. +#' * location_type - The type of location using the covid19R controlled vocabulary. +#' * location_code - A standardized location code using a national or international standard. Drawn from \href{https://github.com/olahol/iso-3166-2.js/}{iso-3166-2.js}'s version +#' * location_code_type The type of standardized location code being used according to the covid19R controlled vocabulary. Here we use `iso_3166_2` +#' * data_type - the type of data in that given row using the covid19R controlled vocabulary. Includes cases_new, deaths_new, recovered_new. +#' * value - number of cases of each data type +#' @export refresh_coronavirus_jhu +#' @return A data.frame object +#' @source coronavirus - Johns Hopkins University Center for Systems Science and Engineering (JHU CCSE) Coronavirus \href{https://systems.jhu.edu/research/public-health/ncov/}{website} +#' +#' @examples +#' \dontrun{ +#' # update the data +#' jhu_covid19_dat <- refresh_coronavirus_jhu() +#' } +#' +refresh_coronavirus_jhu <- function(){ + utils::read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus_covid19format.csv", + stringsAsFactors = FALSE) +} + + + +#' Get information about the datasets provided by the coronavirus package +#' +#' @description Returns information about the datasets in this package for covid19R harvesting +#' +#' @return a tibble of information about the datasets in this package +#' @export get_info_coronavirus +#' +#' @examples +#' \dontrun{ +#' +#' # get the dataset info from this package +#' get_info_coronavirus() +#' } +#' +get_info_coronavirus <- function(){ + data.frame( + data_set_name = "coronavirus_jhu", + package_name = "coronavirus", + function_to_get_data = "refresh_coronavirus_jhu*", + data_details = "The 2019 Novel Coronavirus COVID-19 (2019-nCoV) Dataset from the Johns Hopkins University Center for Systems Science and Engineering", + data_url = "https://systems.jhu.edu/research/public-health/ncov/", + license_url = "https://github.com/CSSEGISandData/COVID-19/", + data_types = "cases_new, recovered_new, deaths_new", + location_types = "country, state", + spatial_extent = "global", + TRUE + ) +} diff --git a/data_raw/data_covid19R.R b/data_raw/data_covid19R.R new file mode 100644 index 0000000..70f2132 --- /dev/null +++ b/data_raw/data_covid19R.R @@ -0,0 +1,61 @@ +#---------------------------------------------------- +# Creating a covid19R compliant JHU coronavirus data set +# using coronavirus data +# https://github.com/CSSEGISandData/COVID-19 + +`%>%` <- magrittr::`%>%` +setwd(here::here()) +source("data-raw/dplyr::left_join") + + +# the initial data +# git_df <- read.csv("https://raw.githubusercontent.com/RamiKrispin/coronavirus/master/csv/coronavirus.csv", +# stringsAsFactors = FALSE) + +# create valid locations +git_df_long_location <- git_df %>% + dplyr::mutate( + country = ifelse(country == "Korea, South", "South Korea", country), + province = ifelse(province == "Bonaire, Sint Eustatius and Saba", + "Bonaire and Sint Eustatius and Saba", province + ) + ) %>% + tidyr::unite(location, province, country, sep = ", ") %>% + dplyr::rename( + data_type = type, + value = cases + ) %>% + + # fix some bad location names + dplyr::mutate( + location = gsub("^\\, ", "", location), + location_type = ifelse(grepl("\\,", location), "state", "country") + ) + +code_table <- get_code_table() + +# add codes +coronavirus_covid19 <- dplyr::left_join(git_df_long_location, code_table) + +# fix data types +coronavirus_covid19 <- coronavirus_covid19 %>% + dplyr::mutate(data_type = dplyr::case_when( + data_type == "confirmed" ~ "cases_new", + data_type == "recovered" ~ "recovered_new", + data_type == "death" ~ "deaths_new", + )) + +coronavirus_covid19 <- coronavirus_covid19 %>% + dplyr::select( + date, location, location_type, + location_code, location_code_type, + data_type, value, lat, long + ) + +# data checks +sum(is.na(coronavirus_covid19$location_code)) # make sure codes combine a-ok - will be >0 due to cruise ships +nrow(coronavirus_covid19) - nrow(git_df_long_location) # should be 0, or there was a one to many match + +# write out +# write.csv(coronavirus, "csv/coronavirus_covid19format.csv", row.names = FALSE) +print("covid19R compliant data done...") diff --git a/data_raw/get_code_table.R b/data_raw/get_code_table.R new file mode 100644 index 0000000..bb71ca6 --- /dev/null +++ b/data_raw/get_code_table.R @@ -0,0 +1,100 @@ +get_code_table <- function() { + # get iso 3166 2 codes + iso_codes <- + read.csv( + "https://github.com/olahol/iso-3166-2.js/raw/master/data.csv", + col.names = c( + "Country", + "iso_3166_2", + "name", + "type", + "Country_iso_3166_2" + ), + na.strings = "." + ) %>% + dplyr::mutate( + name = ifelse( + iso_3166_2 == "NL-BQ1", + "Bonaire and Sint Eustatius and Saba", + name + ), + Country = ifelse(iso_3166_2 == "VG-VG", "British Virgin Islands", Country), + Country = ifelse( + Country_iso_3166_2 == "CD", + "The Democratic Republic Of The Congo", + Country + ), + Country = ifelse(Country_iso_3166_2 == "CZ", "Czechia", Country), + ) + + country_code <- iso_codes %>% + dplyr::mutate(location = Country) %>% + dplyr::group_by(location) %>% + dplyr::summarize(location_code = Country_iso_3166_2[1]) %>% + dplyr::bind_rows( + data.frame(location = "Cabo Verde", location_code = "CV"), + data.frame(location = "Greenland, Denmark", location_code = "GL"), + data.frame(location = "Cote d'Ivoire", location_code = "CI") + ) #do not know why these were missing + + province_code <- iso_codes %>% + dplyr::mutate(type = ifelse(iso_3166_2 == "NL-AW", "Province", type)) %>% # problem with Aruba + dplyr::filter(type != "Country") %>% + dplyr::mutate(location = paste(name, Country, sep = ", ")) %>% + dplyr::group_by(location) %>% + dplyr::summarize(location_code = iso_3166_2[1]) %>% + dplyr::bind_rows( + data.frame(location = "Channel Islands, United Kingdom", location_code = "GB-CHA"), + data.frame(location = "Tibet, China", location_code = "CN-XZ"), + data.frame(location = "Inner Mongolia, China", location_code = "CN-NM") + ) + + code_table <- dplyr::bind_rows(country_code, province_code) %>% + dplyr::mutate( + location_code_type = "iso_3166_2", + location = dplyr::case_when( + location == "Cayman Islands" ~ "Cayman Islands, United Kingdom", + location == "Anguilla" ~ "Anguilla, United Kingdom", + location == "Kinshasa, The Democratic Republic Of The Congo" ~ "Congo (Kinshasa)", + location == "Brazzaville, Congo" ~ "Congo (Brazzaville)", + location == "Brunei Darussalam" ~ "Brunei", + location == "Myanmar" ~ "Burma", + location == "Falkland Islands" ~ "Falkland Islands (Malvinas), United Kingdom", + location == "Swaziland" ~ "Eswatini", + location == "Bermuda" ~ "Bermuda, United Kingdom", + location == "Curaçao" ~ "Curacao, Netherlands", + location == "French Polynesia" ~ "French Polynesia, France", + location == "British Virgin Islands" ~ "British Virgin Islands, United Kingdom", + location == "Faroe Islands" ~ "Faroe Islands, Denmark", + location == "French Guiana" ~ "French Guiana, France", + location == "French Guiana" ~ "French Guiana, France", + location == "Gibraltar" ~ "Gibraltar, United Kingdom", + location == "Vatican City" ~ "Holy See", + location == "Vatican City" ~ "Holy See", + location == "Isle of Man" ~ "Isle of Man, United Kingdom", + location == "Kosovo-Metohija, Serbia" ~ "Kosovo", + location == "Macau" ~ "Macau, China", + location == "Montserrat" ~ "Montserrat, United Kingdom", + location == "New Caledonia" ~ "New Caledonia, France", + location == "Macedonia, the Former Yugoslav Republic Of" ~ "North Macedonia", + location == "Reunion" ~ "Reunion, France", + location == "Saint-Barthélemy, France" ~ "Saint Barthelemy, France", + location == "Saint Kitts And Nevis" ~ "Saint Kitts and Nevis", + location == "Saint-Pierre-et-Miquelon, France" ~ "Saint Pierre and Miquelon, France", + location == "Saint Vincent And The Grenadines" ~ "Saint Vincent and the Grenadines", + location == "St. Maarten" ~ "Sint Maarten, Netherlands", + location == "Korea, Republic of" ~ "South Korea", + location == "Saint-Martin, France" ~ "St Martin, France", + location == "Taiwan" ~ "Taiwan*", + location == "East Timor" ~ "Timor-Leste", + location == "Turks & Caicos Islands" ~ "Turks and Caicos Islands, United Kingdom", + location == "United States" ~ "US", + location == "Viet Nam" ~ "Vietnam", + location == "Gaza, Palestine" ~ "West Bank and Gaza", + TRUE ~ location + ) + ) + + + code_table +}