Skip to content

Commit

Permalink
Separate epa certificate function
Browse files Browse the repository at this point in the history
- openssl availability needs to be confirmed in Windows runner
  • Loading branch information
Insang Song committed Feb 8, 2024
1 parent 89c0a2f commit 7ffa63e
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 60 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ export(convert_stobj_to_stdt)
export(download_aqs_data)
export(download_data)
export(download_ecoregion_data)
export(download_epa_certificate)
export(download_geos_cf_data)
export(download_gmted_data)
export(download_koppen_geiger_data)
Expand Down
84 changes: 32 additions & 52 deletions R/download.R
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,15 @@ download_aqs_data <-
#' The \code{download_ecoregion_data()} function accesses and downloads
#' Ecoregions level 3 data, where all pieces of information in the higher
#' levels are included.
#' @note In Linux systems as of December 2023, downloading Ecoregion data from
#' EPA Data Commons will result in certificate errors. This is bypassed by
#' manually identifying .crt file link in your browser by connecting to
#' https://gaftp.epa.gov then clicking a lock icon in the address bar.
#' (TO DEVELOPERS: see the comments in source code)
#' @note
#' For EPA Data Commons certificate errors, follow the steps below:
#' 1. Click Lock icon in the address bar at https://gaftp.epa.gov
#' 2. Click Show Certificate
#' 3. Access Details
#' 4. Find URL with *.crt extension
#' Currently we bundle the pre-downloaded crt and its PEM (which is accepted
#' in wget command) file in ./inst/extdata.
#' in wget command) file in ./inst/extdata. The instruction above is for
#' certificate updates in the future.
#' @param directory_to_download character(1). Directory to download zip file
#' of Ecoregion level 3 shapefiles
#' @param directory_to_save character(1). Directory to decompress zip files.
Expand All @@ -275,6 +277,8 @@ download_aqs_data <-
#' @param epa_certificate_path character(1). Path to the certificate file
#' for EPA DataCommons. Default is
#' 'extdata/cacert_gaftp_epa.pem' under the package installation path.
#' @param certificate_url character(1). URL to certificate file. See notes for
#' details.
#' @author Insang Song
#' @returns NULL;
#' @importFrom utils download.file
Expand All @@ -289,7 +293,9 @@ download_ecoregion_data <- function(
remove_command = TRUE,
epa_certificate_path =
system.file("extdata/cacert_gaftp_epa.pem",
package = "amadeus")
package = "amadeus"),
certificate_url =
"http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt"
) {
#### 1. data download acknowledgement
download_permit(data_download_acknowledgement = data_download_acknowledgement)
Expand All @@ -312,29 +318,9 @@ download_ecoregion_data <- function(
return(NULL)
}
#### 5. define download URL
if (startsWith(Sys.info()["sysname"], "Linux")) {
if (!file.exists(epa_certificate_path)) {
message("URL should be identified in web browser
Lock icon in the address bar at https://gaftp.epa.gov
Click Show Certificate
access Details then find URL with *.crt extension
copy and replace the url below.\n"
)
download_crt_target <- gsub("pem", "crt", epa_certificate_path)
certificate_url <-
"http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt"
utils::download.file(certificate_url, download_crt_target)
system(paste(
"openssl x509",
"-inform DER",
"-outform PEM",
"-in",
download_crt_target,
"-out",
epa_certificate_path
))
}
}
download_epa_certificate(
epa_certificate_path = epa_certificate_path,

Check warning on line 322 in R/download.R

View workflow job for this annotation

GitHub Actions / lint

file=R/download.R,line=322,col=4,[indentation_linter] Hanging indent should be 27 spaces but is 4 spaces.
certificate_url = certificate_url)

download_url <- paste0(
"https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/",
Expand Down Expand Up @@ -2440,6 +2426,15 @@ download_tri_data <- function(
#' for EPA DataCommons. Default is
#' 'extdata/cacert_gaftp_epa.pem' under the package installation path.
#' @author Ranadeep Daw, Insang Song
#' @note
#' For EPA Data Commons certificate errors, follow the steps below:
#' 1. Click Lock icon in the address bar at https://gaftp.epa.gov
#' 2. Click Show Certificate
#' 3. Access Details
#' 4. Find URL with *.crt extension
#' Currently we bundle the pre-downloaded crt and its PEM (which is accepted
#' in wget command) file in ./inst/extdata. The instruction above is for
#' certificate updates in the future.
#' @returns NULL; Two comma-separated value (CSV) raw files for 2017 and 2020
#' @export
download_nei_data <- function(
Expand All @@ -2451,7 +2446,9 @@ download_nei_data <- function(
unzip = FALSE,
epa_certificate_path =
system.file("extdata/cacert_gaftp_epa.pem",
package = "amadeus")
package = "amadeus"),
certificate_url =
"http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt"

Check warning on line 2451 in R/download.R

View workflow job for this annotation

GitHub Actions / lint

file=R/download.R,line=2451,col=2,[indentation_linter] Indentation should be 4 spaces but is 2 spaces.
) {
#### 1. check for data download acknowledgement
download_permit(data_download_acknowledgement = data_download_acknowledgement)
Expand All @@ -2460,27 +2457,10 @@ download_nei_data <- function(
directory_to_save <- download_sanitize_path(directory_to_save)

#### 5. define download URL
if (startsWith(Sys.info()["sysname"], "Linux")) {
if (!file.exists(epa_certificate_path)) {
message("URL should be identified in web browser
Lock icon in the address bar at https://gaftp.epa.gov
Click Show Certificate
access Details then find URL with *.crt extension
copy and replace the url below.\n"
)
download_crt_target <- gsub("pem", "crt", epa_certificate_path)
certificate_url <-
"http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt"
utils::download.file(certificate_url, download_crt_target)
system(paste("openssl x509",
"-inform DER",
"-outform PEM",
"-in",
download_crt_target,
"-out",
epa_certificate_path))
}
}
download_epa_certificate(
epa_certificate_path = epa_certificate_path,
certificate_url = certificate_url
)

#### 3. define measurement data paths
url_download_base <- "https://gaftp.epa.gov/air/nei/%d/data_summaries/"
Expand Down
36 changes: 36 additions & 0 deletions R/download_support.R
Original file line number Diff line number Diff line change
Expand Up @@ -198,3 +198,39 @@ generate_date_sequence <-
return(dates_original)
}
}


#' Check EPA Certificate
#' @param epa_certificate_path character(1).
#' Full path of a converted certificate of EPA.
#' Should end with `.pem`
#' @param certificate_url character(1).
#' URL of the original certificate.
#' @returns A file designated in `epa_certificate_path`
#' @author Insang Song
#' @importFrom utils download.file
#' @export
download_epa_certificate <-
function(
epa_certificate_path = "cacert_gaftp_epa.pem",
certificate_url =
"http://cacerts.digicert.com/DigiCertGlobalG2TLSRSASHA2562020CA1-1.crt"
) {
if (!endsWith(epa_certificate_path, ".pem")) {
stop("Path should end with .pem .\n")
}
if (!file.exists(epa_certificate_path)) {
download_crt_target <- gsub("pem", "crt", epa_certificate_path)
utils::download.file(certificate_url, download_crt_target)
system(paste(
"openssl x509",
"-inform DER",
"-outform PEM",
"-in",
download_crt_target,
"-out",
epa_certificate_path
))
message("Certificate conversion completed.\n")
}
}
22 changes: 15 additions & 7 deletions man/download_ecoregion_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/download_epa_certificate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 15 additions & 1 deletion man/download_nei_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
wget --ca-certificate=/tmp/Rtmp5cEQ3l/cacert_gaftp_epa.pem https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/us_eco_l3_state_boundaries.zip -O tests/testthat/../testdata//us_eco_l3_state_boundaries.zip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
wget --ca-certificate=/tmp/RtmpeRLmFs/cacert_gaftp_epa.pem https://gaftp.epa.gov/EPADataCommons/ORD/Ecoregions/us/us_eco_l3_state_boundaries.zip -O tests/testthat/../testdata//us_eco_l3_state_boundaries.zip
17 changes: 17 additions & 0 deletions tests/testthat/test-download_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -1103,6 +1103,8 @@ testthat::test_that("Test error cases in EPA gaftp sources 2", {
epa_certificate_path = certificate
)
)
# unlink dir
unlink(tdir)

# define file path with commands
commands_path <- paste0(
Expand All @@ -1116,3 +1118,18 @@ testthat::test_that("Test error cases in EPA gaftp sources 2", {
testthat::expect_true(file.exists(commands_path))
file.remove(commands_path)
})


testthat::test_that("epa certificate", {
testthat::expect_error(
download_epa_certificate("file.txt")
)
testthat::expect_message(
download_epa_certificate(file.path(tempdir(), "file.pem"))
)
testthat::expect_no_error(
download_epa_certificate(
system.file("extdata/cacert_gaftp_epa.pem", package = "amadeus")
)
)
})

0 comments on commit 7ffa63e

Please sign in to comment.