diff --git a/DESCRIPTION b/DESCRIPTION index de7295e7..1a5f65e4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Authors@R: c(person("Eduard", "Szöcs", role = c("aut", "cre"), person("Andreas", "Scharmüller", role = "ctb"), person("Eric R", "Scott", role = "ctb"), person("Jan", "Stanstrup", role = "ctb"), + person("Gordon", "Getzinger", role = "ctb"), person("Tamás", "Stirling", role = "ctb")) Maintainer: Tamás Stirling LazyLoad: yes @@ -36,4 +37,4 @@ Imports: Suggests: testthat, rcdk -RoxygenNote: 7.0.2 +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index a183972e..4126a38c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -71,6 +71,7 @@ export(ppdb) export(ppdb_parse) export(ppdb_query) export(smiles) +export(srs_query) export(wd_ident) import(RCurl) import(dplyr) diff --git a/NEWS b/NEWS index 9359ef3e..45537f40 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,7 @@ NEW FEATURES * Retrieve data from ChEBI (https://www.ebi.ac.uk/chebi/) webservice with chebi_lite_entity() and chebi_comp_entity(). ChEBI comprises a rich data base on chemicals with bilogical interest [contributed by @andreasLD]. * Retrieve retention indices from NIST (https://webbook.nist.gov) with nist_ri() [PR #154, contributed by @Aariq] +* Get record details from US EPA Substance Registry Services (https://cdxnodengn.epa.gov/cdx-srs-rest/) with srs_query() [PR #179] * "first" argument in cts_convert() and cir_query() and "interactive" argument in pc_synonyms() deprecated. Use "choices" instead to return either a list of all results, only the first result, or an interactive menu to choose a result to return. [contributed by @Aariq] MINOR IMPROVEMENTS @@ -15,6 +16,7 @@ BUG FIXES * cs_prop() failed with duplicated return values [issue #148, reported and fixed by @stanstrup] * pp_query() failed when compound present, but no properties [issue #151, reported and fixed by @stanstrup] +* ci_query() failed when missing table [issue #196, reported and fixed by @gjgetzinger] * get_csid() failed because of a major change in the ChemSpider API [issue #149, PR #165, contributed by @stitam] * multiple functions failed because of a major change in the ChemSpider API [issue #149, contributed by @stitam] * cir_query() mistook NA for sodium [issue #158, reported and fixed by @Aariq] diff --git a/R/chemid.R b/R/chemid.R index cff02510..d808aa97 100644 --- a/R/chemid.R +++ b/R/chemid.R @@ -149,22 +149,62 @@ ci_query <- function(query, type = c('name', 'rn', 'inchikey'), source_url <- gsub('^(.*)\\?.*', '\\1', qurl) } - name <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li")) - synonyms <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li")) - cas <- xml_text(xml_find_all(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li")) - inchi <- gsub('\\n|\\t', '', - xml_text(xml_find_all(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))[1] - ) - inchikey <- gsub('\\n|\\t|\\r', '', - xml_text(xml_find_all(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]")) - ) - smiles <- gsub('\\n|\\t|\\r', '', - xml_text(xml_find_all(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]")) - ) - toxicity <- html_table(xml_find_all(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))[[1]] - physprop <- html_table(xml_find_all(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))[[1]] - physprop[ , 'Value'] <- as.numeric(physprop[ , 'Value']) - #= same as physprop + if(is.na(xml_find_first(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li"))){ + name <- NA + }else{ + name <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Name of Substance')]/following-sibling::div[1]//li")) + } + + if(is.na(xml_find_first(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li"))){ + synonyms <- NA + }else{ + synonyms <- xml_text(xml_find_all(ttt, "//h3[contains(., 'Synonyms')]/following-sibling::div[1]//li")) + } + + if(is.na(xml_find_first(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li"))){ + cas <- NA + } else { + cas <- xml_text(xml_find_all(ttt, "//h3[contains(., 'CAS Registry')]/following-sibling::ul[1]//li")) + } + + if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))){ + inchi <- NA + } else { + inchi <- gsub('\\n|\\t', '', + xml_text(xml_find_all(ttt, "//h3[contains(., 'InChI')]/following-sibling::text()[1]"))[1] + ) + } + + if(is.na(xml_find_first(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]"))){ + inchikey <- NA + } else { + inchikey <- gsub('\\n|\\t|\\r', '', + xml_text(xml_find_all(ttt, "//h3[contains(., 'InChIKey')]/following-sibling::text()[1]")) + ) + } + + if(is.na(xml_find_first(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]"))){ + smiles <- NA + } else { + smiles <- gsub('\\n|\\t|\\r', '', + xml_text(xml_find_all(ttt, "//h3[contains(., 'Smiles')]/following-sibling::text()[1]")) + ) + } + + if(is.na(xml_find_first(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))){ + toxicity <- NA + } else { + toxicity <- html_table(xml_find_all(ttt, "//h2[contains(., 'Toxicity')]/following-sibling::div//table"))[[1]] + } + + if(is.na(xml_find_first(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))){ + physprop <- NA + } else { + physprop <- html_table(xml_find_all(ttt, "//h2[contains(., 'Physical Prop')]/following-sibling::div//table"))[[1]] + physprop[ , 'Value'] <- as.numeric(physprop[ , 'Value']) + #= same as physprop + } + out <- list(name = name, synonyms = synonyms, cas = cas, inchi = inchi, inchikey = inchikey, smiles = smiles, toxicity = toxicity, diff --git a/R/srs.R b/R/srs.R new file mode 100644 index 00000000..63ace3c3 --- /dev/null +++ b/R/srs.R @@ -0,0 +1,49 @@ +#' Get record details from U.S. EPA Substance Registry Servives (SRS) +#' +#' Get record details from SRS, see \url{https://cdxnodengn.epa.gov/cdx-srs-rest/} +#' +#'@param query character; query ID. +#'@param from character; type of query ID, e.g. \code{'itn'} , \code{'cas'}, +#' \code{'epaid'}, \code{'tsn'}, \code{'name'}. +#' +#'@return a list of lists (for each supplied query): a list of 22. subsKey, +#' internalTrackingNumber, systematicName, epaIdentificationNumber, +#' currentCasNumber, currentTaxonomicSerialNumber, epaName, substanceType, +#' categoryClass, kingdomCode, iupacName, pubChemId, molecularWeight, +#' molecularFormula, inchiNotation, smilesNotation, classifications, +#' characteristics, synonyms, casNumbers, taxonomicSerialNumbers, relationships +#'@author Gordon Getzinger, \email{gjg3@@duke.edu} +#'@export +#' +#' @examples +#' \donttest{ +#' # might fail if API is not available +#' srs_query(query = '50-00-0', from = 'cas') +#' +#' ### multiple inputs +#' casrn <- c('50-00-0', '67-64-1') +#' srs_query(query = casrn, from = 'cas') +#' } +srs_query <- + function(query, + from = c("itn", "cas", "epaid", "tsn", "name")) { + entity_url <- "https://cdxnodengn.epa.gov/cdx-srs-rest/" + + rst <- lapply(query, function(x) { + entity_query <- paste0(entity_url, "/substance/", from, "/", x) + response <- httr::GET(entity_query) + + if (response$status_code == 200) { + text_content <- httr::content(response, "text") + if (text_content == "[]") { + return(NA) + } else { + jsonlite::fromJSON(text_content) + } + } else { + stop(httr::http_status(response)$message) + } + }) + names(rst) <- query + return(rst) + } diff --git a/man/srs_query.Rd b/man/srs_query.Rd new file mode 100644 index 00000000..11f65cf3 --- /dev/null +++ b/man/srs_query.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/srs.R +\name{srs_query} +\alias{srs_query} +\title{Get record details from U.S. EPA Substance Registry Servives (SRS)} +\usage{ +srs_query(query, from = c("itn", "cas", "epaid", "tsn", "name")) +} +\arguments{ +\item{query}{character; query ID.} + +\item{from}{character; type of query ID, e.g. \code{'itn'} , \code{'cas'}, +\code{'epaid'}, \code{'tsn'}, \code{'name'}.} +} +\description{ +Get record details from SRS, see \url{https://cdxnodengn.epa.gov/cdx-srs-rest/} +} +\examples{ +\donttest{ +# might fail if API is not available +srs_query(query = '50-00-0', from = 'cas') + +### multiple inputs +casrn <- c('50-00-0', '67-64-1') +srs_query(query = casrn, from = 'cas') +} +} +\author{ +Gordon Getzinger, \email{gjg3@duke.edu} +}