Skip to content

Commit

Permalink
The NCBI recently started adding NA values as FTP file paths in the…
Browse files Browse the repository at this point in the history
…ir `species summary files` for species without reference genomes. As a result `meta.retrieval()` stopped working, because no FTP paths were found for some species. This issue was now fixed by adding the filter rule `!is.na(ftp_path)` into all `get*()` functions (Many thanks for making me aware of this issue Ashok Kumar Sharma #34 and Dominik Merges #72)
  • Loading branch information
HajkD committed Mar 17, 2021
1 parent 092bf7a commit ac025b9
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 32 deletions.
6 changes: 3 additions & 3 deletions R/getAssemblyStats.R
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,14 @@ getAssemblyStats <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
stringr::str_detect(assembly_accession, organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -323,7 +323,7 @@ getAssemblyStats <-

)

readr::write_tsv(doc, path = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))

message(
paste0(
Expand Down
10 changes: 5 additions & 5 deletions R/getCDS.R
Original file line number Diff line number Diff line change
Expand Up @@ -119,14 +119,14 @@ getCDS <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
stringr::str_detect(assembly_accession, organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -322,7 +322,7 @@ getCDS <-

)

readr::write_tsv(doc, path = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))

message(
paste0(
Expand Down Expand Up @@ -504,7 +504,7 @@ getCDS <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down Expand Up @@ -672,7 +672,7 @@ getCDS <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down
10 changes: 5 additions & 5 deletions R/getGFF.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,14 +134,14 @@ getGFF <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
stringr::str_detect(assembly_accession, organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -322,7 +322,7 @@ getGFF <-

)

readr::write_tsv(doc, path = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))

if (gunzip) {
message(
Expand Down Expand Up @@ -488,7 +488,7 @@ getGFF <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down Expand Up @@ -659,7 +659,7 @@ getGFF <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down
12 changes: 6 additions & 6 deletions R/getGenome.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,14 +161,14 @@ getGenome <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
assembly_accession == organism,
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -339,12 +339,12 @@ getGenome <-

)

readr::write_tsv(doc, path = file.path(path, paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path, paste0("doc_",local.org,"_db_",db,".tsv")))

genome_summary_stats <- summary_genome(file = file.path(path,
paste0(local.org, "_genomic_", db, ".fna.gz")), organism = organism)

readr::write_tsv(genome_summary_stats, path = file.path(path, paste0("doc_",local.org,"_db_",db,"_summary_statistics.tsv")))
readr::write_tsv(genome_summary_stats, file = file.path(path, paste0("doc_",local.org,"_db_",db,"_summary_statistics.tsv")))

if (!gunzip) {
message(
Expand Down Expand Up @@ -529,7 +529,7 @@ getGenome <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down Expand Up @@ -696,7 +696,7 @@ getGenome <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down
10 changes: 5 additions & 5 deletions R/getProteome.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,14 +141,14 @@ getProteome <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
stringr::str_detect(assembly_accession, organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -325,7 +325,7 @@ getProteome <-

)

readr::write_tsv(doc, path = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))

if (!gunzip) {
message(
Expand Down Expand Up @@ -506,7 +506,7 @@ getProteome <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down Expand Up @@ -673,7 +673,7 @@ getProteome <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down
10 changes: 5 additions & 5 deletions R/getRNA.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,14 @@ getRNA <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
stringr::str_detect(assembly_accession, organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -319,7 +319,7 @@ getRNA <-

)

readr::write_tsv(doc, path = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))

message(
paste0(
Expand Down Expand Up @@ -483,7 +483,7 @@ getRNA <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down Expand Up @@ -628,7 +628,7 @@ getRNA <-

)

readr::write_tsv(doc, file.path(
readr::write_tsv(doc, file = file.path(
path,
paste0("doc_", new.organism, "_db_", db, ".tsv"))
)
Expand Down
6 changes: 3 additions & 3 deletions R/getRepeatMasker.R
Original file line number Diff line number Diff line change
Expand Up @@ -123,14 +123,14 @@ getRepeatMasker <-
AssemblyFilesAllKingdoms,
stringr::str_detect(organism_name, organism) |
stringr::str_detect(assembly_accession, organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
} else {
FoundOrganism <-
dplyr::filter(
AssemblyFilesAllKingdoms,
taxid == as.integer(organism),
(version_status == "latest")
(version_status == "latest"), !is.na(ftp_path)
)
}
}
Expand Down Expand Up @@ -308,7 +308,7 @@ getRepeatMasker <-

)

readr::write_tsv(doc, path = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))
readr::write_tsv(doc, file = file.path(path,paste0("doc_",local.org,"_db_",db,".tsv")))

message(
paste0(
Expand Down

0 comments on commit ac025b9

Please sign in to comment.