diff --git a/src/bioversions/sources/__init__.py b/src/bioversions/sources/__init__.py index bcb893cf..6ac4bc04 100644 --- a/src/bioversions/sources/__init__.py +++ b/src/bioversions/sources/__init__.py @@ -27,6 +27,7 @@ from .ensembl import EnsemblGetter from .expasy import ExPASyGetter from .flybase import FlybaseGetter +from .gtdb import GTDBGetter from .guidetopharmacology import GuideToPharmacologyGetter from .hgnc import HGNCGetter from .homologene import HomoloGeneGetter @@ -137,6 +138,7 @@ def get_getters() -> list[type[Getter]]: ICD10Getter, ICD11Getter, CiVICGetter, + GTDBGetter, ] getters.extend(iter_obo_getters()) extend_ols_getters(getters) diff --git a/src/bioversions/sources/gtdb.py b/src/bioversions/sources/gtdb.py new file mode 100644 index 00000000..8a4a9a0e --- /dev/null +++ b/src/bioversions/sources/gtdb.py @@ -0,0 +1,35 @@ +"""A getter for GTDB.""" + +import requests + +from bioversions.utils import Getter, VersionType + +__all__ = [ + "GTDBGetter", +] + +URL = "https://data.gtdb.ecogenomic.org/releases/latest/VERSION.txt" + + +class GTDBGetter(Getter): + """A getter for the Genome Taxonomy Database (GTDB).""" + + bioregistry_id = "gtdb" + name = "Genome Taxonomy Database" + version_type = VersionType.sequential + date_fmt = "%b %d, %Y" # Format to match "Apr 24, 2024" + homepage_fmt = "https://gtdb.ecogenomic.org/" + + def get(self): + """Get the latest GTDB version number from VERSION.txt.""" + res = requests.get(URL, timeout=15) + lines = res.text.strip().split("\n") + # First line contains version like "v220" + version = lines[0].strip().lstrip("v") + # Third line contains date like "Released Apr 24, 2024" + date = lines[2].strip().removeprefix("Released ") + return {"version": version, "date": date} + + +if __name__ == "__main__": + GTDBGetter.print()