From 513bc40177da23119fbd5bdb0c40fce200066985 Mon Sep 17 00:00:00 2001 From: Dennis Piehl Date: Tue, 27 Aug 2024 05:20:05 -0400 Subject: [PATCH 1/2] V1.24 Update tree loading for CARD ontology data --- HISTORY.txt | 1 + rcsb/exdb/cli/__init__.py | 2 +- rcsb/exdb/tree/TreeNodeListWorker.py | 23 ++++++++++++++++------- requirements.txt | 6 +++--- 4 files changed, 21 insertions(+), 11 deletions(-) diff --git a/HISTORY.txt b/HISTORY.txt index 23006a4..db5a657 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -103,3 +103,4 @@ 13-May-2024 V1.21 Update requirements; fix linting 17-Jul-2024 V1.22 Adjust LigandNeighborMappingExtractor 20-Aug-2024 V1.23 Add support for target cofactor data loading +27-Aug-2024 V1.24 Update tree loading for CARD ontology data diff --git a/rcsb/exdb/cli/__init__.py b/rcsb/exdb/cli/__init__.py index 757aff7..5a5c2b3 100644 --- a/rcsb/exdb/cli/__init__.py +++ b/rcsb/exdb/cli/__init__.py @@ -2,4 +2,4 @@ __author__ = "John Westbrook" __email__ = "john.westbrook@rcsb.org" __license__ = "Apache 2.0" -__version__ = "1.23" +__version__ = "1.24" diff --git a/rcsb/exdb/tree/TreeNodeListWorker.py b/rcsb/exdb/tree/TreeNodeListWorker.py index 20f0d4a..a21f665 100644 --- a/rcsb/exdb/tree/TreeNodeListWorker.py +++ b/rcsb/exdb/tree/TreeNodeListWorker.py @@ -8,6 +8,7 @@ # 9-Sep-2019 jdw add AtcProvider() and ChemrefExtractor() for ATC tree. # 12-Apr-2023 dwp add CARD ontology tree # 8-Aug-2023 dwp Load full (unfiltered) taxonomy tree node list, and stop loading GO tree (will be loaded in DW instead) +# 27-Aug-2024 dwp Update CARD ontology tree loading # ## __docformat__ = "google en" @@ -181,13 +182,21 @@ def load(self, updateId, loadType="full", doLoad=True): ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None) self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp) # ---- CARD - cou = CARDTargetOntologyProvider(cachePath=self.__cachePath, useCache=False) - nL = cou.getTreeNodeList() - logger.info("Starting load of EC node tree length %d", len(nL)) - if doLoad: - collectionName = "tree_card_node_list" - ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None) - self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp) + okCou = True + cou = CARDTargetOntologyProvider(cachePath=self.__cachePath, useCache=useCache) + if not cou.testCache(): + ok = cou.buildOntologyData() + cou.reload() + if not (ok and cou.testCache()): + logger.error("Skipping load of CARD Target Ontology tree data because it is missing.") + okCou = False + if okCou: + nL = cou.getTreeNodeList() + logger.info("Starting load of CARD ontology node tree length %d", len(nL)) + if doLoad: + collectionName = "tree_card_node_list" + ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None) + self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp) # ---- Taxonomy tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache) if self.__useFilteredLists: diff --git a/requirements.txt b/requirements.txt index 2f8b8ce..88aecf2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,9 +12,9 @@ rcsb.utils.ec >= 0.25 rcsb.utils.go >= 0.18 rcsb.utils.seq >= 0.82 rcsb.utils.seqalign >= 0.29 -rcsb.utils.targets >= 0.80 +rcsb.utils.targets >= 0.82 rcsb.utils.struct >= 0.47 rcsb.utils.taxonomy >= 0.43 -rcsb.utils.dictionary >= 1.26 -rcsb.workflow >= 0.44 +rcsb.utils.dictionary >= 1.27 +rcsb.workflow >= 0.45 statistics; python_version < "3.0" \ No newline at end of file From 4d1ef24ce459ff45721372488eef49609ee47b05 Mon Sep 17 00:00:00 2001 From: Dennis Piehl Date: Tue, 27 Aug 2024 12:10:14 -0400 Subject: [PATCH 2/2] pylint --- rcsb/exdb/tree/TreeNodeListWorker.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/rcsb/exdb/tree/TreeNodeListWorker.py b/rcsb/exdb/tree/TreeNodeListWorker.py index a21f665..244b289 100644 --- a/rcsb/exdb/tree/TreeNodeListWorker.py +++ b/rcsb/exdb/tree/TreeNodeListWorker.py @@ -195,7 +195,16 @@ def load(self, updateId, loadType="full", doLoad=True): logger.info("Starting load of CARD ontology node tree length %d", len(nL)) if doLoad: collectionName = "tree_card_node_list" - ok = dl.load(databaseName, collectionName, loadType=loadType, documentList=nL, indexAttributeList=["update_id"], keyNames=None, addValues=addValues, schemaLevel=None) + ok = dl.load( + databaseName, + collectionName, + loadType=loadType, + documentList=nL, + indexAttributeList=["update_id"], + keyNames=None, + addValues=addValues, + schemaLevel=None + ) self.__updateStatus(updateId, databaseName, collectionName, ok, statusStartTimestamp) # ---- Taxonomy tU = TaxonomyProvider(cachePath=self.__cachePath, useCache=useCache)