Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merging master #22

Merged
merged 16 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion HISTORY.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,10 @@
14-Feb-2023 - V0.38 Add support for requesting specific inputIdCodeList/idCodeList for CSMs
22-Mar-2023 - V0.39 Update references to py-rcsb_exdb_assets master branch
19-May-2023 - V0.40 Update DNS to PDB archive
12-Jun-2023 - V0.41 Disable useCache for holdings files to force re-download
12-Jun-2023 - V0.41 Disable useCache for holdings files to force re-download
05-Mar-2024 - V0.42 Adjust RepositoryProvider to support BCIF loading and CSM scaling
19-Mar-2024 - V0.43 Raise exception and return empty list if not all dataContainers are properly read from file in __mergeContainers()
29-May-2024 - V0.44 Fix pylinting
1-Jul-2024 - V0.45 Update CurrentHoldingsProvider to stop including DSN6 maps
9-Sep-2024 - V0.46 Always defer to loading holdings data from remote (rather than storing it locally);
Add validation coefficients to list of repository_content_types
2 changes: 1 addition & 1 deletion azure-template-publish-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- job: ${{ format('publish_{0}_{1}', parameters.tox, parameters.os) }}
pool:
${{ if eq(parameters.os, 'macos') }}:
vmImage: 'macOS-11'
vmImage: 'macOS-12'
${{ if eq(parameters.os, 'linux') }}:
vmImage: 'ubuntu-20.04'
dependsOn:
Expand Down
4 changes: 2 additions & 2 deletions azure-template-tox-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
timeoutInMinutes: 0
pool:
${{ if eq(parameters.os, 'macos') }}:
vmImage: 'macOS-11'
vmImage: 'macOS-12'
${{ if eq(parameters.os, 'linux') }}:
vmImage: 'ubuntu-20.04'

Expand All @@ -41,7 +41,7 @@ jobs:
- bash: |
set -e
ls -la /Applications/Xcode*
sudo xcode-select --switch /Applications/Xcode_12.5.1.app/Contents/Developer
sudo xcode-select --switch /Applications/Xcode_14.2.app/Contents/Developer
which g++
c++ --version
displayName: "setup Xcode"
Expand Down
112 changes: 70 additions & 42 deletions rcsb/utils/repository/CurrentHoldingsProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
#
# Updates:
# 12-Jun-2023 dwp Set useCache default to False to force redownloading of holdings files
# 1-Jul-2024 dwp Stop populating "2fo-fc Map" and "fo-fc Map" content types (DSN6 maps), and
# only include "Map Coefficients" (MTZ map coefficients) in repository holdings
# 9-Sep-2024 dwp Always defer to loading holdings data from remote (rather than storing it locally);
# Add validation coefficients to list of repository_content_types
#
##
"""Provide inventory of current repository content.
Expand All @@ -28,6 +32,7 @@ class CurrentHoldingsProvider(object):
def __init__(self, cachePath, useCache=False, **kwargs):
self.__cachePath = cachePath
self.__dirPath = os.path.join(cachePath, "holdings")
self.__storeCache = kwargs.get("storeCache", False)
#
edMapsLocator = kwargs.get("edmapsLocator", "https://raw.githubusercontent.com/rcsb/py-rcsb_exdb_assets/master/fall_back/edmaps.json")
#
Expand All @@ -53,7 +58,7 @@ def __init__(self, cachePath, useCache=False, **kwargs):
if not ok:
self.__eiP = None

def testCache(self, minCount=170000):
def testCache(self, minCount=220000):
logger.info("Inventory length cD (%d) id list (%d)", len(self.__invD), len(self.__idD))
# JDW - restore consistency checks
# if len(self.__invD) > minCount and len(self.__idD) > minCount and len(self.__invD) == len(self.__idD):
Expand Down Expand Up @@ -171,8 +176,9 @@ def __hasValidationReportData(self, invD, entryId):
def __assembleEntryContentTypes(self, invD):
# Mapping between repository content types and those used by RCSB.org
contentTypeD = {
"2fofc Map": "2fo-fc Map",
"fofc Map": "fo-fc Map",
"mtz_map_coefficients": "Map Coefficients",
# "2fofc Map": "2fo-fc Map",
# "fofc Map": "fo-fc Map",
"assembly_mmcif": "assembly mmCIF",
"assembly_pdb": "assembly PDB",
"combined_nmr_data_nef": "Combined NMR data (NEF)",
Expand All @@ -190,6 +196,8 @@ def __assembleEntryContentTypes(self, invD):
# Missing mappings
# "validation report",
# "validation slider image"
# "validation 2fo-fc coefficients"
# "validation fo-fc coefficients"
# "Map Coefficients",
# "FASTA sequence",
}
Expand All @@ -207,8 +215,6 @@ def __assembleEntryContentTypes(self, invD):
for contentType, pthL in tD.items():
if contentType in contentTypeD:
ctD.setdefault(entryId, []).append(contentTypeD[contentType])
if contentType == "2fofc Map":
ctD.setdefault(entryId, []).append("Map Coefficients")
if contentType == "validation_report":
# "/pdb/validation_reports/01/201l/201l_full_validation.pdf.gz"
# "/pdb/validation_reports/01/201l/201l_multipercentile_validation.png.gz"
Expand All @@ -225,6 +231,10 @@ def __assembleEntryContentTypes(self, invD):
ctD.setdefault(entryId, []).append("validation slider image")
elif "validation.cif.gz" in pth:
ctD.setdefault(entryId, []).append("validation data mmCIF")
elif "validation_2fo-fc_map_coef.cif.gz" in pth:
ctD.setdefault(entryId, []).append("validation 2fo-fc coefficients")
elif "validation_fo-fc_map_coef.cif.gz" in pth:
ctD.setdefault(entryId, []).append("validation fo-fc coefficients")
if contentType == "assembly_mmcif":
# "/pdb/data/biounit/mmCIF/divided/a0/7a09-assembly1.cif.gz"
for pth in pthL:
Expand Down Expand Up @@ -253,23 +263,23 @@ def __addMapContents(self, invD, mapD):
for entryId in invD:
if entryId.lower() in mapD:
tD = mapD[entryId.lower()]
for ky, bV in tD.items():
if bV:
invD[entryId][ky + " Map"] = []
#
if "2fofc" in tD and tD["2fofc"] == "true":
invD[entryId]["mtz_map_coefficients"] = []
return invD

def __reloadEdmapContent(self, edmapsLocator, dirPath):
invD = {}
try:
fU = FileUtil()
fn = fU.getFileName(edmapsLocator)
fp = os.path.join(dirPath, fn)
self.__mU.mkdir(dirPath)
logger.info("Fetch edmaps inventory from %s", edmapsLocator)
ok = fU.get(edmapsLocator, fp)
if ok:
invD = self.__mU.doImport(fp, fmt="json")
invD = self.__mU.doImport(edmapsLocator, fmt="json")
logger.info("Loaded edmaps inventory from %s (%r)", edmapsLocator, len(invD))
#
if self.__storeCache:
fU = FileUtil()
fn = fU.getFileName(edmapsLocator)
fp = os.path.join(dirPath, fn)
self.__mU.mkdir(dirPath)
ok = fU.get(edmapsLocator, fp)
logger.info("Fetch edmaps inventory from %s to %s (%r)", edmapsLocator, fp, ok)
except Exception as e:
logger.exception("Failing for %r with %s", edmapsLocator, str(e))
return invD
Expand All @@ -281,43 +291,56 @@ def __reloadEntryContent(self, urlTarget, urlFallbackTarget, edMapsLocator, dirP
fp = os.path.join(dirPath, fn)
self.__mU.mkdir(dirPath)
#
if useCache and self.__mU.exists(fp):
if self.__storeCache and useCache and self.__mU.exists(fp):
invD = self.__mU.doImport(fp, fmt="json")
logger.debug("Reading cached inventory (%d)", len(invD))
logger.info("Reading cached inventory (%d)", len(invD))
else:
logger.info("Fetch inventory from %s", urlTarget)
ok = fU.get(urlTarget, fp)
if not ok:
ok = fU.get(urlFallbackTarget, fp)
if ok:
invD = self.__mU.doImport(fp, fmt="json")
mapD = self.__reloadEdmapContent(edMapsLocator, self.__dirPath)
invD = self.__addMapContents(invD, mapD)
ofp = fp[:-3] if fp.endswith(".gz") else fp
ok = self.__mU.doExport(ofp, invD, fmt="json", indent=3)
if fp.endswith(".gz"):
logger.info("Updating the current entry contents (%r) in %r", ok, fp)
fU.compress(ofp, fp)
invD = self.__mU.doImport(urlTarget, fmt="json")
logger.info("Loaded inventory from %s (%r)", urlTarget, len(invD))
if len(invD) == 0:
invD = self.__mU.doImport(urlFallbackTarget, fmt="json")
logger.info("Loaded fallback inventory from %s (%r)", urlFallbackTarget, len(invD))
mapD = self.__reloadEdmapContent(edMapsLocator, self.__dirPath)
invD = self.__addMapContents(invD, mapD)
del mapD
#
# previous method - save file locally
if self.__storeCache:
logger.info("Fetch inventory from %s", urlTarget)
ok = fU.get(urlTarget, fp)
if not ok:
ok = fU.get(urlFallbackTarget, fp)
if ok:
ofp = fp[:-3] if fp.endswith(".gz") else fp
ok = self.__mU.doExport(ofp, invD, fmt="json", indent=3)
if fp.endswith(".gz"):
logger.info("Updating the current entry contents (%r) in %r", ok, fp)
fU.compress(ofp, fp)
return invD

def __reloadEntryIds(self, urlTarget, urlFallbackTarget, dirPath, useCache=True):
tD = {}
idD = {}
fU = FileUtil()
fn = fU.getFileName(urlTarget)
fp = os.path.join(dirPath, fn)
self.__mU.mkdir(dirPath)
#
if useCache and self.__mU.exists(fp):
if self.__storeCache and useCache and self.__mU.exists(fp):
tD = self.__mU.doImport(fp, fmt="json")
logger.debug("Reading cached IDs list (%d)", len(tD))
logger.info("Reading cached IDs list (%d)", len(tD))
else:
tD = self.__mU.doImport(urlTarget, fmt="json")
logger.info("Loaded ID list from %s (%r)", urlTarget, len(tD))
if len(tD) == 0:
tD = self.__mU.doImport(urlFallbackTarget, fmt="json")
logger.info("Loaded fallback ID list from %s (%r)", urlFallbackTarget, len(tD))
#
if self.__storeCache:
logger.info("Fetch ID list from %s", urlTarget)
ok = fU.get(urlTarget, fp)
if not ok:
ok = fU.get(urlFallbackTarget, fp)
#
if ok:
tD = self.__mU.doImport(fp, fmt="json")
#
for k, v in tD.items():
try:
Expand All @@ -329,23 +352,28 @@ def __reloadEntryIds(self, urlTarget, urlFallbackTarget, dirPath, useCache=True)
return {k: v for k, v in sTupL}

def __reloadRefdataIds(self, urlTarget, urlFallbackTarget, dirPath, useCache=True):
tD = {}
idD = {}
fU = FileUtil()
fn = fU.getFileName(urlTarget)
fp = os.path.join(dirPath, fn)
self.__mU.mkdir(dirPath)
#
if useCache and self.__mU.exists(fp):
if self.__storeCache and useCache and self.__mU.exists(fp):
tD = self.__mU.doImport(fp, fmt="json")
logger.debug("Reading cached IDs list (%d)", len(tD))
logger.info("Reading cached IDs list (%d)", len(tD))
else:
tD = self.__mU.doImport(urlTarget, fmt="json")
logger.info("Loaded ID list from %s (%r)", urlTarget, len(tD))
if len(tD) == 0:
tD = self.__mU.doImport(urlFallbackTarget, fmt="json")
logger.info("Loaded fallback ID list from %s (%r)", urlFallbackTarget, len(tD))
#
if self.__storeCache:
logger.info("Fetch ID list from %s", urlTarget)
ok = fU.get(urlTarget, fp)
if not ok:
ok = fU.get(urlFallbackTarget, fp)
#
if ok:
tD = self.__mU.doImport(fp, fmt="json")
#
for k, v in tD.items():
try:
Expand Down
14 changes: 10 additions & 4 deletions rcsb/utils/repository/RemovedHoldingsProvider.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Updates:
# 18-Apr-2022 dwp Update getSupersededBy method to recursively return all superseded entries
# 12-Jun-2023 dwp Set useCache default to False to force redownloading of holdings files
# 9-Sep-2024 dwp Always defer to loading holdings data from remote (rather than storing it locally)
##
"""Provide an inventory of removed repository content.
"""
Expand All @@ -24,6 +25,7 @@ class RemovedHoldingsProvider(object):
def __init__(self, cachePath, useCache=False, **kwargs):
self.__cachePath = cachePath
self.__dirPath = os.path.join(self.__cachePath, "holdings")
self.__storeCache = kwargs.get("storeCache", False)
self.__filterType = kwargs.get("filterType", "")
self.__assignDates = "assign-dates" in self.__filterType
baseUrl = kwargs.get("holdingsTargetUrl", "https://files.wwpdb.org/pub/pdb/holdings")
Expand Down Expand Up @@ -137,17 +139,21 @@ def __reload(self, urlTarget, urlFallbackTarget, dirPath, useCache=True):
fp = os.path.join(dirPath, fn)
self.__mU.mkdir(dirPath)
#
if useCache and self.__mU.exists(fp):
if self.__storeCache and useCache and self.__mU.exists(fp):
invD = self.__mU.doImport(fp, fmt="json")
logger.info("Reading cached inventory (%d) from file %s", len(invD), fp)
else:
invD = self.__mU.doImport(urlTarget, fmt="json")
logger.info("Loaded inventory from %s (%r)", urlTarget, len(invD))
if len(invD) == 0:
invD = self.__mU.doImport(urlFallbackTarget, fmt="json")
logger.info("Loaded fallback inventory from %s (%r)", urlFallbackTarget, len(invD))
#
if self.__storeCache:
logger.info("Fetch inventory from %s", urlTarget)
ok = fU.get(urlTarget, fp)
if not ok:
ok = fU.get(urlFallbackTarget, fp)
#
if ok:
invD = self.__mU.doImport(fp, fmt="json")
#
return invD

Expand Down
Loading