Skip to content

Commit

Permalink
Refactor code.
Browse files Browse the repository at this point in the history
  • Loading branch information
everaldorodrigo committed Oct 14, 2024
1 parent fbd6019 commit 244fa68
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 64 deletions.
68 changes: 4 additions & 64 deletions src/hub/dataload/sources/civic/civic_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,7 @@
from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import HTTPDumper

from hub.dataload.sources.civic.graphql_variants import GraphqlVariants
from hub.dataload.sources.civic.graphql_molecular_profiles import (
GraphqlMolecularProfiles,
)
from hub.dataload.sources.civic.graphql_detail import GraphqlVariantDetail
from hub.dataload.sources.civic.graphql_contributor_avatars import (
GraphqlContributorAvatars,
)
from hub.dataload.sources.civic.graphql_summary import GraphqlVariantSummary
from hub.dataload.sources.civic.graphql_gene import GraphqlGeneVariant
from hub.dataload.sources.civic.graphql_dump import GraphqlDump


class CivicDumper(HTTPDumper):
Expand All @@ -33,24 +24,9 @@ def set_release(self):
self.release = datetime.date.today().strftime("%Y-%m-%d")

def create_todump_list(self, force=False, **kwargs):
ids = []
self.logger.info("Find all available variant IDS")

hasNextPage = True
# previousPageEnd = None
# loop through all the pages
while hasNextPage:
response_data = GraphqlVariants().fetch(api_url=self.API_URL)
print("### response_data")
print(response_data)
if "data" in response_data:
for variant in response_data["data"]["browseVariants"]["edges"]:
ids.append(variant["node"]["id"])
hasNextPage = response_data["data"]["browseVariants"]["pageInfo"][
"hasNextPage"
]
hasNextPage = False # TODO: Remove to get all pages
# previousPageEnd = response_data['data']['browseVariants']['pageInfo']['endCursor']
self.logger.info("Find all available variant IDS")
ids = GraphqlDump().get_variants_list(api_url=self.API_URL)

self.logger.info("Now download files")
for variant_id in ids[:5]: # TODO: Remove the top 5 [:5]
Expand Down Expand Up @@ -78,49 +54,13 @@ def download(self, remoteurl, localfile, headers={}): # noqa: B006
self.prepare_local_folders(localfile)
variant_id = remoteurl

self.logger.debug("Downloading '%s' as '%s'" % (remoteurl, localfile))
res_summary = GraphqlVariantSummary().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_detail = GraphqlVariantDetail().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_molecular_profiles = GraphqlMolecularProfiles().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_contributor_avatars = GraphqlContributorAvatars().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_gene_variant = GraphqlGeneVariant().fetch(
api_url=self.API_URL, variant_id=variant_id
)

variant_data = {}
variant_data = self.merge_dicts(variant_data, res_molecular_profiles["data"])
variant_data = self.merge_dicts(variant_data, res_contributor_avatars["data"])
variant_data = self.merge_dicts(variant_data, res_gene_variant["data"]["variant"])
variant_data = self.merge_dicts(variant_data, res_detail["data"]["variant"])
variant_data = self.merge_dicts(variant_data, res_summary["data"]["variant"])
variant_data = GraphqlDump().dump_variant(variant_id=variant_id, api_url=self.API_URL)

with open(localfile, "w") as f:
json.dump(variant_data, f)

return variant_data

def merge_dicts(self, d1, d2):
merged = d1.copy()
for key, value in d2.items():
if key in merged:
if isinstance(merged[key], dict) and isinstance(value, dict):
merged[key] = self.merge_dicts(merged[key], value)
elif isinstance(merged[key], list) and isinstance(value, list):
merged[key] = merged[key] + value # Concatenate lists
else:
merged[key] = value # Overwrite value
else:
merged[key] = value
return merged


def __init__(self, *args, **kwargs):
super(CivicDumper, self).__init__(*args, **kwargs)
Expand Down
71 changes: 71 additions & 0 deletions src/hub/dataload/sources/civic/graphql_dump.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from hub.dataload.sources.civic.graphql_variants import GraphqlVariants
from hub.dataload.sources.civic.graphql_molecular_profiles import (
GraphqlMolecularProfiles,
)
from hub.dataload.sources.civic.graphql_detail import GraphqlVariantDetail
from hub.dataload.sources.civic.graphql_contributor_avatars import (
GraphqlContributorAvatars,
)
from hub.dataload.sources.civic.graphql_summary import GraphqlVariantSummary
from hub.dataload.sources.civic.graphql_gene import GraphqlGeneVariant


class GraphqlDump():

def get_variants_list(self, api_url: str):
ids = []
hasNextPage = True
# previousPageEnd = None
# loop through all the pages
while hasNextPage:
response_data = GraphqlVariants().fetch(api_url=api_url)
print("### response_data")
print(response_data)
if "data" in response_data:
for variant in response_data["data"]["browseVariants"]["edges"]:
ids.append(variant["node"]["id"])
hasNextPage = response_data["data"]["browseVariants"]["pageInfo"][
"hasNextPage"
]
hasNextPage = False # TODO: Remove to get all pages
return ids

def dump_variant(self, api_url: str, variant_id: int):
res_summary = GraphqlVariantSummary().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_detail = GraphqlVariantDetail().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_molecular_profiles = GraphqlMolecularProfiles().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_contributor_avatars = GraphqlContributorAvatars().fetch(
api_url=self.API_URL, variant_id=variant_id
)
res_gene_variant = GraphqlGeneVariant().fetch(
api_url=self.API_URL, variant_id=variant_id
)

variant_data = {}
variant_data = self.merge_dicts(variant_data, res_molecular_profiles["data"])
variant_data = self.merge_dicts(variant_data, res_contributor_avatars["data"])
variant_data = self.merge_dicts(variant_data, res_gene_variant["data"]["variant"])
variant_data = self.merge_dicts(variant_data, res_detail["data"]["variant"])
variant_data = self.merge_dicts(variant_data, res_summary["data"]["variant"])

return variant_data

def merge_dicts(self, d1, d2):
merged = d1.copy()
for key, value in d2.items():
if key in merged:
if isinstance(merged[key], dict) and isinstance(value, dict):
merged[key] = self.merge_dicts(merged[key], value)
elif isinstance(merged[key], list) and isinstance(value, list):
merged[key] = merged[key] + value # Concatenate lists
else:
merged[key] = value # Overwrite value
else:
merged[key] = value
return merged

0 comments on commit 244fa68

Please sign in to comment.