Skip to content

Commit

Permalink
Parse molecular profile.
Browse files Browse the repository at this point in the history
  • Loading branch information
everaldorodrigo committed Oct 11, 2024
1 parent 56f4b96 commit 0bc7471
Showing 1 changed file with 15 additions and 20 deletions.
35 changes: 15 additions & 20 deletions src/hub/dataload/sources/civic/civic_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,23 +56,23 @@ def load_data(data_folder):
new_doc['_id'] = 'CIVIC_VARIANT:' + str(variant_id)
# for _evidence in doc['evidence_items']:
# print(doc)
for _molecularProfiles in doc['molecularProfiles']['edges']:
for _molecularProfiles in doc['molecularProfiles']['nodes']:
# print(_molecularProfiles)
for _evidence in _molecularProfiles['node']['evidenceItems']['edges']:
for _evidence in _molecularProfiles['evidenceItems']['edges']:
print(_evidence['node'])
if 'disease' in _evidence and 'doid' in (_evidence['disease'] or {}) and _evidence['disease']['doid']:
_evidence['disease']['doid'] = 'DOID:' + _evidence['disease']['doid']
# if 'source' in _evidence and 'citation_id' in _evidence['source']:
# if _evidence['source']['source_type'] == "PubMed":
# _evidence['source']['pubmed'] = to_int(_evidence['source']['citation_id'])
# _evidence['source'].pop('source_type')
# _evidence['source'].pop('citation_id')
# elif _evidence['source']['source_type'] == "ASCO":
# _evidence['source']['asco'] = to_int(_evidence['source']['citation_id'])
# _evidence['source'].pop('source_type')
# _evidence['source'].pop('citation_id')
# else:
# raise ValueError("The value of source_type is not one of PubMed or ASCO, it's {}, need to restructure parser".format(_evidence['source']['source_type']))
if 'disease' in _evidence['node'] and 'doid' in (_evidence['node']['disease'] or {}) and _evidence['node']['disease']['doid']:
_evidence['node']['disease']['doid'] = 'DOID:' + _evidence['node']['disease']['doid']
# if 'source' in _evidence and 'citation_id' in _evidence['source']:
# if _evidence['source']['source_type'] == "PubMed":
# _evidence['source']['pubmed'] = to_int(_evidence['source']['citation_id'])
# _evidence['source'].pop('source_type')
# _evidence['source'].pop('citation_id')
# elif _evidence['source']['source_type'] == "ASCO":
# _evidence['source']['asco'] = to_int(_evidence['source']['citation_id'])
# _evidence['source'].pop('source_type')
# _evidence['source'].pop('citation_id')
# else:
# raise ValueError("The value of source_type is not one of PubMed or ASCO, it's {}, need to restructure parser".format(_evidence['source']['source_type']))
new_doc['civic'] = doc
# yield dict_sweep(unlist(new_doc),['','null', 'N/A', None, [], {}])
# change doid into its formal representation, which should be sth like DOID:1
Expand All @@ -82,8 +82,3 @@ def load_data(data_folder):
logging.info("number of ids with chrom, ref but no alt: {}".format(no_case2))
logging.info("number of ids with chrom, alt but no ref: {}".format(no_case3))
logging.info("number of ids with no ref and alt: {}".format(no_case4))


load_data("/Users/v/dev/scripps/myvariant.info-copy/src/hub/dataload/sources/civic")

# print("##########")

0 comments on commit 0bc7471

Please sign in to comment.