diff --git a/src/hub/dataload/sources/civic/civic_parser.py b/src/hub/dataload/sources/civic/civic_parser.py index 196938b1..20ca37a5 100644 --- a/src/hub/dataload/sources/civic/civic_parser.py +++ b/src/hub/dataload/sources/civic/civic_parser.py @@ -56,23 +56,23 @@ def load_data(data_folder): new_doc['_id'] = 'CIVIC_VARIANT:' + str(variant_id) # for _evidence in doc['evidence_items']: # print(doc) - for _molecularProfiles in doc['molecularProfiles']['edges']: + for _molecularProfiles in doc['molecularProfiles']['nodes']: # print(_molecularProfiles) - for _evidence in _molecularProfiles['node']['evidenceItems']['edges']: + for _evidence in _molecularProfiles['evidenceItems']['edges']: print(_evidence['node']) - if 'disease' in _evidence and 'doid' in (_evidence['disease'] or {}) and _evidence['disease']['doid']: - _evidence['disease']['doid'] = 'DOID:' + _evidence['disease']['doid'] - # if 'source' in _evidence and 'citation_id' in _evidence['source']: - # if _evidence['source']['source_type'] == "PubMed": - # _evidence['source']['pubmed'] = to_int(_evidence['source']['citation_id']) - # _evidence['source'].pop('source_type') - # _evidence['source'].pop('citation_id') - # elif _evidence['source']['source_type'] == "ASCO": - # _evidence['source']['asco'] = to_int(_evidence['source']['citation_id']) - # _evidence['source'].pop('source_type') - # _evidence['source'].pop('citation_id') - # else: - # raise ValueError("The value of source_type is not one of PubMed or ASCO, it's {}, need to restructure parser".format(_evidence['source']['source_type'])) + if 'disease' in _evidence['node'] and 'doid' in (_evidence['node']['disease'] or {}) and _evidence['node']['disease']['doid']: + _evidence['node']['disease']['doid'] = 'DOID:' + _evidence['node']['disease']['doid'] + # if 'source' in _evidence and 'citation_id' in _evidence['source']: + # if _evidence['source']['source_type'] == "PubMed": + # _evidence['source']['pubmed'] = to_int(_evidence['source']['citation_id']) + # _evidence['source'].pop('source_type') + # _evidence['source'].pop('citation_id') + # elif _evidence['source']['source_type'] == "ASCO": + # _evidence['source']['asco'] = to_int(_evidence['source']['citation_id']) + # _evidence['source'].pop('source_type') + # _evidence['source'].pop('citation_id') + # else: + # raise ValueError("The value of source_type is not one of PubMed or ASCO, it's {}, need to restructure parser".format(_evidence['source']['source_type'])) new_doc['civic'] = doc # yield dict_sweep(unlist(new_doc),['','null', 'N/A', None, [], {}]) # change doid into its formal representation, which should be sth like DOID:1 @@ -82,8 +82,3 @@ def load_data(data_folder): logging.info("number of ids with chrom, ref but no alt: {}".format(no_case2)) logging.info("number of ids with chrom, alt but no ref: {}".format(no_case3)) logging.info("number of ids with no ref and alt: {}".format(no_case4)) - - -load_data("/Users/v/dev/scripps/myvariant.info-copy/src/hub/dataload/sources/civic") - -# print("##########") \ No newline at end of file