Skip to content

Commit

Permalink
Draft skeleton pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
matentzn authored and joeflack4 committed Jul 25, 2022
1 parent 58358ae commit fdb88ba
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
11 changes: 11 additions & 0 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -250,3 +250,14 @@ ALL_COMPONENT_SIGNTAURE_REPORTS=$(foreach n,$(ALL_COMPONENT_IDS), reports/mirror
.PHONY: signature_reports
signature_reports: $(ALL_MIRROR_SIGNTAURE_REPORTS) $(ALL_COMPONENT_SIGNTAURE_REPORTS)
echo "Finished running signature reports.."

slurp/:
mkdir -p $@

slurp/%.tsv: components/%.owl tmp/mondo.sssom.tsv reports/mirror-signature-mondo.tsv | slurp/
python $(SCRIPTSDIR)/migrate.py -i $< --mapping-file tmp/mondo.sssom.tsv --min-id 123000 --mondo-terms reports/mirror-signature-mondo.tsv --output $@
# Feel free to change the signature. Min ID is the next available Mondo ID.

slurp-%: slurp/%.tsv

slurp: slurp-omim
37 changes: 37 additions & 0 deletions src/scripts/migrate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Migration pipeline

#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING

#Inputs:
source_ontology #e.g. omim
sssom_map # e.g. mondo.sssom.tsv
min_id
termlist_mondo

#Outputs:
data = []

for t in source_ontology:
if t not in sssom_map['object_id']:
parents = []
migrate = True
for p in oak.get_direct_parents(t):
if p not in sssom_map['object_id']:
migrate = False
break
elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \
or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch':
# In other words, if the parent is mapped, and the mapping is either exact or narrower
parents.append(sssom_map[sssom_map['object_id']==p]['subject_id'])
else:
# Its fine, just continue looking for other parents in this case
if migrate and parents:
next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist.
label = oak.get_label(t)
definition = oak.get_definition(t)
data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition})

pandas.DataFrame(data).to_csv(fn, sep="\t")

0 comments on commit fdb88ba

Please sign in to comment.