Skip to content

Commit

Permalink
Feature: Basic slurp pipeline
Browse files Browse the repository at this point in the history
- Update: Basic pseudo code in Python updated
- Update: Makfile: Updating formatting.
  • Loading branch information
joeflack4 committed Jul 26, 2022
1 parent fdb88ba commit 3e35822
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 33 deletions.
17 changes: 14 additions & 3 deletions src/ontology/mondo-ingest.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,12 @@ mappings: sssom $(ALL_MAPPINGS)
#################
# Utils #########
#################
# Documentation for this commands in this section is in: `docs/developer/ordo.md`
# Documentation for `report-mapping-annotations` and `update-jinja-sparql-queries`: `docs/developer/ordo.md`
# TODO: When https://github.com/monarch-initiative/mondo-ingest/issues/43 is fixed, can change back to `requirements.txt`
python-install-dependencies:
python3 -m pip install --upgrade pip
python3 -m pip install -r $(RELEASEDIR)/requirements-unlocked.txt


report-mapping-annotations:
python3 $(SCRIPTSDIR)/ordo_mapping_annotations/report_mapping_annotations.py
Expand Down Expand Up @@ -254,10 +259,16 @@ signature_reports: $(ALL_MIRROR_SIGNTAURE_REPORTS) $(ALL_COMPONENT_SIGNTAURE_REP
slurp/:
mkdir -p $@

# Feel free to change the signature. Min ID is the next available Mondo ID.
slurp/%.tsv: components/%.owl tmp/mondo.sssom.tsv reports/mirror-signature-mondo.tsv | slurp/
python $(SCRIPTSDIR)/migrate.py -i $< --mapping-file tmp/mondo.sssom.tsv --min-id 123000 --mondo-terms reports/mirror-signature-mondo.tsv --output $@
# Feel free to change the signature. Min ID is the next available Mondo ID.
python $(SCRIPTSDIR)/migrate.py \
-i $< \
--mapping-file tmp/mondo.sssom.tsv \
--min-id 123000 \
--mondo-terms reports/mirror-signature-mondo.tsv \
--output $@

slurp-%: slurp/%.tsv

# TODO: add more ontologies, e.g.: doid, icd10cm, icd10who, ncit, ordo
slurp: slurp-omim
77 changes: 47 additions & 30 deletions src/scripts/migrate.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,54 @@
# Migration pipeline
"""Migration pipeline
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
#### THIS IS PSEUDO CODE NOT PYTHON OR ANYTHING
TODOs:
- add CLI: look to makefile for what to include
"""
import oakliblib
import pandas


#Inputs:
source_ontology #e.g. omim
sssom_map # e.g. mondo.sssom.tsv
min_id
termlist_mondo

#Outputs:
data = []

for t in source_ontology:
if t not in sssom_map['object_id']:
parents = []
migrate = True
for p in oak.get_direct_parents(t):
if p not in sssom_map['object_id']:
migrate = False
break
elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \
or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch':
# In other words, if the parent is mapped, and the mapping is either exact or narrower
parents.append(sssom_map[sssom_map['object_id']==p]['subject_id'])
else:
# Its fine, just continue looking for other parents in this case
if migrate and parents:
next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist.
label = oak.get_label(t)
definition = oak.get_definition(t)
data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition})

pandas.DataFrame(data).to_csv(fn, sep="\t")
source_ontology = '' #e.g. omim
sssom_map = '' # e.g. mondo.sssom.tsv
min_id = ''
termlist_mondo = ''


def run(source_ontology = '', sssom_map = '', min_id = '', termlist_mondo = ''):
"""source_ontology = '' #e.g. omim
sssom_map = '' # e.g. mondo.sssom.tsv
min_id = ''
termlist_mondo = ''"""
#Outputs:
data = []

for t in source_ontology:
if t not in sssom_map['object_id']:
parents = []
migrate = True
for p in oaklib.get_direct_parents(t):
if p not in sssom_map['object_id']:
migrate = False
break
elif sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:exactMatch' \
or sssom_map[sssom_map['object_id']==p]['predicate_id'] = 'skos:narrowMatch':
# In other words, if the parent is mapped, and the mapping is either exact or narrower
parents.append(sssom_map[sssom_map['object_id']==p]['subject_id'])
else:
# Its fine, just continue looking for other parents in this case
if migrate and parents:
next_mondo_id = determine_next_available_mondo_id(min_id, termlist_mondo) # satrting from min_id, then counting up and checking if it does not already exist.
label = oaklib.get_label(t)
definition = oaklib.get_definition(t)
data.append({'mondo_id':next_mondo_id, 'xref': t, 'label': label, 'definition': definition})

pandas.DataFrame(data).to_csv(fn, sep="\t")


if __name__ == '__main__':
run()

0 comments on commit 3e35822

Please sign in to comment.