Skip to content

Commit

Permalink
Adding lexical matches to uPheno pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
matentzn committed Oct 23, 2024
1 parent 60628d1 commit c12d11d
Show file tree
Hide file tree
Showing 6 changed files with 12,350 additions and 18 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,6 @@ src/ontology/upheno.db
src/ontology/semsim/upheno-0.4.semsimian.tsv
src/ontology/imports/all_phenotype_terms.txt
src/ontology/config/prefixes.csv
src/ontology/upheno-test.db
src/ontology/upheno-test.owl
upheno-test.owl
12,215 changes: 12,215 additions & 0 deletions src/mappings/upheno-lexical.sssom.tsv

Large diffs are not rendered by default.

10 changes: 7 additions & 3 deletions src/ontology/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# More information: https://github.com/INCATools/ontology-development-kit/

# Fingerprint of the configuration file when this Makefile was last generated
CONFIG_HASH= 1e77a49593b43cb269ead22e0f03aa7e85284fb88e0e8b01cb02248f733fccd6
CONFIG_HASH= bd81b2a27db2a8712339e7c75ddfc54af797ede910a207485ad4179a01f54550


# ----------------------------------------
Expand Down Expand Up @@ -65,7 +65,7 @@ PATTERN_RELEASE_FILES= $(PATTERNDIR)/definitions.owl $(PATTERNDIR)/pattern.
MAPPINGDIR= ../mappings
MAPPING_TESTER= sssom validate
SSSOMPY= sssom
MAPPINGS= upheno-oba upheno-species-independent upheno-cross-species nbo-go uberon
MAPPINGS= upheno-oba upheno-species-independent upheno-lexical upheno-cross-species nbo-go uberon
MAPPING_RELEASE_FILES= $(foreach n,$(MAPPINGS), $(MAPPINGDIR)/$(n).sssom.tsv)


Expand Down Expand Up @@ -205,7 +205,7 @@ all_subsets: $(SUBSET_FILES)
# ----------------------------------------


MAPPINGS = upheno-oba upheno-species-independent upheno-cross-species nbo-go uberon
MAPPINGS = upheno-oba upheno-species-independent upheno-lexical upheno-cross-species nbo-go uberon

MAPPING_FILES = $(patsubst %, $(MAPPINGDIR)/%.sssom.tsv, $(MAPPINGS))

Expand Down Expand Up @@ -893,6 +893,10 @@ $(MAPPINGDIR)/upheno-oba.sssom.tsv:
$(MAPPINGDIR)/upheno-species-independent.sssom.tsv:
test -f $@

# This mappingset is manually curated, so we only check that the file actually exists.
$(MAPPINGDIR)/upheno-lexical.sssom.tsv:
test -f $@

# This mappingset is manually curated, so we only check that the file actually exists.
$(MAPPINGDIR)/upheno-cross-species.sssom.tsv:
test -f $@
Expand Down
100 changes: 100 additions & 0 deletions src/ontology/config/upheno-match-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
rules:
- description: default
postconditions:
predicate_id: skos:closeMatch
weight: 0.0

- description: exact to exact
preconditions:
subject_match_field_one_of:
- oio:hasExactSynonym
- rdfs:label
- skos:prefLabel
object_match_field_one_of:
- oio:hasExactSynonym
- rdfs:label
- skos:prefLabel
postconditions:
predicate_id: skos:exactMatch
weight: 2.0

- description: >-
label to label; note this is additive with the exact to exact rule,
so the score just represents an additional small boost
preconditions:
subject_match_field_one_of:
- rdfs:label
object_match_field_one_of:
- rdfs:label
postconditions:
predicate_id: skos:exactMatch
weight: 0.5
- description: xref match
preconditions:
subject_match_field_one_of:
# - oio:hasDbXref
- skos:exactMatch
object_match_field_one_of:
# - oio:hasDbXref
- skos:exactMatch
postconditions:
predicate_id: skos:exactMatch
weight: 4.0

- preconditions:
subject_match_field_one_of:
- oio:hasExactSynonym
- rdfs:label
object_match_field_one_of:
- oio:hasBroadSynonym
postconditions:
predicate_id: skos:broadMatch
weight: 2.0

- preconditions:
subject_match_field_one_of:
- oio:hasExactSynonym
- rdfs:label
object_match_field_one_of:
- oio:hasNarrowSynonym
postconditions:
predicate_id: skos:narrowMatch
weight: 2.0

- synonymizer:
description: Remove box brackets bound info from the label.
match: '\[[^)]*\]'
match_scope: "*"
replacement: ""

- synonymizer:
description: Remove variant from the label (for matching wbphenotype).
match: 'aberrant'
match_scope: "*"
replacement: ""

- synonymizer:
description: Remove abnormally from the label.
match: 'abnormally'
match_scope: "*"
replacement: ""

- synonymizer:
description: Remove abnormal from the label.
match: 'abnormal'
match_scope: "*"
replacement: ""

- synonymizer:
description: Remove aberrant from the label (for matching dicty).
match: 'aberrant'
match_scope: "*"
replacement: ""

- synonymizer:
description: Replace "'s" by "s" in the label.
match: "[']s"
match_scope: "*"
replacement: "s"

2 changes: 2 additions & 0 deletions src/ontology/upheno-odk.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ sssom_mappingset_group:
maintenance: manual
- id: upheno-species-independent
maintenance: manual
- id: upheno-lexical
maintenance: manual
- id: upheno-cross-species
maintenance: manual
- id: nbo-go
Expand Down
38 changes: 23 additions & 15 deletions src/ontology/upheno.Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,34 @@

SSPOS = mp hp zp dpo wbphenotype xpo planp ddpheno fypo apo mgpo phipo

%.db: %.owl
@rm -f $*.db
@rm -f .template.db
@rm -f .template.db.tmp
@rm -f $*-relation-graph.tsv.gz
RUST_BACKTRACE=full semsql make $*.db -P config/prefixes.csv
@rm -f .template.db
@rm -f .template.db.tmp
@rm -f $*-relation-graph.tsv.gz
@test -f $*.db || (echo "Error: File not found!" && exit 1)

.PRECIOUS: %.db

###############################
#### Mappings and reports #####
###############################

$(TMPDIR)/upheno-species-lexical.csv: upheno.owl
$(ROBOT) query -f csv -i $< --query ../sparql/phenotype-classes-labels.sparql $@

$(TMPDIR)/upheno-species-lexical-oak.sssom.tsv: upheno.db
runoak -i sqlite:$< lexmatch -R config/upheno-match-rules.yaml -o $@

# Currently only the oak lexical match is used for the upheno-lexical.sssom.tsv
# Should this be a merge of the upheno-species-independent.sssom.tsv and oak lexical match?
$(MAPPINGDIR)/upheno-lexical.sssom.tsv: $(TMPDIR)/upheno-species-lexical-oak.sssom.tsv
sssom filter $< -o $@ --predicate_id skos:exactMatch

$(TMPDIR)/upheno-mapping-logical.csv: upheno.owl
$(ROBOT) query -f csv -i $< --query ../sparql/cross-species-mappings.sparql $@
#echo "SKIP $@"
Expand All @@ -18,8 +39,8 @@ $(REPORTDIR)/upheno-associated-entities.csv: upheno.owl
#$(ROBOT) query -i tmp/mat_upheno.owl -f csv --query ../sparql/phenotype_entity_associations.sparql $@
touch $@

$(MAPPINGDIR)/upheno-oba.sssom.tsv: upheno.owl
robot query -i $< --query ../sparql/pheno_trait.sparql $@
$(MAPPINGDIR)/upheno-oba.sssom.tsv: #upheno.owl
robot query -i upheno.owl --query ../sparql/pheno_trait.sparql $@
sed -i 's/[?]//g' $@
sed -i 's/<http:[/][/]purl[.]obolibrary[.]org[/]obo[/]/obo:/g' $@
sed -i 's/>//g' $@
Expand Down Expand Up @@ -56,19 +77,6 @@ $(MAPPINGDIR)/upheno-cross-species.sssom.tsv: $(TMPDIR)/upheno-species-lexical.c
$(MAPPINGDIR)/%.sssom.owl: $(MAPPINGDIR)/%.sssom.tsv
sssom convert -i $< -O owl -o $@

%.db: %.owl
@rm -f $*.db
@rm -f .template.db
@rm -f .template.db.tmp
@rm -f $*-relation-graph.tsv.gz
RUST_BACKTRACE=full semsql make $*.db -P config/prefixes.csv
@rm -f .template.db
@rm -f .template.db.tmp
@rm -f $*-relation-graph.tsv.gz
@test -f $*.db || (echo "Error: File not found!" && exit 1)

.PRECIOUS: %.db

semsim/upheno-0.4.semsimian.tsv: upheno.db $(IMPORTDIR)/all_phenotype_terms.txt
runoak --stacktrace -vvv -i semsimian:sqlite:upheno.db similarity -p i \
--set1-file $(IMPORTDIR)/all_phenotype_terms.txt \
Expand Down

0 comments on commit c12d11d

Please sign in to comment.