Skip to content

Commit

Permalink
Merge branch 'develop' into fix/mito_nuc_trans
Browse files Browse the repository at this point in the history
  • Loading branch information
haowang-bioinfo committed Oct 19, 2023
2 parents 402ecb6 + 4f9ec7a commit 0526913
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 156 deletions.
18 changes: 18 additions & 0 deletions code/test/sanityCheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pandas as pd
import cobra
from collections import Counter


def get_column_from_tsv(tsv_file, column_id, to_list=True):
Expand Down Expand Up @@ -106,11 +107,28 @@ def checkUnusedEntities(model, entity_type):
assert len(unused_entities) == 0, f"Found unused {entity_type}: {unused_entities}"


def checkDupRxn(model):
"""
Check for duplicate reactions in the model
"""

reaction_equations = [rxn.build_reaction_string(use_metabolite_names=False) for rxn in model.reactions]
duplicate_reactions = [reaction for reaction, count in Counter(reaction_equations).items() if count > 1]
dup_list = [model.reactions[idx].id for idx, val in enumerate(reaction_equations) if val in duplicate_reactions]

if duplicate_reactions:
output = f"The following {len(dup_list)} reactions are duplicates, please check: " + ';'.join(dup_list)
print(output)

assert len(duplicate_reactions) == 0, "Found duplicated reactions!"


if __name__ == "__main__":
rxns, mets, genes, model = load_yml("model/Human-GEM.yml")
checkRxnAnnotation(rxns)
checkMetAnnotation(mets)
checkGeneAnnotation(genes)
checkUnusedEntities(model, "metabolites")
checkUnusedEntities(model, "genes")
checkDupRxn(model)
print("All checks have passed.")
1 change: 1 addition & 0 deletions data/deprecatedIdentifiers/deprecatedMetabolites.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -2237,6 +2237,7 @@ mets metsNoComp metBiGGID metKEGGID metHMDBID metChEBIID metPubChemID metLipidMa
"MAM01445m" "MAM01445" "cholate" "C00695" "HMDB0000619" "CHEBI:16359" "221493" "LMST04010001" "" "HC00502" "cholate" "MNXM450" "m01445m" "m01445m"
"MAM01514m" "MAM01514" "cholcoa" "C01794" "HMDB0001374" "CHEBI:15519" "439573" "" "" "HC00844" "cholcoa" "MNXM162468;MNXM431" "m01514m" "m01514m"
"MAM00759c" "MAM00759" "" "C13712" "" "" "" "LMST02030130" "" "" "M00759" "MNXM3494" "m00759c" "m00759c"
"MAM02397c" "MAM02397" "" "C16239" "" "" "" "" "" "" "M02397" "MNXM19093" "m02397c" "m02397c"
"MAM01268c" "MAM01268" "" "" "" "" "" "PROTEIN" "" "" "M01268" "" "m01268c" "m01268c"
"MAM01268m" "MAM01268" "" "" "" "" "" "PROTEIN" "" "" "M01268" "" "m01268m" "m01268m"
"MAM01268n" "MAM01268" "" "" "" "" "" "PROTEIN" "" "" "M01268" "" "m01268n" "m01268n"
Expand Down
6 changes: 6 additions & 0 deletions data/deprecatedIdentifiers/deprecatedReactions.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ rxns rxnKEGGID rxnBiGGID rxnEHMNID rxnHepatoNET1ID rxnREACTOMEID rxnRecon3DID rx
"MAR00096" "" "BUP2" "" "" "" "BUP2" "MNXR96346" "" "" "" 0 "" "" "BUP2"
"MAR02027" "" "r0193" "" "" "" "r0193" "MNXR105313" "" "" "" 0 "" "" "r0193"
"MAR02006" "" "CPS_m" "" "" "" "r0034" "" "" "" "" 0 "" "" "r0034"
"MAR06400" "R07768" "" "R07768C" "" "" "HMR_6400" "MNXR111356" "HMR_6400" "RCR11944" "" 0 "" "" "HMR_6400"
"MAR06401" "R07769" "" "R07769C" "" "" "HMR_6401" "MNXR111357" "HMR_6401" "RCR11945" "" 0 "" "" "HMR_6401"
"MAR03940" "R01795" "PHETHPTOX" "R01795C" "r0399" "" "r0399" "MNXR102632;MNXR107174" "HMR_3940" "RCR11388" "" 0 "" "RHEA:30787" "HMR_3940"
"MAR07785" "" "ATPtx" "" "" "" "ATPtx" "MNXR96140" "HMR_7785" "RCR20040" "" 0 "" "" "HMR_7785"
"MAR07775" "" "NADHtpu" "" "" "" "NADHtpu;NADtpu" "MNXR101881" "HMR_7775" "RCR20024" "" 0 "" "" "HMR_7775"
Expand All @@ -247,3 +249,7 @@ rxns rxnKEGGID rxnBiGGID rxnEHMNID rxnHepatoNET1ID rxnREACTOMEID rxnRecon3DID rx
"MAR07926" "" "DGTPtm" "" "" "" "DGTPtm" "MNXR97327" "" "" "" 0 "" "" "DGTPtm"
"MAR01483" "" "LIPOti" "" "" "" "LIPOti" "MNXR101093" "" "" "" 0 "" "" "LIPOti"
"MAR06406" "" "LIPOti" "RT1089" "" "" "" "MNXR101093" "HMR_6406" "RCR41534" "" 0 "" "" "HMR_6406"
"MAR00478" "R00844" "" "" "" "R-HSA-75889" "HMR_0478" "MNXR99876" "HMR_0478" "RCR14575" "" 0 "RHEA:11098" "RHEA:11096" "HMR_0478"
"MAR00482" "R00848;R00849" "G3PD2m" "R00848C" "r0205" "" "HMR_0482;r0205" "MNXR99875;MNXR106713" "HMR_0482;HMR_0483" "RCR14578;RCR21050" "" 0 "" "RHEA:31283;RHEA:18977" "HMR_0482;HMR_0483;MAR00483"
"MAR20012" "R02123" "HMR_6647;RADH;R_HMR_6647;R_RADH" "" "" "" "" "MNXR146215" "" "" "" 0 "RHEA:42080" "" ""
"MAR20089" "R02701" "" "" "" "" "" "MNXR151808" "" "" "" 0 "RHEA:18533" "" ""
28 changes: 14 additions & 14 deletions data/metabolicTasks/metabolicTasks_CellfieConsensus.txt
Original file line number Diff line number Diff line change
Expand Up @@ -220,12 +220,12 @@
36 Synthesis of lactose glucose[g] 1 1 lactose[g] 1 1
UDP-galactose[g] 1 1 UDP[g] 1 1
H+[g] 1 1
37 Glycogen biosynthesis NA[c] 1 1 glycogenin G4G7[c] 1 1
UDP-glucose[c] 11 H+[c] 11
UDP[c] 11
38 Glycogen degradation glycogenin G4G7[c] 1 1 glucose[c] 8
Pi[c] 3 NA[c] 1 1
H2O[c] 8 glucose-1-phosphate[c] 3
# 37 Glycogen biosynthesis NA[c] 1 1 glycogenin G4G7[c] 1 1 The metabolite NA[c] in this task is conflicting with HumanGEM
# UDP-glucose[c] 11 H+[c] 11
# UDP[c] 11
# 38 Glycogen degradation glycogenin G4G7[c] 1 1 glucose[c] 8 The metabolite NA[c] in this task is conflicting with HumanGEM
# Pi[c] 3 NA[c] 1 1
# H2O[c] 8 glucose-1-phosphate[c] 3
39 Fructose degradation (to glucose-3-phosphate) fructose[c] 1 1 D-glyceraldehyde 3-phosphate[c] 2 2
ATP[c] 2 2 ADP[c] 2 2
H+[c] 2 2
Expand Down Expand Up @@ -1130,14 +1130,14 @@
180 Pyridoxal-phosphate synthesis pyridoxal[c] 1 1 pyridoxal-phosphate[c] 1 1
ATP[c] 1 1 ADP[c] 1 1
H+[c] 1 1
181 Synthesis of bilirubin heme[c] 1 1 bilirubin[c] 1 1
NADPH[c] 0 4 H2O[c] 0 3
H+[c] 0 6 Fe2+[c] 0 1
O2[c] 0 3 Fe3+[c] 0 1
FADH2[c] 0 1 NADP+[c] 0 4
FAD[c] 0 1
CO[c] 1 1
NA[c] 0 1
# 181 Synthesis of bilirubin heme[c] 1 1 bilirubin[c] 1 1 The metabolite NA[c] in this task is conflicting with HumanGEM
# NADPH[c] 0 4 H2O[c] 0 3
# H+[c] 0 6 Fe2+[c] 0 1
# O2[c] 0 3 Fe3+[c] 0 1
# FADH2[c] 0 1 NADP+[c] 0 4
# FAD[c] 0 1
# CO[c] 1 1
# NA[c] 0 1
182 Heme synthesis glycine[m] 16 16 heme[m] 2 2
succinyl-CoA[m] 16 18 H2O[m] 26 26
Fe2+[m] 2 2 CoA[m] 16 18
Expand Down
130 changes: 2 additions & 128 deletions model/Human-GEM.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32657,14 +32657,6 @@
- charge: -1
- inchis: ""
- metFrom: "HMRdatabase"
- !!omap
- id: "MAM02397c"
- name: "lipoyl-[ACP]"
- compartment: "c"
- formula: "C19H33N2O8PRS3"
- charge: -1
- inchis: ""
- metFrom: "HMRdatabase"
- !!omap
- id: "MAM02398c"
- name: "lipoyl-AMP"
Expand Down Expand Up @@ -100713,49 +100705,6 @@
- subsystem:
- "Lipoic acid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR06400"
- name: ""
- metabolites: !!omap
- MAM01098c: 2
- MAM02039c: -5
- MAM02040c: 6
- MAM02397c: 1
- MAM02471c: 2
- MAM02554c: 7
- MAM02555c: -7
- MAM02643c: -1
- MAM02681c: 2
- MAM02682c: -2
- MAM02877c: -2
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000121897 or ENSG00000175536"
- rxnNotes: ""
- rxnFrom: "HMRdatabase"
- eccodes: "2.8.1.8;2.3.1.181"
- references: ""
- subsystem:
- "Lipoic acid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR06401"
- name: "lipoyl-[acp]:protein N6-lipoyltransferase"
- metabolites: !!omap
- MAM00184c: 1
- MAM00209c: 1
- MAM01350c: -1
- MAM02397c: -1
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000175536"
- rxnNotes: ""
- rxnFrom: "HMRdatabase"
- eccodes: "2.3.1.181"
- references: ""
- subsystem:
- "Lipoic acid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR06402"
- name: "octanoyl-[acp]:protein N6-octanoyltransferase"
Expand Down Expand Up @@ -133497,25 +133446,6 @@
- subsystem:
- "Glycerophospholipid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR00478"
- name: "sn-Glycerol-3-phosphate:NADP+ 2-oxidoreductase"
- metabolites: !!omap
- MAM01690c: -1
- MAM02039c: -1
- MAM02554c: 1
- MAM02555c: -1
- MAM02914c: 1
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000167588"
- rxnNotes: ""
- rxnFrom: "HMRdatabase"
- eccodes: "1.1.1.94"
- references: "PMID:16460752"
- subsystem:
- "Glycerophospholipid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR00479"
- name: "sn-Glycerol-3-phosphate:NAD+ 2-oxidoreductase"
Expand All @@ -133531,7 +133461,7 @@
- rxnNotes: ""
- rxnFrom: "HMRdatabase"
- eccodes: "1.1.1.8"
- references: "PMID:17045662;PMID:7772607"
- references: "PMID:16460752;PMID:17045662;PMID:7772607"
- subsystem:
- "Glycerophospholipid metabolism"
- confidence_score: 0
Expand All @@ -133554,24 +133484,6 @@
- subsystem:
- "Glycerophospholipid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR00482"
- name: "sn-Glycerol-3-phosphate:(acceptor) 2-oxidoreductase"
- metabolites: !!omap
- MAM01690c: 1
- MAM01802c: -1
- MAM01803c: 1
- MAM02914c: -1
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000115159"
- rxnNotes: ""
- rxnFrom: "HMRdatabase"
- eccodes: "1.1.5.3"
- references: "PMID:11955283;PMID:2115809;PMID:2923620;PMID:3338458;PMID:340460;PMID:8401296;PMID:8549872;PMID:8579375;PMID:9171333;PMID:9244403;PMID:9559543"
- subsystem:
- "Glycerophospholipid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR00484"
- name: "CDPglycerol phosphoglycerohydrolase"
Expand Down Expand Up @@ -155269,7 +155181,7 @@
- MAM02833c: 1
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000128918 or ENSG00000165092"
- gene_reaction_rule: "ENSG00000128918 or ENSG00000165092 or ENSG00000184254"
- rxnNotes: ""
- rxnFrom: "HMRdatabase"
- eccodes: "1.2.1.36"
Expand Down Expand Up @@ -302678,26 +302590,6 @@
- subsystem:
- "Retinol metabolism"
- confidence_score: 0
- !!omap
- id: "MAR20012"
- name: "MAR20012"
- metabolites: !!omap
- MAM02039c: 2
- MAM02040c: -1
- MAM02552c: -1
- MAM02553c: 1
- MAM02832c: -1
- MAM02833c: 1
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000184254"
- rxnNotes: ""
- rxnFrom: ""
- eccodes: "1.2.1.36"
- references: ""
- subsystem:
- "Retinol metabolism"
- confidence_score: 0
- !!omap
- id: "MAR20013"
- name: "MAR20013"
Expand Down Expand Up @@ -304045,24 +303937,6 @@
- subsystem:
- "Fatty acid metabolism"
- confidence_score: 0
- !!omap
- id: "MAR20089"
- name: "MAR20089"
- metabolites: !!omap
- MAM01107c: -1
- MAM01596c: 1
- MAM02039c: -1
- MAM02897c: 1
- lower_bound: 0
- upper_bound: 1000
- gene_reaction_rule: "ENSG00000132437"
- rxnNotes: ""
- rxnFrom: ""
- eccodes: "4.1.1.28"
- references: ""
- subsystem:
- "Tryptophan metabolism"
- confidence_score: 0
- !!omap
- id: "MAR20090"
- name: "MAR20090"
Expand Down
1 change: 0 additions & 1 deletion model/metabolites.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -4080,7 +4080,6 @@ mets metsNoComp metBiGGID metKEGGID metHMDBID metChEBIID metPubChemID metLipidMa
"MAM02396n" "MAM02396" "" "C06315" "HMDB0005082" "CHEBI:6499" "5280915" "LMFA03040002" "" "" "C06315" "MNXM12130" "m02396n" "m02396n"
"MAM02396x" "MAM02396" "" "C06315" "HMDB0005082" "CHEBI:6499" "5280915" "LMFA03040002" "" "" "C06315" "MNXM12130" "m02396p" "m02396p"
"MAM02396r" "MAM02396" "" "C06315" "HMDB0005082" "CHEBI:6499" "5280915" "LMFA03040002" "" "" "C06315" "MNXM12130" "m02396r" "m02396r"
"MAM02397c" "MAM02397" "" "C16239" "" "" "" "" "" "" "M02397" "MNXM19093" "m02397c" "m02397c"
"MAM02398c" "MAM02398" "" "C16238" "" "" "23724672" "" "CE5656" "" "CE5656" "MNXM2392" "m02398c" "m02398c"
"MAM02399c" "MAM02399" "" "" "" "" "53481572" "" "CE2102" "" "CE2102" "MNXM60228" "m02399c" "m02399c"
"MAM02401c" "MAM02401" "" "C03127" "" "CHEBI:29160" "" "" "" "" "M02401" "MNXM89832" "m02401c" "m02401c"
Expand Down
Loading

0 comments on commit 0526913

Please sign in to comment.