Skip to content

Commit

Permalink
Update to work with latest python-modelcif
Browse files Browse the repository at this point in the history
We now need to provide the UniProt sequence
of the target and give an explicit alignment
between it and the model. This should quiet
the warnings from the latest python-modelcif
and populate the struct_ref table in the
mmCIF output.
  • Loading branch information
benmwebb committed Sep 30, 2024
1 parent 195ebb2 commit fec35ba
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 17 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ This should preserve all information in the PDB file.
This utility requires a local mirror of PDB in compressed mmCIF format.
Use the `-r` command line option to point to the location of this mirror.
It also needs the [python-modelcif](https://github.com/ihmwg/python-modelcif)
library, to read the mmCIF files for any template structures and to write
the final mmCIF or BinaryCIF ModBase file.
library, version 1.1 or later, to read the mmCIF files for any template
structures and to write the final mmCIF or BinaryCIF ModBase file.
17 changes: 12 additions & 5 deletions modbase_pdb_to_cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,8 @@ def get_system(self, align):
target_e.description = "Target and template"
else:
target_e = modelcif.Entity(tgt_primary, description="Target")
target_e.references.extend(self.get_target_refs(tgtbeg, tgtend))
target_e.references.extend(self.get_target_refs(tgt_primary,
tgtbeg, tgtend))
chain_id = self.chain_id.strip() or 'A'
asym = modelcif.AsymUnit(target_e, details='Model subunit',
id=chain_id, auth_seq_id_map=tgtbeg-1)
Expand Down Expand Up @@ -352,16 +353,22 @@ class OurAlignment(modelcif.alignment.Global,

return s

def get_target_refs(self, tgtbeg, tgtend):
def get_target_refs(self, tgtprimary, tgtbeg, tgtend):
refmap = {'UniProt': modelcif.reference.UniProt,
'RefSeq': RefSeq,
'PlasmoDB': PlasmoDB}
for db in self.seqdb:
cls = refmap.get(db.name)
if cls:
yield cls(code=db.code, accession=db.accession,
align_begin=tgtbeg, align_end=tgtend,
isoform=ihm.unknown)
# We only know the modeled sequence, not the full database
# sequence, so pad it with gaps as necessary
r = cls(code=db.code, accession=db.accession,
isoform=ihm.unknown,
sequence="-" * (tgtbeg - 1) + "".join(tgtprimary))
aln = modelcif.reference.Alignment(db_begin=tgtbeg,
db_end=tgtend)
r.alignments.append(aln)
yield r

def get_model_class(self, asym, atoms):
class MyModel(modelcif.model.HomologyModel):
Expand Down
46 changes: 44 additions & 2 deletions test/input/output.cif
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ _struct.entry_id model_66b8fbc891f519c1ba8d8ad2e62c6caa
_struct.pdbx_model_details .
_struct.pdbx_structure_determination_methodology computational
_struct.title 'Model of S54091 hypothetical protein YPR070w - yeast (Saccha'
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/ba728c4/base/mmcif_ma-core.dic
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/base/mmcif_ma-core.dic
_audit_conform.dict_name mmcif_ma.dic
_audit_conform.dict_version 1.4.5
_audit_conform.dict_version 1.4.6
_database_2.database_code 66b8fbc891f519c1ba8d8ad2e62c6caa
_database_2.database_id MODBASE
#
Expand Down Expand Up @@ -126,6 +126,48 @@ _entity.details
#
#
loop_
_struct_ref.id
_struct_ref.entity_id
_struct_ref.db_name
_struct_ref.db_code
_struct_ref.pdbx_db_accession
_struct_ref.pdbx_align_begin
_struct_ref.pdbx_seq_one_letter_code
_struct_ref.details
1 1 UNP B5VTL7_YEAS6 B5VTL7 9
;ETLDSMIELFKDYKPGSITLENITRLCQTLGLESFTEELSNELSRLSTASKIIVIDVDYNKKQDRIQDVK
LVLASNFDNFDYFNQRDGEHEKSNILLNSLTKYPDLKAFHNNLKFLYLLDAYSHIESDSTSHNNGSSDKS
LDSSNASFNNQGKLDLFKYFTELSHYIRQCFQDNCCDFKVRTNLNDKFGIYILTQGINGKEVPLAKIYLE
ENKSDSQYRFYEYIYSQETKSWINESAENFSNGISLVMEIVANAKESNYTDLIWFPEDFISPELIIDKVT
CSSNSSSSPPIIDLFSNNNYNSRIQLMNDFTTKLINIKKFDISNDNLDLISEILKWVQWSRIVLQNVFKL
VSTPSSNSNSSELEPDYQAPFSTSTKDKNSSTSNTEPIPRSNRHGSVVEASRRRRSSTNKSKRPSITEAM
MLKEEGLQQFNLHEILSEPAIEEE
;
.
2 1 Other NP_015395.1 NP_015395 9
;ETLDSMIELFKDYKPGSITLENITRLCQTLGLESFTEELSNELSRLSTASKIIVIDVDYNKKQDRIQDVK
LVLASNFDNFDYFNQRDGEHEKSNILLNSLTKYPDLKAFHNNLKFLYLLDAYSHIESDSTSHNNGSSDKS
LDSSNASFNNQGKLDLFKYFTELSHYIRQCFQDNCCDFKVRTNLNDKFGIYILTQGINGKEVPLAKIYLE
ENKSDSQYRFYEYIYSQETKSWINESAENFSNGISLVMEIVANAKESNYTDLIWFPEDFISPELIIDKVT
CSSNSSSSPPIIDLFSNNNYNSRIQLMNDFTTKLINIKKFDISNDNLDLISEILKWVQWSRIVLQNVFKL
VSTPSSNSNSSELEPDYQAPFSTSTKDKNSSTSNTEPIPRSNRHGSVVEASRRRRSSTNKSKRPSITEAM
MLKEEGLQQFNLHEILSEPAIEEE
;
.
#
#
loop_
_struct_ref_seq.align_id
_struct_ref_seq.ref_id
_struct_ref_seq.seq_align_beg
_struct_ref_seq.seq_align_end
_struct_ref_seq.db_align_beg
_struct_ref_seq.db_align_end
1 1 1 444 9 452
2 2 1 444 9 452
#
#
loop_
_ma_target_ref_db_details.target_entity_id
_ma_target_ref_db_details.db_name
_ma_target_ref_db_details.db_name_other_details
Expand Down
46 changes: 44 additions & 2 deletions test/input/output_with_align.cif
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ _struct.entry_id model_66b8fbc891f519c1ba8d8ad2e62c6caa
_struct.pdbx_model_details .
_struct.pdbx_structure_determination_methodology computational
_struct.title 'Model of S54091 hypothetical protein YPR070w - yeast (Saccha'
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/ba728c4/base/mmcif_ma-core.dic
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/base/mmcif_ma-core.dic
_audit_conform.dict_name mmcif_ma.dic
_audit_conform.dict_version 1.4.5
_audit_conform.dict_version 1.4.6
_database_2.database_code 66b8fbc891f519c1ba8d8ad2e62c6caa
_database_2.database_id MODBASE
#
Expand Down Expand Up @@ -129,6 +129,48 @@ _entity.details
#
#
loop_
_struct_ref.id
_struct_ref.entity_id
_struct_ref.db_name
_struct_ref.db_code
_struct_ref.pdbx_db_accession
_struct_ref.pdbx_align_begin
_struct_ref.pdbx_seq_one_letter_code
_struct_ref.details
1 1 UNP B5VTL7_YEAS6 B5VTL7 9
;ETLDSMIELFKDYKPGSITLENITRLCQTLGLESFTEELSNELSRLSTASKIIVIDVDYNKKQDRIQDVK
LVLASNFDNFDYFNQRDGEHEKSNILLNSLTKYPDLKAFHNNLKFLYLLDAYSHIESDSTSHNNGSSDKS
LDSSNASFNNQGKLDLFKYFTELSHYIRQCFQDNCCDFKVRTNLNDKFGIYILTQGINGKEVPLAKIYLE
ENKSDSQYRFYEYIYSQETKSWINESAENFSNGISLVMEIVANAKESNYTDLIWFPEDFISPELIIDKVT
CSSNSSSSPPIIDLFSNNNYNSRIQLMNDFTTKLINIKKFDISNDNLDLISEILKWVQWSRIVLQNVFKL
VSTPSSNSNSSELEPDYQAPFSTSTKDKNSSTSNTEPIPRSNRHGSVVEASRRRRSSTNKSKRPSITEAM
MLKEEGLQQFNLHEILSEPAIEEE
;
.
2 1 Other NP_015395.1 NP_015395 9
;ETLDSMIELFKDYKPGSITLENITRLCQTLGLESFTEELSNELSRLSTASKIIVIDVDYNKKQDRIQDVK
LVLASNFDNFDYFNQRDGEHEKSNILLNSLTKYPDLKAFHNNLKFLYLLDAYSHIESDSTSHNNGSSDKS
LDSSNASFNNQGKLDLFKYFTELSHYIRQCFQDNCCDFKVRTNLNDKFGIYILTQGINGKEVPLAKIYLE
ENKSDSQYRFYEYIYSQETKSWINESAENFSNGISLVMEIVANAKESNYTDLIWFPEDFISPELIIDKVT
CSSNSSSSPPIIDLFSNNNYNSRIQLMNDFTTKLINIKKFDISNDNLDLISEILKWVQWSRIVLQNVFKL
VSTPSSNSNSSELEPDYQAPFSTSTKDKNSSTSNTEPIPRSNRHGSVVEASRRRRSSTNKSKRPSITEAM
MLKEEGLQQFNLHEILSEPAIEEE
;
.
#
#
loop_
_struct_ref_seq.align_id
_struct_ref_seq.ref_id
_struct_ref_seq.seq_align_beg
_struct_ref_seq.seq_align_end
_struct_ref_seq.db_align_beg
_struct_ref_seq.db_align_end
1 1 1 444 9 452
2 2 1 444 9 452
#
#
loop_
_ma_target_ref_db_details.target_entity_id
_ma_target_ref_db_details.db_name
_ma_target_ref_db_details.db_name_other_details
Expand Down
28 changes: 26 additions & 2 deletions test/input/test_multi_expdta.cif
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ _struct.entry_id model_66b8fbc891f519c1ba8d8ad2e62c6caa
_struct.pdbx_model_details .
_struct.pdbx_structure_determination_methodology computational
_struct.title 'Model of S54091 hypothetical protein YPR070w - yeast (Saccharomyces cerevisiae)'
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/ba728c4/base/mmcif_ma-core.dic
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/base/mmcif_ma-core.dic
_audit_conform.dict_name mmcif_ma.dic
_audit_conform.dict_version 1.4.5
_audit_conform.dict_version 1.4.6
_database_2.database_code 66b8fbc891f519c1ba8d8ad2e62c6caa
_database_2.database_id MODBASE
#
Expand Down Expand Up @@ -108,6 +108,30 @@ _entity.details
#
#
loop_
_struct_ref.id
_struct_ref.entity_id
_struct_ref.db_name
_struct_ref.db_code
_struct_ref.pdbx_db_accession
_struct_ref.pdbx_align_begin
_struct_ref.pdbx_seq_one_letter_code
_struct_ref.details
1 1 UNP B5VTL7_YEAS6 B5VTL7 9 ET .
2 1 Other NP_015395.1 NP_015395 9 ET .
#
#
loop_
_struct_ref_seq.align_id
_struct_ref_seq.ref_id
_struct_ref_seq.seq_align_beg
_struct_ref_seq.seq_align_end
_struct_ref_seq.db_align_beg
_struct_ref_seq.db_align_end
1 1 1 2 9 10
2 2 1 2 9 10
#
#
loop_
_ma_target_ref_db_details.target_entity_id
_ma_target_ref_db_details.db_name
_ma_target_ref_db_details.db_name_other_details
Expand Down
28 changes: 26 additions & 2 deletions test/input/test_no_chain.cif
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ _struct.entry_id model_66b8fbc891f519c1ba8d8ad2e62c6caa
_struct.pdbx_model_details .
_struct.pdbx_structure_determination_methodology computational
_struct.title 'Model of S54091 hypothetical protein YPR070w - yeast (Saccha'
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/ba728c4/base/mmcif_ma-core.dic
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/base/mmcif_ma-core.dic
_audit_conform.dict_name mmcif_ma.dic
_audit_conform.dict_version 1.4.5
_audit_conform.dict_version 1.4.6
_database_2.database_code 66b8fbc891f519c1ba8d8ad2e62c6caa
_database_2.database_id MODBASE
#
Expand Down Expand Up @@ -108,6 +108,30 @@ _entity.details
#
#
loop_
_struct_ref.id
_struct_ref.entity_id
_struct_ref.db_name
_struct_ref.db_code
_struct_ref.pdbx_db_accession
_struct_ref.pdbx_align_begin
_struct_ref.pdbx_seq_one_letter_code
_struct_ref.details
1 1 UNP B5VTL7_YEAS6 B5VTL7 9 ET .
2 1 Other NP_015395.1 NP_015395 9 ET .
#
#
loop_
_struct_ref_seq.align_id
_struct_ref_seq.ref_id
_struct_ref_seq.seq_align_beg
_struct_ref_seq.seq_align_end
_struct_ref_seq.db_align_beg
_struct_ref_seq.db_align_end
1 1 1 2 9 10
2 2 1 2 9 10
#
#
loop_
_ma_target_ref_db_details.target_entity_id
_ma_target_ref_db_details.db_name
_ma_target_ref_db_details.db_name_other_details
Expand Down
4 changes: 2 additions & 2 deletions test/input/test_old_model.cif
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ _struct.entry_id model_9abae378d2aa2c1a5b21c65c98564dc0
_struct.pdbx_model_details .
_struct.pdbx_structure_determination_methodology computational
_struct.title 'Model of'
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/ba728c4/base/mmcif_ma-core.dic
_audit_conform.dict_location https://raw.githubusercontent.com/ihmwg/ModelCIF/d18ba38/base/mmcif_ma-core.dic
_audit_conform.dict_name mmcif_ma.dic
_audit_conform.dict_version 1.4.5
_audit_conform.dict_version 1.4.6
_database_2.database_code 9abae378d2aa2c1a5b21c65c98564dc0
_database_2.database_id MODBASE
#
Expand Down

0 comments on commit fec35ba

Please sign in to comment.