Skip to content

Commit

Permalink
fix: support alternative atom names within connect_via_residue_names
Browse files Browse the repository at this point in the history
  • Loading branch information
nscorley committed Dec 10, 2024
1 parent 3a7437d commit c6acc78
Show file tree
Hide file tree
Showing 4 changed files with 15,353 additions and 1 deletion.
36 changes: 35 additions & 1 deletion src/biotite/structure/bonds.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1620,20 +1620,22 @@ def connect_via_residue_names(atoms, bint inter_residue=True,
"""
from .info.bonds import bonds_in_residue
from .residues import get_residue_starts
from .info.ccd import get_from_ccd

cdef list bonds = []
cdef int res_i
cdef int i, j
cdef int curr_start_i, next_start_i
cdef np.ndarray atom_names = atoms.atom_name
cdef np.ndarray atom_names_in_res
cdef np.ndarray std_atom_ids
cdef np.ndarray res_names = atoms.res_name
cdef str atom_name1, atom_name2
cdef int64[:] atom_indices1, atom_indices2
cdef dict bond_dict_for_res

residue_starts = get_residue_starts(atoms, add_exclusive_stop=True)
# Omit exclsive stop in 'residue_starts'
# Omit exclusive stop in 'residue_starts'
for res_i in range(len(residue_starts)-1):
curr_start_i = residue_starts[res_i]
next_start_i = residue_starts[res_i+1]
Expand All @@ -1646,6 +1648,38 @@ def connect_via_residue_names(atoms, bint inter_residue=True,
)

atom_names_in_res = atom_names[curr_start_i : next_start_i]

# Check if we should use alternative atom names
std_atom_ids = get_from_ccd(
"chem_comp_atom",
res_names[curr_start_i],
"atom_id"
)
if (atom_names_in_res is not None and \
std_atom_ids is not None and \
not set(atom_names_in_res).issubset(std_atom_ids)):
# We do not assume that the order of atoms within
# atom_names_in_res matches that of the CCD
alt_atom_ids = get_from_ccd(
"chem_comp_atom",
res_names[curr_start_i],
"alt_atom_id"
)
if set(atom_names_in_res).issubset(alt_atom_ids):
# Standardize atom IDs
mapping = dict(zip(alt_atom_ids, std_atom_ids))
mapped_atom_names_in_res = np.vectorize(
mapping.get
)(atom_names_in_res)
atom_names_in_res = mapped_atom_names_in_res

# If we uncomment the line below, we modify the atom_name in-place
# And thus enforce standardized atom names (which may be an unexpected behavior)
# TODO: Is that a desired behavior?
# atoms.atom_name[curr_start_i : next_start_i] = atom_names_in_res

# TODO: How to handle cases that do not fit either mapping?

for (atom_name1, atom_name2), bond_type in bond_dict_for_res.items():
atom_indices1 = np.where(atom_names_in_res == atom_name1)[0] \
.astype(np.int64, copy=False)
Expand Down
Loading

0 comments on commit c6acc78

Please sign in to comment.