Skip to content

Commit

Permalink
Moving Fletcher into installable form
Browse files Browse the repository at this point in the history
  • Loading branch information
glycojones committed Oct 7, 2024
1 parent 4dc8cc9 commit 6796cc5
Show file tree
Hide file tree
Showing 13 changed files with 7,165 additions and 0 deletions.
39 changes: 39 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[build-system]
requires = ["setuptools", "setuptools-scm"]
build-backend = "setuptools.build_meta"

[project]
name = "fletcher_maximus"
dynamic=["version"]
description = "A package for quickly spotting molecular geometric features in protein structural files."
readme = "README.md"
requires-python = ">=3.8"
classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3.8",
]
authors = [
{ name = "Jon Agirre", email = "[email protected]" },
{ name = "Federico Sabbadin", email = "[email protected]" }
]
maintainers = [
{ name = "Jon Agirre", email = "[email protected]" },
]
dependencies = [
"gemmi == 0.6.6"
]

[tool.setuptools]
include-package-data = true

[tool.setuptools_scm]
version_file = "src/fletcher/_version.py"

[project.urls]
Homepage = "https://github.com/glycojones/fletcher"
Issues = "https://github.com/glycojones/fletcher/issues"

[tool.pytest.ini_options]
markers = [
]
18 changes: 18 additions & 0 deletions src/fletcher/coot_scripting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@


def create_script_file ( filename = "", list_of_hits = [ ] ) :
with open ( filename.split('.')[0] + '.py', 'w' ) as file_out :
file_out.write ( "# File programmatically created by Fletcher\n" )
file_out.write ( 'handle_read_draw_molecule_with_recentre ("%s", 1)\n' % filename )
file_out.write ( 'interesting_things_gui ("Results from Fletcher",[\n')
for hit in list_of_hits :
file_out.write ( '["%s %s", %.3f, %.3f, %.3f, ]' \
% ( hit[0].get('name'), \
hit[0].get('seqid'), \
hit[0].get('coordinates')[0], \
hit[0].get('coordinates')[1], \
hit[0].get('coordinates')[2] ))
if hit is not list_of_hits[-1] :
file_out.write(',\n')
file_out.write ( '])\n')
file_out.close ( )
154 changes: 154 additions & 0 deletions src/fletcher/fletcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import gemmi
import argparse
import json
from pathlib import Path
from coot_scripting import create_script_file

def find_structural_motifs ( filename = "",
residue_lists = [ ],
distance = 0.0,
min_plddt = 70.0,
n_term = False,
c_term = False,
) :

af_model = gemmi.read_structure ( filename )
neighbour_search = gemmi.NeighborSearch ( af_model[0], af_model.cell, distance ).populate ( include_h=False )
first_residues = gemmi.Selection ( '(' + residue_lists[0][0] + ')' )

result_dict = { }
result_list = [ ]

for model in first_residues.models(af_model):
for chain in first_residues.chains(model):
for residue in first_residues.residues(chain):
partial_result = [ residue ]
marks = neighbour_search.find_neighbors ( residue[-1], 0, distance )
for candidate_list in residue_lists[1:] :
for candidate in candidate_list :
found_in_contacts = False
for mark in marks :
cra = mark.to_cra ( af_model[0] )

# We do the following conversion to harness gemmi's translation of modified residue codes
# into the unmodified ones, e.g. HIC (methylated histidine) >> HIS (normal histidine)
if gemmi.find_tabulated_residue(candidate).one_letter_code.upper() == \
gemmi.find_tabulated_residue(cra.residue.name).one_letter_code.upper() \
and cra.residue not in partial_result :

partial_result.append ( cra.residue )
found_in_contacts = True
break
if found_in_contacts :
break
if len(residue_lists) == len(partial_result) :
if (n_term or c_term) :
in_terminus = False
for residue in partial_result :
if n_term and residue == chain[0] :
in_terminus = True
elif c_term and residue.seqid.num == chain[-1].seqid.num :
in_terminus = True
if in_terminus : result_list.append ( partial_result )
else :
result_list.append ( partial_result )

if len ( result_list ) > 0 :
Path ( filename ).touch() # We want results at the top
result_dict['filename'] = filename
result_dict['residue_lists'] = str(residue_lists)
result_dict['distance'] = distance
result_dict['plddt'] = min_plddt
hit_list = [ ]

for result in result_list :
hit = [ ]
for residue in result :
residue_dict = { }
residue_dict['name'] = residue.name
residue_dict['seqid'] = str(residue.seqid)
if residue[-1].b_iso < min_plddt :
residue_dict['plddt'] = 'LOW PLDDT: %.2f' % residue[-1].b_iso
else :
residue_dict['plddt'] = '%.2f' % residue[-1].b_iso
residue_dict ['coordinates'] = residue[-1].pos.tolist()
hit.append ( residue_dict )
hit_list.append ( hit )
print ( "Hit found:", hit )

result_dict['hits'] = hit_list

with open ( filename.split('.')[0] + '.json', 'w' ) as file_out :
json.dump ( result_dict, file_out, sort_keys=False, indent=4 )

create_script_file ( filename, hit_list )

else :
print ("\nNo results found :-( \n")
return result_dict

if __name__ == '__main__':
parser = argparse.ArgumentParser (
prog='Fletcher',
description='Fletcher will try to find a list of residues within a fixed distance from the centre of mass.'\
'\nConcept: Federico Sabbadin & Jon Agirre, University of York, UK.',
epilog='Please send bug reports to Jon Agirre: [email protected]' )

parser.add_argument ( '-f', '--filename', \
help = "The name of the file to be processed, in PDB or mmCIF format.", \
required = True )

parser.add_argument ( '-r', '--residues', \
help = "A list of residues in one-letter code, comma separated, and including alternatives, e.g. L,A,FWY.", \
default = "GF", required = True )

parser.add_argument ( '-d', '--distance', \
help = "Specifies how far each of the residues can be from the rest, in Angstroems.", \
default = "0.0", required = True )

parser.add_argument ( '-p', '--plddt', \
help = "Flag up candidate residues with average pLDDT below thresold (Jumper et al., 2020).", \
default = "70.0", required = False )

parser.add_argument ( '-n', '--nterm', \
help = 'Require one residue to be at the n-terminus', \
choices = [ 'yes', 'no' ], \
default = 'no' )

parser.add_argument ( '-c', '--cterm', \
help = 'Require one residue to be at the c-terminus', \
choices = [ 'yes', 'no' ], \
default = 'no' )

args = parser.parse_args ( )

# Assuming argparse has got the right number of parameters beyond this point

print ( "\nFletcher is a tool that helps spot and document molecular features in AlphaFold models."\
"\nConcept: Federico Sabbaddin & Jon Agirre, University of York, UK."\
"\nLatest source code: https://github.com/glycojones/fletcher"\
"\nBug reports to [email protected]\n\n" )

input_residues = args.residues.split(',')
list_of_residues = [ ]

for slot in input_residues :
list_of_residues.append ( gemmi.expand_one_letter_sequence(slot, gemmi.ResidueKind.AA) )

distance = float ( args.distance )
min_plddt = float ( args.plddt )
n_term = True if args.nterm == 'yes' else False
c_term = True if args.cterm == 'yes' else False

print ( "Running Fletcher with the following parameters:\nFilename: ",
args.filename, "\nResidue list: ",
list_of_residues, "\nDistance: ",
distance, "\npLDDT: ",
min_plddt,
"\nN-term: ", n_term,
"\nC-term: ", c_term,
"\n" )

if len ( list_of_residues ) > 1 and distance > 0.0 :
find_structural_motifs ( args.filename, list_of_residues, distance, min_plddt, n_term, c_term )

9 changes: 9 additions & 0 deletions src/fletcher/plddt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from math import exp

def plddt_to_rmsd ( plddt = 0.0 ) :
frac_lddt = plddt / 100.0
rmsd_estimation = 1.5 * exp(4.0*(0.7-frac_lddt))
return rmsd_estimation

def plddt_to_bfact ( plddt = 0.0 ) :
return min ( 999.99, 26.318945069571623 * (plddt_to_rmsd ( plddt ))**2)
Loading

0 comments on commit 6796cc5

Please sign in to comment.