From 72f99433b58ab34b7afcb0c049702d02e605b199 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann Date: Wed, 14 Dec 2022 13:26:14 +0200 Subject: [PATCH 1/2] adding easyconfigs: AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb and patches: AlphaFold-2.3.0_data-dep-paths.patch --- .../AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb | 168 ++++++++++++++++++ .../AlphaFold-2.3.0_data-dep-paths.patch | 136 ++++++++++++++ 2 files changed, 304 insertions(+) create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0_data-dep-paths.patch diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb new file mode 100644 index 00000000000..b3442dbb988 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb @@ -0,0 +1,168 @@ +easyblock = 'PythonBundle' + +name = 'AlphaFold' +version = '2.3.0' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'https://deepmind.com/research/case-studies/alphafold' +description = "AlphaFold can predict protein structures with atomic accuracy even where no similar structure is known" + +toolchain = {'name': 'foss', 'version': '2021b'} + +builddependencies = [ + # required for installing dm-tree + ('Bazel', '3.7.2'), + ('CMake', '3.22.1'), +] + +dependencies = [ + ('Python', '3.9.6'), + ('CUDA', '11.4.1', '', SYSTEM), + ('SciPy-bundle', '2021.10'), + ('PyYAML', '5.4.1'), + ('TensorFlow', '2.7.1', versionsuffix), + ('Biopython', '1.79'), + ('HH-suite', '3.3.0'), + ('HMMER', '3.3.2'), + ('Kalign', '3.3.2'), + ('jax', '0.3.23', versionsuffix), # also provides absl-py + ('UCX-CUDA', '1.11.2', versionsuffix), + ('cuDNN', '8.2.2.26', versionsuffix, SYSTEM), + ('NCCL', '2.10.3', versionsuffix), + ('OpenMM', '7.5.1', '-DeepMind-patch'), +] + +# commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common, +# see docker/Dockerfile in AlphaFold repository +local_scp_commit = '7102c6' + +components = [ + (name, version, { + 'easyblock': 'PythonPackage', + 'source_urls': [ + 'https://github.com/deepmind/alphafold/archive/refs/tags/', + 'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit, + ], + 'sources': [ + { + 'download_filename': 'v%(version)s.tar.gz', + 'filename': SOURCE_TAR_GZ, + }, + { + 'download_filename': 'stereo_chemical_props.txt', + 'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit, + 'extract_cmd': "cp %s .", + }, + ], + 'patches': [ + 'AlphaFold-2.0.0_fix-packages.patch', + 'AlphaFold-2.3.0_data-dep-paths.patch', + 'AlphaFold-2.0.0_n-cpu.patch', + 'AlphaFold-2.1.0_fix-scp-path.patch', + 'AlphaFold-2.0.1_setup_rm_tfcpu.patch', + ], + 'checksums': [ + '52055a0b4bf194ae0e1960e6391e501490f82274c975e01c1ff0e353a1cd59d9', # v2.3.0.tar.gz + '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt + '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch + '5cff3fc7104e020ef546d23cb4fb1b8d6517562783f055cc55fc65fe2b0248d0', # AlphaFold-2.3.0_data-dep-paths.patch + 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch + '5363d403baf5ab73f4d3ddd72e19af9ff832de4b1d7ba25a5fbcc5846c1c890f', # AlphaFold-2.1.0_fix-scp-path.patch + '1a2e4e843bd9a4d15ee39e6c37cc63ba281311cc7a0a5610f0e43b52ef93faac', # AlphaFold-2.0.1_setup_rm_tfcpu.patch + + ], + 'start_dir': 'alphafold-%(version)s', + 'use_pip': True, + }), +] + +use_pip = True + +exts_list = [ + ('PDBFixer', '1.7', { + 'source_urls': ['https://github.com/openmm/pdbfixer/archive/refs/tags/'], + 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': '%(name)s-%(version)s.tar.gz'}], + 'checksums': ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea'], + }), + ('toolz', '0.11.2', { + 'checksums': ['6b312d5e15138552f1bda8a4e66c30e236c831b612b2bf0005f8a1df10a4bc33'], + }), + ('chex', '0.1.5', { + 'checksums': ['686858320f8f220c82a6c7eeb54dcdcaa4f3d7f66690dacd13a24baa1ee8299e'], + }), + ('tabulate', '0.8.10', { + 'checksums': ['6c57f3f3dd7ac2782770155f3adb2db0b1a269637e42f27599925e64b114f519'], + }), + ('jmp', '0.0.2', { + 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", + 'checksums': ['fdb5cec0d10aab4116c2770f24b2adf4f503fcfbb96ce8ef583e1879bdbf1b9b'], + }), + ('dm-haiku', '0.0.9', { + 'modulename': 'haiku', + 'source_urls': ['https://github.com/deepmind/dm-haiku/archive/refs/tags/'], + 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': '%(name)s-%(version)s.tar.gz'}], + 'checksums': ['d550f07f5891ede30ada5faafde98f549ed1b8ceadb7a601cca3d81db7d82414'], + }), + ('dm-tree', '0.1.7', { + 'modulename': 'tree', + 'checksums': ['30fec8aca5b92823c0e796a2f33b875b4dccd470b57e91e6c542405c5f77fd2a'], + }), + ('websocket-client', '1.3.3', { + 'modulename': 'websocket', + 'checksums': ['d58c5f284d6a9bf8379dab423259fe8f85b70d5fa5d2916d5791a84594b122b1'], + }), + ('docker', '5.0.3', { + 'checksums': ['d916a26b62970e7c2f554110ed6af04c7ccff8e9f81ad17d0d40c75637e227fb'], + }), + ('immutabledict', '2.2.1', { + 'checksums': ['1ddb0edf1bb6c70d0197eb90ce1fe2b2d58502334f5fdfde72d7c633d723ec3a'], + }), + ('contextlib2', '21.6.0', { + 'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'], + }), + ('ml_collections', '0.1.1', { + 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", + 'checksums': ['3fefcc72ec433aa1e5d32307a3e474bbb67f405be814ea52a2166bfc9dbe68cc'], + }), +] + +postinstallcmds = [ + "mkdir -p %(installdir)s/bin", + # run_alphafold.py script is missing a shebang... + "echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py", + "cat %(builddir)s/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py", + "cp %(builddir)s/alphafold-%(version)s/run_alphafold_{features,resume}.py %(installdir)s/bin", + "chmod a+x %(installdir)s/bin/run_alphafold*.py", + "cd %(installdir)s/bin && ln -s run_alphafold.py alphafold", + "cp -a %(builddir)s/alphafold-%(version)s/scripts %(installdir)s/", + "cp %%(builddir)s/stereo_chemical_props-%s.txt %%(installdir)s/stereo_chemical_props.txt" % local_scp_commit, + # run tests for run_alphafold.py script; + # shouldn't do this in sanity check to avoid breaking use of --module-only + "PYTHONPATH=%(installdir)s/lib/python%(pyshortver)s/site-packages:$PYTHONPATH " + "python %(builddir)s/alphafold-%(version)s/run_alphafold_test.py", +] + +sanity_check_paths = { + 'files': ['bin/alphafold', 'bin/pdbfixer', 'bin/run_alphafold.py', 'stereo_chemical_props.txt'], + 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], +} + +sanity_check_commands = [ + "pdbfixer --help", + "python -m simtk.testInstallation", + "python -c 'import alphafold'", + "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", +] + +sanity_pip_check = True + +# these allow to make predictions on proteins that would typically be too long to fit into GPU memory; +# see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py +modextravars = { + 'TF_FORCE_UNIFIED_MEMORY': '1', + 'XLA_PYTHON_CLIENT_MEM_FRACTION': '3', + # 'ALPHAFOLD_DATA_DIR': '/path/to/AlphaFold_DBs', # please adapt + 'OPENMM_RELAX': 'CUDA' # unset or set to 'CPU' in order not to run the energy minimization on GPU; PR#189 +} + +moduleclass = 'bio' diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0_data-dep-paths.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0_data-dep-paths.patch new file mode 100644 index 00000000000..20a40b14f2b --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0_data-dep-paths.patch @@ -0,0 +1,136 @@ +pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data +(see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py); +pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild +author: Kenneth Hoste (HPC-UGent) +update 2.0.1 -> 2.1.0/2.1.2/2.3.0: Thomas Hoffmann (EMBL); +diff -ru alphafold-2.3.0/run_alphafold.py alphafold-2.3.0_data-dep-paths/run_alphafold.py +--- alphafold-2.3.0/run_alphafold.py 2022-12-11 20:36:44.000000000 +0100 ++++ alphafold-2.3.0_data-dep-paths/run_alphafold.py 2022-12-13 17:36:37.258678676 +0100 +@@ -40,6 +40,46 @@ + import numpy as np + + # Internal import (7716). ++use_reduced_dbs = any("--db_preset=reduced_dbs" in s for s in sys.argv[1:]) ++use_monomer_preset = not any("--model_preset=multimer" in s for s in sys.argv[1:]) ++ ++data_dir = os.getenv('ALPHAFOLD_DATA_DIR') ++use_gpu_relax = os.getenv('OPENMM_RELAX')=='CUDA' ++ ++if data_dir: ++ mgnify_database_path = os.path.join(data_dir, 'mgnify', 'mgy_clusters_2022_05.fa') ++ uniref90_database_path = os.path.join(data_dir, 'uniref90', 'uniref90.fasta') ++ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files') ++ obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') ++ if use_monomer_preset: ++ pdb_seqres_database_path = None ++ uniprot_database_path = None ++ pdb70_database_path = os.path.join(data_dir, 'pdb70', 'pdb70') ++ else: ++ pdb_seqres_database_path = os.path.join(data_dir, 'pdb_seqres', 'pdb_seqres.txt') ++ uniprot_database_path = os.path.join(data_dir, 'uniprot', 'uniprot.fasta') ++ pdb70_database_path = None ++ if use_reduced_dbs: ++ small_bfd_database_path = os.path.join(data_dir, 'small_bfd','bfd-first_non_consensus_sequences.fasta') ++ uniref30_database_path = None ++ bfd_database_path = None ++ else: ++ small_bfd_database_path = None ++ bfd_database_path = os.path.join(data_dir, 'bfd', 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt') ++ uniref30_database_path = os.path.join(data_dir, 'uniref30', 'UniRef30_2021_03') ++else: ++ sys.stderr.write("$ALPHAFOLD_DATA_DIR is not defined!") ++ uniref90_database_path = None ++ mgnify_database_path = None ++ bfd_database_path = None ++ uniref30_database_path = None ++ pdb70_database_path = None ++ template_mmcif_dir = None ++ obsolete_pdbs_path = None ++ small_bfd_database_path = None ++ uniprot_database_path = None ++ pdb_seqres_database_path = None ++ use_gpu_relax = None + + logging.set_verbosity(logging.INFO) + +@@ -50,7 +90,7 @@ + 'separated by commas. All FASTA paths must have a unique basename as the ' + 'basename is used to name the output directories for each prediction.') + +-flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.') ++flags.DEFINE_string('data_dir', data_dir, 'Path to directory of supporting data.') + flags.DEFINE_string('output_dir', None, 'Path to a directory that will ' + 'store the results.') + flags.DEFINE_string('jackhmmer_binary_path', shutil.which('jackhmmer'), +@@ -65,27 +105,27 @@ + 'Path to the hmmbuild executable.') + flags.DEFINE_string('kalign_binary_path', shutil.which('kalign'), + 'Path to the Kalign executable.') +-flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 ' ++flags.DEFINE_string('uniref90_database_path', uniref90_database_path, 'Path to the Uniref90 ' + 'database for use by JackHMMER.') +-flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify ' ++flags.DEFINE_string('mgnify_database_path', mgnify_database_path, 'Path to the MGnify ' + 'database for use by JackHMMER.') +-flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD ' ++flags.DEFINE_string('bfd_database_path', bfd_database_path, 'Path to the BFD ' + 'database for use by HHblits.') +-flags.DEFINE_string('small_bfd_database_path', None, 'Path to the small ' ++flags.DEFINE_string('small_bfd_database_path', small_bfd_database_path, 'Path to the small ' + 'version of BFD used with the "reduced_dbs" preset.') +-flags.DEFINE_string('uniref30_database_path', None, 'Path to the UniRef30 ' ++flags.DEFINE_string('uniref30_database_path', uniref30_database_path, 'Path to the UniRef30 ' + 'database for use by HHblits.') +-flags.DEFINE_string('uniprot_database_path', None, 'Path to the Uniprot ' ++flags.DEFINE_string('uniprot_database_path', uniprot_database_path, 'Path to the Uniprot ' + 'database for use by JackHMMer.') +-flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 ' ++flags.DEFINE_string('pdb70_database_path', pdb70_database_path, 'Path to the PDB70 ' + 'database for use by HHsearch.') +-flags.DEFINE_string('pdb_seqres_database_path', None, 'Path to the PDB ' ++flags.DEFINE_string('pdb_seqres_database_path', pdb_seqres_database_path, 'Path to the PDB ' + 'seqres database for use by hmmsearch.') +-flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with ' ++flags.DEFINE_string('template_mmcif_dir', template_mmcif_dir, 'Path to a directory with ' + 'template mmCIF structures, each named .cif') + flags.DEFINE_string('max_template_date', None, 'Maximum template release date ' + 'to consider. Important if folding historical test sets.') +-flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a ' ++flags.DEFINE_string('obsolete_pdbs_path', obsolete_pdbs_path, 'Path to file containing a ' + 'mapping from obsolete PDB IDs to the PDB IDs of their ' + 'replacements.') + flags.DEFINE_enum('db_preset', 'full_dbs', +@@ -124,7 +164,7 @@ + 'result in predictions with distracting stereochemical ' + 'violations but might help in case you are having issues ' + 'with the relaxation stage.') +-flags.DEFINE_boolean('use_gpu_relax', None, 'Whether to relax on GPU. ' ++flags.DEFINE_boolean('use_gpu_relax', use_gpu_relax, 'Whether to relax on GPU. ' + 'Relax on GPU can be much faster than CPU, so it is ' + 'recommended to enable if possible. GPUs must be available' + ' if this setting is enabled.') +@@ -296,6 +336,10 @@ + 'sure it is installed on your system.') + + use_small_bfd = FLAGS.db_preset == 'reduced_dbs' ++ if use_small_bfd and data_dir: ++ bfd_database_path = None ++ uniref30_database_path = None ++ + _check_flag('small_bfd_database_path', 'db_preset', + should_be_set=use_small_bfd) + _check_flag('bfd_database_path', 'db_preset', +@@ -420,13 +464,7 @@ + flags.mark_flags_as_required([ + 'fasta_paths', + 'output_dir', +- 'data_dir', +- 'uniref90_database_path', +- 'mgnify_database_path', +- 'template_mmcif_dir', + 'max_template_date', +- 'obsolete_pdbs_path', +- 'use_gpu_relax', + ]) + + app.run(main) From 8600fbbccaf05b61eff4827614ac4e66bfafec51 Mon Sep 17 00:00:00 2001 From: Thomas Hoffmann <81254262+ThomasHoffmann77@users.noreply.github.com> Date: Tue, 28 Feb 2023 13:49:12 +0100 Subject: [PATCH 2/2] delete run_alphafold_{features,resume}.py --- .../a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb | 1 - 1 file changed, 1 deletion(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb index b3442dbb988..348e3a6aaed 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.3.0-foss-2021b-CUDA-11.4.1.eb @@ -131,7 +131,6 @@ postinstallcmds = [ # run_alphafold.py script is missing a shebang... "echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py", "cat %(builddir)s/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py", - "cp %(builddir)s/alphafold-%(version)s/run_alphafold_{features,resume}.py %(installdir)s/bin", "chmod a+x %(installdir)s/bin/run_alphafold*.py", "cd %(installdir)s/bin && ln -s run_alphafold.py alphafold", "cp -a %(builddir)s/alphafold-%(version)s/scripts %(installdir)s/",