From 33f5fb22b7335458cc100f31d8414f44f99de0d6 Mon Sep 17 00:00:00 2001 From: Pierre Poulain Date: Wed, 21 Dec 2016 14:16:45 +0100 Subject: [PATCH] Remove R clustering in main code, tests and doc --- .travis.yml | 2 - CHANGELOG | 1 + doc/source/PBclust.rst | 121 -------- doc/source/api_cookbook.rst | 3 +- doc/source/index.rst | 1 - doc/source/utilization.rst | 7 +- pbxplore/analysis/__init__.py | 7 - pbxplore/analysis/clustering.py | 146 ---------- pbxplore/demo/psi_md_traj_all.PB.clust | 275 ------------------ pbxplore/scripts/PBclust.py | 178 ------------ .../tests/test_data/psi_md_traj_all.PB.clust | 273 ----------------- .../test_data/psi_md_traj_all_c5.PB.clust | 275 ------------------ pbxplore/tests/test_regression.py | 52 ---- run_demo2_clusters.sh | 65 ----- setup.py | 1 - 15 files changed, 4 insertions(+), 1403 deletions(-) delete mode 100644 doc/source/PBclust.rst delete mode 100644 pbxplore/analysis/clustering.py delete mode 100644 pbxplore/demo/psi_md_traj_all.PB.clust delete mode 100755 pbxplore/scripts/PBclust.py delete mode 100644 pbxplore/tests/test_data/psi_md_traj_all.PB.clust delete mode 100644 pbxplore/tests/test_data/psi_md_traj_all_c5.PB.clust delete mode 100755 run_demo2_clusters.sh diff --git a/.travis.yml b/.travis.yml index bf93f06..4274902 100644 --- a/.travis.yml +++ b/.travis.yml @@ -31,7 +31,6 @@ matrix: addons: apt: packages: - - r-base - ghc - cabal-install @@ -66,7 +65,6 @@ script: - if [[ $SETUP == 'test' ]]; then nosetests -v pbxplore/tests; fi - if [[ $SETUP == 'test' ]]; then yes | ./run_demo1_assignation.sh; fi - if [[ $SETUP == 'test' ]]; then yes | ./run_demo2_statistics.sh; fi - - if [[ $SETUP == 'test' ]]; then yes | ./run_demo2_clusters.sh; fi - if [[ $SETUP == 'doc' ]]; then cd doc; sphinx-build -W -b html source build/html; fi #after_success: diff --git a/CHANGELOG b/CHANGELOG index 0a2bfc6..3827f94 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,7 @@ - Add MDAnalysis as a mandatory module - Add matplotlib as a mandatory module - Fix weblogo weblogolib.SymbolColor/ColorGroup import +- Remove R clustering (now in branch clust_R) **1.3.1** - remove pbxplore import from setup.py and add version number diff --git a/doc/source/PBclust.rst b/doc/source/PBclust.rst deleted file mode 100644 index 5eddb02..0000000 --- a/doc/source/PBclust.rst +++ /dev/null @@ -1,121 +0,0 @@ -PBclust -======= - -.. warning:: NOT UPDATED. - -Once converted to PB sequences, conformations of a same protein can be clustered -based on PB similarities. - - -Example -------- - -.. code-block:: bash - - $ PBclust -f `PBdata`/psi_md_traj_all.PB.fasta -o psi_md_traj_all --clusters 5 - read 270 sequences in demo2/psi_md_traj_all.PB.fasta - read substitution matrix - Building distance matrix - 100% - wrote psi_md_traj_all.PB.dist - R clustering: OK - cluster 1: 90 sequences (33%) - cluster 2: 55 sequences (20%) - cluster 3: 35 sequences (13%) - cluster 4: 35 sequences (13%) - cluster 5: 55 sequences (20%) - wrote psi_md_traj_all.PB.clust - - -Cluster 1 is the biggest cluster with 33% of all conformations. -`psi_md_traj_all.PB.dist` contains the matrix distance between all PB sequences. - -Content of `psi_md_traj_all.PB.clust` (clustering results): :: - - SEQ_CLU "psi_md_traj_1.pdb | model 0" 1 - SEQ_CLU "psi_md_traj_1.pdb | model 1" 1 - SEQ_CLU "psi_md_traj_1.pdb | model 2" 1 - [snip] - ... - [snip] - SEQ_CLU "psi_md_traj_3.pdb | model 31" 4 - SEQ_CLU "psi_md_traj_3.pdb | model 32" 4 - SEQ_CLU "psi_md_traj_3.pdb | model 33" 5 - SEQ_CLU "psi_md_traj_3.pdb | model 34" 5 - [snip] - ... - [snip] - SEQ_CLU "psi_md_traj_3.pdb | model 88" 5 - SEQ_CLU "psi_md_traj_3.pdb | model 89" 5 - MED_CLU "psi_md_traj_1.pdb | model 65" 1 - MED_CLU "psi_md_traj_2.pdb | model 33" 2 - MED_CLU "psi_md_traj_2.pdb | model 74" 3 - MED_CLU "psi_md_traj_3.pdb | model 0" 4 - MED_CLU "psi_md_traj_3.pdb | model 87" 5 - - -Usage ------ - -Here’s the ``PBclust`` help text. :: - - usage: PBclust [-h] -f F -o O (--clusters CLUSTERS | --compare) - - Cluster protein structures based on their PB sequences. - - optional arguments: - -h, --help show this help message and exit - -f F name(s) of the PBs file (in fasta format) - -o O name for results - --clusters CLUSTERS number of wanted clusters - --compare compare the first sequence versus all others - - -`--compare` option -`````````````````` - -compares, position by position, the first sequence found in the fasta file against all others. -The result of the comparison is a score between O (identical) and 9 (different). - -.. code-block:: bash - - $ PBclust -f `PBdata`/psi_md_traj_all.PB.fasta -o psi_md_traj_all --compare - read 270 sequences in demo2/psi_md_traj_all.PB.fasta - read substitution matrix - Normalized substitution matrix (between 0 and 9) - [[0 3 2 3 4 3 3 4 2 3 5 3 5 4 3 3] - [3 0 3 3 3 4 3 2 2 3 3 2 5 3 3 2] - [2 3 0 3 4 3 2 4 3 4 5 5 5 4 3 2] - [3 3 3 0 2 3 4 4 3 3 5 5 9 6 5 4] - [4 3 4 2 0 2 2 2 4 3 3 4 7 4 5 5] - [3 4 3 3 2 0 3 3 4 2 3 3 5 5 4 5] - [3 3 2 4 2 3 0 3 3 3 4 3 3 2 2 1] - [4 2 4 4 2 3 3 0 3 1 2 3 5 4 2 4] - [2 2 3 3 4 4 3 3 0 2 2 2 5 3 3 2] - [3 3 4 3 3 2 3 1 2 0 2 2 4 4 3 3] - [5 3 5 5 3 3 4 2 2 2 0 3 3 3 4 4] - [3 2 5 5 4 3 3 3 2 2 3 0 3 2 2 4] - [5 5 5 9 7 5 3 5 5 4 3 3 0 2 3 3] - [4 3 4 6 4 5 2 4 3 4 3 2 2 0 2 2] - [3 3 3 5 5 4 2 2 3 3 4 2 3 2 0 2] - [3 2 2 4 5 5 1 4 2 3 4 4 3 2 2 0]] - Compare first sequence (psi_md_traj_1.pdb | model 0) with others - wrote psi_md_traj_all.PB.compare.fasta - -Content of `psi_md_traj_all.PB.compare.fasta`: :: - - >psi_md_traj_1.pdb | model 0 vs psi_md_traj_1.pdb | model 1 - 00000002000000000020000000000002000200000000000230002000 - >psi_md_traj_1.pdb | model 0 vs psi_md_traj_1.pdb | model 2 - 00000002000000000005000000000002000243000000055230000000 - >psi_md_traj_1.pdb | model 0 vs psi_md_traj_1.pdb | model 3 - 00000002000000000020000000000002000200000000055230002000 - [snip] - ... - [snip] - >psi_md_traj_1.pdb | model 0 vs psi_md_traj_3.pdb | model 87 - 00302523340000000005000000035032000323300000335220000000 - >psi_md_traj_1.pdb | model 0 vs psi_md_traj_3.pdb | model 88 - 00302523350500000005000000032232000323300000555225000000 - >psi_md_traj_1.pdb | model 0 vs psi_md_traj_3.pdb | model 89 - 00333522250000000025000000035032000323300002035020002000 diff --git a/doc/source/api_cookbook.rst b/doc/source/api_cookbook.rst index 79b9361..6f2db4a 100644 --- a/doc/source/api_cookbook.rst +++ b/doc/source/api_cookbook.rst @@ -30,13 +30,12 @@ Look at the notebook :doc:`Writing PB in file <./notebooks/Assignement>` for fu PBxplore.analysis ----------------- -This module handle all analysis functions, ploting functions and clustering one available with the package. +This module handle all analysis functions and ploting functions. You can: * generate map of the distribution of PBs along protein sequence with `plot_map()`. * compute :ref:`Neq` with `compute_neq()` and generate the plot with `plot_neq()`. * generate WebLogo-like representation of PBs frequency along protein sequence with `generate_weblogo()`. -* cluster conformations of a same protein based on PB similarities. Look at the notebook :doc:`Visualize protein deformability <./notebooks/Deformability>` for further information. diff --git a/doc/source/index.rst b/doc/source/index.rst index 6067b45..d34a2f3 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -47,7 +47,6 @@ Basically, PBxplore can: PBassign PBcount PBstat - PBclust api_cookbook api_reference diff --git a/doc/source/utilization.rst b/doc/source/utilization.rst index 64ba0f1..f4c0d5e 100644 --- a/doc/source/utilization.rst +++ b/doc/source/utilization.rst @@ -17,7 +17,6 @@ Here the list: - :doc:`PBcount ` computes the frequency of PBs at each position along the amino acid sequence. - :doc:`PBstat ` generates frequency and logo plots, and estimates the :ref:`Neq `. -- :doc:`PBclust ` use clustering algorithm (k-means) to re-group similar PBs sequences. API @@ -35,14 +34,12 @@ Demo files PBxplore provides scripts to demonstrate its functionalities. These scripts guide the user through the different command line tools of PBxplore. -3 demonstration scripts are available: +2 demonstration scripts are available: * `run_demo1_assignation.sh` demonstrates the how to use ``PBassign`` to assign Protein Block sequences to protein structures; * `run_demo2_statistics.sh` demonstrates how to analyse protein dynamics using - PBxplore; -* finally, `run_demo2_clusters.sh` demonstrates how to use ``PBclust`` to cluster - Protein Block sequences. + PBxplore. In addition to the scripts, PBxplore bundles input files to test and get accustom with the software and the python library. Call the ``PBdata`` program to diff --git a/pbxplore/analysis/__init__.py b/pbxplore/analysis/__init__.py index a633dc8..adc0baa 100644 --- a/pbxplore/analysis/__init__.py +++ b/pbxplore/analysis/__init__.py @@ -28,12 +28,6 @@ .. autofunction:: generate_weblogo -Cluster protein block sequences -------------------------------- - -.. autofunction: distance_matrix - -.. autofunction: hclust Utils ----- @@ -45,7 +39,6 @@ .. autofunction:: compute_score_by_position """ -from .clustering import hclust, distance_matrix, RError from .compare import compare from .count import count_matrix, read_occurence_file from .neq import compute_neq diff --git a/pbxplore/analysis/clustering.py b/pbxplore/analysis/clustering.py deleted file mode 100644 index 588bc44..0000000 --- a/pbxplore/analysis/clustering.py +++ /dev/null @@ -1,146 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import print_function, absolute_import - -# Standard module -import subprocess -import sys - -# Third-party module -import numpy - -# Local module -from . import utils - - -class RError(RuntimeError): - """ - Exception raised when something fails with a R script. - """ - pass - - -def distance_matrix(sequences, substitution_mat): - """ - Compute distances of all sequences against all the others - - The substitution matrix is expected to be expressed as similarity scores. - """ - distance_mat = numpy.empty((len(sequences), len(sequences)), dtype='float') - - print("Building distance matrix") - # Get similarity score - for i, seqA in enumerate(sequences): - sys.stdout.write("\r%.f%%" % (float(i+1)/len(sequences)*100)) - sys.stdout.flush() - for j, seqB in enumerate(sequences[i:], start=i): - score = utils.substitution_score(substitution_mat, seqA, seqB) - distance_mat[i, j] = score - distance_mat[j, i] = score - print("") - # Set the diagonal equal to its maximum value - diag_maxi = numpy.max(distance_mat.diagonal()) - numpy.fill_diagonal(distance_mat, diag_maxi) - # Convert similarity score into a distance - mini = numpy.min(distance_mat) - maxi = numpy.max(distance_mat) - # Compute distance - distance_mat = 1 - (distance_mat - mini)/(maxi - mini) - # Check distance values are in expected range - assert(numpy.min(distance_mat) >= 0.0) - assert(numpy.max(distance_mat) <= 1.0) - assert(numpy.sum(distance_mat.diagonal()) == 0.0) - return distance_mat - - -def _matrix_to_str(distance_mat): - numpy.set_printoptions(threshold=numpy.inf, precision=3, linewidth=100000) - output_mat_str = numpy.array_str(distance_mat).replace('[', '').replace(']', '') - return output_mat_str - - -def hclust(distance_mat, nclusters, method='ward'): - """ - Hierachical clustering using R - - Parameters - ---------- - distance_mat : 2D numpy array - Distance matrix - nclusters : int - Number of cluster to build - method : str - Aggregation method for the clustering algorithm; must be a value - valid for R hclust function - - Returns - ------- - cluster_id : list of int - Cluster ID for each item; starts at 1 - medoid_id : list of int - Index of the medoid for each cluster - - Exceptions - ---------- - RError : something wrong happened with R - """ - # Convert the distance matrix into a string readable by R - output_mat_str = _matrix_to_str(distance_mat) - # Build the R script - R_script = """ - connector = textConnection("{matrix}") - distances = read.table(connector, header=FALSE) - rownames(distances) = colnames(distances) - - clusters = cutree(hclust(as.dist(distances), method="{method}"), k={clusters}) - distances = as.matrix(distances) - - # function to find medoid in cluster i - # http://www.biostars.org/p/11987/ - clust.medoid = function(i, distmat, clusters) {{ - ind = (clusters == i) - - if(length(distmat[ind, ind]) == 1){{ - names(clusters[ind]) - }} else {{ - names(which.min(rowSums( distmat[ind, ind] ))) - # c(min(rowMeans( distmat[ind, ind] ))) - }} - }} - - medoids = sapply(unique(clusters), clust.medoid, distances, clusters) - - cat("cluster_id", clusters, "\n") - cat("medoid_id", medoids) - """.format(matrix=output_mat_str, clusters=nclusters, method=method) - - # Execute the R script - command = "R --vanilla --slave" - proc = subprocess.Popen(command, shell=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - stdin=subprocess.PIPE) - (out, err) = proc.communicate(R_script.encode('utf-8')) - out = out.decode('utf-8') - err = err.decode('utf-8') - if err: - raise RError("{0}".format(err)) - code = proc.wait() - if code: - raise RError("R returned with code {}".format(code)) - - # Extract the output of the R script - # only 2 lines are expected - if len(out.split("\n")) != 2: - raise RError("unexpected R ouput") - cluster_id, medoid_id = out.split("\n") - # As the input table is provided without headers, the sequences are named - # V1, V2... with indices starting at 1. To get a integer index starting at - # 0 from a sequence name, we need to remove the V prefix and to substract 1 - # from the remaining number. This applies to medoid_id that relies on the - # sequence name, but not to cluster_id that is already a list of integers. - cluster_id = [int(x) for x in cluster_id.split()[1:]] - medoid_id = [int(x[1:]) - 1 for x in medoid_id.split()[1:]] - - return cluster_id, medoid_id diff --git a/pbxplore/demo/psi_md_traj_all.PB.clust b/pbxplore/demo/psi_md_traj_all.PB.clust deleted file mode 100644 index e044ddd..0000000 --- a/pbxplore/demo/psi_md_traj_all.PB.clust +++ /dev/null @@ -1,275 +0,0 @@ -SEQ_CLU "psi_md_traj_1.pdb | model 0" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 1" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 2" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 3" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 4" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 5" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 6" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 7" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 8" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 9" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 10" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 11" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 12" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 13" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 14" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 15" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 16" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 17" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 18" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 19" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 20" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 21" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 22" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 23" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 24" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 25" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 26" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 27" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 28" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 29" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 30" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 31" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 32" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 33" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 34" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 35" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 36" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 37" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 38" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 39" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 40" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 41" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 42" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 43" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 44" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 45" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 46" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 47" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 48" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 49" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 50" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 51" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 52" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 53" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 54" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 55" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 56" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 57" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 58" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 59" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 60" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 61" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 62" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 63" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 64" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 65" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 66" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 67" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 68" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 69" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 70" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 71" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 72" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 73" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 74" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 75" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 76" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 77" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 78" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 79" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 80" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 81" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 82" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 83" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 84" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 85" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 86" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 87" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 88" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 89" 1 -SEQ_CLU "psi_md_traj_2.pdb | model 0" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 1" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 2" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 3" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 4" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 5" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 6" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 7" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 8" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 9" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 10" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 11" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 12" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 13" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 14" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 15" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 16" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 17" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 18" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 19" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 20" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 21" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 22" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 23" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 24" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 25" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 26" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 27" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 28" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 29" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 30" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 31" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 32" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 33" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 34" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 35" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 36" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 37" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 38" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 39" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 40" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 41" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 42" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 43" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 44" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 45" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 46" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 47" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 48" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 49" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 50" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 51" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 52" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 53" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 54" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 55" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 56" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 57" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 58" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 59" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 60" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 61" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 62" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 63" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 64" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 65" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 66" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 67" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 68" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 69" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 70" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 71" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 72" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 73" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 74" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 75" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 76" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 77" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 78" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 79" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 80" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 81" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 82" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 83" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 84" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 85" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 86" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 87" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 88" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 89" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 0" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 1" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 2" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 3" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 4" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 5" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 6" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 7" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 8" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 9" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 10" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 11" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 12" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 13" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 14" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 15" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 16" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 17" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 18" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 19" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 20" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 21" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 22" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 23" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 24" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 25" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 26" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 27" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 28" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 29" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 30" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 31" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 32" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 33" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 34" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 35" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 36" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 37" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 38" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 39" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 40" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 41" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 42" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 43" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 44" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 45" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 46" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 47" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 48" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 49" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 50" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 51" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 52" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 53" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 54" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 55" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 56" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 57" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 58" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 59" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 60" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 61" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 62" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 63" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 64" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 65" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 66" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 67" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 68" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 69" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 70" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 71" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 72" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 73" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 74" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 75" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 76" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 77" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 78" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 79" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 80" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 81" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 82" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 83" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 84" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 85" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 86" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 87" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 88" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 89" 5 -MED_CLU "psi_md_traj_1.pdb | model 65" 1 -MED_CLU "psi_md_traj_2.pdb | model 33" 2 -MED_CLU "psi_md_traj_2.pdb | model 74" 3 -MED_CLU "psi_md_traj_3.pdb | model 0" 4 -MED_CLU "psi_md_traj_3.pdb | model 87" 5 diff --git a/pbxplore/scripts/PBclust.py b/pbxplore/scripts/PBclust.py deleted file mode 100755 index 4d400e5..0000000 --- a/pbxplore/scripts/PBclust.py +++ /dev/null @@ -1,178 +0,0 @@ -#! /usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Cluster protein structures based on their PB sequences. - -2013 - P. Poulain, A. G. de Brevern -""" - - -# Use print as a function for python 3 compatibility -from __future__ import print_function, division - -# Standard modules -import collections -import sys -import os -import argparse - -# Third-party module -import numpy - -# Local module -import pbxplore as pbx -from pbxplore.PB import SUBSTITUTION_MATRIX_NAME - -# Python2/Python3 compatibility -# The range function in python 3 behaves as the range function in python 2 -# and returns a generator rather than a list. To produce a list in python 3, -# one should use list(range). Here we change range to behave the same in -# python 2 and in python 3. In both cases, range will return a generator. -try: - range = xrange -except NameError: - pass - - -def user_input(): - """ - Handle PBclust command line arguments - """ - parser = argparse.ArgumentParser( - description="Cluster protein structures based on their PB sequences.") - - # mandatory arguments - parser.add_argument("-f", action="append", required=True, - help="name(s) of the PBs file (in fasta format)") - parser.add_argument("-o", action="store", required=True, - help="name for results") - - # --clusters or --compare arguments - group = parser.add_mutually_exclusive_group(required=True) - group.add_argument("--clusters", action="store", type=int, - help="number of wanted clusters") - # optional arguments - group.add_argument("--compare", action="store_true", default=False, - help="compare the first sequence versus all others") - - # get all arguments - options = parser.parse_args() - - # test if the number of clusters is valid - if options.clusters is not None and options.clusters <= 0: - parser.error("Number of clusters must be > 0.") - - # check if input files exist - for name in options.f: - if not os.path.isfile(name): - sys.exit("{0}: not a valid file. Bye".format(name)) - - return options - - -def display_clust_report(cluster_id): - """ - Display a quick report on the clustering - - Display the number of structures in each cluster, and the fraction of the - overall sequence set they represent. - """ - nclusters = len(cluster_id) - cluster_count = collections.Counter(cluster_id) - for cluster, count in cluster_count.most_common(): - print("cluster {}: {} sequences ({:>2.0f}%)" - .format(cluster, count, 100*count/nclusters)) - - -def write_clusters(fname, cluster_id, medoid_id, seq_names): - """ - Write the result of a clustering in a file - - The output file contains two types of lines: - - * first, lines that start with SEQ_CLU link each sequence header to a - cluster ID; these lines are written n the same order as the input fasta - file(s) - * then, lines that start with MED_CLU link an input sequence to a cluster - as its medoid; these lines are ordered as the cluster IDs so the first - medoid is the medoid of the first cluster. The sequence index given in - these lines start at 1. - - Parameters - ---------- - fname : str - The path to the file to write in - cluster_id : list of int - The cluster ID for each sequence ordered like the sequences - medoid_id : list of int - The index of the medoid for each group in the list of sequences - seq_names: list of str - The header for each sequence - """ - with open(fname, "w") as outfile: - for name, cluster in zip(seq_names, cluster_id): - outfile.write('SEQ_CLU "{}" {} \n'.format(name, cluster)) - for idx, med in enumerate(medoid_id, start=1): - outfile.write('MED_CLU "{}" {} \n'.format(seq_names[med], idx)) - - -def write_distance_matrix(distance_matrix, fname): - """ - Write a distance matrix in a file - - Parameters - ---------- - distance_matrix : 2D numpy array - The matrix to write - fname : str - The path to the file to write in - """ - numpy.savetxt(fname, distance_matrix) - - -def pbclust_cli(): - """ - Run the PBclust command line - """ - # Read user inputs - options = user_input() - header_lst, seq_lst = pbx.io.read_several_fasta(options.f) - - # Load subtitution matrix - try: - substitution_mat = pbx.PB.load_substitution_matrix(SUBSTITUTION_MATRIX_NAME) - except ValueError: - sys.exit("Substitution matrix is not symetric.") - except IOError: - sys.exit("Error reading the substitution matrix.") - - # --compare option - # compare the first sequence (in the fasta file) versus all others - if options.compare: - compare_file_name = options.o + ".PB.compare.fasta" - pbx.analysis.compare(header_lst, seq_lst, substitution_mat, compare_file_name) - sys.exit(0) - - # Compute the distance matrix for the clustering - try: - distance_mat = pbx.analysis.distance_matrix(seq_lst, substitution_mat) - except pbx.PB.InvalidBlockError as e: - sys.exit('Unexpected PB in the input ({})'.format(e.block)) - distance_fname = options.o + ".PB.dist" - write_distance_matrix(distance_mat, distance_fname) - print("wrote {0}".format(distance_fname)) - - # Carry out the clustering - try: - cluster_id, medoid_id = pbx.analysis.hclust(distance_mat, nclusters=options.clusters) - except pbx.analysis.RError as e: - sys.exit('Error with R:\n' + str(e)) - display_clust_report(cluster_id) - output_fname = options.o + ".PB.clust" - write_clusters(output_fname, cluster_id, medoid_id, header_lst) - print("wrote {0}".format(output_fname)) - - -if __name__ == '__main__': - pbclust_cli() diff --git a/pbxplore/tests/test_data/psi_md_traj_all.PB.clust b/pbxplore/tests/test_data/psi_md_traj_all.PB.clust deleted file mode 100644 index 7e5c92d..0000000 --- a/pbxplore/tests/test_data/psi_md_traj_all.PB.clust +++ /dev/null @@ -1,273 +0,0 @@ -SEQ_CLU "psi_md_traj_1.pdb | model 0" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 1" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 2" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 3" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 4" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 5" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 6" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 7" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 8" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 9" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 10" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 11" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 12" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 13" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 14" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 15" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 16" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 17" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 18" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 19" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 20" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 21" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 22" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 23" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 24" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 25" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 26" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 27" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 28" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 29" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 30" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 31" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 32" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 33" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 34" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 35" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 36" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 37" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 38" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 39" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 40" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 41" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 42" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 43" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 44" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 45" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 46" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 47" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 48" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 49" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 50" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 51" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 52" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 53" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 54" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 55" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 56" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 57" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 58" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 59" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 60" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 61" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 62" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 63" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 64" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 65" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 66" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 67" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 68" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 69" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 70" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 71" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 72" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 73" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 74" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 75" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 76" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 77" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 78" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 79" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 80" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 81" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 82" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 83" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 84" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 85" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 86" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 87" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 88" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 89" 1 -SEQ_CLU "psi_md_traj_2.pdb | model 0" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 1" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 2" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 3" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 4" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 5" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 6" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 7" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 8" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 9" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 10" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 11" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 12" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 13" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 14" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 15" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 16" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 17" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 18" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 19" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 20" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 21" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 22" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 23" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 24" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 25" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 26" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 27" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 28" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 29" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 30" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 31" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 32" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 33" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 34" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 35" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 36" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 37" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 38" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 39" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 40" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 41" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 42" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 43" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 44" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 45" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 46" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 47" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 48" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 49" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 50" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 51" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 52" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 53" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 54" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 55" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 56" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 57" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 58" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 59" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 60" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 61" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 62" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 63" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 64" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 65" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 66" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 67" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 68" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 69" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 70" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 71" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 72" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 73" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 74" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 75" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 76" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 77" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 78" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 79" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 80" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 81" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 82" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 83" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 84" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 85" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 86" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 87" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 88" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 89" 2 -SEQ_CLU "psi_md_traj_3.pdb | model 0" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 1" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 2" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 3" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 4" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 5" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 6" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 7" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 8" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 9" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 10" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 11" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 12" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 13" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 14" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 15" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 16" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 17" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 18" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 19" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 20" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 21" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 22" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 23" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 24" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 25" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 26" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 27" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 28" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 29" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 30" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 31" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 32" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 33" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 34" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 35" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 36" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 37" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 38" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 39" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 40" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 41" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 42" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 43" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 44" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 45" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 46" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 47" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 48" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 49" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 50" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 51" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 52" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 53" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 54" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 55" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 56" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 57" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 58" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 59" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 60" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 61" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 62" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 63" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 64" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 65" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 66" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 67" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 68" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 69" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 70" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 71" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 72" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 73" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 74" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 75" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 76" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 77" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 78" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 79" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 80" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 81" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 82" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 83" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 84" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 85" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 86" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 87" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 88" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 89" 3 -MED_CLU "psi_md_traj_1.pdb | model 65" 1 -MED_CLU "psi_md_traj_2.pdb | model 47" 2 -MED_CLU "psi_md_traj_3.pdb | model 41" 3 diff --git a/pbxplore/tests/test_data/psi_md_traj_all_c5.PB.clust b/pbxplore/tests/test_data/psi_md_traj_all_c5.PB.clust deleted file mode 100644 index e044ddd..0000000 --- a/pbxplore/tests/test_data/psi_md_traj_all_c5.PB.clust +++ /dev/null @@ -1,275 +0,0 @@ -SEQ_CLU "psi_md_traj_1.pdb | model 0" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 1" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 2" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 3" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 4" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 5" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 6" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 7" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 8" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 9" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 10" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 11" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 12" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 13" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 14" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 15" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 16" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 17" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 18" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 19" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 20" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 21" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 22" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 23" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 24" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 25" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 26" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 27" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 28" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 29" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 30" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 31" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 32" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 33" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 34" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 35" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 36" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 37" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 38" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 39" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 40" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 41" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 42" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 43" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 44" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 45" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 46" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 47" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 48" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 49" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 50" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 51" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 52" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 53" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 54" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 55" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 56" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 57" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 58" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 59" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 60" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 61" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 62" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 63" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 64" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 65" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 66" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 67" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 68" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 69" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 70" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 71" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 72" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 73" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 74" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 75" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 76" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 77" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 78" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 79" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 80" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 81" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 82" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 83" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 84" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 85" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 86" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 87" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 88" 1 -SEQ_CLU "psi_md_traj_1.pdb | model 89" 1 -SEQ_CLU "psi_md_traj_2.pdb | model 0" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 1" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 2" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 3" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 4" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 5" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 6" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 7" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 8" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 9" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 10" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 11" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 12" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 13" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 14" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 15" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 16" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 17" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 18" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 19" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 20" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 21" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 22" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 23" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 24" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 25" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 26" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 27" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 28" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 29" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 30" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 31" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 32" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 33" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 34" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 35" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 36" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 37" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 38" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 39" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 40" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 41" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 42" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 43" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 44" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 45" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 46" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 47" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 48" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 49" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 50" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 51" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 52" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 53" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 54" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 55" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 56" 2 -SEQ_CLU "psi_md_traj_2.pdb | model 57" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 58" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 59" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 60" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 61" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 62" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 63" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 64" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 65" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 66" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 67" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 68" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 69" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 70" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 71" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 72" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 73" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 74" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 75" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 76" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 77" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 78" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 79" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 80" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 81" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 82" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 83" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 84" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 85" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 86" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 87" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 88" 3 -SEQ_CLU "psi_md_traj_2.pdb | model 89" 3 -SEQ_CLU "psi_md_traj_3.pdb | model 0" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 1" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 2" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 3" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 4" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 5" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 6" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 7" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 8" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 9" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 10" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 11" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 12" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 13" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 14" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 15" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 16" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 17" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 18" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 19" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 20" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 21" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 22" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 23" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 24" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 25" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 26" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 27" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 28" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 29" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 30" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 31" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 32" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 33" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 34" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 35" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 36" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 37" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 38" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 39" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 40" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 41" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 42" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 43" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 44" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 45" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 46" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 47" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 48" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 49" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 50" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 51" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 52" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 53" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 54" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 55" 4 -SEQ_CLU "psi_md_traj_3.pdb | model 56" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 57" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 58" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 59" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 60" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 61" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 62" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 63" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 64" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 65" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 66" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 67" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 68" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 69" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 70" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 71" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 72" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 73" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 74" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 75" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 76" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 77" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 78" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 79" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 80" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 81" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 82" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 83" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 84" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 85" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 86" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 87" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 88" 5 -SEQ_CLU "psi_md_traj_3.pdb | model 89" 5 -MED_CLU "psi_md_traj_1.pdb | model 65" 1 -MED_CLU "psi_md_traj_2.pdb | model 33" 2 -MED_CLU "psi_md_traj_2.pdb | model 74" 3 -MED_CLU "psi_md_traj_3.pdb | model 0" 4 -MED_CLU "psi_md_traj_3.pdb | model 87" 5 diff --git a/pbxplore/tests/test_regression.py b/pbxplore/tests/test_regression.py index a7d3f42..2d87be7 100644 --- a/pbxplore/tests/test_regression.py +++ b/pbxplore/tests/test_regression.py @@ -348,58 +348,6 @@ def test_first_residue_negative(self): first_residue=-20) -class TestPBclust(TemplateTestCase): - def _build_command_line(self, input_files, output, - clusters=None, compare=False): - output_full_path = os.path.join(self._temp_directory, output) - command = ['PBclust', '-o', output_full_path] - for input_file in input_files: - command += ['-f', os.path.join(REFDIR, input_file)] - if clusters is not None: - command += ['--clusters', str(clusters)] - if compare: - command += ['--compare'] - return command - - def _validate_output(self, reference, input_files, output, - clusters=None, compare=False, **kwargs): - output = os.path.join(self._temp_directory, output) - if compare: - # Asses the validity of the distance file - reference_full_path = os.path.join(REFDIR, - reference + '.PB.compare.fasta') - output_full_path = output + '.PB.compare.fasta' - _assert_identical_files(output_full_path, reference_full_path) - else: - # Asses the validity of the main output od PBclust (the clust file) - reference_full_path = os.path.join(REFDIR, reference + '.PB.clust') - output_full_path = output + '.PB.clust' - _assert_identical_files(output_full_path, reference_full_path) - - def test_default_single_input(self): - self._run_program_and_validate(reference='psi_md_traj_all', - input_files=['psi_md_traj_all.PB.fasta', ], - output='output', clusters=3) - - def test_default_multi_input(self): - self._run_program_and_validate(reference='psi_md_traj_all', - input_files=['psi_md_traj_1.PB.fasta', - 'psi_md_traj_2.PB.fasta', - 'psi_md_traj_3.PB.fasta'], - output='output', clusters=3) - - def test_nclusters(self): - self._run_program_and_validate(reference='psi_md_traj_all_c5', - input_files=['psi_md_traj_all.PB.fasta', ], - output='output', - clusters=5) - - def test_compare(self): - self._run_program_and_validate(reference='psi_md_traj_1', - input_files=['psi_md_traj_1.PB.fasta', ], - output='output', - compare=True) - class TestPBstat(TemplateTestCase): def _build_command_line(self, input_file, output, mapdist=False, neq=False, diff --git a/run_demo2_clusters.sh b/run_demo2_clusters.sh deleted file mode 100755 index 6be2a0a..0000000 --- a/run_demo2_clusters.sh +++ /dev/null @@ -1,65 +0,0 @@ -#! /bin/bash - -# 2013 - P. Poulain, A. G. de Brevern - -# exit script at first error -set -e - -function pause(){ - read -r -s -n1 -p "Press any key to continue." - echo -} - - -echo "#------------------------------------------------------------------------#" -echo "| |" -echo "| Demo script for PBxplore: multiple conformation clustering |" -echo "| |" -echo "#------------------------------------------------------------------------#" - -pause - -# create and move into the demo directory -DATA_PATH=$(PBdata) -DEMO_PATH=demo2_clusters_tmp -INPUT_FILES=(psi_md_traj_all.PB.fasta) -mkdir -p $DEMO_PATH -for input_file in ${INPUT_FILES[@]} -do - cp ${DATA_PATH}/${input_file} $DEMO_PATH -done - -cd $DEMO_PATH - -echo -e "\n" -echo "#------------------------------------------------------------------------#" -echo "| |" -echo "| Cluster structures |" -echo "| |" -echo "#------------------------------------------------------------------------#" - -echo -e "\n" -echo "Produce with 3 clusters (--clusters option)" -echo "PBclust -f psi_md_traj_all.PB.fasta -o psi_md_traj_all_3 --clusters 3" -pause -PBclust -f psi_md_traj_all.PB.fasta -o psi_md_traj_all_3 --clusters 3 - -echo -e "\n" -echo "Compare all sequences against the first one (--compare option)" -echo "PBclust -f psi_md_traj_all.PB.fasta -o psi_md_traj_all --compare" -pause -PBclust -f psi_md_traj_all.PB.fasta -o psi_md_traj_all --compare - - -echo -e "\n" -echo "#------------------------------------------------------------------------#" -echo "| |" -echo "| Demo completed! |" -echo "| |" -echo "#------------------------------------------------------------------------#" -echo -echo "Look at *.PB.* files in the demo2_clusters_tmp directory." -pwd -ls -lh -echo "Do not forget to delete demo2_clusters_tmp directory when you will be done with this demo." - diff --git a/setup.py b/setup.py index 504d5a4..880daac 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,6 @@ entry_points={ 'console_scripts': [ 'PBassign = pbxplore.scripts.PBassign:pbassign_cli', - 'PBclust = pbxplore.scripts.PBclust:pbclust_cli', 'PBcount = pbxplore.scripts.PBcount:pbcount_cli', 'PBstat = pbxplore.scripts.PBstat:pbstat_cli', 'PBdata = pbxplore.scripts.PBdata:pbdata_cli',