Skip to content

Commit

Permalink
Fixes error GAP assignment error in fasta2select (#3156)
Browse files Browse the repository at this point in the history
Fixes #3124

## Work done in this PR
- Moves the assignment of the GAP variable in fasta2select in order to avoid cases where it is referred to but not assigned.
  • Loading branch information
sulays authored Apr 10, 2021
1 parent 13a5df0 commit 8a5ef48
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 15 deletions.
1 change: 1 addition & 0 deletions package/AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ Chronological list of authors
- Paarth Thadani
- Henry Kobin
- Kosuke Kudo
- Sulay Shah

External code
-------------
Expand Down
4 changes: 3 additions & 1 deletion package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@ The rules for this file:
calcraven,xiki-tempula, mieczyslaw, manuel.nuno.melo, PicoCentauri,
hanatok, rmeli, aditya-kamath, tirkarthi, LeonardoBarneschi, hejamu,
biogen98, orioncohen, z3y50n, hp115, ojeda-e, thadanipaarth, HenryKobin,
1ut
1ut, sulays

* 2.0.0

Fixes
* Fixed issue with unassigned 'GAP' variable in fasta2algin function when
resids are provided in input (Issue #3124)
* Improve diffusionmap coverage (Issue #3208)
* Removed deprecated parameters `n_jobs` and `precompute_distances` of
sklearn.cluster.KMeans (Issue #2986)
Expand Down
2 changes: 1 addition & 1 deletion package/MDAnalysis/analysis/align.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,12 +1073,12 @@ def fasta2select(fastafilename, is_aligned=False,
# implict assertion that we only have two sequences in the alignment
orig_resids = [ref_resids, target_resids]
offsets = [ref_offset, target_offset]
GAP = "-"
for iseq, a in enumerate(alignment):
# need iseq index to change orig_resids
if orig_resids[iseq] is None:
# build default: assume consecutive numbering of all
# residues in the alignment
GAP = "-"
length = len(a.seq) - a.seq.count(GAP)
orig_resids[iseq] = np.arange(1, length + 1)
else:
Expand Down
29 changes: 16 additions & 13 deletions testsuite/MDAnalysisTests/analysis/test_align.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,15 +465,14 @@ def test_average_structure_in_memory(self, universe):

class TestAlignmentProcessing(object):
seq = FASTA
error_msg = "selection string has unexpected length"

def test_fasta2select_aligned(self):
"""test align.fasta2select() on aligned FASTA (Issue 112)"""
sel = align.fasta2select(self.seq, is_aligned=True)
# length of the output strings, not residues or anything real...
assert len(sel['reference']) == 30623, ("selection string has"
"unexpected length")
assert len(
sel['mobile']) == 30623, "selection string has unexpected length"
assert len(sel['reference']) == 30623, self.error_msg
assert len(sel['mobile']) == 30623, self.error_msg

@pytest.mark.skipif(executable_not_found("clustalw2"),
reason="Test skipped because clustalw2 executable not found")
Expand All @@ -483,10 +482,8 @@ def test_fasta2select_file(self, tmpdir):
with tmpdir.as_cwd():
sel = align.fasta2select(self.seq, is_aligned=False,
alnfilename=None, treefilename=None)
assert len(sel['reference']) == 23080, ("selection string has"
"unexpected length")
assert len(sel['mobile']) == 23090, ("selection string has"
"unexpected length")
assert len(sel['reference']) == 23080, self.error_msg
assert len(sel['mobile']) == 23090, self.error_msg

@pytest.mark.skipif(executable_not_found("clustalw2"),
reason="Test skipped because clustalw2 executable not found")
Expand All @@ -500,11 +497,17 @@ def test_fasta2select_ClustalW(self, tmpdir):
# numbers computed from alignment with clustalw 2.1 on Mac OS X
# [orbeckst] length of the output strings, not residues or anything
# real...
assert len(sel['reference']) == 23080, ("selection string has"
"unexpected length")
assert len(
sel['mobile']) == 23090, "selection string has unexpected length"

assert len(sel['reference']) == 23080, self.error_msg
assert len(sel['mobile']) == 23090, self.error_msg

def test_fasta2select_resids(self, tmpdir):
"""test align.fasta2select() when resids provided (Issue #3124)"""
resids = [x for x in range(705)]
sel = align.fasta2select(self.seq, is_aligned=True,
ref_resids=resids, target_resids=resids)
# length of the output strings, not residues or anything real...
assert len(sel['reference']) == 30621, self.error_msg
assert len(sel['mobile']) == 30621, self.error_msg

def test_sequence_alignment():
u = mda.Universe(PSF)
Expand Down

0 comments on commit 8a5ef48

Please sign in to comment.