Skip to content

Commit

Permalink
Merge pull request #529 from padix-key/numpy2
Browse files Browse the repository at this point in the history
Update to NumPy 2.0
  • Loading branch information
padix-key committed Aug 20, 2024
2 parents 0404084 + e1d8300 commit 5133e0c
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 118 deletions.
1 change: 0 additions & 1 deletion doc/tutorial/structure/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,3 @@ contains functions for structure analysis and manipulation.
measurement
segments
nucleotide
trajectories
4 changes: 2 additions & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ dependencies:
# Biotite dependencies
- msgpack-python >=0.5.6
- networkx >=2.0
- numpy >=1.15, <2.0
- numpy >=2.0
- requests >=2.12
# Testing
- mdtraj >=1.9.3, <1.10
# - mdtraj >=1.9.3, <1.10 # tempoarily disabled due to incompatibility with numpy 2.0
- pytest >=7.0
# Interfaced software in biotite.application (can also be installed separately)
- autodock-vina
Expand Down
9 changes: 3 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@ classifiers = [
"Programming Language :: Python :: Implementation :: CPython",
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
# Based on https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
# When updating our minimum supported python version follow minimums set in this setup.cfg
# as of 2022-01 for 3.7 "numpy >= 1.14.5", for 3.8 "numpy >= 1.17.3", for 3.9 "numpy >= 1.19.3"
# this should be manually updated as the minimum python version increases

dependencies = [
"requests >= 2.12",
"numpy >= 1.14.5, < 2.0",
"numpy >= 2.0",
"msgpack >= 0.5.6",
"networkx >= 2.0",
]
Expand Down Expand Up @@ -68,7 +65,7 @@ requires = [
"hatchling",
"hatch-vcs == 0.4",
"hatch-cython == 0.5",
"oldest-supported-numpy",
"numpy >= 2.0",
"cython >= 3.0",
]
build-backend = "hatchling.build"
93 changes: 48 additions & 45 deletions src/biotite/sequence/align/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class Alignment(object):
An :class:`Alignment` object stores information about which symbols
of *n* sequences are aligned to each other and it stores the
corresponding alignment score.
Instead of saving a list of aligned symbols, this class saves the
original *n* sequences, that were aligned, and a so called *trace*,
which indicate the aligned symbols of these sequences.
Expand All @@ -31,16 +31,16 @@ class Alignment(object):
Each element of the trace is the index in the corresponding
sequence.
A gap is represented by the value -1.
Furthermore this class provides multiple utility functions for
conversion into strings in order to make the alignment human
readable.
Unless an :class:`Alignment` object is the result of an multiple
sequence alignment, the object will contain only two sequences.
All attributes of this class are publicly accessible.
Parameters
----------
sequences : list
Expand All @@ -49,7 +49,7 @@ class Alignment(object):
The alignment trace.
score : int, optional
Alignment score.
Attributes
----------
sequences : list
Expand All @@ -58,10 +58,10 @@ class Alignment(object):
The alignment trace.
score : int
Alignment score.
Examples
--------
>>> seq1 = NucleotideSequence("CGTCAT")
>>> seq2 = NucleotideSequence("TCATGC")
>>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
Expand Down Expand Up @@ -107,19 +107,19 @@ def _gapped_str(self, seq_index):
else:
seq_str += "-"
return seq_str

def get_gapped_sequences(self):
"""
Get a the string representation of the gapped sequences.
Returns
-------
sequences : list of str
The list of gapped sequence strings. The order is the same
as in `Alignment.sequences`.
"""
return [self._gapped_str(i) for i in range(len(self.sequences))]

def __str__(self):
# Check if any of the sequences
# has an non-single letter alphabet
Expand All @@ -143,7 +143,7 @@ def __str__(self):
return ali_str[:-2]
else:
return super().__str__()

def __getitem__(self, index):
if isinstance(index, tuple):
if len(index) > 2:
Expand All @@ -162,13 +162,13 @@ def __getitem__(self, index):
)
else:
return Alignment(self.sequences, self.trace[index], self.score)

def __iter__(self):
raise TypeError("'Alignment' object is not iterable")

def __len__(self):
return len(self.trace)

def __eq__(self, item):
if not isinstance(item, Alignment):
return False
Expand All @@ -179,7 +179,7 @@ def __eq__(self, item):
if self.score != item.score:
return False
return True

@staticmethod
def _index_sequences(sequences, index):
if isinstance(index, (list, tuple)) or \
Expand All @@ -193,19 +193,19 @@ def _index_sequences(sequences, index):
raise IndexError(
f"Invalid alignment index type '{type(index).__name__}'"
)

@staticmethod
def trace_from_strings(seq_str_list):
"""
Create a trace from strings that represent aligned sequences.
Parameters
----------
seq_str_list : list of str
The strings, where each each one represents a sequence
(with gaps) in an alignment.
A ``-`` is interpreted as gap.
Returns
-------
trace : ndarray, dtype=int, shape=(n,2)
Expand Down Expand Up @@ -238,22 +238,22 @@ def get_codes(alignment):
Instead of the indices of the aligned symbols (trace), the return
value contains the corresponding symbol codes for each index.
Gaps are still represented by *-1*.
Parameters
----------
alignment : Alignment
The alignment to get the sequence codes for.
Returns
-------
codes : ndarray, dtype=int, shape=(n,m)
The sequence codes for the alignment.
The shape is *(n,m)* for *n* sequences and *m* alignment cloumn.
The array uses *-1* values for gaps.
Examples
--------
>>> seq1 = NucleotideSequence("CGTCAT")
>>> seq2 = NucleotideSequence("TCATGC")
>>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
Expand All @@ -267,14 +267,17 @@ def get_codes(alignment):
"""
trace = alignment.trace
sequences = alignment.sequences

# The number of sequences is the first dimension
codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=int)
codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64)
for i in range(len(sequences)):
# Mark -1 explicitly as int64 to avoid that the unsigned dtype
# of the sequence code is used
# (https://numpy.org/neps/nep-0050-scalar-promotion.html)
codes[i] = np.where(
trace[:,i] != -1, sequences[i].code[trace[:,i]], -1
trace[:,i] != -1, sequences[i].code[trace[:,i]], np.int64(-1)
)

return np.stack(codes)


Expand All @@ -283,24 +286,24 @@ def get_symbols(alignment):
Similar to :func:`get_codes()`, but contains the decoded symbols
instead of codes.
Gaps are still represented by *None* values.
Parameters
----------
alignment : Alignment
The alignment to get the symbols for.
Returns
-------
symbols : list of list
The nested list of symbols.
See Also
--------
get_codes
Examples
--------
>>> seq1 = NucleotideSequence("CGTCAT")
>>> seq2 = NucleotideSequence("TCATGC")
>>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
Expand All @@ -317,8 +320,8 @@ def get_symbols(alignment):
alphabet = alignment.sequences[i].get_alphabet()
codes_wo_gaps = codes[i, codes[i] != -1]
symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps)
if not isinstance(symbols_wo_gaps, list):
symbols_wo_gaps = list(symbols_wo_gaps)
if isinstance(symbols_wo_gaps, np.ndarray):
symbols_wo_gaps = symbols_wo_gaps.tolist()
symbols_for_seq = np.full(len(codes[i]), None, dtype=object)
symbols_for_seq[codes[i] != -1] = symbols_wo_gaps
symbols[i] = symbols_for_seq.tolist()
Expand All @@ -331,7 +334,7 @@ def get_sequence_identity(alignment, mode="not_terminal"):
The identity is equal to the matches divided by a measure for the
length of the alignment that depends on the `mode` parameter.
Parameters
----------
alignment : Alignment
Expand All @@ -348,12 +351,12 @@ def get_sequence_identity(alignment, mode="not_terminal"):
length of the shortest sequence.
Default is *not_terminal*.
Returns
-------
identity : float
The sequence identity, ranging between 0 and 1.
See also
--------
get_pairwise_sequence_identity
Expand All @@ -368,7 +371,7 @@ def get_sequence_identity(alignment, mode="not_terminal"):
unique_symbols = np.unique(column)
if len(unique_symbols) == 1 and unique_symbols[0] != -1:
matches += 1

# Calculate length
if mode == "all":
length = len(alignment)
Expand All @@ -394,7 +397,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
The identity is equal to the matches divided by a measure for the
length of the alignment that depends on the `mode` parameter.
Parameters
----------
alignment : Alignment, length=n
Expand All @@ -411,12 +414,12 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
length of the shortest one of the two sequences.
Default is *not_terminal*.
Returns
-------
identity : ndarray, dtype=float, shape=(n,n)
The pairwise sequence identity, ranging between 0 and 1.
See also
--------
get_sequence_identity
Expand Down Expand Up @@ -458,7 +461,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
])
else:
raise ValueError(f"'{mode}' is an invalid calculation mode")

return matches / length


Expand All @@ -468,7 +471,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
If the alignment contains more than two sequences,
all pairwise scores are counted.
Parameters
----------
alignment : Alignment
Expand All @@ -485,7 +488,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
terminal_penalty : bool, optional
If true, gap penalties are applied to terminal gaps.
(Default: True)
Returns
-------
score : int
Expand All @@ -509,7 +512,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
# Ignore gaps
if code_i != -1 and code_j != -1:
score += matrix[code_i, code_j]

# Sum gap penalties
if type(gap_penalty) == int:
gap_open = gap_penalty
Expand Down Expand Up @@ -598,7 +601,7 @@ def find_terminal_gaps(alignment):
# The terminal gaps are before all sequences start and after any
# sequence ends
# Use exclusive stop -> -1
return np.max(firsts), np.min(lasts) + 1
return np.max(firsts).item(), np.min(lasts).item() + 1


def remove_terminal_gaps(alignment):
Expand Down
5 changes: 3 additions & 2 deletions src/biotite/sequence/align/kmertable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1352,7 +1352,8 @@ cdef class KmerTable:


def __iter__(self):
return iter(self.get_kmers())
for kmer in self.get_kmers():
yield kmer.item()


def __reversed__(self):
Expand Down Expand Up @@ -3394,7 +3395,7 @@ def _to_string(table):
else:
symbols = str(tuple(symbols))
line = symbols + ": " + ", ".join(
[str(tuple(pos)) for pos in table[kmer]]
[str((ref_id.item(), pos.item())) for ref_id, pos in table[kmer]]
)
lines.append(line)
return "\n".join(lines)
Loading

0 comments on commit 5133e0c

Please sign in to comment.