Merge pull request #529 from padix-key/numpy2

Update to NumPy 2.0
biotite-dev · Jun 30, 2024 · e43eff6 · e43eff6
2 parents 2331695 + 5bbc62b
commit e43eff6
Show file tree

Hide file tree

Showing 10 changed files with 125 additions and 118 deletions.
diff --git a/doc/tutorial/structure/index.rst b/doc/tutorial/structure/index.rst
@@ -54,4 +54,3 @@ contains functions for structure analysis and manipulation.
     measurement
     segments
     nucleotide
-    trajectories
diff --git a/environment.yml b/environment.yml
@@ -19,10 +19,10 @@ dependencies:
   # Biotite dependencies
   - msgpack-python >=0.5.6
   - networkx >=2.0
-  - numpy >=1.15, <2.0
+  - numpy >=2.0
   - requests >=2.12
   # Testing
-  - mdtraj >=1.9.3, <1.10
+  # - mdtraj >=1.9.3, <1.10  # tempoarily disabled due to incompatibility with numpy 2.0
   - pytest >=7.0
   # Interfaced software in biotite.application (can also be installed separately)
   - autodock-vina

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,13 +18,10 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
 ]
-# Based on https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
-# When updating our minimum supported python version follow minimums set in this setup.cfg
-# as of 2022-01 for 3.7 "numpy >= 1.14.5", for 3.8 "numpy >= 1.17.3", for 3.9 "numpy >= 1.19.3"
-# this should be manually updated as the minimum python version increases
+
 dependencies = [
   "requests >= 2.12",
-  "numpy >= 1.14.5, < 2.0",
+  "numpy >= 2.0",
   "msgpack >= 0.5.6",
   "networkx >= 2.0",
 ]
@@ -68,7 +65,7 @@ requires = [
     "hatchling",
     "hatch-vcs == 0.4",
     "hatch-cython == 0.5",
-    "oldest-supported-numpy",
+    "numpy >= 2.0",
     "cython >= 3.0",
 ]
 build-backend = "hatchling.build"
diff --git a/src/biotite/sequence/align/alignment.py b/src/biotite/sequence/align/alignment.py
@@ -22,7 +22,7 @@ class Alignment(object):
     An :class:`Alignment` object stores information about which symbols
     of *n* sequences are aligned to each other and it stores the
     corresponding alignment score.
-    
+
     Instead of saving a list of aligned symbols, this class saves the
     original *n* sequences, that were aligned, and a so called *trace*,
     which indicate the aligned symbols of these sequences.
@@ -31,16 +31,16 @@ class Alignment(object):
     Each element of the trace is the index in the corresponding
     sequence.
     A gap is represented by the value -1.
-    
+
     Furthermore this class provides multiple utility functions for
     conversion into strings in order to make the alignment human
     readable.
-    
+
     Unless an :class:`Alignment` object is the result of an multiple
     sequence alignment, the object will contain only two sequences.
-    
+
     All attributes of this class are publicly accessible.
-    
+
     Parameters
     ----------
     sequences : list
@@ -49,7 +49,7 @@ class Alignment(object):
         The alignment trace.
     score : int, optional
         Alignment score.
-    
+
     Attributes
     ----------
     sequences : list
@@ -58,10 +58,10 @@ class Alignment(object):
         The alignment trace.
     score : int
         Alignment score.
-    
+
     Examples
     --------
-    
+
     >>> seq1 = NucleotideSequence("CGTCAT")
     >>> seq2 = NucleotideSequence("TCATGC")
     >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -107,19 +107,19 @@ def _gapped_str(self, seq_index):
             else:
                 seq_str += "-"
         return seq_str
-    
+
     def get_gapped_sequences(self):
         """
         Get a the string representation of the gapped sequences.
-        
+
         Returns
         -------
         sequences : list of str
             The list of gapped sequence strings. The order is the same
             as in `Alignment.sequences`.
         """
         return [self._gapped_str(i) for i in range(len(self.sequences))]
-    
+
     def __str__(self):
         # Check if any of the sequences
         # has an non-single letter alphabet
@@ -143,7 +143,7 @@ def __str__(self):
             return ali_str[:-2]
         else:
             return super().__str__()
-    
+
     def __getitem__(self, index):
         if isinstance(index, tuple):
             if len(index) > 2:
@@ -162,13 +162,13 @@ def __getitem__(self, index):
             )
         else:
             return Alignment(self.sequences, self.trace[index], self.score)
-    
+
     def __iter__(self):
         raise TypeError("'Alignment' object is not iterable")
-    
+
     def __len__(self):
         return len(self.trace)
-    
+
     def __eq__(self, item):
         if not isinstance(item, Alignment):
             return False
@@ -179,7 +179,7 @@ def __eq__(self, item):
         if self.score != item.score:
             return False
         return True
-    
+
     @staticmethod
     def _index_sequences(sequences, index):
         if isinstance(index, (list, tuple)) or \
@@ -193,19 +193,19 @@ def _index_sequences(sequences, index):
             raise IndexError(
                 f"Invalid alignment index type '{type(index).__name__}'"
             )
-    
+
     @staticmethod
     def trace_from_strings(seq_str_list):
         """
         Create a trace from strings that represent aligned sequences.
-        
+
         Parameters
         ----------
         seq_str_list : list of str
             The strings, where each each one represents a sequence
             (with gaps) in an alignment.
             A ``-`` is interpreted as gap.
-        
+
         Returns
         -------
         trace : ndarray, dtype=int, shape=(n,2)
@@ -238,22 +238,22 @@ def get_codes(alignment):
     Instead of the indices of the aligned symbols (trace), the return
     value contains the corresponding symbol codes for each index.
     Gaps are still represented by *-1*.
-    
+
     Parameters
     ----------
     alignment : Alignment
         The alignment to get the sequence codes for.
-    
+
     Returns
     -------
     codes : ndarray, dtype=int, shape=(n,m)
         The sequence codes for the alignment.
         The shape is *(n,m)* for *n* sequences and *m* alignment cloumn.
         The array uses *-1* values for gaps.
-    
+
     Examples
     --------
-    
+
     >>> seq1 = NucleotideSequence("CGTCAT")
     >>> seq2 = NucleotideSequence("TCATGC")
     >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -267,14 +267,17 @@ def get_codes(alignment):
     """
     trace = alignment.trace
     sequences = alignment.sequences
-    
+
     # The number of sequences is the first dimension
-    codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=int)
+    codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64)
     for i in range(len(sequences)):
+        # Mark -1 explicitly as int64 to avoid that the unsigned dtype
+        # of the sequence code is used
+        # (https://numpy.org/neps/nep-0050-scalar-promotion.html)
         codes[i] = np.where(
-            trace[:,i] != -1, sequences[i].code[trace[:,i]], -1
+            trace[:,i] != -1, sequences[i].code[trace[:,i]], np.int64(-1)
         )
-    
+
     return np.stack(codes)
 
 
@@ -283,24 +286,24 @@ def get_symbols(alignment):
     Similar to :func:`get_codes()`, but contains the decoded symbols
     instead of codes.
     Gaps are still represented by *None* values.
-    
+
     Parameters
     ----------
     alignment : Alignment
         The alignment to get the symbols for.
-    
+
     Returns
     -------
     symbols : list of list
         The nested list of symbols.
-    
+
     See Also
     --------
     get_codes
 
     Examples
     --------
-    
+
     >>> seq1 = NucleotideSequence("CGTCAT")
     >>> seq2 = NucleotideSequence("TCATGC")
     >>> matrix = SubstitutionMatrix.std_nucleotide_matrix()
@@ -317,8 +320,8 @@ def get_symbols(alignment):
         alphabet = alignment.sequences[i].get_alphabet()
         codes_wo_gaps = codes[i, codes[i] != -1]
         symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps)
-        if not isinstance(symbols_wo_gaps, list):
-            symbols_wo_gaps = list(symbols_wo_gaps)
+        if isinstance(symbols_wo_gaps, np.ndarray):
+            symbols_wo_gaps = symbols_wo_gaps.tolist()
         symbols_for_seq = np.full(len(codes[i]), None, dtype=object)
         symbols_for_seq[codes[i] != -1] = symbols_wo_gaps
         symbols[i] = symbols_for_seq.tolist()
@@ -331,7 +334,7 @@ def get_sequence_identity(alignment, mode="not_terminal"):
 
     The identity is equal to the matches divided by a measure for the
     length of the alignment that depends on the `mode` parameter.
-    
+
     Parameters
     ----------
     alignment : Alignment
@@ -348,12 +351,12 @@ def get_sequence_identity(alignment, mode="not_terminal"):
               length of the shortest sequence.
 
         Default is *not_terminal*.
-    
+
     Returns
     -------
     identity : float
         The sequence identity, ranging between 0 and 1.
-    
+
     See also
     --------
     get_pairwise_sequence_identity
@@ -368,7 +371,7 @@ def get_sequence_identity(alignment, mode="not_terminal"):
         unique_symbols = np.unique(column)
         if len(unique_symbols) == 1 and unique_symbols[0] != -1:
             matches += 1
-    
+
     # Calculate length
     if mode == "all":
         length = len(alignment)
@@ -394,7 +397,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
 
     The identity is equal to the matches divided by a measure for the
     length of the alignment that depends on the `mode` parameter.
-    
+
     Parameters
     ----------
     alignment : Alignment, length=n
@@ -411,12 +414,12 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
               length of the shortest one of the two sequences.
 
         Default is *not_terminal*.
-    
+
     Returns
     -------
     identity : ndarray, dtype=float, shape=(n,n)
         The pairwise sequence identity, ranging between 0 and 1.
-    
+
     See also
     --------
     get_sequence_identity
@@ -458,7 +461,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"):
                 ])
     else:
         raise ValueError(f"'{mode}' is an invalid calculation mode")
-    
+
     return matches / length
 
 
@@ -468,7 +471,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
 
     If the alignment contains more than two sequences,
     all pairwise scores are counted.
-    
+
     Parameters
     ----------
     alignment : Alignment
@@ -485,7 +488,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
     terminal_penalty : bool, optional
         If true, gap penalties are applied to terminal gaps.
         (Default: True)
-    
+
     Returns
     -------
     score : int
@@ -509,7 +512,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True):
                 # Ignore gaps
                 if code_i != -1 and code_j != -1:
                     score += matrix[code_i, code_j]
-    
+
     # Sum gap penalties
     if type(gap_penalty) == int:
         gap_open = gap_penalty
@@ -598,7 +601,7 @@ def find_terminal_gaps(alignment):
     # The terminal gaps are before all sequences start and after any
     # sequence ends
     # Use exclusive stop -> -1
-    return np.max(firsts), np.min(lasts) + 1
+    return np.max(firsts).item(), np.min(lasts).item() + 1
 
 
 def remove_terminal_gaps(alignment):

diff --git a/src/biotite/sequence/align/kmertable.pyx b/src/biotite/sequence/align/kmertable.pyx
@@ -1352,7 +1352,8 @@ cdef class KmerTable:
 
 
     def __iter__(self):
-        return iter(self.get_kmers())
+        for kmer in self.get_kmers():
+            yield kmer.item()
 
 
     def __reversed__(self):
@@ -3394,7 +3395,7 @@ def _to_string(table):
         else:
             symbols = str(tuple(symbols))
         line = symbols + ": " + ", ".join(
-            [str(tuple(pos)) for pos in table[kmer]]
+            [str((ref_id.item(), pos.item())) for ref_id, pos in table[kmer]]
         )
         lines.append(line)
     return "\n".join(lines)