From ae8cf931240207c58349fe0a0b51cb3481e23c5c Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Sat, 3 Feb 2024 15:10:51 +0100 Subject: [PATCH 1/7] Remove deprecated NumPy API usage --- src/biotite/structure/filter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/biotite/structure/filter.py b/src/biotite/structure/filter.py index d32bce085..da4d34cfb 100644 --- a/src/biotite/structure/filter.py +++ b/src/biotite/structure/filter.py @@ -479,7 +479,7 @@ def filter_first_altloc(atoms, altloc_ids): 1 CB 4.000 5.000 6.000 """ # Filter all atoms without altloc code - altloc_filter = np.in1d(altloc_ids, [".", "?", " ", ""]) + altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""]) # And filter all atoms for each residue with the first altloc ID residue_starts = get_residue_starts(atoms, add_exclusive_stop=True) @@ -556,7 +556,7 @@ def filter_highest_occupancy_altloc(atoms, altloc_ids, occupancies): 1 CB 6.000 5.000 4.000 """ # Filter all atoms without altloc code - altloc_filter = np.in1d(altloc_ids, [".", "?", " ", ""]) + altloc_filter = np.isin(altloc_ids, [".", "?", " ", ""]) # And filter all atoms for each residue with the highest sum of # occupancies From d6d7cca760598de550b0f87c33e062faaaaf55d0 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Sat, 3 Feb 2024 15:22:14 +0100 Subject: [PATCH 2/7] Update NumPy version --- environment.yml | 2 +- pyproject.toml | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/environment.yml b/environment.yml index ba7cc77ec..04672cb48 100644 --- a/environment.yml +++ b/environment.yml @@ -19,7 +19,7 @@ dependencies: # Biotite dependencies - msgpack-python >=0.5.6 - networkx >=2.0 - - numpy >=1.15, <2.0 + - numpy >=2.0 - requests >=2.12 # Testing - mdtraj >=1.9.3, <1.10 diff --git a/pyproject.toml b/pyproject.toml index f1fa2e07b..fe580be37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,13 +18,10 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Topic :: Scientific/Engineering :: Bio-Informatics", ] -# Based on https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg -# When updating our minimum supported python version follow minimums set in this setup.cfg -# as of 2022-01 for 3.7 "numpy >= 1.14.5", for 3.8 "numpy >= 1.17.3", for 3.9 "numpy >= 1.19.3" -# this should be manually updated as the minimum python version increases + dependencies = [ "requests >= 2.12", - "numpy >= 1.14.5, < 2.0", + "numpy >= 2.0", "msgpack >= 0.5.6", "networkx >= 2.0", ] @@ -68,7 +65,7 @@ requires = [ "hatchling", "hatch-vcs == 0.4", "hatch-cython == 0.5", - "oldest-supported-numpy", + "numpy >= 2.0", "cython >= 3.0", ] build-backend = "hatchling.build" From 68f73a0a4786ccc635861982287c5eb273fde584 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Mon, 1 Apr 2024 15:03:56 +0200 Subject: [PATCH 3/7] Remove unused imports --- src/biotite/sequence/align/selector.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/biotite/sequence/align/selector.pyx b/src/biotite/sequence/align/selector.pyx index 1bf68127f..77f2728c3 100644 --- a/src/biotite/sequence/align/selector.pyx +++ b/src/biotite/sequence/align/selector.pyx @@ -10,10 +10,8 @@ __all__ = ["MinimizerSelector", "SyncmerSelector", "CachedSyncmerSelector", cimport cython cimport numpy as np -from numbers import Integral import numpy as np from .kmeralphabet import KmerAlphabet -from ..alphabet import AlphabetError ctypedef np.int64_t int64 From 3629a1e15a5044be46c959ba1c391f6b165a7969 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Mon, 1 Apr 2024 15:05:08 +0200 Subject: [PATCH 4/7] Remove deprecated Cython syntax --- src/biotite/sequence/align/selector.pyx | 104 ++++++++++++------------ 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/src/biotite/sequence/align/selector.pyx b/src/biotite/sequence/align/selector.pyx index 77f2728c3..8bfff8721 100644 --- a/src/biotite/sequence/align/selector.pyx +++ b/src/biotite/sequence/align/selector.pyx @@ -19,7 +19,7 @@ ctypedef np.uint32_t uint32 # Obtained from 'np.iinfo(np.int64).max' -DEF MAX_INT_64 = 9223372036854775807 +cdef int64 MAX_INT_64 = 9223372036854775807 class MinimizerSelector: @@ -52,7 +52,7 @@ class MinimizerSelector: This standard order is often the lexicographical order, which is known to yield suboptimal *density* in many cases :footcite:`Roberts2004`. - + Attributes ---------- kmer_alphabet : KmerAlphabet @@ -71,7 +71,7 @@ class MinimizerSelector: References ---------- - + .. footbibliography:: Examples @@ -120,12 +120,12 @@ class MinimizerSelector: self._window = window self._kmer_alph = kmer_alphabet self._permutation = permutation - + @property def kmer_alphabet(self): return self._kmer_alph - + @property def window(self): return self._window @@ -133,7 +133,7 @@ class MinimizerSelector: @property def permutation(self): return self._permutation - + def select(self, sequence, bint alphabet_check=True): """ @@ -152,7 +152,7 @@ class MinimizerSelector: of the sequence and the alphabet of the :class:`MinimizerSelector` is not checked to gain additional performance. - + Returns ------- minimizer_indices : ndarray, dtype=np.uint32 @@ -160,7 +160,7 @@ class MinimizerSelector: minimizers : ndarray, dtype=np.int64 The *k-mers* that are the selected minimizers, returned as *k-mer* code. - + Notes ----- Duplicate minimizers are omitted, i.e. if two windows have the @@ -174,7 +174,7 @@ class MinimizerSelector: ) kmers = self._kmer_alph.create_kmers(sequence.code) return self.select_from_kmers(kmers) - + def select_from_kmers(self, kmers): """ @@ -189,7 +189,7 @@ class MinimizerSelector: minimizers in. The *k-mer* codes correspond to the *k-mers* encoded by the given `kmer_alphabet`. - + Returns ------- minimizer_indices : ndarray, dtype=np.uint32 @@ -197,7 +197,7 @@ class MinimizerSelector: appears. minimizers : ndarray, dtype=np.int64 The corresponding *k-mers* codes of the minimizers. - + Notes ----- Duplicate minimizers are omitted, i.e. if two windows have the @@ -265,7 +265,7 @@ class SyncmerSelector: *k-mer*. By default, the minimum position needs to be at the start of the *k-mer*, which is termed *open syncmer*. - + Attributes ---------- alphabet : Alphabet @@ -274,7 +274,7 @@ class SyncmerSelector: The :class:`KmerAlphabet` for *k* and *s*, respectively. permutation : Permutation The permutation. - + See also -------- CachedSyncmerSelector @@ -289,7 +289,7 @@ class SyncmerSelector: References ---------- - + .. footbibliography:: Examples @@ -335,7 +335,7 @@ class SyncmerSelector: self._alphabet = alphabet self._kmer_alph = KmerAlphabet(alphabet, k) self._smer_alph = KmerAlphabet(alphabet, s) - + self._permutation = permutation self._offset = np.asarray(offset, dtype=np.int64) @@ -351,7 +351,7 @@ class SyncmerSelector: ) if len(np.unique(self._offset)) != len(self._offset): raise ValueError("Offset must contain unique values") - + @property def alphabet(self): @@ -360,7 +360,7 @@ class SyncmerSelector: @property def kmer_alphabet(self): return self._kmer_alph - + @property def smer_alphabet(self): return self._smer_alph @@ -368,7 +368,7 @@ class SyncmerSelector: @property def permutation(self): return self._permutation - + def select(self, sequence, bint alphabet_check=True): """ @@ -387,7 +387,7 @@ class SyncmerSelector: of the sequence and the alphabet of the :class:`SyncmerSelector` is not checked to gain additional performance. - + Returns ------- syncmer_indices : ndarray, dtype=np.uint32 @@ -426,7 +426,7 @@ class SyncmerSelector: relative_min_pos = min_pos - np.arange(len(kmers)) syncmer_pos = self._filter_syncmer_pos(relative_min_pos) return syncmer_pos, kmers[syncmer_pos] - + def select_from_kmers(self, kmers): """ @@ -440,7 +440,7 @@ class SyncmerSelector: ---------- kmers : ndarray, dtype=np.int64 The *k-mer* codes to select the syncmers from. - + Returns ------- syncmer_indices : ndarray, dtype=np.uint32 @@ -457,9 +457,9 @@ class SyncmerSelector: :class:`Sequence` objects. """ cdef int64 i - + symbol_codes_for_each_kmer = self._kmer_alph.split(kmers) - + cdef int64[:] min_pos = np.zeros( len(symbol_codes_for_each_kmer), dtype=np.int64 ) @@ -475,10 +475,10 @@ class SyncmerSelector: f"sort keys for {len(smers)} s-mers" ) min_pos[i] = np.argmin(ordering) - + syncmer_pos = self._filter_syncmer_pos(min_pos) return syncmer_pos, kmers[syncmer_pos] - + def _filter_syncmer_pos(self, min_pos): """ @@ -536,7 +536,7 @@ class CachedSyncmerSelector(SyncmerSelector): *k-mer*. By default, the minimum position needs to be at the start of the *k-mer*, which is termed *open syncmer*. - + Attributes ---------- alphabet : Alphabet @@ -545,7 +545,7 @@ class CachedSyncmerSelector(SyncmerSelector): The :class:`KmerAlphabet` for *k* and *s*, respectively. permutation : Permutation The permutation. - + See also -------- SyncmerSelector @@ -560,7 +560,7 @@ class CachedSyncmerSelector(SyncmerSelector): References ---------- - + .. footbibliography:: Examples @@ -582,7 +582,7 @@ class CachedSyncmerSelector(SyncmerSelector): >>> print(["".join(kmer_alph.decode(kmer)) for kmer in syncmers]) ['GGCAA', 'AAGTG', 'AGTGA', 'GTGAC'] """ - + def __init__(self, alphabet, k, s, permutation=None, offset=(0,)): super().__init__(alphabet, k, s, permutation, offset) # Check for all possible *k-mers*, whether they are syncmers @@ -591,7 +591,7 @@ class CachedSyncmerSelector(SyncmerSelector): # Convert the index array into a boolean mask self._syncmer_mask = np.zeros(len(self.kmer_alphabet), dtype=bool) self._syncmer_mask[syncmer_indices] = True - + def select(self, sequence, bint alphabet_check=True): """ @@ -610,7 +610,7 @@ class CachedSyncmerSelector(SyncmerSelector): of the sequence and the alphabet of the :class:`CachedSyncmerSelector` is not checked to gain additional performance. - + Returns ------- syncmer_indices : ndarray, dtype=np.uint32 @@ -626,7 +626,7 @@ class CachedSyncmerSelector(SyncmerSelector): ) kmers = self.kmer_alphabet.create_kmers(sequence.code) return self.select_from_kmers(kmers) - + def select_from_kmers(self, kmers): """ @@ -640,7 +640,7 @@ class CachedSyncmerSelector(SyncmerSelector): ---------- kmers : ndarray, dtype=np.int64 The *k-mer* codes to select the syncmers from. - + Returns ------- syncmer_indices : ndarray, dtype=np.uint32 @@ -658,7 +658,7 @@ class MincodeSelector: Selects the :math:`1/\text{compression}` *smallest* *k-mers* from :class:`KmerAlphabet`. :footcite:`Edgar2021` - + '*Small*' refers to the lexicographical order, or alternatively a custom order if `permutation` is given. The *Mincode* approach tries to reduce the number of *k-mers* from a @@ -680,7 +680,7 @@ class MincodeSelector: By default, the standard order of the :class:`KmerAlphabet` is used. This standard order is often the lexicographical order. - + Attributes ---------- kmer_alphabet : KmerAlphabet @@ -693,10 +693,10 @@ class MincodeSelector: All *k-mers*, that are smaller than this value are selected. permutation : Permutation The permutation. - + References ---------- - + .. footbibliography:: Examples @@ -733,12 +733,12 @@ class MincodeSelector: permutation_offset = permutation.min permutation_range = permutation.max - permutation.min + 1 self._threshold = permutation_offset + permutation_range / compression - + @property def kmer_alphabet(self): return self._kmer_alph - + @property def compression(self): return self._compression @@ -750,7 +750,7 @@ class MincodeSelector: @property def permutation(self): return self._permutation - + def select(self, sequence, bint alphabet_check=True): """ @@ -769,7 +769,7 @@ class MincodeSelector: of the sequence and the alphabet of the :class:`MincodeSelector` is not checked to gain additional performance. - + Returns ------- mincode_indices : ndarray, dtype=np.uint32 @@ -784,7 +784,7 @@ class MincodeSelector: ) kmers = self._kmer_alph.create_kmers(sequence.code) return self.select_from_kmers(kmers) - + def select_from_kmers(self, kmers): """ @@ -798,7 +798,7 @@ class MincodeSelector: ---------- kmers : ndarray, dtype=np.int64 The *k-mer* codes to select the *Mincode k-mers* from. - + Returns ------- mincode_indices : ndarray, dtype=np.uint32 @@ -818,7 +818,7 @@ class MincodeSelector: mincode_pos = ordering < self._threshold return mincode_pos, kmers[mincode_pos] - + @cython.boundscheck(False) @cython.wraparound(False) @@ -833,7 +833,7 @@ def _minimize(int64[:] kmers, int64[:] ordering, uint32 window, instead of 'x - (window-1)/2' to 'x + (window-1)/2'. """ cdef uint32 seq_i - + cdef uint32 n_windows = kmers.shape[0] - (window - 1) # Pessimistic array allocation size # -> Expect that every window has a new minimizer @@ -863,14 +863,14 @@ def _minimize(int64[:] kmers, int64[:] ordering, uint32 window, reverse_argcummin = reverse_argcummins[seq_i] forward_cummin = ordering[forward_argcummin] reverse_cummin = ordering[reverse_argcummin] - + # At ties the leftmost position is taken, # which stems from the reverse pass if forward_cummin < reverse_cummin: combined_argcummin = forward_argcummin else: combined_argcummin = reverse_argcummin - + # If the same minimizer position was observed before, the # duplicate is simply ignored, if 'include_duplicates' is false if include_duplicates or combined_argcummin != prev_argcummin: @@ -897,7 +897,7 @@ cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size): cdef uint32 current_min_i = 0 cdef int64 current_min, current_val cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32) - + # Any actual value will be smaller than this placeholder current_min = MAX_INT_64 for seq_i in range(values.shape[0]): @@ -909,7 +909,7 @@ cdef _chunk_wise_forward_argcummin(int64[:] values, uint32 chunk_size): current_min_i = seq_i current_min = current_val min_pos[seq_i] = current_min_i - + return min_pos @cython.boundscheck(False) @@ -928,7 +928,7 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size): - There are issues in selecting the leftmost argument - An offset is necessary to ensure alignment of chunks with forward pass - + Hence, a separate 'reverse' variant of the function was implemented. """ cdef uint32 seq_i @@ -936,7 +936,7 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size): cdef uint32 current_min_i = 0 cdef int64 current_min, current_val cdef uint32[:] min_pos = np.empty(values.shape[0], dtype=np.uint32) - + current_min = MAX_INT_64 for seq_i in reversed(range(values.shape[0])): # The chunk beginning is a small difference to forward @@ -950,5 +950,5 @@ cdef _chunk_wise_reverse_argcummin(int64[:] values, uint32 chunk_size): current_min_i = seq_i current_min = current_val min_pos[seq_i] = current_min_i - + return min_pos From 29a8df64496042753e4f27be75933137d08ee0f5 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Mon, 10 Jun 2024 15:02:10 +0200 Subject: [PATCH 5/7] Adapt to changes in NumPy broadcast behavior --- src/biotite/sequence/align/alignment.py | 93 ++++++++++++------------ src/biotite/sequence/align/kmertable.pyx | 5 +- src/biotite/sequence/alphabet.py | 2 +- src/biotite/sequence/codon.py | 5 +- 4 files changed, 55 insertions(+), 50 deletions(-) diff --git a/src/biotite/sequence/align/alignment.py b/src/biotite/sequence/align/alignment.py index 2f824c7f0..7d97d15a8 100644 --- a/src/biotite/sequence/align/alignment.py +++ b/src/biotite/sequence/align/alignment.py @@ -22,7 +22,7 @@ class Alignment(object): An :class:`Alignment` object stores information about which symbols of *n* sequences are aligned to each other and it stores the corresponding alignment score. - + Instead of saving a list of aligned symbols, this class saves the original *n* sequences, that were aligned, and a so called *trace*, which indicate the aligned symbols of these sequences. @@ -31,16 +31,16 @@ class Alignment(object): Each element of the trace is the index in the corresponding sequence. A gap is represented by the value -1. - + Furthermore this class provides multiple utility functions for conversion into strings in order to make the alignment human readable. - + Unless an :class:`Alignment` object is the result of an multiple sequence alignment, the object will contain only two sequences. - + All attributes of this class are publicly accessible. - + Parameters ---------- sequences : list @@ -49,7 +49,7 @@ class Alignment(object): The alignment trace. score : int, optional Alignment score. - + Attributes ---------- sequences : list @@ -58,10 +58,10 @@ class Alignment(object): The alignment trace. score : int Alignment score. - + Examples -------- - + >>> seq1 = NucleotideSequence("CGTCAT") >>> seq2 = NucleotideSequence("TCATGC") >>> matrix = SubstitutionMatrix.std_nucleotide_matrix() @@ -107,11 +107,11 @@ def _gapped_str(self, seq_index): else: seq_str += "-" return seq_str - + def get_gapped_sequences(self): """ Get a the string representation of the gapped sequences. - + Returns ------- sequences : list of str @@ -119,7 +119,7 @@ def get_gapped_sequences(self): as in `Alignment.sequences`. """ return [self._gapped_str(i) for i in range(len(self.sequences))] - + def __str__(self): # Check if any of the sequences # has an non-single letter alphabet @@ -143,7 +143,7 @@ def __str__(self): return ali_str[:-2] else: return super().__str__() - + def __getitem__(self, index): if isinstance(index, tuple): if len(index) > 2: @@ -162,13 +162,13 @@ def __getitem__(self, index): ) else: return Alignment(self.sequences, self.trace[index], self.score) - + def __iter__(self): raise TypeError("'Alignment' object is not iterable") - + def __len__(self): return len(self.trace) - + def __eq__(self, item): if not isinstance(item, Alignment): return False @@ -179,7 +179,7 @@ def __eq__(self, item): if self.score != item.score: return False return True - + @staticmethod def _index_sequences(sequences, index): if isinstance(index, (list, tuple)) or \ @@ -193,19 +193,19 @@ def _index_sequences(sequences, index): raise IndexError( f"Invalid alignment index type '{type(index).__name__}'" ) - + @staticmethod def trace_from_strings(seq_str_list): """ Create a trace from strings that represent aligned sequences. - + Parameters ---------- seq_str_list : list of str The strings, where each each one represents a sequence (with gaps) in an alignment. A ``-`` is interpreted as gap. - + Returns ------- trace : ndarray, dtype=int, shape=(n,2) @@ -238,22 +238,22 @@ def get_codes(alignment): Instead of the indices of the aligned symbols (trace), the return value contains the corresponding symbol codes for each index. Gaps are still represented by *-1*. - + Parameters ---------- alignment : Alignment The alignment to get the sequence codes for. - + Returns ------- codes : ndarray, dtype=int, shape=(n,m) The sequence codes for the alignment. The shape is *(n,m)* for *n* sequences and *m* alignment cloumn. The array uses *-1* values for gaps. - + Examples -------- - + >>> seq1 = NucleotideSequence("CGTCAT") >>> seq2 = NucleotideSequence("TCATGC") >>> matrix = SubstitutionMatrix.std_nucleotide_matrix() @@ -267,14 +267,17 @@ def get_codes(alignment): """ trace = alignment.trace sequences = alignment.sequences - + # The number of sequences is the first dimension - codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=int) + codes = np.zeros((trace.shape[1], trace.shape[0]), dtype=np.int64) for i in range(len(sequences)): + # Mark -1 explicitly as int64 to avoid that the unsigned dtype + # of the sequence code is used + # (https://numpy.org/neps/nep-0050-scalar-promotion.html) codes[i] = np.where( - trace[:,i] != -1, sequences[i].code[trace[:,i]], -1 + trace[:,i] != -1, sequences[i].code[trace[:,i]], np.int64(-1) ) - + return np.stack(codes) @@ -283,24 +286,24 @@ def get_symbols(alignment): Similar to :func:`get_codes()`, but contains the decoded symbols instead of codes. Gaps are still represented by *None* values. - + Parameters ---------- alignment : Alignment The alignment to get the symbols for. - + Returns ------- symbols : list of list The nested list of symbols. - + See Also -------- get_codes Examples -------- - + >>> seq1 = NucleotideSequence("CGTCAT") >>> seq2 = NucleotideSequence("TCATGC") >>> matrix = SubstitutionMatrix.std_nucleotide_matrix() @@ -317,8 +320,8 @@ def get_symbols(alignment): alphabet = alignment.sequences[i].get_alphabet() codes_wo_gaps = codes[i, codes[i] != -1] symbols_wo_gaps = alphabet.decode_multiple(codes_wo_gaps) - if not isinstance(symbols_wo_gaps, list): - symbols_wo_gaps = list(symbols_wo_gaps) + if isinstance(symbols_wo_gaps, np.ndarray): + symbols_wo_gaps = symbols_wo_gaps.tolist() symbols_for_seq = np.full(len(codes[i]), None, dtype=object) symbols_for_seq[codes[i] != -1] = symbols_wo_gaps symbols[i] = symbols_for_seq.tolist() @@ -331,7 +334,7 @@ def get_sequence_identity(alignment, mode="not_terminal"): The identity is equal to the matches divided by a measure for the length of the alignment that depends on the `mode` parameter. - + Parameters ---------- alignment : Alignment @@ -348,12 +351,12 @@ def get_sequence_identity(alignment, mode="not_terminal"): length of the shortest sequence. Default is *not_terminal*. - + Returns ------- identity : float The sequence identity, ranging between 0 and 1. - + See also -------- get_pairwise_sequence_identity @@ -368,7 +371,7 @@ def get_sequence_identity(alignment, mode="not_terminal"): unique_symbols = np.unique(column) if len(unique_symbols) == 1 and unique_symbols[0] != -1: matches += 1 - + # Calculate length if mode == "all": length = len(alignment) @@ -394,7 +397,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"): The identity is equal to the matches divided by a measure for the length of the alignment that depends on the `mode` parameter. - + Parameters ---------- alignment : Alignment, length=n @@ -411,12 +414,12 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"): length of the shortest one of the two sequences. Default is *not_terminal*. - + Returns ------- identity : ndarray, dtype=float, shape=(n,n) The pairwise sequence identity, ranging between 0 and 1. - + See also -------- get_sequence_identity @@ -458,7 +461,7 @@ def get_pairwise_sequence_identity(alignment, mode="not_terminal"): ]) else: raise ValueError(f"'{mode}' is an invalid calculation mode") - + return matches / length @@ -468,7 +471,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True): If the alignment contains more than two sequences, all pairwise scores are counted. - + Parameters ---------- alignment : Alignment @@ -485,7 +488,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True): terminal_penalty : bool, optional If true, gap penalties are applied to terminal gaps. (Default: True) - + Returns ------- score : int @@ -509,7 +512,7 @@ def score(alignment, matrix, gap_penalty=-10, terminal_penalty=True): # Ignore gaps if code_i != -1 and code_j != -1: score += matrix[code_i, code_j] - + # Sum gap penalties if type(gap_penalty) == int: gap_open = gap_penalty @@ -598,7 +601,7 @@ def find_terminal_gaps(alignment): # The terminal gaps are before all sequences start and after any # sequence ends # Use exclusive stop -> -1 - return np.max(firsts), np.min(lasts) + 1 + return np.max(firsts).item(), np.min(lasts).item() + 1 def remove_terminal_gaps(alignment): diff --git a/src/biotite/sequence/align/kmertable.pyx b/src/biotite/sequence/align/kmertable.pyx index 98cc62dee..90d7b0569 100644 --- a/src/biotite/sequence/align/kmertable.pyx +++ b/src/biotite/sequence/align/kmertable.pyx @@ -1352,7 +1352,8 @@ cdef class KmerTable: def __iter__(self): - return iter(self.get_kmers()) + for kmer in self.get_kmers(): + yield kmer.item() def __reversed__(self): @@ -3394,7 +3395,7 @@ def _to_string(table): else: symbols = str(tuple(symbols)) line = symbols + ": " + ", ".join( - [str(tuple(pos)) for pos in table[kmer]] + [str((ref_id.item(), pos.item())) for ref_id, pos in table[kmer]] ) lines.append(line) return "\n".join(lines) diff --git a/src/biotite/sequence/alphabet.py b/src/biotite/sequence/alphabet.py index 2f0d409e6..4b9fe9683 100644 --- a/src/biotite/sequence/alphabet.py +++ b/src/biotite/sequence/alphabet.py @@ -352,7 +352,7 @@ def encode(self, symbol): raise AlphabetError( f"Symbol {repr(symbol)} is not in the alphabet" ) - return indices[0] + return indices[0].item() def decode(self, code, as_bytes=False): if code < 0 or code >= len(self._symbols): diff --git a/src/biotite/sequence/codon.py b/src/biotite/sequence/codon.py index 67d2ab291..fe50c791f 100644 --- a/src/biotite/sequence/codon.py +++ b/src/biotite/sequence/codon.py @@ -147,8 +147,9 @@ def __getitem__(self, item): elif isinstance(item, int): # Code for amino acid -> return possible codon codes codon_numbers = np.where(self._codons == item)[0] - codon_codes = tuple(CodonTable._to_codon(codon_numbers)) - codon_codes = tuple([tuple(code) for code in codon_codes]) + codon_codes = tuple( + [tuple(code.tolist()) for code in CodonTable._to_codon(codon_numbers)] + ) return codon_codes else: # Code for codon as any iterable object From d6ea5069454dbdf2f64b7f49dc0eeb5448889b86 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Sat, 29 Jun 2024 13:00:25 +0200 Subject: [PATCH 6/7] Temporarily disable trajectory functionalities MDTraj is incompatible with NumPy 2.0 --- doc/tutorial/structure/index.rst | 1 - environment.yml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/tutorial/structure/index.rst b/doc/tutorial/structure/index.rst index 46a2307f4..32b550994 100644 --- a/doc/tutorial/structure/index.rst +++ b/doc/tutorial/structure/index.rst @@ -54,4 +54,3 @@ contains functions for structure analysis and manipulation. measurement segments nucleotide - trajectories diff --git a/environment.yml b/environment.yml index 04672cb48..8e484d75c 100644 --- a/environment.yml +++ b/environment.yml @@ -22,7 +22,7 @@ dependencies: - numpy >=2.0 - requests >=2.12 # Testing - - mdtraj >=1.9.3, <1.10 + # - mdtraj >=1.9.3, <1.10 # tempoarily disabled due to incompatibility with numpy 2.0 - pytest >=7.0 # Interfaced software in biotite.application (can also be installed separately) - autodock-vina From 5bbc62bc411e74d0289b963b4276fb697284594e Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann <padix.key@gmail.com> Date: Sat, 29 Jun 2024 17:06:02 +0200 Subject: [PATCH 7/7] Adapt to changed `argsort()` behavior --- src/biotite/sequence/align/permutation.pyx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/biotite/sequence/align/permutation.pyx b/src/biotite/sequence/align/permutation.pyx index 64a84289f..9c1ab87bd 100644 --- a/src/biotite/sequence/align/permutation.pyx +++ b/src/biotite/sequence/align/permutation.pyx @@ -186,6 +186,9 @@ class FrequencyPermutation(Permutation): The minimum and maximum value, the permutated value (i.e. the return value of :meth:`permute()`) can take. + kmer_alphabet : KmerAlphabet + The *k-mer* alphabet that defines the range of possible *k-mers* + that should be permuted. Notes ----- @@ -226,11 +229,11 @@ class FrequencyPermutation(Permutation): >>> permutation = FrequencyPermutation.from_table(kmer_table) >>> order = permutation.permute(kmer_codes) >>> print(order) - [ 0 24 20 19 16 15 14 13 12 22 21 10 11 8 7 18 6 5 4 3 23 2 1 9 + [ 0 22 18 19 1 2 3 4 5 23 20 6 7 8 9 21 10 11 12 13 24 14 15 16 17] >>> kmer_codes = kmer_codes[np.argsort(order)] >>> print(["..."] + ["".join(kmer_alph.decode(c)) for c in kmer_codes[-10:]]) - ['...', 'ba', 'ar', 'rr', 'da', 'ad', 'ac', 'ca', 'br', 'ra', 'ab'] + ['...', 'rc', 'rd', 'rr', 'ac', 'ad', 'ca', 'da', 'ab', 'br', 'ra'] """ def __init__(self, kmer_alphabet, counts): @@ -240,7 +243,9 @@ class FrequencyPermutation(Permutation): f"but {len(counts)} counts were given" ) # 'order' maps a permutation to a k-mer - order = np.argsort(counts) + # Stability is important to get the same k-mer subset selection + # on different architectures + order = np.argsort(counts, stable=True) # '_permutation_table' should perform the reverse mapping self._permutation_table = _invert_mapping(order) self._kmer_alph = kmer_alphabet @@ -259,8 +264,11 @@ class FrequencyPermutation(Permutation): return self._kmer_alph + @staticmethod def from_table(kmer_table): """ + from_table(kmer_table) + Create a :class:`FrequencyPermutation` from the *k-mer* counts of a :class:`KmerTable`.