diff --git a/gensim/_matutils.pyx b/gensim/_matutils.pyx
index 0162202224..aa4e9a1cee 100644
--- a/gensim/_matutils.pyx
+++ b/gensim/_matutils.pyx
@@ -42,7 +42,7 @@ def mean_absolute_difference(a, b):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil:
+cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) noexcept nogil:
     """Mean absolute difference between two arrays.
 
     Parameters
@@ -103,7 +103,7 @@ def logsumexp(x):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil:
+cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) noexcept nogil:
     """Log of sum of exponentials.
 
     Parameters
@@ -223,7 +223,7 @@ def dirichlet_expectation_1d(alpha):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
+cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) noexcept nogil:
     """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
 
     Parameters
@@ -251,7 +251,7 @@ cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil:
+cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) noexcept nogil:
     """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
 
     Parameters
@@ -298,7 +298,7 @@ def digamma(DTYPE_t x):
 
 
 @cython.cdivision(True)
-cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil:
+cdef inline DTYPE_t _digamma(DTYPE_t x,) noexcept nogil:
     """Digamma function for positive floats.
 
     Parameters
diff --git a/gensim/models/doc2vec_corpusfile.pyx b/gensim/models/doc2vec_corpusfile.pyx
index da5b230b9f..29463954c1 100644
--- a/gensim/models/doc2vec_corpusfile.pyx
+++ b/gensim/models/doc2vec_corpusfile.pyx
@@ -61,7 +61,7 @@ cdef void prepare_c_structures_for_batch(
         np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
         np.uint32_t *reduced_windows, int *document_len, int train_words,
         int docvecs_count, int doc_tag, int shrink_windows,
-    ) nogil:
+    ) noexcept nogil:
     cdef VocabItem predict_word
     cdef string token
     cdef int i = 0
diff --git a/gensim/models/doc2vec_inner.pxd b/gensim/models/doc2vec_inner.pxd
index 525d20c6b6..c327ce462f 100644
--- a/gensim/models/doc2vec_inner.pxd
+++ b/gensim/models/doc2vec_inner.pxd
@@ -57,7 +57,7 @@ cdef void fast_document_dbow_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *context_vectors, REAL_t *syn1, const int size,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
-    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil
+    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil
 
 
 cdef unsigned long long fast_document_dbow_neg(
@@ -65,31 +65,31 @@ cdef unsigned long long fast_document_dbow_neg(
     REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
     unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
-    const np.uint32_t contexts_lockf_len) nogil
+    const np.uint32_t contexts_lockf_len) noexcept nogil
 
 
 cdef void fast_document_dm_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil
+    const int size, int learn_hidden) noexcept nogil
 
 
 cdef unsigned long long fast_document_dm_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil
+    const int size, int learn_hidden) noexcept nogil
 
 
 cdef void fast_document_dmc_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil
 
 
 cdef unsigned long long fast_document_dmc_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil
 
 
 cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words, learn_hidden, train_words=*, work=*,
diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx
index 21964b79b6..24aafe2f8b 100644
--- a/gensim/models/doc2vec_inner.pyx
+++ b/gensim/models/doc2vec_inner.pyx
@@ -39,7 +39,7 @@ cdef void fast_document_dbow_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *context_vectors, REAL_t *syn1, const int size,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
-    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil:
+    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil:
 
     cdef long long a, b
     cdef long long row1 = context_index * size, row2
@@ -66,7 +66,7 @@ cdef unsigned long long fast_document_dbow_neg(
     REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
     unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
-    const np.uint32_t contexts_lockf_len) nogil:
+    const np.uint32_t contexts_lockf_len) noexcept nogil:
 
     cdef long long a
     cdef long long row1 = context_index * size, row2
@@ -106,7 +106,7 @@ cdef unsigned long long fast_document_dbow_neg(
 cdef void fast_document_dm_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil:
+    const int size, int learn_hidden) noexcept nogil:
 
     cdef long long b
     cdef long long row2
@@ -129,7 +129,7 @@ cdef void fast_document_dm_hs(
 cdef unsigned long long fast_document_dm_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil:
+    const int size, int learn_hidden) noexcept nogil:
 
     cdef long long row2
     cdef unsigned long long modulo = 281474976710655ULL
@@ -165,7 +165,7 @@ cdef unsigned long long fast_document_dm_neg(
 cdef void fast_document_dmc_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil:
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:
 
     cdef long long a, b
     cdef long long row2
@@ -189,7 +189,7 @@ cdef void fast_document_dmc_hs(
 cdef unsigned long long fast_document_dmc_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil:
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:
 
     cdef long long a
     cdef long long row2
diff --git a/gensim/models/fasttext_corpusfile.pyx b/gensim/models/fasttext_corpusfile.pyx
index 1f67785bf7..0de3762e15 100644
--- a/gensim/models/fasttext_corpusfile.pyx
+++ b/gensim/models/fasttext_corpusfile.pyx
@@ -48,7 +48,7 @@ cdef void prepare_c_structures_for_batch(
         int *effective_words, int *effective_sentences, unsigned long long *next_random, cvocab_t *vocab,
         int *sentence_idx, np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
         np.uint32_t *reduced_windows, int *subwords_idx_len, np.uint32_t **subwords_idx, int shrink_windows,
-    ) nogil:
+    ) noexcept nogil:
     cdef VocabItem word
     cdef string token
     cdef vector[string] sent
diff --git a/gensim/models/fasttext_inner.pxd b/gensim/models/fasttext_inner.pxd
index af7a531116..f383dc6616 100644
--- a/gensim/models/fasttext_inner.pxd
+++ b/gensim/models/fasttext_inner.pxd
@@ -135,16 +135,16 @@ cdef void init_ft_config(FastTextConfig *c, model, alpha, _work, _neu1)
 cdef object populate_ft_config(FastTextConfig *c, vocab, buckets_word, sentences)
 
 
-cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil
+cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil
 
 
-cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil
+cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil
 
 
-cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil
+cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil
 
 
-cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil
+cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil
 
 
-cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil
+cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil
diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx
index 6e246b3579..682c515cfc 100644
--- a/gensim/models/fasttext_inner.pyx
+++ b/gensim/models/fasttext_inner.pyx
@@ -72,7 +72,7 @@ cdef int ONE = 1
 cdef REAL_t ONEF = <REAL_t>1.0
 
 
-cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
+cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil:
     """Perform skipgram training with negative sampling.
 
     Parameters
@@ -145,7 +145,7 @@ cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
                   c.work, &ONE, &c.syn0_ngrams[subwords_index[d]*c.size], &ONE)
 
 
-cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
+cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil:
     """Perform skipgram training with hierarchical sampling.
 
     Parameters
@@ -221,7 +221,7 @@ cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
             &c.syn0_ngrams[row2], &ONE)
 
 
-cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil:
+cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil:
     """Perform CBOW training with negative sampling.
 
     Parameters
@@ -306,7 +306,7 @@ cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k
                 &c.syn0_ngrams[c.subwords_idx[m][d]*c.size], &ONE)
 
 
-cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil:
+cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil:
     """Perform CBOW training with hierarchical sampling.
 
     Parameters
@@ -510,7 +510,7 @@ cdef object populate_ft_config(FastTextConfig *c, wv, buckets_word, sentences):
     return effective_words, effective_sentences
 
 
-cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil:
+cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil:
     """Performs training on a fully initialized and populated configuration.
 
     Parameters
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
index ffb808f6cc..bb6c6a1ab8 100644
--- a/gensim/models/keyedvectors.py
+++ b/gensim/models/keyedvectors.py
@@ -1667,7 +1667,7 @@ def save_word2vec_format(
                 if binary:
                     fout.write(f"{prefix}{key} ".encode('utf8') + key_vector.astype(REAL).tobytes())
                 else:
-                    fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector)}\n".encode('utf8'))
+                    fout.write(f"{prefix}{key} {' '.join(val.astype('str') for val in key_vector)}\n".encode('utf8'))
 
     @classmethod
     def load_word2vec_format(
diff --git a/gensim/models/nmf_pgd.pyx b/gensim/models/nmf_pgd.pyx
index 2419272e5b..82f079fd2e 100644
--- a/gensim/models/nmf_pgd.pyx
+++ b/gensim/models/nmf_pgd.pyx
@@ -10,10 +10,10 @@
 from libc.math cimport sqrt
 from cython.parallel import prange
 
-cdef double fmin(double x, double y) nogil:
+cdef double fmin(double x, double y) noexcept nogil:
     return x if x < y else y
 
-cdef double fmax(double x, double y) nogil:
+cdef double fmax(double x, double y) noexcept nogil:
     return x if x > y else y
 
 def solve_h(double[:, ::1] h, double[:, :] Wtv, double[:, ::1] WtW, int[::1] permutation, double kappa):
diff --git a/gensim/models/word2vec_corpusfile.pxd b/gensim/models/word2vec_corpusfile.pxd
index 2490c2ca37..c8614038e6 100644
--- a/gensim/models/word2vec_corpusfile.pxd
+++ b/gensim/models/word2vec_corpusfile.pxd
@@ -26,9 +26,9 @@ cdef extern from "fast_line_sentence.h":
     cdef cppclass FastLineSentence:
         FastLineSentence() except +
         FastLineSentence(string&, size_t) except +
-        vector[string] ReadSentence() nogil except +
-        bool_t IsEof() nogil
-        void Reset() nogil
+        vector[string] ReadSentence() except + nogil
+        bool_t IsEof() noexcept nogil
+        void Reset() noexcept nogil
 
 
 cdef class CythonLineSentence:
@@ -37,12 +37,12 @@ cdef class CythonLineSentence:
     cdef public size_t max_sentence_length, max_words_in_batch, offset
     cdef vector[vector[string]] buf_data
 
-    cpdef bool_t is_eof(self) nogil
-    cpdef vector[string] read_sentence(self) nogil except *
-    cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *
-    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil
-    cpdef void reset(self) nogil
-    cpdef vector[vector[string]] next_batch(self) nogil except *
+    cpdef bool_t is_eof(self) noexcept nogil
+    cpdef vector[string] read_sentence(self) except * nogil
+    cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil
+    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil
+    cpdef void reset(self) noexcept nogil
+    cpdef vector[vector[string]] next_batch(self) except * nogil
 
 
 cdef struct VocabItem:
@@ -62,9 +62,9 @@ ctypedef unordered_map[string, VocabItem] cvocab_t
 cdef class CythonVocab:
     cdef cvocab_t vocab
     cdef subword_arrays
-    cdef cvocab_t* get_vocab_ptr(self) nogil except *
+    cdef cvocab_t* get_vocab_ptr(self) except * nogil
 
 
-cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil
+cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil
 cdef REAL_t get_next_alpha(REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
-                           long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil
+                           long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil
diff --git a/gensim/models/word2vec_corpusfile.pyx b/gensim/models/word2vec_corpusfile.pyx
index 89012cfd81..0a7d401ea5 100644
--- a/gensim/models/word2vec_corpusfile.pyx
+++ b/gensim/models/word2vec_corpusfile.pyx
@@ -62,7 +62,7 @@ cdef class CythonVocab:
 
             self.vocab[token] = word
 
-    cdef cvocab_t* get_vocab_ptr(self) nogil except *:
+    cdef cvocab_t* get_vocab_ptr(self) except * nogil:
         return &self.vocab
 
 
@@ -92,17 +92,17 @@ cdef class CythonLineSentence:
         if self._thisptr != NULL:
             del self._thisptr
 
-    cpdef bool_t is_eof(self) nogil:
+    cpdef bool_t is_eof(self) noexcept nogil:
         return self._thisptr.IsEof()
 
-    cpdef vector[string] read_sentence(self) nogil except *:
+    cpdef vector[string] read_sentence(self) except * nogil:
         return self._thisptr.ReadSentence()
 
-    cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *:
+    cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil:
         cdef vector[string] sent = self.read_sentence()
         return self._chunk_sentence(sent)
 
-    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil:
+    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil:
         cdef vector[vector[string]] res
         cdef vector[string] chunk
         cdef size_t cur_idx = 0
@@ -120,7 +120,7 @@ cdef class CythonLineSentence:
 
         return res
 
-    cpdef void reset(self) nogil:
+    cpdef void reset(self) noexcept nogil:
         self._thisptr.Reset()
 
     def __iter__(self):
@@ -135,7 +135,7 @@ cdef class CythonLineSentence:
         # This function helps pickle to correctly serialize objects of this class.
         return rebuild_cython_line_sentence, (self.source, self.max_sentence_length)
 
-    cpdef vector[vector[string]] next_batch(self) nogil except *:
+    cpdef vector[vector[string]] next_batch(self) except * nogil:
         cdef:
             vector[vector[string]] job_batch
             vector[vector[string]] chunked_sentence
@@ -235,13 +235,13 @@ cdef void prepare_c_structures_for_batch(
             reduced_windows[i] = 0
 
 
-cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil:
+cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil:
     return alpha - ((alpha - end_alpha) * (<REAL_t> cur_epoch) / num_epochs)
 
 
 cdef REAL_t get_next_alpha(
         REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
-        long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil:
+        long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil:
     cdef REAL_t epoch_progress
 
     if expected_examples != -1:
diff --git a/gensim/models/word2vec_inner.pxd b/gensim/models/word2vec_inner.pxd
index 8a77a17041..fcab17c296 100644
--- a/gensim/models/word2vec_inner.pxd
+++ b/gensim/models/word2vec_inner.pxd
@@ -20,12 +20,12 @@ cdef extern from "voidptr.h":
 ctypedef np.float32_t REAL_t
 
 # BLAS routine signatures
-ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil
-ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil
-ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil
-ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil
-ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil
-ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil
+ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil
+ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil
+ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil
+ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil
+ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) noexcept nogil
+ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) noexcept nogil
 
 cdef scopy_ptr scopy
 cdef saxpy_ptr saxpy
@@ -42,8 +42,8 @@ cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE
 DEF MAX_SENTENCE_LEN = 10000
 
 # function implementations swapped based on BLAS detected in word2vec_inner.pyx init()
-ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil
-ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil
+ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil
+ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil
 
 cdef our_dot_ptr our_dot
 cdef our_saxpy_ptr our_saxpy
@@ -78,26 +78,26 @@ cdef struct Word2VecConfig:
 
 
 # for when fblas.sdot returns a double
-cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil
+cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil
 
 # for when fblas.sdot returns a float
-cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil
+cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil
 
 # for when no blas available
-cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil
-cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil
+cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil
+cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil
 
 # to support random draws from negative-sampling cum_table
-cdef unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil
+cdef unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) noexcept nogil
 
-cdef unsigned long long random_int32(unsigned long long *next_random) nogil
+cdef unsigned long long random_int32(unsigned long long *next_random) noexcept nogil
 
 
 cdef void w2v_fast_sentence_sg_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *syn0, REAL_t *syn1, const int size,
     const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil
 
 
 cdef unsigned long long w2v_fast_sentence_sg_neg(
@@ -105,7 +105,7 @@ cdef unsigned long long w2v_fast_sentence_sg_neg(
     REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
     const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work,
     unsigned long long next_random, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil
 
 
 cdef void w2v_fast_sentence_cbow_hs(
@@ -113,7 +113,7 @@ cdef void w2v_fast_sentence_cbow_hs(
     REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size,
     const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work,
     int i, int j, int k, int cbow_mean, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil
 
 
 cdef unsigned long long w2v_fast_sentence_cbow_neg(
@@ -121,7 +121,7 @@ cdef unsigned long long w2v_fast_sentence_cbow_neg(
     REAL_t *neu1,  REAL_t *syn0, REAL_t *syn1neg, const int size,
     const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work,
     int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil
 
 
 cdef init_w2v_config(Word2VecConfig *c, model, alpha, compute_loss, _work, _neu1=*)
diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx
index 5b5060bab5..f3f24b5472 100755
--- a/gensim/models/word2vec_inner.pyx
+++ b/gensim/models/word2vec_inner.pyx
@@ -44,15 +44,15 @@ cdef REAL_t ONEF = <REAL_t>1.0
 
 
 # for when fblas.sdot returns a double
-cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil:
+cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil:
     return <REAL_t>dsdot(N, X, incX, Y, incY)
 
 # for when fblas.sdot returns a float
-cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil:
+cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil:
     return <REAL_t>sdot(N, X, incX, Y, incY)
 
 # for when no blas available
-cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil:
+cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil:
     # not a true full dot()-implementation: just enough for our cases
     cdef int i
     cdef REAL_t a
@@ -62,7 +62,7 @@ cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const
     return a
 
 # for when no blas available
-cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil:
+cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil:
     cdef int i
     for i from 0 <= i < N[0] by 1:
         Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])]
@@ -71,7 +71,7 @@ cdef void w2v_fast_sentence_sg_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *syn0, REAL_t *syn1, const int size,
     const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil:
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil:
     """Train on a single effective word from the current batch, using the Skip-Gram model.
 
     In this model we are using a given word to predict a context word (a word that is
@@ -135,7 +135,7 @@ cdef void w2v_fast_sentence_sg_hs(
 
 
 # to support random draws from negative-sampling cum_table
-cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil:
+cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) noexcept nogil:
     cdef unsigned long long mid
     while hi > lo:
         mid = (lo + hi) >> 1
@@ -147,7 +147,7 @@ cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x,
 
 # this quick & dirty RNG apparently matches Java's (non-Secure)Random
 # note this function side-effects next_random to set up the next number
-cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil:
+cdef inline unsigned long long random_int32(unsigned long long *next_random) noexcept nogil:
     cdef unsigned long long this_random = next_random[0] >> 16
     next_random[0] = (next_random[0] * <unsigned long long>25214903917ULL + 11) & 281474976710655ULL
     return this_random
@@ -157,7 +157,7 @@ cdef unsigned long long w2v_fast_sentence_sg_neg(
     REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
     const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work,
     unsigned long long next_random, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil:
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil:
     """Train on a single effective word from the current batch, using the Skip-Gram model.
 
     In this model we are using a given word to predict a context word (a word that is
@@ -248,7 +248,7 @@ cdef void w2v_fast_sentence_cbow_hs(
     REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size,
     const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work,
     int i, int j, int k, int cbow_mean, REAL_t *words_lockf, const np.uint32_t lockf_len,
-    const int _compute_loss, REAL_t *_running_training_loss_param) nogil:
+    const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil:
     """Train on a single effective word from the current batch, using the CBOW method.
 
     Using this method we train the trainable neural network by attempting to predict a
@@ -346,7 +346,7 @@ cdef unsigned long long w2v_fast_sentence_cbow_neg(
     REAL_t *neu1,  REAL_t *syn0, REAL_t *syn1neg, const int size,
     const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work,
     int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *words_lockf,
-    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil:
+    const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil:
     """Train on a single effective word from the current batch, using the CBOW method.
 
     Using this method we train the trainable neural network by attempting to predict a
@@ -785,7 +785,7 @@ def score_sentence_sg(model, sentence, _work):
 cdef void score_pair_sg_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *syn0, REAL_t *syn1, const int size,
-    const np.uint32_t word2_index, REAL_t *work) nogil:
+    const np.uint32_t word2_index, REAL_t *work) noexcept nogil:
 
     cdef long long b
     cdef long long row1 = <long long>word2_index * <long long>size, row2, sgn
@@ -879,7 +879,7 @@ cdef void score_pair_cbow_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN],
     REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size,
     const np.uint32_t indexes[MAX_SENTENCE_LEN], REAL_t *work,
-    int i, int j, int k, int cbow_mean) nogil:
+    int i, int j, int k, int cbow_mean) noexcept nogil:
 
     cdef long long a, b
     cdef long long row2
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index 153a083762..52a9cb9a2c 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -323,11 +323,11 @@ def test_full(self, num_best=None):
             # Sparse array.
             for i, sim in sims:
                 # Note that similarities are bigger than zero, as they are the 1/ 1 + distances.
-                self.assertTrue(numpy.alltrue(sim > 0.0))
+                self.assertTrue(numpy.all(sim > 0.0))
         else:
             self.assertTrue(sims[0] == 1.0)  # Similarity of a document with itself is 0.0.
-            self.assertTrue(numpy.alltrue(sims[1:] > 0.0))
-            self.assertTrue(numpy.alltrue(sims[1:] < 1.0))
+            self.assertTrue(numpy.all(sims[1:] > 0.0))
+            self.assertTrue(numpy.all(sims[1:] < 1.0))
 
     @unittest.skipIf(POT_EXT is False, "POT not installed")
     def test_non_increasing(self):
@@ -354,15 +354,15 @@ def test_chunking(self):
         sims = index[query]
 
         for i in range(3):
-            self.assertTrue(numpy.alltrue(sims[i, i] == 1.0))  # Similarity of a document with itself is 0.0.
+            self.assertTrue(numpy.all(sims[i, i] == 1.0))  # Similarity of a document with itself is 0.0.
 
         # test the same thing but with num_best
         index.num_best = 3
         sims = index[query]
         for sims_temp in sims:
             for i, sim in sims_temp:
-                self.assertTrue(numpy.alltrue(sim > 0.0))
-                self.assertTrue(numpy.alltrue(sim <= 1.0))
+                self.assertTrue(numpy.all(sim > 0.0))
+                self.assertTrue(numpy.all(sim <= 1.0))
 
     @unittest.skipIf(POT_EXT is False, "POT not installed")
     def test_iter(self):
@@ -370,8 +370,8 @@ def test_iter(self):
 
         index = self.cls(TEXTS, self.w2v_model)
         for sims in index:
-            self.assertTrue(numpy.alltrue(sims >= 0.0))
-            self.assertTrue(numpy.alltrue(sims <= 1.0))
+            self.assertTrue(numpy.all(sims >= 0.0))
+            self.assertTrue(numpy.all(sims <= 1.0))
 
     @unittest.skipIf(POT_EXT is False, "POT not installed")
     def test_str(self):
@@ -399,12 +399,12 @@ def test_full(self, num_best=None):
         if num_best is not None:
             # Sparse array.
             for i, sim in sims:
-                self.assertTrue(numpy.alltrue(sim <= 1.0))
-                self.assertTrue(numpy.alltrue(sim >= 0.0))
+                self.assertTrue(numpy.all(sim <= 1.0))
+                self.assertTrue(numpy.all(sim >= 0.0))
         else:
             self.assertAlmostEqual(1.0, sims[0])  # Similarity of a document with itself is 1.0.
-            self.assertTrue(numpy.alltrue(sims[1:] >= 0.0))
-            self.assertTrue(numpy.alltrue(sims[1:] < 1.0))
+            self.assertTrue(numpy.all(sims[1:] >= 0.0))
+            self.assertTrue(numpy.all(sims[1:] < 1.0))
 
         # Corpora
         for query in (
@@ -416,15 +416,15 @@ def test_full(self, num_best=None):
                 # Sparse array.
                 for result in sims:
                     for i, sim in result:
-                        self.assertTrue(numpy.alltrue(sim <= 1.0))
-                        self.assertTrue(numpy.alltrue(sim >= 0.0))
+                        self.assertTrue(numpy.all(sim <= 1.0))
+                        self.assertTrue(numpy.all(sim >= 0.0))
             else:
                 for i, result in enumerate(sims):
                     self.assertAlmostEqual(1.0, result[i])  # Similarity of a document with itself is 1.0.
-                    self.assertTrue(numpy.alltrue(result[:i] >= 0.0))
-                    self.assertTrue(numpy.alltrue(result[:i] < 1.0))
-                    self.assertTrue(numpy.alltrue(result[i + 1:] >= 0.0))
-                    self.assertTrue(numpy.alltrue(result[i + 1:] < 1.0))
+                    self.assertTrue(numpy.all(result[:i] >= 0.0))
+                    self.assertTrue(numpy.all(result[:i] < 1.0))
+                    self.assertTrue(numpy.all(result[i + 1:] >= 0.0))
+                    self.assertTrue(numpy.all(result[i + 1:] < 1.0))
 
     def test_non_increasing(self):
         """ Check that similarities are non-increasing when `num_best` is not `None`."""
@@ -445,7 +445,7 @@ def test_chunking(self):
         sims = index[query]
 
         for i in range(3):
-            self.assertTrue(numpy.alltrue(sims[i, i] == 1.0))  # Similarity of a document with itself is 1.0.
+            self.assertTrue(numpy.all(sims[i, i] == 1.0))  # Similarity of a document with itself is 1.0.
 
         # test the same thing but with num_best
         index.num_best = 5
@@ -459,8 +459,8 @@ def test_chunking(self):
     def test_iter(self):
         index = self.cls(CORPUS, self.similarity_matrix)
         for sims in index:
-            self.assertTrue(numpy.alltrue(sims >= 0.0))
-            self.assertTrue(numpy.alltrue(sims <= 1.0))
+            self.assertTrue(numpy.all(sims >= 0.0))
+            self.assertTrue(numpy.all(sims <= 1.0))
 
 
 class TestSparseMatrixSimilarity(_TestSimilarityABC):
diff --git a/pyproject.toml b/pyproject.toml
index 1200f06c8b..0a9c20ae20 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,9 @@
 [build-system]
 requires = [
-    #
-    # If we build our extensions with Cython 3.0.0, then they will be an
-    # order of magnitude slower, so avoid it for now.
-    #
-    "Cython>=0.29.32,<3.0.0",
+    "Cython>=3.0.0",
     # oldest supported Numpy for this platform is 1.17 but the oldest supported by Gensim
     # is 1.18.5, remove the line when they increase oldest supported Numpy for this platform
-    "numpy>=1.26.4, <2.0.0; python_version>='3.9'",
+    "numpy>=2.0.0; python_version>='3.9'",
     "setuptools",
     "wheel",
 ]
diff --git a/setup.py b/setup.py
index a4688969be..c92d3e7780 100644
--- a/setup.py
+++ b/setup.py
@@ -59,6 +59,7 @@ def make_c_ext(use_cython=False):
             sources=[source],
             language='c',
             extra_compile_args=extra_args,
+            define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
         )
 
 
@@ -80,6 +81,7 @@ def make_cpp_ext(use_cython=False):
             language='c++',
             extra_compile_args=extra_args,
             extra_link_args=extra_args,
+            define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
         )
 
 
@@ -120,8 +122,8 @@ def finalize_options(self):
 
         if need_cython():
             import Cython.Build
-            Cython.Build.cythonize(list(make_c_ext(use_cython=True)), language_level=3)
-            Cython.Build.cythonize(list(make_cpp_ext(use_cython=True)), language_level=3)
+            Cython.Build.cythonize(list(make_c_ext(use_cython=True)))
+            Cython.Build.cythonize(list(make_cpp_ext(use_cython=True)))
 
 
 class CleanExt(distutils.cmd.Command):
@@ -324,17 +326,14 @@ def run(self):
     'pandas',
 ]
 
-#
-# see https://github.com/piskvorky/gensim/pull/3535
-#
-NUMPY_STR = 'numpy >= 1.26.0, < 2.0'
+NUMPY_STR = 'numpy >= 2.0.0'
 
 install_requires = [
     NUMPY_STR,
     #
     # scipy 1.14.0 and onwards removes deprecated sparsetools submodule
     #
-    'scipy >= 1.11.0, <1.14.0',
+    'scipy >= 1.13.0, <1.14.0',
     'smart_open >= 1.8.1',
 ]