Draft

piskvorky · Nov 8, 2024 · e104e53 · e104e53
1 parent 03aeb11
commit e104e53
Show file tree

Hide file tree

Showing 16 changed files with 112 additions and 117 deletions.
diff --git a/gensim/_matutils.pyx b/gensim/_matutils.pyx
@@ -42,7 +42,7 @@ def mean_absolute_difference(a, b):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil:
+cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) noexcept nogil:
     """Mean absolute difference between two arrays.
 
     Parameters
@@ -103,7 +103,7 @@ def logsumexp(x):
 @cython.boundscheck(False)
 @cython.wraparound(False)
 @cython.cdivision(True)
-cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil:
+cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) noexcept nogil:
     """Log of sum of exponentials.
 
     Parameters
@@ -223,7 +223,7 @@ def dirichlet_expectation_1d(alpha):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
+cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) noexcept nogil:
     """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
 
     Parameters
@@ -251,7 +251,7 @@ cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil:
+cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) noexcept nogil:
     """Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
 
     Parameters
@@ -298,7 +298,7 @@ def digamma(DTYPE_t x):
 
 
 @cython.cdivision(True)
-cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil:
+cdef inline DTYPE_t _digamma(DTYPE_t x,) noexcept nogil:
     """Digamma function for positive floats.
 
     Parameters

diff --git a/gensim/models/doc2vec_corpusfile.pyx b/gensim/models/doc2vec_corpusfile.pyx
@@ -61,7 +61,7 @@ cdef void prepare_c_structures_for_batch(
         np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
         np.uint32_t *reduced_windows, int *document_len, int train_words,
         int docvecs_count, int doc_tag, int shrink_windows,
-    ) nogil:
+    ) noexcept nogil:
     cdef VocabItem predict_word
     cdef string token
     cdef int i = 0

diff --git a/gensim/models/doc2vec_inner.pxd b/gensim/models/doc2vec_inner.pxd
@@ -57,39 +57,39 @@ cdef void fast_document_dbow_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *context_vectors, REAL_t *syn1, const int size,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
-    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil
+    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil
 
 
 cdef unsigned long long fast_document_dbow_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len,
     REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
     unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
-    const np.uint32_t contexts_lockf_len) nogil
+    const np.uint32_t contexts_lockf_len) noexcept nogil
 
 
 cdef void fast_document_dm_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil
+    const int size, int learn_hidden) noexcept nogil
 
 
 cdef unsigned long long fast_document_dm_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil
+    const int size, int learn_hidden) noexcept nogil
 
 
 cdef void fast_document_dmc_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil
 
 
 cdef unsigned long long fast_document_dmc_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil
 
 
 cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words, learn_hidden, train_words=*, work=*,

diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx
@@ -39,7 +39,7 @@ cdef void fast_document_dbow_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
     REAL_t *context_vectors, REAL_t *syn1, const int size,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
-    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil:
+    REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil:
 
     cdef long long a, b
     cdef long long row1 = context_index * size, row2
@@ -66,7 +66,7 @@ cdef unsigned long long fast_document_dbow_neg(
     REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
     const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
     unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
-    const np.uint32_t contexts_lockf_len) nogil:
+    const np.uint32_t contexts_lockf_len) noexcept nogil:
 
     cdef long long a
     cdef long long row1 = context_index * size, row2
@@ -106,7 +106,7 @@ cdef unsigned long long fast_document_dbow_neg(
 cdef void fast_document_dm_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil:
+    const int size, int learn_hidden) noexcept nogil:
 
     cdef long long b
     cdef long long row2
@@ -129,7 +129,7 @@ cdef void fast_document_dm_hs(
 cdef unsigned long long fast_document_dm_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int size, int learn_hidden) nogil:
+    const int size, int learn_hidden) noexcept nogil:
 
     cdef long long row2
     cdef unsigned long long modulo = 281474976710655ULL
@@ -165,7 +165,7 @@ cdef unsigned long long fast_document_dm_neg(
 cdef void fast_document_dmc_hs(
     const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
     REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil:
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:
 
     cdef long long a, b
     cdef long long row2
@@ -189,7 +189,7 @@ cdef void fast_document_dmc_hs(
 cdef unsigned long long fast_document_dmc_neg(
     const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
     REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
-    const int layer1_size, const int vector_size, int learn_hidden) nogil:
+    const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:
 
     cdef long long a
     cdef long long row2

diff --git a/gensim/models/fasttext_corpusfile.pyx b/gensim/models/fasttext_corpusfile.pyx
@@ -48,7 +48,7 @@ cdef void prepare_c_structures_for_batch(
         int *effective_words, int *effective_sentences, unsigned long long *next_random, cvocab_t *vocab,
         int *sentence_idx, np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
         np.uint32_t *reduced_windows, int *subwords_idx_len, np.uint32_t **subwords_idx, int shrink_windows,
-    ) nogil:
+    ) noexcept nogil:
     cdef VocabItem word
     cdef string token
     cdef vector[string] sent

diff --git a/gensim/models/fasttext_inner.pxd b/gensim/models/fasttext_inner.pxd
@@ -135,16 +135,16 @@ cdef void init_ft_config(FastTextConfig *c, model, alpha, _work, _neu1)
 cdef object populate_ft_config(FastTextConfig *c, vocab, buckets_word, sentences)
 
 
-cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil
+cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil
 
 
-cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil
+cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil
 
 
-cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil
+cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil
 
 
-cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil
+cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil
 
 
-cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil
+cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil
diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx
@@ -72,7 +72,7 @@ cdef int ONE = 1
 cdef REAL_t ONEF = <REAL_t>1.0
 
 
-cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
+cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil:
     """Perform skipgram training with negative sampling.
 
     Parameters
@@ -145,7 +145,7 @@ cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
                   c.work, &ONE, &c.syn0_ngrams[subwords_index[d]*c.size], &ONE)
 
 
-cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
+cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil:
     """Perform skipgram training with hierarchical sampling.
 
     Parameters
@@ -221,7 +221,7 @@ cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
             &c.syn0_ngrams[row2], &ONE)
 
 
-cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil:
+cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil:
     """Perform CBOW training with negative sampling.
 
     Parameters
@@ -306,7 +306,7 @@ cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k
                 &c.syn0_ngrams[c.subwords_idx[m][d]*c.size], &ONE)
 
 
-cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil:
+cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil:
     """Perform CBOW training with hierarchical sampling.
 
     Parameters
@@ -510,7 +510,7 @@ cdef object populate_ft_config(FastTextConfig *c, wv, buckets_word, sentences):
     return effective_words, effective_sentences
 
 
-cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil:
+cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil:
     """Performs training on a fully initialized and populated configuration.
 
     Parameters

diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -1667,7 +1667,7 @@ def save_word2vec_format(
                 if binary:
                     fout.write(f"{prefix}{key} ".encode('utf8') + key_vector.astype(REAL).tobytes())
                 else:
-                    fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector)}\n".encode('utf8'))
+                    fout.write(f"{prefix}{key} {' '.join(val.astype('str') for val in key_vector)}\n".encode('utf8'))
 
     @classmethod
     def load_word2vec_format(

diff --git a/gensim/models/nmf_pgd.pyx b/gensim/models/nmf_pgd.pyx
@@ -10,10 +10,10 @@
 from libc.math cimport sqrt
 from cython.parallel import prange
 
-cdef double fmin(double x, double y) nogil:
+cdef double fmin(double x, double y) noexcept nogil:
     return x if x < y else y
 
-cdef double fmax(double x, double y) nogil:
+cdef double fmax(double x, double y) noexcept nogil:
     return x if x > y else y
 
 def solve_h(double[:, ::1] h, double[:, :] Wtv, double[:, ::1] WtW, int[::1] permutation, double kappa):

diff --git a/gensim/models/word2vec_corpusfile.pxd b/gensim/models/word2vec_corpusfile.pxd
@@ -26,9 +26,9 @@ cdef extern from "fast_line_sentence.h":
     cdef cppclass FastLineSentence:
         FastLineSentence() except +
         FastLineSentence(string&, size_t) except +
-        vector[string] ReadSentence() nogil except +
-        bool_t IsEof() nogil
-        void Reset() nogil
+        vector[string] ReadSentence() except + nogil
+        bool_t IsEof() noexcept nogil
+        void Reset() noexcept nogil
 
 
 cdef class CythonLineSentence:
@@ -37,12 +37,12 @@ cdef class CythonLineSentence:
     cdef public size_t max_sentence_length, max_words_in_batch, offset
     cdef vector[vector[string]] buf_data
 
-    cpdef bool_t is_eof(self) nogil
-    cpdef vector[string] read_sentence(self) nogil except *
-    cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *
-    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil
-    cpdef void reset(self) nogil
-    cpdef vector[vector[string]] next_batch(self) nogil except *
+    cpdef bool_t is_eof(self) noexcept nogil
+    cpdef vector[string] read_sentence(self) except * nogil
+    cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil
+    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil
+    cpdef void reset(self) noexcept nogil
+    cpdef vector[vector[string]] next_batch(self) except * nogil
 
 
 cdef struct VocabItem:
@@ -62,9 +62,9 @@ ctypedef unordered_map[string, VocabItem] cvocab_t
 cdef class CythonVocab:
     cdef cvocab_t vocab
     cdef subword_arrays
-    cdef cvocab_t* get_vocab_ptr(self) nogil except *
+    cdef cvocab_t* get_vocab_ptr(self) except * nogil
 
 
-cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil
+cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil
 cdef REAL_t get_next_alpha(REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
-                           long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil
+                           long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil
diff --git a/gensim/models/word2vec_corpusfile.pyx b/gensim/models/word2vec_corpusfile.pyx
@@ -62,7 +62,7 @@ cdef class CythonVocab:
 
             self.vocab[token] = word
 
-    cdef cvocab_t* get_vocab_ptr(self) nogil except *:
+    cdef cvocab_t* get_vocab_ptr(self) except * nogil:
         return &self.vocab
 
 
@@ -92,17 +92,17 @@ cdef class CythonLineSentence:
         if self._thisptr != NULL:
             del self._thisptr
 
-    cpdef bool_t is_eof(self) nogil:
+    cpdef bool_t is_eof(self) noexcept nogil:
         return self._thisptr.IsEof()
 
-    cpdef vector[string] read_sentence(self) nogil except *:
+    cpdef vector[string] read_sentence(self) except * nogil:
         return self._thisptr.ReadSentence()
 
-    cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *:
+    cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil:
         cdef vector[string] sent = self.read_sentence()
         return self._chunk_sentence(sent)
 
-    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil:
+    cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil:
         cdef vector[vector[string]] res
         cdef vector[string] chunk
         cdef size_t cur_idx = 0
@@ -120,7 +120,7 @@ cdef class CythonLineSentence:
 
         return res
 
-    cpdef void reset(self) nogil:
+    cpdef void reset(self) noexcept nogil:
         self._thisptr.Reset()
 
     def __iter__(self):
@@ -135,7 +135,7 @@ cdef class CythonLineSentence:
         # This function helps pickle to correctly serialize objects of this class.
         return rebuild_cython_line_sentence, (self.source, self.max_sentence_length)
 
-    cpdef vector[vector[string]] next_batch(self) nogil except *:
+    cpdef vector[vector[string]] next_batch(self) except * nogil:
         cdef:
             vector[vector[string]] job_batch
             vector[vector[string]] chunked_sentence
@@ -235,13 +235,13 @@ cdef void prepare_c_structures_for_batch(
             reduced_windows[i] = 0
 
 
-cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil:
+cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil:
     return alpha - ((alpha - end_alpha) * (<REAL_t> cur_epoch) / num_epochs)
 
 
 cdef REAL_t get_next_alpha(
         REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
-        long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil:
+        long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil:
     cdef REAL_t epoch_progress
 
     if expected_examples != -1: