diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index c4c3444499..4fe6b56015 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -61,19 +61,16 @@ jobs: fail-fast: false matrix: include: - - {python: '3.8', os: macos-12} - {python: '3.9', os: macos-12} - {python: '3.10', os: macos-12} - {python: '3.11', os: macos-12} - {python: '3.12', os: macos-12} - - {python: '3.8', os: ubuntu-20.04} - {python: '3.9', os: ubuntu-20.04} - {python: '3.10', os: ubuntu-20.04} - {python: '3.11', os: ubuntu-20.04} - {python: '3.12', os: ubuntu-20.04} - - {python: '3.8', os: windows-2019} - {python: '3.9', os: windows-2019} - {python: '3.10', os: windows-2019} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d342f2c11..07aa430ccd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -63,13 +63,11 @@ jobs: fail-fast: false matrix: include: - - {python: '3.8', os: ubuntu-20.04} - {python: '3.9', os: ubuntu-20.04} - {python: '3.10', os: ubuntu-20.04} - {python: '3.11', os: ubuntu-20.04} - {python: '3.12', os: ubuntu-20.04} - - {python: '3.8', os: windows-2019} - {python: '3.9', os: windows-2019} - {python: '3.10', os: windows-2019} - {python: '3.11', os: windows-2019} diff --git a/gensim/_matutils.pyx b/gensim/_matutils.pyx index 0162202224..aa4e9a1cee 100644 --- a/gensim/_matutils.pyx +++ b/gensim/_matutils.pyx @@ -42,7 +42,7 @@ def mean_absolute_difference(a, b): @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) -cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil: +cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) noexcept nogil: """Mean absolute difference between two arrays. Parameters @@ -103,7 +103,7 @@ def logsumexp(x): @cython.boundscheck(False) @cython.wraparound(False) @cython.cdivision(True) -cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil: +cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) noexcept nogil: """Log of sum of exponentials. Parameters @@ -223,7 +223,7 @@ def dirichlet_expectation_1d(alpha): @cython.boundscheck(False) @cython.wraparound(False) -cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil: +cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) noexcept nogil: """Expected value of log(theta) where theta is drawn from a Dirichlet distribution. Parameters @@ -251,7 +251,7 @@ cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil: @cython.boundscheck(False) @cython.wraparound(False) -cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil: +cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) noexcept nogil: """Expected value of log(theta) where theta is drawn from a Dirichlet distribution. Parameters @@ -298,7 +298,7 @@ def digamma(DTYPE_t x): @cython.cdivision(True) -cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil: +cdef inline DTYPE_t _digamma(DTYPE_t x,) noexcept nogil: """Digamma function for positive floats. Parameters diff --git a/gensim/models/doc2vec_corpusfile.pyx b/gensim/models/doc2vec_corpusfile.pyx index da5b230b9f..29463954c1 100644 --- a/gensim/models/doc2vec_corpusfile.pyx +++ b/gensim/models/doc2vec_corpusfile.pyx @@ -61,7 +61,7 @@ cdef void prepare_c_structures_for_batch( np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points, np.uint32_t *reduced_windows, int *document_len, int train_words, int docvecs_count, int doc_tag, int shrink_windows, - ) nogil: + ) noexcept nogil: cdef VocabItem predict_word cdef string token cdef int i = 0 diff --git a/gensim/models/doc2vec_inner.pxd b/gensim/models/doc2vec_inner.pxd index 525d20c6b6..c327ce462f 100644 --- a/gensim/models/doc2vec_inner.pxd +++ b/gensim/models/doc2vec_inner.pxd @@ -57,7 +57,7 @@ cdef void fast_document_dbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *context_vectors, REAL_t *syn1, const int size, const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden, - REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil + REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil cdef unsigned long long fast_document_dbow_neg( @@ -65,31 +65,31 @@ cdef unsigned long long fast_document_dbow_neg( REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf, - const np.uint32_t contexts_lockf_len) nogil + const np.uint32_t contexts_lockf_len) noexcept nogil cdef void fast_document_dm_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, - const int size, int learn_hidden) nogil + const int size, int learn_hidden) noexcept nogil cdef unsigned long long fast_document_dm_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random, REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, - const int size, int learn_hidden) nogil + const int size, int learn_hidden) noexcept nogil cdef void fast_document_dmc_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, - const int layer1_size, const int vector_size, int learn_hidden) nogil + const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil cdef unsigned long long fast_document_dmc_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random, REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, - const int layer1_size, const int vector_size, int learn_hidden) nogil + const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words, learn_hidden, train_words=*, work=*, diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 21964b79b6..24aafe2f8b 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -39,7 +39,7 @@ cdef void fast_document_dbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *context_vectors, REAL_t *syn1, const int size, const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden, - REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil: + REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil: cdef long long a, b cdef long long row1 = context_index * size, row2 @@ -66,7 +66,7 @@ cdef unsigned long long fast_document_dbow_neg( REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf, - const np.uint32_t contexts_lockf_len) nogil: + const np.uint32_t contexts_lockf_len) noexcept nogil: cdef long long a cdef long long row1 = context_index * size, row2 @@ -106,7 +106,7 @@ cdef unsigned long long fast_document_dbow_neg( cdef void fast_document_dm_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, - const int size, int learn_hidden) nogil: + const int size, int learn_hidden) noexcept nogil: cdef long long b cdef long long row2 @@ -129,7 +129,7 @@ cdef void fast_document_dm_hs( cdef unsigned long long fast_document_dm_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random, REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, - const int size, int learn_hidden) nogil: + const int size, int learn_hidden) noexcept nogil: cdef long long row2 cdef unsigned long long modulo = 281474976710655ULL @@ -165,7 +165,7 @@ cdef unsigned long long fast_document_dm_neg( cdef void fast_document_dmc_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, - const int layer1_size, const int vector_size, int learn_hidden) nogil: + const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil: cdef long long a, b cdef long long row2 @@ -189,7 +189,7 @@ cdef void fast_document_dmc_hs( cdef unsigned long long fast_document_dmc_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random, REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, - const int layer1_size, const int vector_size, int learn_hidden) nogil: + const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil: cdef long long a cdef long long row2 diff --git a/gensim/models/fasttext_corpusfile.pyx b/gensim/models/fasttext_corpusfile.pyx index 1f67785bf7..0de3762e15 100644 --- a/gensim/models/fasttext_corpusfile.pyx +++ b/gensim/models/fasttext_corpusfile.pyx @@ -48,7 +48,7 @@ cdef void prepare_c_structures_for_batch( int *effective_words, int *effective_sentences, unsigned long long *next_random, cvocab_t *vocab, int *sentence_idx, np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points, np.uint32_t *reduced_windows, int *subwords_idx_len, np.uint32_t **subwords_idx, int shrink_windows, - ) nogil: + ) noexcept nogil: cdef VocabItem word cdef string token cdef vector[string] sent diff --git a/gensim/models/fasttext_inner.pxd b/gensim/models/fasttext_inner.pxd index af7a531116..f383dc6616 100644 --- a/gensim/models/fasttext_inner.pxd +++ b/gensim/models/fasttext_inner.pxd @@ -135,16 +135,16 @@ cdef void init_ft_config(FastTextConfig *c, model, alpha, _work, _neu1) cdef object populate_ft_config(FastTextConfig *c, vocab, buckets_word, sentences) -cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil +cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil -cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil +cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil -cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil +cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil -cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil +cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil -cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil +cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx index 6e246b3579..682c515cfc 100644 --- a/gensim/models/fasttext_inner.pyx +++ b/gensim/models/fasttext_inner.pyx @@ -72,7 +72,7 @@ cdef int ONE = 1 cdef REAL_t ONEF = 1.0 -cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil: +cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil: """Perform skipgram training with negative sampling. Parameters @@ -145,7 +145,7 @@ cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil: c.work, &ONE, &c.syn0_ngrams[subwords_index[d]*c.size], &ONE) -cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil: +cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil: """Perform skipgram training with hierarchical sampling. Parameters @@ -221,7 +221,7 @@ cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil: &c.syn0_ngrams[row2], &ONE) -cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil: +cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil: """Perform CBOW training with negative sampling. Parameters @@ -306,7 +306,7 @@ cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k &c.syn0_ngrams[c.subwords_idx[m][d]*c.size], &ONE) -cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil: +cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil: """Perform CBOW training with hierarchical sampling. Parameters @@ -510,7 +510,7 @@ cdef object populate_ft_config(FastTextConfig *c, wv, buckets_word, sentences): return effective_words, effective_sentences -cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil: +cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil: """Performs training on a fully initialized and populated configuration. Parameters diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index ffb808f6cc..bb6c6a1ab8 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1667,7 +1667,7 @@ def save_word2vec_format( if binary: fout.write(f"{prefix}{key} ".encode('utf8') + key_vector.astype(REAL).tobytes()) else: - fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector)}\n".encode('utf8')) + fout.write(f"{prefix}{key} {' '.join(val.astype('str') for val in key_vector)}\n".encode('utf8')) @classmethod def load_word2vec_format( diff --git a/gensim/models/nmf_pgd.pyx b/gensim/models/nmf_pgd.pyx index 2419272e5b..82f079fd2e 100644 --- a/gensim/models/nmf_pgd.pyx +++ b/gensim/models/nmf_pgd.pyx @@ -10,10 +10,10 @@ from libc.math cimport sqrt from cython.parallel import prange -cdef double fmin(double x, double y) nogil: +cdef double fmin(double x, double y) noexcept nogil: return x if x < y else y -cdef double fmax(double x, double y) nogil: +cdef double fmax(double x, double y) noexcept nogil: return x if x > y else y def solve_h(double[:, ::1] h, double[:, :] Wtv, double[:, ::1] WtW, int[::1] permutation, double kappa): diff --git a/gensim/models/word2vec_corpusfile.pxd b/gensim/models/word2vec_corpusfile.pxd index 2490c2ca37..c8614038e6 100644 --- a/gensim/models/word2vec_corpusfile.pxd +++ b/gensim/models/word2vec_corpusfile.pxd @@ -26,9 +26,9 @@ cdef extern from "fast_line_sentence.h": cdef cppclass FastLineSentence: FastLineSentence() except + FastLineSentence(string&, size_t) except + - vector[string] ReadSentence() nogil except + - bool_t IsEof() nogil - void Reset() nogil + vector[string] ReadSentence() except + nogil + bool_t IsEof() noexcept nogil + void Reset() noexcept nogil cdef class CythonLineSentence: @@ -37,12 +37,12 @@ cdef class CythonLineSentence: cdef public size_t max_sentence_length, max_words_in_batch, offset cdef vector[vector[string]] buf_data - cpdef bool_t is_eof(self) nogil - cpdef vector[string] read_sentence(self) nogil except * - cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except * - cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil - cpdef void reset(self) nogil - cpdef vector[vector[string]] next_batch(self) nogil except * + cpdef bool_t is_eof(self) noexcept nogil + cpdef vector[string] read_sentence(self) except * nogil + cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil + cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil + cpdef void reset(self) noexcept nogil + cpdef vector[vector[string]] next_batch(self) except * nogil cdef struct VocabItem: @@ -62,9 +62,9 @@ ctypedef unordered_map[string, VocabItem] cvocab_t cdef class CythonVocab: cdef cvocab_t vocab cdef subword_arrays - cdef cvocab_t* get_vocab_ptr(self) nogil except * + cdef cvocab_t* get_vocab_ptr(self) except * nogil -cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil +cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil cdef REAL_t get_next_alpha(REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words, - long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil + long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil diff --git a/gensim/models/word2vec_corpusfile.pyx b/gensim/models/word2vec_corpusfile.pyx index 89012cfd81..0a7d401ea5 100644 --- a/gensim/models/word2vec_corpusfile.pyx +++ b/gensim/models/word2vec_corpusfile.pyx @@ -62,7 +62,7 @@ cdef class CythonVocab: self.vocab[token] = word - cdef cvocab_t* get_vocab_ptr(self) nogil except *: + cdef cvocab_t* get_vocab_ptr(self) except * nogil: return &self.vocab @@ -92,17 +92,17 @@ cdef class CythonLineSentence: if self._thisptr != NULL: del self._thisptr - cpdef bool_t is_eof(self) nogil: + cpdef bool_t is_eof(self) noexcept nogil: return self._thisptr.IsEof() - cpdef vector[string] read_sentence(self) nogil except *: + cpdef vector[string] read_sentence(self) except * nogil: return self._thisptr.ReadSentence() - cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *: + cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil: cdef vector[string] sent = self.read_sentence() return self._chunk_sentence(sent) - cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil: + cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil: cdef vector[vector[string]] res cdef vector[string] chunk cdef size_t cur_idx = 0 @@ -120,7 +120,7 @@ cdef class CythonLineSentence: return res - cpdef void reset(self) nogil: + cpdef void reset(self) noexcept nogil: self._thisptr.Reset() def __iter__(self): @@ -135,7 +135,7 @@ cdef class CythonLineSentence: # This function helps pickle to correctly serialize objects of this class. return rebuild_cython_line_sentence, (self.source, self.max_sentence_length) - cpdef vector[vector[string]] next_batch(self) nogil except *: + cpdef vector[vector[string]] next_batch(self) except * nogil: cdef: vector[vector[string]] job_batch vector[vector[string]] chunked_sentence @@ -235,13 +235,13 @@ cdef void prepare_c_structures_for_batch( reduced_windows[i] = 0 -cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil: +cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil: return alpha - ((alpha - end_alpha) * ( cur_epoch) / num_epochs) cdef REAL_t get_next_alpha( REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words, - long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil: + long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil: cdef REAL_t epoch_progress if expected_examples != -1: diff --git a/gensim/models/word2vec_inner.pxd b/gensim/models/word2vec_inner.pxd index 8a77a17041..fcab17c296 100644 --- a/gensim/models/word2vec_inner.pxd +++ b/gensim/models/word2vec_inner.pxd @@ -20,12 +20,12 @@ cdef extern from "voidptr.h": ctypedef np.float32_t REAL_t # BLAS routine signatures -ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil -ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil -ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil -ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil -ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil -ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil +ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil +ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil +ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil +ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil +ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) noexcept nogil +ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) noexcept nogil cdef scopy_ptr scopy cdef saxpy_ptr saxpy @@ -42,8 +42,8 @@ cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE DEF MAX_SENTENCE_LEN = 10000 # function implementations swapped based on BLAS detected in word2vec_inner.pyx init() -ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil -ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil +ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil +ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil cdef our_dot_ptr our_dot cdef our_saxpy_ptr our_saxpy @@ -78,26 +78,26 @@ cdef struct Word2VecConfig: # for when fblas.sdot returns a double -cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil +cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil # for when fblas.sdot returns a float -cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil +cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil # for when no blas available -cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil -cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil +cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil +cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil # to support random draws from negative-sampling cum_table -cdef unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil +cdef unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) noexcept nogil -cdef unsigned long long random_int32(unsigned long long *next_random) nogil +cdef unsigned long long random_int32(unsigned long long *next_random) noexcept nogil cdef void w2v_fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil cdef unsigned long long w2v_fast_sentence_sg_neg( @@ -105,7 +105,7 @@ cdef unsigned long long w2v_fast_sentence_sg_neg( REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, unsigned long long next_random, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil cdef void w2v_fast_sentence_cbow_hs( @@ -113,7 +113,7 @@ cdef void w2v_fast_sentence_cbow_hs( REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil cdef unsigned long long w2v_fast_sentence_cbow_neg( @@ -121,7 +121,7 @@ cdef unsigned long long w2v_fast_sentence_cbow_neg( REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil cdef init_w2v_config(Word2VecConfig *c, model, alpha, compute_loss, _work, _neu1=*) diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index 5b5060bab5..f3f24b5472 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -44,15 +44,15 @@ cdef REAL_t ONEF = 1.0 # for when fblas.sdot returns a double -cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: +cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil: return dsdot(N, X, incX, Y, incY) # for when fblas.sdot returns a float -cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: +cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil: return sdot(N, X, incX, Y, incY) # for when no blas available -cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: +cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) noexcept nogil: # not a true full dot()-implementation: just enough for our cases cdef int i cdef REAL_t a @@ -62,7 +62,7 @@ cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const return a # for when no blas available -cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: +cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) noexcept nogil: cdef int i for i from 0 <= i < N[0] by 1: Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] @@ -71,7 +71,7 @@ cdef void w2v_fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil: """Train on a single effective word from the current batch, using the Skip-Gram model. In this model we are using a given word to predict a context word (a word that is @@ -135,7 +135,7 @@ cdef void w2v_fast_sentence_sg_hs( # to support random draws from negative-sampling cum_table -cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil: +cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) noexcept nogil: cdef unsigned long long mid while hi > lo: mid = (lo + hi) >> 1 @@ -147,7 +147,7 @@ cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, # this quick & dirty RNG apparently matches Java's (non-Secure)Random # note this function side-effects next_random to set up the next number -cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil: +cdef inline unsigned long long random_int32(unsigned long long *next_random) noexcept nogil: cdef unsigned long long this_random = next_random[0] >> 16 next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL return this_random @@ -157,7 +157,7 @@ cdef unsigned long long w2v_fast_sentence_sg_neg( REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, unsigned long long next_random, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil: """Train on a single effective word from the current batch, using the Skip-Gram model. In this model we are using a given word to predict a context word (a word that is @@ -248,7 +248,7 @@ cdef void w2v_fast_sentence_cbow_hs( REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, REAL_t *words_lockf, const np.uint32_t lockf_len, - const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil: """Train on a single effective word from the current batch, using the CBOW method. Using this method we train the trainable neural network by attempting to predict a @@ -346,7 +346,7 @@ cdef unsigned long long w2v_fast_sentence_cbow_neg( REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *words_lockf, - const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) nogil: + const np.uint32_t lockf_len, const int _compute_loss, REAL_t *_running_training_loss_param) noexcept nogil: """Train on a single effective word from the current batch, using the CBOW method. Using this method we train the trainable neural network by attempting to predict a @@ -785,7 +785,7 @@ def score_sentence_sg(model, sentence, _work): cdef void score_pair_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, REAL_t *work) nogil: + const np.uint32_t word2_index, REAL_t *work) noexcept nogil: cdef long long b cdef long long row1 = word2_index * size, row2, sgn @@ -879,7 +879,7 @@ cdef void score_pair_cbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], REAL_t *work, - int i, int j, int k, int cbow_mean) nogil: + int i, int j, int k, int cbow_mean) noexcept nogil: cdef long long a, b cdef long long row2 diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py index 314336dadc..52a9cb9a2c 100644 --- a/gensim/test/test_similarities.py +++ b/gensim/test/test_similarities.py @@ -323,11 +323,11 @@ def test_full(self, num_best=None): # Sparse array. for i, sim in sims: # Note that similarities are bigger than zero, as they are the 1/ 1 + distances. - self.assertTrue(numpy.alltrue(sim > 0.0)) + self.assertTrue(numpy.all(sim > 0.0)) else: self.assertTrue(sims[0] == 1.0) # Similarity of a document with itself is 0.0. - self.assertTrue(numpy.alltrue(sims[1:] > 0.0)) - self.assertTrue(numpy.alltrue(sims[1:] < 1.0)) + self.assertTrue(numpy.all(sims[1:] > 0.0)) + self.assertTrue(numpy.all(sims[1:] < 1.0)) @unittest.skipIf(POT_EXT is False, "POT not installed") def test_non_increasing(self): @@ -354,15 +354,15 @@ def test_chunking(self): sims = index[query] for i in range(3): - self.assertTrue(numpy.alltrue(sims[i, i] == 1.0)) # Similarity of a document with itself is 0.0. + self.assertTrue(numpy.all(sims[i, i] == 1.0)) # Similarity of a document with itself is 0.0. # test the same thing but with num_best index.num_best = 3 sims = index[query] for sims_temp in sims: for i, sim in sims_temp: - self.assertTrue(numpy.alltrue(sim > 0.0)) - self.assertTrue(numpy.alltrue(sim <= 1.0)) + self.assertTrue(numpy.all(sim > 0.0)) + self.assertTrue(numpy.all(sim <= 1.0)) @unittest.skipIf(POT_EXT is False, "POT not installed") def test_iter(self): @@ -370,8 +370,8 @@ def test_iter(self): index = self.cls(TEXTS, self.w2v_model) for sims in index: - self.assertTrue(numpy.alltrue(sims >= 0.0)) - self.assertTrue(numpy.alltrue(sims <= 1.0)) + self.assertTrue(numpy.all(sims >= 0.0)) + self.assertTrue(numpy.all(sims <= 1.0)) @unittest.skipIf(POT_EXT is False, "POT not installed") def test_str(self): @@ -399,12 +399,12 @@ def test_full(self, num_best=None): if num_best is not None: # Sparse array. for i, sim in sims: - self.assertTrue(numpy.alltrue(sim <= 1.0)) - self.assertTrue(numpy.alltrue(sim >= 0.0)) + self.assertTrue(numpy.all(sim <= 1.0)) + self.assertTrue(numpy.all(sim >= 0.0)) else: self.assertAlmostEqual(1.0, sims[0]) # Similarity of a document with itself is 1.0. - self.assertTrue(numpy.alltrue(sims[1:] >= 0.0)) - self.assertTrue(numpy.alltrue(sims[1:] < 1.0)) + self.assertTrue(numpy.all(sims[1:] >= 0.0)) + self.assertTrue(numpy.all(sims[1:] < 1.0)) # Corpora for query in ( @@ -416,15 +416,15 @@ def test_full(self, num_best=None): # Sparse array. for result in sims: for i, sim in result: - self.assertTrue(numpy.alltrue(sim <= 1.0)) - self.assertTrue(numpy.alltrue(sim >= 0.0)) + self.assertTrue(numpy.all(sim <= 1.0)) + self.assertTrue(numpy.all(sim >= 0.0)) else: for i, result in enumerate(sims): self.assertAlmostEqual(1.0, result[i]) # Similarity of a document with itself is 1.0. - self.assertTrue(numpy.alltrue(result[:i] >= 0.0)) - self.assertTrue(numpy.alltrue(result[:i] < 1.0)) - self.assertTrue(numpy.alltrue(result[i + 1:] >= 0.0)) - self.assertTrue(numpy.alltrue(result[i + 1:] < 1.0)) + self.assertTrue(numpy.all(result[:i] >= 0.0)) + self.assertTrue(numpy.all(result[:i] < 1.0)) + self.assertTrue(numpy.all(result[i + 1:] >= 0.0)) + self.assertTrue(numpy.all(result[i + 1:] < 1.0)) def test_non_increasing(self): """ Check that similarities are non-increasing when `num_best` is not `None`.""" @@ -445,7 +445,7 @@ def test_chunking(self): sims = index[query] for i in range(3): - self.assertTrue(numpy.alltrue(sims[i, i] == 1.0)) # Similarity of a document with itself is 1.0. + self.assertTrue(numpy.all(sims[i, i] == 1.0)) # Similarity of a document with itself is 1.0. # test the same thing but with num_best index.num_best = 5 @@ -459,8 +459,8 @@ def test_chunking(self): def test_iter(self): index = self.cls(CORPUS, self.similarity_matrix) for sims in index: - self.assertTrue(numpy.alltrue(sims >= 0.0)) - self.assertTrue(numpy.alltrue(sims <= 1.0)) + self.assertTrue(numpy.all(sims >= 0.0)) + self.assertTrue(numpy.all(sims <= 1.0)) class TestSparseMatrixSimilarity(_TestSimilarityABC): @@ -625,7 +625,7 @@ def assertIndexSaved(self, index): fname = get_tmpfile('gensim_similarities.tst.pkl') index.save(fname) self.assertTrue(os.path.exists(fname)) - self.assertTrue(os.path.exists(fname + '.d')) + self.assertTrue(os.path.exists(fname + ".dict")) def assertLoadedIndexEqual(self, index, model): from gensim.similarities.annoy import AnnoyIndexer @@ -676,7 +676,7 @@ def test_save(self): fname = get_tmpfile('gensim_similarities.tst.pkl') self.index.save(fname) self.assertTrue(os.path.exists(fname)) - self.assertTrue(os.path.exists(fname + '.d')) + self.assertTrue(os.path.exists(fname + ".dict")) def test_load_not_exist(self): from gensim.similarities.annoy import AnnoyIndexer diff --git a/pyproject.toml b/pyproject.toml index dba0603d0f..0a9c20ae20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,9 @@ [build-system] requires = [ - # - # If we build our extensions with Cython 3.0.0, then they will be an - # order of magnitude slower, so avoid it for now. - # - "Cython>=0.29.32,<3.0.0", + "Cython>=3.0.0", # oldest supported Numpy for this platform is 1.17 but the oldest supported by Gensim # is 1.18.5, remove the line when they increase oldest supported Numpy for this platform - "numpy==1.18.5; python_version=='3.8' and platform_machine not in 'arm64|aarch64'", - "oldest-supported-numpy; python_version>'3.8' or platform_machine in 'arm64|aarch64'", + "numpy>=2.0.0; python_version>='3.9'", "setuptools", "wheel", ] diff --git a/setup.py b/setup.py index 01cda61aba..c92d3e7780 100644 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ def make_c_ext(use_cython=False): sources=[source], language='c', extra_compile_args=extra_args, + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], ) @@ -80,6 +81,7 @@ def make_cpp_ext(use_cython=False): language='c++', extra_compile_args=extra_args, extra_link_args=extra_args, + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], ) @@ -120,8 +122,8 @@ def finalize_options(self): if need_cython(): import Cython.Build - Cython.Build.cythonize(list(make_c_ext(use_cython=True)), language_level=3) - Cython.Build.cythonize(list(make_cpp_ext(use_cython=True)), language_level=3) + Cython.Build.cythonize(list(make_c_ext(use_cython=True))) + Cython.Build.cythonize(list(make_cpp_ext(use_cython=True))) class CleanExt(distutils.cmd.Command): @@ -324,17 +326,14 @@ def run(self): 'pandas', ] -# -# see https://github.com/piskvorky/gensim/pull/3535 -# -NUMPY_STR = 'numpy >= 1.18.5, < 2.0' +NUMPY_STR = 'numpy >= 2.0.0' install_requires = [ NUMPY_STR, # # scipy 1.14.0 and onwards removes deprecated sparsetools submodule # - 'scipy >= 1.7.0, <1.14.0', + 'scipy >= 1.13.0, <1.14.0', 'smart_open >= 1.8.1', ] @@ -373,7 +372,6 @@ def run(self): 'Environment :: Console', 'Intended Audience :: Science/Research', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', @@ -385,7 +383,7 @@ def run(self): ], test_suite="gensim.test", - python_requires='>=3.8', + python_requires='>=3.9', install_requires=install_requires, tests_require=linux_testenv, extras_require={