Skip to content

Commit

Permalink
Draft
Browse files Browse the repository at this point in the history
  • Loading branch information
YoungMind1 committed Nov 8, 2024
1 parent 03aeb11 commit e104e53
Show file tree
Hide file tree
Showing 16 changed files with 112 additions and 117 deletions.
10 changes: 5 additions & 5 deletions gensim/_matutils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def mean_absolute_difference(a, b):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) nogil:
cdef DTYPE_t _mean_absolute_difference(DTYPE_t[:] a, DTYPE_t[:] b) noexcept nogil:
"""Mean absolute difference between two arrays.
Parameters
Expand Down Expand Up @@ -103,7 +103,7 @@ def logsumexp(x):
@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) nogil:
cdef DTYPE_t _logsumexp_2d(DTYPE_t[:, :] data) noexcept nogil:
"""Log of sum of exponentials.
Parameters
Expand Down Expand Up @@ -223,7 +223,7 @@ def dirichlet_expectation_1d(alpha):

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:
cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) noexcept nogil:
"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
Parameters
Expand Down Expand Up @@ -251,7 +251,7 @@ cdef void _dirichlet_expectation_1d(DTYPE_t[:] alpha, DTYPE_t[:] out) nogil:

@cython.boundscheck(False)
@cython.wraparound(False)
cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) nogil:
cdef void _dirichlet_expectation_2d(DTYPE_t[:, :] alpha, DTYPE_t[:, :] out) noexcept nogil:
"""Expected value of log(theta) where theta is drawn from a Dirichlet distribution.
Parameters
Expand Down Expand Up @@ -298,7 +298,7 @@ def digamma(DTYPE_t x):


@cython.cdivision(True)
cdef inline DTYPE_t _digamma(DTYPE_t x,) nogil:
cdef inline DTYPE_t _digamma(DTYPE_t x,) noexcept nogil:
"""Digamma function for positive floats.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/doc2vec_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ cdef void prepare_c_structures_for_batch(
np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
np.uint32_t *reduced_windows, int *document_len, int train_words,
int docvecs_count, int doc_tag, int shrink_windows,
) nogil:
) noexcept nogil:
cdef VocabItem predict_word
cdef string token
cdef int i = 0
Expand Down
12 changes: 6 additions & 6 deletions gensim/models/doc2vec_inner.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -57,39 +57,39 @@ cdef void fast_document_dbow_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
REAL_t *context_vectors, REAL_t *syn1, const int size,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil


cdef unsigned long long fast_document_dbow_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len,
REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
const np.uint32_t contexts_lockf_len) nogil
const np.uint32_t contexts_lockf_len) noexcept nogil


cdef void fast_document_dm_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil
const int size, int learn_hidden) noexcept nogil


cdef unsigned long long fast_document_dm_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil
const int size, int learn_hidden) noexcept nogil


cdef void fast_document_dmc_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil


cdef unsigned long long fast_document_dmc_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil


cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words, learn_hidden, train_words=*, work=*,
Expand Down
12 changes: 6 additions & 6 deletions gensim/models/doc2vec_inner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ cdef void fast_document_dbow_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen,
REAL_t *context_vectors, REAL_t *syn1, const int size,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden,
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) nogil:
REAL_t *contexts_lockf, const np.uint32_t contexts_lockf_len) noexcept nogil:

cdef long long a, b
cdef long long row1 = context_index * size, row2
Expand All @@ -66,7 +66,7 @@ cdef unsigned long long fast_document_dbow_neg(
REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index,
const np.uint32_t context_index, const REAL_t alpha, REAL_t *work,
unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *contexts_lockf,
const np.uint32_t contexts_lockf_len) nogil:
const np.uint32_t contexts_lockf_len) noexcept nogil:

cdef long long a
cdef long long row1 = context_index * size, row2
Expand Down Expand Up @@ -106,7 +106,7 @@ cdef unsigned long long fast_document_dbow_neg(
cdef void fast_document_dm_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil:
const int size, int learn_hidden) noexcept nogil:

cdef long long b
cdef long long row2
Expand All @@ -129,7 +129,7 @@ cdef void fast_document_dm_hs(
cdef unsigned long long fast_document_dm_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int size, int learn_hidden) nogil:
const int size, int learn_hidden) noexcept nogil:

cdef long long row2
cdef unsigned long long modulo = 281474976710655ULL
Expand Down Expand Up @@ -165,7 +165,7 @@ cdef unsigned long long fast_document_dm_neg(
cdef void fast_document_dmc_hs(
const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len,
REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil:
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:

cdef long long a, b
cdef long long row2
Expand All @@ -189,7 +189,7 @@ cdef void fast_document_dmc_hs(
cdef unsigned long long fast_document_dmc_neg(
const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, unsigned long long next_random,
REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work,
const int layer1_size, const int vector_size, int learn_hidden) nogil:
const int layer1_size, const int vector_size, int learn_hidden) noexcept nogil:

cdef long long a
cdef long long row2
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/fasttext_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ cdef void prepare_c_structures_for_batch(
int *effective_words, int *effective_sentences, unsigned long long *next_random, cvocab_t *vocab,
int *sentence_idx, np.uint32_t *indexes, int *codelens, np.uint8_t **codes, np.uint32_t **points,
np.uint32_t *reduced_windows, int *subwords_idx_len, np.uint32_t **subwords_idx, int shrink_windows,
) nogil:
) noexcept nogil:
cdef VocabItem word
cdef string token
cdef vector[string] sent
Expand Down
10 changes: 5 additions & 5 deletions gensim/models/fasttext_inner.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -135,16 +135,16 @@ cdef void init_ft_config(FastTextConfig *c, model, alpha, _work, _neu1)
cdef object populate_ft_config(FastTextConfig *c, vocab, buckets_word, sentences)


cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil
cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil


cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil
cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil


cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil
cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil


cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil
cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil


cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil
cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil
10 changes: 5 additions & 5 deletions gensim/models/fasttext_inner.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ cdef int ONE = 1
cdef REAL_t ONEF = <REAL_t>1.0


cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) noexcept nogil:
"""Perform skipgram training with negative sampling.
Parameters
Expand Down Expand Up @@ -145,7 +145,7 @@ cdef void fasttext_fast_sentence_sg_neg(FastTextConfig *c, int i, int j) nogil:
c.work, &ONE, &c.syn0_ngrams[subwords_index[d]*c.size], &ONE)


cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) noexcept nogil:
"""Perform skipgram training with hierarchical sampling.
Parameters
Expand Down Expand Up @@ -221,7 +221,7 @@ cdef void fasttext_fast_sentence_sg_hs(FastTextConfig *c, int i, int j) nogil:
&c.syn0_ngrams[row2], &ONE)


cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) nogil:
cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k) noexcept nogil:
"""Perform CBOW training with negative sampling.
Parameters
Expand Down Expand Up @@ -306,7 +306,7 @@ cdef void fasttext_fast_sentence_cbow_neg(FastTextConfig *c, int i, int j, int k
&c.syn0_ngrams[c.subwords_idx[m][d]*c.size], &ONE)


cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) nogil:
cdef void fasttext_fast_sentence_cbow_hs(FastTextConfig *c, int i, int j, int k) noexcept nogil:
"""Perform CBOW training with hierarchical sampling.
Parameters
Expand Down Expand Up @@ -510,7 +510,7 @@ cdef object populate_ft_config(FastTextConfig *c, wv, buckets_word, sentences):
return effective_words, effective_sentences


cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) nogil:
cdef void fasttext_train_any(FastTextConfig *c, int num_sentences) noexcept nogil:
"""Performs training on a fully initialized and populated configuration.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1667,7 +1667,7 @@ def save_word2vec_format(
if binary:
fout.write(f"{prefix}{key} ".encode('utf8') + key_vector.astype(REAL).tobytes())
else:
fout.write(f"{prefix}{key} {' '.join(repr(val) for val in key_vector)}\n".encode('utf8'))
fout.write(f"{prefix}{key} {' '.join(val.astype('str') for val in key_vector)}\n".encode('utf8'))

@classmethod
def load_word2vec_format(
Expand Down
4 changes: 2 additions & 2 deletions gensim/models/nmf_pgd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from libc.math cimport sqrt
from cython.parallel import prange

cdef double fmin(double x, double y) nogil:
cdef double fmin(double x, double y) noexcept nogil:
return x if x < y else y

cdef double fmax(double x, double y) nogil:
cdef double fmax(double x, double y) noexcept nogil:
return x if x > y else y

def solve_h(double[:, ::1] h, double[:, :] Wtv, double[:, ::1] WtW, int[::1] permutation, double kappa):
Expand Down
24 changes: 12 additions & 12 deletions gensim/models/word2vec_corpusfile.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ cdef extern from "fast_line_sentence.h":
cdef cppclass FastLineSentence:
FastLineSentence() except +
FastLineSentence(string&, size_t) except +
vector[string] ReadSentence() nogil except +
bool_t IsEof() nogil
void Reset() nogil
vector[string] ReadSentence() except + nogil
bool_t IsEof() noexcept nogil
void Reset() noexcept nogil


cdef class CythonLineSentence:
Expand All @@ -37,12 +37,12 @@ cdef class CythonLineSentence:
cdef public size_t max_sentence_length, max_words_in_batch, offset
cdef vector[vector[string]] buf_data

cpdef bool_t is_eof(self) nogil
cpdef vector[string] read_sentence(self) nogil except *
cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *
cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil
cpdef void reset(self) nogil
cpdef vector[vector[string]] next_batch(self) nogil except *
cpdef bool_t is_eof(self) noexcept nogil
cpdef vector[string] read_sentence(self) except * nogil
cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil
cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil
cpdef void reset(self) noexcept nogil
cpdef vector[vector[string]] next_batch(self) except * nogil


cdef struct VocabItem:
Expand All @@ -62,9 +62,9 @@ ctypedef unordered_map[string, VocabItem] cvocab_t
cdef class CythonVocab:
cdef cvocab_t vocab
cdef subword_arrays
cdef cvocab_t* get_vocab_ptr(self) nogil except *
cdef cvocab_t* get_vocab_ptr(self) except * nogil


cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil
cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil
cdef REAL_t get_next_alpha(REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil
18 changes: 9 additions & 9 deletions gensim/models/word2vec_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ cdef class CythonVocab:

self.vocab[token] = word

cdef cvocab_t* get_vocab_ptr(self) nogil except *:
cdef cvocab_t* get_vocab_ptr(self) except * nogil:
return &self.vocab


Expand Down Expand Up @@ -92,17 +92,17 @@ cdef class CythonLineSentence:
if self._thisptr != NULL:
del self._thisptr

cpdef bool_t is_eof(self) nogil:
cpdef bool_t is_eof(self) noexcept nogil:
return self._thisptr.IsEof()

cpdef vector[string] read_sentence(self) nogil except *:
cpdef vector[string] read_sentence(self) except * nogil:
return self._thisptr.ReadSentence()

cpdef vector[vector[string]] _read_chunked_sentence(self) nogil except *:
cpdef vector[vector[string]] _read_chunked_sentence(self) except * nogil:
cdef vector[string] sent = self.read_sentence()
return self._chunk_sentence(sent)

cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) nogil:
cpdef vector[vector[string]] _chunk_sentence(self, vector[string] sent) noexcept nogil:
cdef vector[vector[string]] res
cdef vector[string] chunk
cdef size_t cur_idx = 0
Expand All @@ -120,7 +120,7 @@ cdef class CythonLineSentence:

return res

cpdef void reset(self) nogil:
cpdef void reset(self) noexcept nogil:
self._thisptr.Reset()

def __iter__(self):
Expand All @@ -135,7 +135,7 @@ cdef class CythonLineSentence:
# This function helps pickle to correctly serialize objects of this class.
return rebuild_cython_line_sentence, (self.source, self.max_sentence_length)

cpdef vector[vector[string]] next_batch(self) nogil except *:
cpdef vector[vector[string]] next_batch(self) except * nogil:
cdef:
vector[vector[string]] job_batch
vector[vector[string]] chunked_sentence
Expand Down Expand Up @@ -235,13 +235,13 @@ cdef void prepare_c_structures_for_batch(
reduced_windows[i] = 0


cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) nogil:
cdef REAL_t get_alpha(REAL_t alpha, REAL_t end_alpha, int cur_epoch, int num_epochs) noexcept nogil:
return alpha - ((alpha - end_alpha) * (<REAL_t> cur_epoch) / num_epochs)


cdef REAL_t get_next_alpha(
REAL_t start_alpha, REAL_t end_alpha, long long total_examples, long long total_words,
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) nogil:
long long expected_examples, long long expected_words, int cur_epoch, int num_epochs) noexcept nogil:
cdef REAL_t epoch_progress

if expected_examples != -1:
Expand Down
Loading

0 comments on commit e104e53

Please sign in to comment.