Skip to content

Commit

Permalink
use long long for value multipled by vector_size for > 2^31 indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
gojomo committed Aug 1, 2020
1 parent 779fe46 commit 492ce1a
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions gensim/models/doc2vec_corpusfile.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def d2v_train_epoch_dbow(
cdef int sent_idx, idx_start, idx_end

cdef vector[string] doc_words
cdef int _doc_tag = start_doctag
cdef long long _doc_tag = start_doctag

init_d2v_config(
&c, model, _alpha, learn_doctags, learn_words, learn_hidden, train_words=train_words,
Expand Down Expand Up @@ -302,7 +302,7 @@ def d2v_train_epoch_dm(
cdef REAL_t count, inv_count = 1.0

cdef vector[string] doc_words
cdef int _doc_tag = start_doctag
cdef long long _doc_tag = start_doctag

init_d2v_config(
&c, model, _alpha, learn_doctags, learn_words, learn_hidden, train_words=False,
Expand Down Expand Up @@ -455,7 +455,7 @@ def d2v_train_epoch_dm_concat(
cdef int sent_idx, idx_start, idx_end

cdef vector[string] doc_words
cdef int _doc_tag = start_doctag
cdef long long _doc_tag = start_doctag

init_d2v_config(
&c, model, _alpha, learn_doctags, learn_words, learn_hidden, train_words=False,
Expand All @@ -469,8 +469,8 @@ def d2v_train_epoch_dm_concat(
effective_words = 0

doc_words = input_stream.read_sentence()
_doc_tag = total_documents
c.doctag_len = _doc_tag < c.docvecs_count
_doc_tag = total_documents # WTF? skipping to end no matter what start_doctag passed in?
c.doctag_len = _doc_tag < c.docvecs_count # WTF? if only 1 acceptable as this scalar, odd to set from bool

# skip doc either empty or without expected number of tags
if doc_words.empty() or c.expected_doctag_len != c.doctag_len:
Expand Down

0 comments on commit 492ce1a

Please sign in to comment.