diff --git a/.gitignore b/.gitignore index 1029443e9d..70a08894d7 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ *.pkl *.bak *.npy +*.npz # OS generated files # ###################### @@ -44,6 +45,7 @@ Thumbs.db .ropeproject .settings/ .eggs +cython_debug docs/src/_build/ docs/_static dedan_gensim.tmproj diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index b5c8f1086c..aa9e80843d 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -49,13 +49,13 @@ repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap, \ sqrt, newaxis, ndarray, dot, vstack -logger = logging.getLogger(__name__) - from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_sentence_sg -from six.moves import xrange +from six.moves import xrange, zip from six import string_types, integer_types +logger = logging.getLogger(__name__) + try: from gensim.models.doc2vec_inner import train_document_dbow, train_document_dm, train_document_dm_concat from gensim.models.word2vec_inner import FAST_VERSION # blas-adaptation shared from word2vec @@ -63,7 +63,7 @@ # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 - def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ @@ -71,9 +71,9 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. - The document is provided as `word_vocabs`, a list of Vocab objects which provide - indexes into the word_vector array, and `doctag_indexes`, which provide indexes - int the doctag_vectors array. (See `_prepare_items()`.) + The document is provided as `doc_words`, a list of word tokens which are looked up + in the model's vocab dictionary, and `doctag_indexes`, which provide indexes + into the doctag_vectors array. If `train_words` is True, simultaneously train word-to-word (not just doc-to-word) examples, exactly as per Word2Vec skip-gram training. (Without this option, @@ -93,18 +93,16 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, doctag_locks = model.docvecs.doctag_syn0_lockf if train_words and learn_words: - train_sentence_sg(model, word_vocabs, alpha, work) # TODO: adapt for word_vectors/word_locks + train_sentence_sg(model, doc_words, alpha, work) for doctag_index in doctag_indexes: - for word in word_vocabs: - if word is None: - continue # OOV word in the input document => skip + for word in doc_words: train_sg_pair(model, word, doctag_index, alpha, learn_vectors=learn_doctags, learn_hidden=learn_hidden, context_vectors=doctag_vectors, context_locks=doctag_locks) - return len([word for word in word_vocabs if word is not None]) + return len(doc_words) - def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ @@ -116,9 +114,9 @@ def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 `dm_mean` configuration field. See `train_dm_concat()` for the DM model with a concatenated input layer. - The document is provided as `word_vocabs`, a list of Vocab objects which provide - indexes into the word_vector array, and `doctag_indexes`, which provide indexes - int the doctag_vectors array. (See `_prepare_items()`.) + The document is provided as `doc_words`, a list of word tokens which are looked up + in the model's vocab dictionary, and `doctag_indexes`, which provide indexes + into the doctag_vectors array. Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to prevent learning-updates to those respective model weights, as if using the @@ -137,33 +135,33 @@ def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf + word_vocabs = [model.vocab[w] for w in doc_words if w in model.vocab and + model.vocab[w].sample_int > model.random.randint(2**32)] doctag_sum = np_sum(doctag_vectors[doctag_indexes], axis=0) doctag_len = len(doctag_indexes) for pos, word in enumerate(word_vocabs): - if word is None: - continue # OOV word in the input document => skip - reduced_window = random.randint(model.window) # `b` in the original doc2vec code + reduced_window = model.random.randint(model.window) # `b` in the original doc2vec code start = max(0, pos - model.window + reduced_window) - window_pos = enumerate(word_vocabs[start : pos + model.window + 1 - reduced_window], start) + window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start) word2_indexes = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] l1 = np_sum(word_vectors[word2_indexes], axis=0) + doctag_sum # 1 x layer1_size if word2_indexes and model.cbow_mean: l1 /= (len(word2_indexes) + doctag_len) - neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha, learn_vectors=False, learn_hidden=True) + neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha, + learn_vectors=False, learn_hidden=learn_hidden) if word2_indexes and not model.cbow_mean: neu1e /= (len(word2_indexes) + doctag_len) if learn_doctags: - doctag_vectors[doctag_indexes] += \ - neu1e * np_repeat(doctag_locks[doctag_indexes],model.vector_size).reshape(-1,model.vector_size) + doctag_vectors[doctag_indexes] += neu1e * \ + np_repeat(doctag_locks[doctag_indexes], model.vector_size).reshape(-1, model.vector_size) if learn_words: - word_vectors[word2_indexes] += \ - neu1e * np_repeat(word_locks[word2_indexes],model.vector_size).reshape(-1,model.vector_size) - - return len([word for word in word_vocabs if word is not None]) + word_vectors[word2_indexes] += neu1e * \ + np_repeat(word_locks[word2_indexes], model.vector_size).reshape(-1, model.vector_size) + return len(word_vocabs) - def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ @@ -172,9 +170,9 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. - The document is provided as `word_vocabs`, a list of Vocab objects which provide - indexes into the word_vector array, and `doctag_indexes`, which provide indexes - int the doctag_vectors array. (See `_prepare_items()`.) + The document is provided as `doc_words`, a list of word tokens which are looked up + in the model's vocab dictionary, and `doctag_indexes`, which provide indexes + into the doctag_vectors array. Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to prevent learning-updates to those respective model weights, as if using the @@ -193,9 +191,11 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf + word_vocabs = [model.vocab[w] for w in doc_words if w in model.vocab and + model.vocab[w].sample_int > model.random.randint(2**32)] doctag_len = len(doctag_indexes) if doctag_len != model.dm_tag_count: - return 0 # skip doc without expected doctag(s) + return 0 # skip doc without expected number of doctag(s) (TODO: warn/pad?) null_word = model.vocab['\0'] pre_pad_count = model.window @@ -208,19 +208,20 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non for pos in range(pre_pad_count, len(padded_document_indexes) - post_pad_count): word_context_indexes = ( - padded_document_indexes[pos - pre_pad_count : pos] # preceding words - + padded_document_indexes[pos + 1 : pos + 1 + post_pad_count] # following words + padded_document_indexes[(pos - pre_pad_count): pos] # preceding words + + padded_document_indexes[(pos + 1):(pos + 1 + post_pad_count)] # following words ) word_context_len = len(word_context_indexes) predict_word = model.vocab[model.index2word[padded_document_indexes[pos]]] # numpy advanced-indexing copies; concatenate, flatten to 1d l1 = concatenate((doctag_vectors[doctag_indexes], word_vectors[word_context_indexes])).ravel() - neu1e = train_cbow_pair(model, predict_word, None, l1, alpha, learn_hidden=learn_hidden, learn_vectors=False) + neu1e = train_cbow_pair(model, predict_word, None, l1, alpha, + learn_hidden=learn_hidden, learn_vectors=False) # filter by locks and shape for addition to source vectors e_locks = concatenate((doctag_locks[doctag_indexes], word_locks[word_context_indexes])) - neu1e_r = (neu1e.reshape(-1,model.vector_size) - * np_repeat(e_locks,model.vector_size).reshape(-1,model.vector_size)) + neu1e_r = (neu1e.reshape(-1, model.vector_size) + * np_repeat(e_locks, model.vector_size).reshape(-1, model.vector_size)) if learn_doctags: np_add.at(doctag_vectors, doctag_indexes, neu1e_r[:doctag_len]) @@ -230,7 +231,7 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non return len(padded_document_indexes) - pre_pad_count - post_pad_count -class TaggedDocument(namedtuple('TaggedDocument','words tags')): +class TaggedDocument(namedtuple('TaggedDocument', 'words tags')): """ A single document, made up of `words` (a list of unicode string tokens) and `tags` (a list of tokens). Tags may be one or more unicode string @@ -243,6 +244,7 @@ class TaggedDocument(namedtuple('TaggedDocument','words tags')): def __str__(self): return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags) + # for compatibility class LabeledSentence(TaggedDocument): def __init__(self, *args, **kwargs): @@ -277,7 +279,7 @@ class DocvecsArray(utils.SaveLoad): def __init__(self, mapfile_path=None): self.doctags = {} # string -> Doctag (only filled if necessary) self.index2doctag = [] # int index -> String (only filled if necessary) - self.count = -1 + self.count = 0 self.mapfile_path = mapfile_path def note_doctag(self, key, document_no, document_length): @@ -288,16 +290,16 @@ def note_doctag(self, key, document_no, document_length): if key in self.doctags: self.doctags[key] = self.doctags[key].repeat(document_length) else: - self.doctags[key] = Doctag(document_no, document_length, 1) + self.doctags[key] = Doctag(len(self.index2doctag), document_length, 1) self.index2doctag.append(key) self.count = max(self.count, len(self.index2doctag)) def indexed_doctags(self, doctag_tokens): """Return indexes and backing-arrays used in training examples.""" - return ([i for i in [self._int_index(index,-1) for index in doctag_tokens] if i > -1], + return ([i for i in [self._int_index(index, -1) for index in doctag_tokens] if i > -1], self.doctag_syn0, self.doctag_syn0_lockf, doctag_tokens) - def trained_items(self, indexed_tuples): + def trained_item(self, indexed_tuple): """Persist any changes made to the given indexes (matching tuple previously returned by indexed_doctags()); a no-op for this implementation""" pass @@ -319,6 +321,9 @@ def _key_index(self, i_index, missing=None): def __getitem__(self, index): return self.doctag_syn0[self._int_index(index)] + def __len__(self): + return self.count + def __contains__(self, index): if isinstance(index, int): return index < self.count @@ -333,11 +338,17 @@ def borrow_from(self, other_docvecs): def clear_sims(self): self.doctag_syn0norm = None + def estimated_lookup_memory(self): + """Estimated memory for tag lookup; 0 if using pure int tags.""" + return 60 * len(self.index2doctag) + 140 * len(self.doctags) + def reset_weights(self, model): - length = max(len(self.doctags),self.count) + length = max(len(self.doctags), self.count) if self.mapfile_path: - self.doctag_syn0 = np_memmap(self.mapfile_path+'.doctag_syn0',dtype=REAL,mode='w+',shape=(length,model.vector_size)) - self.doctag_syn0_lockf = np_memmap(self.mapfile_path+'.doctag_syn0_lockf',dtype=REAL,mode='w+',shape=(length,)) + self.doctag_syn0 = np_memmap(self.mapfile_path+'.doctag_syn0', dtype=REAL, + mode='w+', shape=(length, model.vector_size)) + self.doctag_syn0_lockf = np_memmap(self.mapfile_path+'.doctag_syn0_lockf', dtype=REAL, + mode='w+', shape=(length,)) self.doctag_syn0_lockf.fill(1.0) else: self.doctag_syn0 = empty((length, model.vector_size), dtype=REAL) @@ -345,7 +356,7 @@ def reset_weights(self, model): for i in xrange(length): # construct deterministic seed from index AND model seed - seed = "%d %s" % (model.seed, self.index2doctag[i] if len(self.index2doctag)>0 else str(i)) + seed = "%d %s" % (model.seed, self.index2doctag[i] if len(self.index2doctag) > 0 else str(i)) self.doctag_syn0[i] = model.seeded_vector(seed) def init_sims(self, replace=False): @@ -366,7 +377,8 @@ def init_sims(self, replace=False): self.doctag_syn0[i, :] /= sqrt((self.doctag_syn0[i, :] ** 2).sum(-1)) self.doctag_syn0norm = self.doctag_syn0 else: - self.doctag_syn0norm = (self.doctag_syn0 / sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL) + self.doctag_syn0norm = (self.doctag_syn0 / + sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL) def most_similar(self, positive=[], negative=[], topn=10): """ @@ -386,9 +398,9 @@ def most_similar(self, positive=[], negative=[], topn=10): # add weights for each doc, if not already present; default to 1.0 for positive and -1.0 for negative docs positive = [(doc, 1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) - else doc for doc in positive] + else doc for doc in positive] negative = [(doc, -1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) - else doc for doc in negative] + else doc for doc in negative] # compute the weighted average of all docs all_docs, mean = set(), [] @@ -456,6 +468,7 @@ class Doctag(namedtuple('Doctag', 'index, word_count, doc_count')): Will not be used if all presented document tags are ints. """ __slots__ = () + def repeat(self, word_count): return self._replace(word_count=self.word_count + word_count, doc_count=self.doc_count + 1) @@ -517,42 +530,52 @@ def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, """ super(Doc2Vec, self).__init__(size=size, alpha=alpha, window=window, min_count=min_count, - sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, - sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, - null_word=dm_concat, **kwargs) + sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, + sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, + null_word=dm_concat, **kwargs) self.dbow_words = dbow_words self.dm_concat = dm_concat self.dm_tag_count = dm_tag_count - self.docvecs = docvecs - if not self.docvecs: - self.docvecs = DocvecsArray(docvecs_mapfile) + if self.dm and self.dm_concat: + self.layer1_size = (self.dm_tag_count + (2 * self.window)) * self.vector_size + else: + self.layer1_size = size + self.docvecs = docvecs or DocvecsArray(docvecs_mapfile) self.comment = comment if documents is not None: self.build_vocab(documents) self.train(documents) + @property + def dm(self): + return not self.sg # opposite of SG + + @property + def dbow(self): + return self.sg # same as SG + def clear_sims(self): - super(Doc2Vec,self).clear_sims() + super(Doc2Vec, self).clear_sims() self.docvecs.clear_sims() def reset_weights(self): - if self.dm_concat: + if self.dm and self.dm_concat: # expand l1 size to match concatenated tags+words length self.layer1_size = (self.dm_tag_count + (2 * self.window)) * self.vector_size - logger.info("using concatenative %d-dimensional layer1"% (self.layer1_size)) - super(Doc2Vec,self).reset_weights() + logger.info("using concatenative %d-dimensional layer1" % (self.layer1_size)) + super(Doc2Vec, self).reset_weights() self.docvecs.reset_weights(self) def reset_from(self, other_model): """Reuse shareable structures from other_model.""" self.docvecs.borrow_from(other_model.docvecs) - super(Doc2Vec,self).reset_from(other_model) + super(Doc2Vec, self).reset_from(other_model) - def _vocab_from(self, documents): + def _vocab_from(self, documents, progress_per=10000): document_no, vocab = -1, {} total_words = 0 for document_no, document in enumerate(documents): - if document_no % 10000 == 0: + if document_no % progress_per == 0: logger.info("PROGRESS: at document #%i, processed %i words and %i word types" % (document_no, total_words, len(vocab))) document_length = len(document.words) @@ -566,51 +589,38 @@ def _vocab_from(self, documents): vocab[word] = Vocab(count=1) logger.info("collected %i word types from a corpus of %i words and %i documents" % (len(vocab), total_words, document_no + 1)) + self.corpus_count = document_no + 1 return vocab - def _prepare_items(self, documents): - for document in documents: - yield (self._tokens_to_vocabs(document.words), - self.docvecs.indexed_doctags(document.tags)) - - def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): - """Convert list of tokens to items (Vocabs) from source_dict.""" - if source_dict is None: - source_dict = self.vocab - if sample: - return [source_dict[token] for token in tokens if token in source_dict - and (source_dict[token].sample_probability >= 1.0 or - source_dict[token].sample_probability >= random.random_sample())] - else: - return [source_dict[token] for token in tokens if token in source_dict] - - def _get_job_words(self, alpha, work, job, neu1): - if self.sg: - tally = sum(train_document_dbow(self, word_vocabs, doctag_indexes, alpha, work, train_words=self.dbow_words, - doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) - for word_vocabs, (doctag_indexes, doctag_vectors, doctag_locks, ignored) in job) - elif self.dm_concat: - tally = sum(train_document_dm_concat(self, word_vocabs, doctag_indexes, alpha, work, neu1, - doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) - for word_vocabs, (doctag_indexes, doctag_vectors, doctag_locks, ignored) in job) - else: - tally = sum(train_document_dm(self, word_vocabs, doctag_indexes, alpha, work, neu1, - doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) - for word_vocabs, (doctag_indexes, doctag_vectors, doctag_locks, ignored) in job) - self.docvecs.trained_items(item for s, item in job) + def _do_train_job(self, job, alpha, inits): + work, neu1 = inits + tally = 0 + for doc in job: + indexed_doctags = self.docvecs.indexed_doctags(doc.tags) + doctag_indexes, doctag_vectors, doctag_locks, ignored = indexed_doctags + if self.sg: + tally += train_document_dbow(self, doc.words, doctag_indexes, alpha, work, + train_words=self.dbow_words, + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) + elif self.dm_concat: + tally += train_document_dm_concat(self, doc.words, doctag_indexes, alpha, work, neu1, + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) + else: + tally += train_document_dm(self, doc.words, doctag_indexes, alpha, work, neu1, + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) + self.docvecs.trained_item(indexed_doctags) return tally - def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): + def infer_vector(self, doc_words, alpha=0.1, min_alpha=0.0001, steps=5): """ Infer a vector for given post-bulk training document. Document should be a list of (word) tokens. """ doctag_vectors = empty((1, self.vector_size), dtype=REAL) - doctag_vectors[0] = self.seeded_vector(' '.join(document)) + doctag_vectors[0] = self.seeded_vector(' '.join(doc_words)) doctag_locks = ones(1, dtype=REAL) doctag_indexes = [0] - word_vocabs = self._tokens_to_vocabs(document) work = zeros(self.layer1_size, dtype=REAL) if not self.sg: @@ -618,21 +628,28 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): for i in range(steps): if self.sg: - train_document_dbow(self, word_vocabs, doctag_indexes, alpha, work, + train_document_dbow(self, doc_words, doctag_indexes, alpha, work, learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) elif self.dm_concat: - train_document_dm_concat(self, word_vocabs, doctag_indexes, alpha, work, neu1, + train_document_dm_concat(self, doc_words, doctag_indexes, alpha, work, neu1, learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) else: - train_document_dm(self, word_vocabs, doctag_indexes, alpha, work, neu1, + train_document_dm(self, doc_words, doctag_indexes, alpha, work, neu1, learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha return doctag_vectors[0] + def estimate_memory(self, vocab_size=None): + """Estimate required memory for a model using current settings.""" + report = super(Doc2Vec, self).estimate_memory(vocab_size) + report['doctag_lookup'] = self.docvecs.estimated_lookup_memory() + report['doctag_syn0'] = self.docvecs.count * self.vector_size * 4 + return report + def __str__(self): """Abbreviated name reflecting major configuration paramaters.""" segments = [] @@ -662,7 +679,7 @@ def __str__(self): if self.min_count > 1: segments.append('mc%d' % self.min_count) if self.sample > 0: - segments.append('s%E' % self.sample) + segments.append('s%g' % self.sample) if self.workers > 1: segments.append('t%d' % self.workers) return 'Doc2Vec(%s)' % ','.join(segments) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 4212c1da13..11799301c6 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -459,7 +459,7 @@ static const char *__pyx_f[] = { "type.pxd", }; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726 * # in Cython to enable them only on the right systems. * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< @@ -468,7 +468,7 @@ static const char *__pyx_f[] = { */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":727 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":727 * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< @@ -477,7 +477,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":728 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":728 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< @@ -486,7 +486,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t; */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":729 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":729 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< @@ -495,7 +495,7 @@ typedef npy_int32 __pyx_t_5numpy_int32_t; */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733 * #ctypedef npy_int128 int128_t * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< @@ -504,7 +504,7 @@ typedef npy_int64 __pyx_t_5numpy_int64_t; */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":734 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":734 * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< @@ -513,7 +513,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":735 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":735 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< @@ -522,7 +522,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t; */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":736 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":736 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< @@ -531,7 +531,7 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t; */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":740 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":740 * #ctypedef npy_uint128 uint128_t * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< @@ -540,7 +540,7 @@ typedef npy_uint64 __pyx_t_5numpy_uint64_t; */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":741 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":741 * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< @@ -549,7 +549,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 * # The int types are mapped a bit surprising -- * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t # <<<<<<<<<<<<<< @@ -558,7 +558,7 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_long __pyx_t_5numpy_int_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< @@ -567,7 +567,7 @@ typedef npy_long __pyx_t_5numpy_int_t; */ typedef npy_longlong __pyx_t_5numpy_long_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 * ctypedef npy_long int_t * ctypedef npy_longlong long_t * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< @@ -576,7 +576,7 @@ typedef npy_longlong __pyx_t_5numpy_long_t; */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":754 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":754 * ctypedef npy_longlong longlong_t * * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< @@ -585,7 +585,7 @@ typedef npy_longlong __pyx_t_5numpy_longlong_t; */ typedef npy_ulong __pyx_t_5numpy_uint_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755 * * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< @@ -594,7 +594,7 @@ typedef npy_ulong __pyx_t_5numpy_uint_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":756 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":756 * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< @@ -603,7 +603,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 * ctypedef npy_ulonglong ulonglong_t * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< @@ -612,7 +612,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":759 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":759 * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< @@ -621,7 +621,7 @@ typedef npy_intp __pyx_t_5numpy_intp_t; */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 * ctypedef npy_uintp uintp_t * * ctypedef npy_double float_t # <<<<<<<<<<<<<< @@ -630,7 +630,7 @@ typedef npy_uintp __pyx_t_5numpy_uintp_t; */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762 * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< @@ -639,7 +639,7 @@ typedef npy_double __pyx_t_5numpy_float_t; */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":763 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":763 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< @@ -679,7 +679,7 @@ typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t /*--- Type declarations ---*/ -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":765 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":765 * ctypedef npy_longdouble longdouble_t * * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< @@ -688,7 +688,7 @@ typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t */ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 * * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< @@ -697,7 +697,7 @@ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; */ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< @@ -706,7 +706,7 @@ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; */ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 * ctypedef npy_clongdouble clongdouble_t * * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< @@ -886,6 +886,11 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name); +static CYTHON_INLINE int __Pyx_PySequence_Contains(PyObject* item, PyObject* seq, int eq) { + int result = PySequence_Contains(seq, item); + return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); +} + #define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) : \ @@ -969,6 +974,8 @@ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG(PyObject *); +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value); + static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *); #ifndef __PYX_FORCE_INIT_THREADS @@ -1151,6 +1158,7 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas)(int const *, float const *, int const *, float const *, int const *); /*proto*/ static void (*__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas)(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_bisect_left)(__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG); /*proto*/ +static unsigned PY_LONG_LONG (*__pyx_f_6gensim_6models_14word2vec_inner_random_int32)(unsigned PY_LONG_LONG *); /*proto*/ /* Module declarations from 'gensim.models.doc2vec_inner' */ static int __pyx_v_6gensim_6models_13doc2vec_inner_ONE; @@ -1169,9 +1177,9 @@ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; -static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ -static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_doc_words, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_doc_words, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_doc_words, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ static char __pyx_k_B[] = "B"; @@ -1192,6 +1200,7 @@ static char __pyx_k_l[] = "l"; static char __pyx_k_m[] = "m"; static char __pyx_k_n[] = "n"; static char __pyx_k_q[] = "q"; +static char __pyx_k_r[] = "r"; static char __pyx_k_Zd[] = "Zd"; static char __pyx_k_Zf[] = "Zf"; static char __pyx_k_Zg[] = "Zg"; @@ -1207,7 +1216,6 @@ static char __pyx_k_size[] = "size"; static char __pyx_k_syn0[] = "syn0"; static char __pyx_k_syn1[] = "syn1"; static char __pyx_k_test[] = "__test__"; -static char __pyx_k_word[] = "word"; static char __pyx_k_work[] = "work"; static char __pyx_k_alpha[] = "alpha"; static char __pyx_k_codes[] = "codes"; @@ -1219,6 +1227,7 @@ static char __pyx_k_model[] = "model"; static char __pyx_k_numpy[] = "numpy"; static char __pyx_k_point[] = "point"; static char __pyx_k_range[] = "range"; +static char __pyx_k_token[] = "token"; static char __pyx_k_vocab[] = "vocab"; static char __pyx_k_zeros[] = "zeros"; static char __pyx_k_import[] = "__import__"; @@ -1226,6 +1235,7 @@ static char __pyx_k_neu1_2[] = "_neu1"; static char __pyx_k_points[] = "points"; static char __pyx_k_random[] = "random"; static char __pyx_k_result[] = "result"; +static char __pyx_k_sample[] = "sample"; static char __pyx_k_window[] = "window"; static char __pyx_k_work_2[] = "_work"; static char __pyx_k_alpha_2[] = "_alpha"; @@ -1234,15 +1244,18 @@ static char __pyx_k_float32[] = "float32"; static char __pyx_k_indexes[] = "indexes"; static char __pyx_k_randint[] = "randint"; static char __pyx_k_syn1neg[] = "syn1neg"; +static char __pyx_k_vlookup[] = "vlookup"; static char __pyx_k_codelens[] = "codelens"; static char __pyx_k_negative[] = "negative"; static char __pyx_k_word2vec[] = "word2vec"; static char __pyx_k_cbow_mean[] = "cbow_mean"; static char __pyx_k_cum_table[] = "cum_table"; +static char __pyx_k_doc_words[] = "doc_words"; static char __pyx_k_enumerate[] = "enumerate"; static char __pyx_k_inv_count[] = "inv_count"; static char __pyx_k_ValueError[] = "ValueError"; static char __pyx_k_doctag_len[] = "doctag_len"; +static char __pyx_k_sample_int[] = "sample_int"; static char __pyx_k_syn0_lockf[] = "syn0_lockf"; static char __pyx_k_word_locks[] = "word_locks"; static char __pyx_k_doctag_syn0[] = "doctag_syn0"; @@ -1251,7 +1264,6 @@ static char __pyx_k_learn_words[] = "learn_words"; static char __pyx_k_next_random[] = "next_random"; static char __pyx_k_train_words[] = "train_words"; static char __pyx_k_vector_size[] = "vector_size"; -static char __pyx_k_word_vocabs[] = "word_vocabs"; static char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static char __pyx_k_RuntimeError[] = "RuntimeError"; static char __pyx_k_dm_tag_count[] = "dm_tag_count"; @@ -1284,7 +1296,7 @@ static char __pyx_k_train_document_dbow[] = "train_document_dbow"; static char __pyx_k_train_document_dm_concat[] = "train_document_dm_concat"; static char __pyx_k_gensim_models_doc2vec_inner[] = "gensim.models.doc2vec_inner"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; -static char __pyx_k_Volumes_work_workspace_gensim_t[] = "/Volumes/work/workspace/gensim/trunk/gensim/models/doc2vec_inner.pyx"; +static char __pyx_k_home_ubuntu_src_gensim_bigdocve[] = "/home/ubuntu/src/gensim-bigdocvec-pr/gensim/models/doc2vec_inner.pyx"; static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; static char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; @@ -1297,7 +1309,6 @@ static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor; static PyObject *__pyx_n_s_REAL; static PyObject *__pyx_n_s_RuntimeError; static PyObject *__pyx_n_s_ValueError; -static PyObject *__pyx_kp_s_Volumes_work_workspace_gensim_t; static PyObject *__pyx_kp_s__5; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; @@ -1309,6 +1320,7 @@ static PyObject *__pyx_n_s_count; static PyObject *__pyx_n_s_cum_table; static PyObject *__pyx_n_s_cum_table_len; static PyObject *__pyx_n_s_dm_tag_count; +static PyObject *__pyx_n_s_doc_words; static PyObject *__pyx_n_s_doctag_indexes; static PyObject *__pyx_n_s_doctag_indexes_2; static PyObject *__pyx_n_s_doctag_len; @@ -1326,6 +1338,7 @@ static PyObject *__pyx_n_s_expected_doctag_len; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_gensim_models_doc2vec_inner; +static PyObject *__pyx_kp_s_home_ubuntu_src_gensim_bigdocve; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_import; @@ -1358,11 +1371,14 @@ static PyObject *__pyx_n_s_numpy; static PyObject *__pyx_n_s_point; static PyObject *__pyx_n_s_points; static PyObject *__pyx_n_s_predict_word; +static PyObject *__pyx_n_s_r; static PyObject *__pyx_n_s_randint; static PyObject *__pyx_n_s_random; static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_reduced_windows; static PyObject *__pyx_n_s_result; +static PyObject *__pyx_n_s_sample; +static PyObject *__pyx_n_s_sample_int; static PyObject *__pyx_n_s_scipy_linalg_blas; static PyObject *__pyx_n_s_size; static PyObject *__pyx_n_s_syn0; @@ -1370,6 +1386,7 @@ static PyObject *__pyx_n_s_syn0_lockf; static PyObject *__pyx_n_s_syn1; static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_token; static PyObject *__pyx_n_s_train_document_dbow; static PyObject *__pyx_n_s_train_document_dm; static PyObject *__pyx_n_s_train_document_dm_concat; @@ -1377,16 +1394,15 @@ static PyObject *__pyx_n_s_train_words; static PyObject *__pyx_n_s_train_words_2; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; static PyObject *__pyx_n_s_vector_size; +static PyObject *__pyx_n_s_vlookup; static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_window_indexes; -static PyObject *__pyx_n_s_word; static PyObject *__pyx_n_s_word2vec; static PyObject *__pyx_n_s_word_locks; static PyObject *__pyx_n_s_word_locks_2; static PyObject *__pyx_n_s_word_vectors; static PyObject *__pyx_n_s_word_vectors_2; -static PyObject *__pyx_n_s_word_vocabs; static PyObject *__pyx_n_s_work; static PyObject *__pyx_n_s_work_2; static PyObject *__pyx_n_s_zeros; @@ -2633,7 +2649,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_13doc2vec_inner_fast_docume /* "gensim/models/doc2vec_inner.pyx":222 * * - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -2643,7 +2659,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_1train_document_dbow = {"train_document_dbow", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_word_vocabs = 0; + PyObject *__pyx_v_doc_words = 0; PyObject *__pyx_v_doctag_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; @@ -2662,13 +2678,13 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_document_dbow (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_doc_words,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); /* "gensim/models/doc2vec_inner.pyx":223 * - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs @@ -2679,7 +2695,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P values[8] = ((PyObject *)Py_True); /* "gensim/models/doc2vec_inner.pyx":224 - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -2715,7 +2731,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doc_words)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } @@ -2798,7 +2814,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P } } __pyx_v_model = values[0]; - __pyx_v_word_vocabs = values[1]; + __pyx_v_doc_words = values[1]; __pyx_v_doctag_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; @@ -2819,12 +2835,12 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); + __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(__pyx_self, __pyx_v_model, __pyx_v_doc_words, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "gensim/models/doc2vec_inner.pyx":222 * * - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -2834,9 +2850,10 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_1train_document_dbow(P return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { +static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_doc_words, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; + int __pyx_v_sample; int __pyx_v__train_words; int __pyx_v__learn_words; int __pyx_v__learn_hidden; @@ -2865,6 +2882,8 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_token = NULL; PyObject *__pyx_v_predict_word = NULL; PyObject *__pyx_v_item = NULL; long __pyx_v_k; @@ -2872,21 +2891,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_3; - int __pyx_t_4; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; + int __pyx_t_6; Py_ssize_t __pyx_t_7; PyObject *__pyx_t_8 = NULL; unsigned PY_LONG_LONG __pyx_t_9; PyObject *__pyx_t_10 = NULL; - long __pyx_t_11; - Py_ssize_t __pyx_t_12; - int __pyx_t_13; - __pyx_t_5numpy_uint32_t __pyx_t_14; + PyObject *(*__pyx_t_11)(PyObject *); + __pyx_t_5numpy_uint32_t __pyx_t_12; + Py_ssize_t __pyx_t_13; + PyObject *__pyx_t_14 = NULL; PyObject *__pyx_t_15 = NULL; - PyObject *__pyx_t_16 = NULL; - PyObject *(*__pyx_t_17)(PyObject *); + long __pyx_t_16; + int __pyx_t_17; int __pyx_t_18; int __pyx_t_19; int __pyx_lineno = 0; @@ -2904,7 +2923,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int _train_words = train_words + * cdef int sample = (model.sample != 0) */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 225; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -2916,8 +2935,8 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) * cdef int _train_words = train_words - * cdef int _learn_words = learn_words */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 226; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -2928,589 +2947,720 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY /* "gensim/models/doc2vec_inner.pyx":227 * cdef int hs = model.hs * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * cdef int _train_words = train_words + * cdef int _learn_words = learn_words + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/doc2vec_inner.pyx":228 + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) * cdef int _train_words = train_words # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 227; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 228; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__train_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":228 - * cdef int negative = model.negative + /* "gensim/models/doc2vec_inner.pyx":229 + * cdef int sample = (model.sample != 0) * cdef int _train_words = train_words * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int _learn_doctags = learn_doctags */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 228; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 229; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":229 + /* "gensim/models/doc2vec_inner.pyx":230 * cdef int _train_words = train_words * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int _learn_doctags = learn_doctags * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 229; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":230 + /* "gensim/models/doc2vec_inner.pyx":231 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 230; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 231; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doctags = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":237 + /* "gensim/models/doc2vec_inner.pyx":238 * cdef REAL_t *_doctag_locks * cdef REAL_t *_work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 237; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__alpha = __pyx_t_3; + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":238 + /* "gensim/models/doc2vec_inner.pyx":239 * cdef REAL_t *_work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_DOCUMENT_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 238; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 239; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":246 + /* "gensim/models/doc2vec_inner.pyx":247 * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":249 - * + /* "gensim/models/doc2vec_inner.pyx":251 * cdef int i, j + * cdef unsigned long long r * cdef long result = 0 # <<<<<<<<<<<<<< * * # For hierarchical softmax */ __pyx_v_result = 0; - /* "gensim/models/doc2vec_inner.pyx":263 + /* "gensim/models/doc2vec_inner.pyx":265 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) */ - __pyx_t_4 = (__pyx_v_word_vectors == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_word_vectors == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":264 + /* "gensim/models/doc2vec_inner.pyx":266 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 266; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_3); + __pyx_t_3 = 0; goto __pyx_L3; } __pyx_L3:; - /* "gensim/models/doc2vec_inner.pyx":265 + /* "gensim/models/doc2vec_inner.pyx":267 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 265; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "gensim/models/doc2vec_inner.pyx":266 + /* "gensim/models/doc2vec_inner.pyx":268 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) */ - __pyx_t_5 = (__pyx_v_doctag_vectors == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { + __pyx_t_6 = (__pyx_v_doctag_vectors == Py_None); + __pyx_t_5 = (__pyx_t_6 != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":267 + /* "gensim/models/doc2vec_inner.pyx":269 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 269; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 267; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_1); + __pyx_t_1 = 0; goto __pyx_L4; } __pyx_L4:; - /* "gensim/models/doc2vec_inner.pyx":268 + /* "gensim/models/doc2vec_inner.pyx":270 * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 270; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "gensim/models/doc2vec_inner.pyx":269 + /* "gensim/models/doc2vec_inner.pyx":271 * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) */ - __pyx_t_4 = (__pyx_v_word_locks == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_word_locks == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":270 + /* "gensim/models/doc2vec_inner.pyx":272 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 270; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_6); - __pyx_t_6 = 0; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); + __pyx_t_1 = 0; goto __pyx_L5; } __pyx_L5:; - /* "gensim/models/doc2vec_inner.pyx":271 + /* "gensim/models/doc2vec_inner.pyx":273 * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "gensim/models/doc2vec_inner.pyx":272 + /* "gensim/models/doc2vec_inner.pyx":274 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) */ - __pyx_t_5 = (__pyx_v_doctag_locks == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { + __pyx_t_6 = (__pyx_v_doctag_locks == Py_None); + __pyx_t_5 = (__pyx_t_6 != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":273 + /* "gensim/models/doc2vec_inner.pyx":275 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_3); + __pyx_t_3 = 0; goto __pyx_L6; } __pyx_L6:; - /* "gensim/models/doc2vec_inner.pyx":274 + /* "gensim/models/doc2vec_inner.pyx":276 * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "gensim/models/doc2vec_inner.pyx":276 + /* "gensim/models/doc2vec_inner.pyx":278 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) * */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":277 + /* "gensim/models/doc2vec_inner.pyx":279 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 279; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L7; } __pyx_L7:; - /* "gensim/models/doc2vec_inner.pyx":279 + /* "gensim/models/doc2vec_inner.pyx":281 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":280 + /* "gensim/models/doc2vec_inner.pyx":282 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":281 + /* "gensim/models/doc2vec_inner.pyx":283 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * if negative or sample: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":282 + /* "gensim/models/doc2vec_inner.pyx":284 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_7; + goto __pyx_L8; + } + __pyx_L8:; - /* "gensim/models/doc2vec_inner.pyx":283 + /* "gensim/models/doc2vec_inner.pyx":285 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_6 = (__pyx_v_negative != 0); + if (!__pyx_t_6) { + } else { + __pyx_t_5 = __pyx_t_6; + goto __pyx_L10_bool_binop_done; + } + __pyx_t_6 = (__pyx_v_sample != 0); + __pyx_t_5 = __pyx_t_6; + __pyx_L10_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/doc2vec_inner.pyx":286 + * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - goto __pyx_L8; + goto __pyx_L9; } - __pyx_L8:; + __pyx_L9:; - /* "gensim/models/doc2vec_inner.pyx":286 + /* "gensim/models/doc2vec_inner.pyx":289 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) */ - __pyx_t_4 = (__pyx_v_work == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_work == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":287 + /* "gensim/models/doc2vec_inner.pyx":290 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) + * */ - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); __pyx_t_10 = 0; - goto __pyx_L9; + goto __pyx_L12; } - __pyx_L9:; + __pyx_L12:; - /* "gensim/models/doc2vec_inner.pyx":288 + /* "gensim/models/doc2vec_inner.pyx":291 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) + * + * vlookup = model.vocab */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 288; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "gensim/models/doc2vec_inner.pyx":289 - * work = zeros(model.layer1_size, dtype=REAL) + /* "gensim/models/doc2vec_inner.pyx":293 * _work = np.PyArray_DATA(work) - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< - * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * + * vlookup = model.vocab # <<<<<<<<<<<<<< + * i = 0 + * for token in doc_words: */ - __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = 10000; - if (((__pyx_t_7 < __pyx_t_11) != 0)) { - __pyx_t_12 = __pyx_t_7; - } else { - __pyx_t_12 = __pyx_t_11; - } - __pyx_v_document_len = ((int)__pyx_t_12); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_v_vlookup = __pyx_t_10; + __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":290 - * _work = np.PyArray_DATA(work) - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + /* "gensim/models/doc2vec_inner.pyx":294 * - * for i in range(document_len): + * vlookup = model.vocab + * i = 0 # <<<<<<<<<<<<<< + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None + */ + __pyx_v_i = 0; + + /* "gensim/models/doc2vec_inner.pyx":295 + * vlookup = model.vocab + * i = 0 + * for token in doc_words: # <<<<<<<<<<<<<< + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word */ - __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = 10000; - if (((__pyx_t_12 < __pyx_t_11) != 0)) { - __pyx_t_7 = __pyx_t_12; + if (likely(PyList_CheckExact(__pyx_v_doc_words)) || PyTuple_CheckExact(__pyx_v_doc_words)) { + __pyx_t_10 = __pyx_v_doc_words; __Pyx_INCREF(__pyx_t_10); __pyx_t_7 = 0; + __pyx_t_11 = NULL; } else { - __pyx_t_7 = __pyx_t_11; + __pyx_t_7 = -1; __pyx_t_10 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_11 = Py_TYPE(__pyx_t_10)->tp_iternext; if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __pyx_v_doctag_len = ((int)__pyx_t_7); + for (;;) { + if (likely(!__pyx_t_11)) { + if (likely(PyList_CheckExact(__pyx_t_10))) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_10)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_10, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_10)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_10, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } + } else { + __pyx_t_3 = __pyx_t_11(__pyx_t_10); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":292 - * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) - * - * for i in range(document_len): # <<<<<<<<<<<<<< - * predict_word = word_vocabs[i] - * if predict_word is None: - */ - __pyx_t_2 = __pyx_v_document_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + /* "gensim/models/doc2vec_inner.pyx":296 + * i = 0 + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged + */ + __pyx_t_6 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if ((__pyx_t_6 != 0)) { + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __pyx_t_1; + __pyx_t_1 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_3 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":293 - * - * for i in range(document_len): - * predict_word = word_vocabs[i] # <<<<<<<<<<<<<< - * if predict_word is None: - * # shrink document to leave out word + /* "gensim/models/doc2vec_inner.pyx":297 + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_10); - __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_10); - __pyx_t_10 = 0; + __pyx_t_6 = (__pyx_v_predict_word == Py_None); + __pyx_t_5 = (__pyx_t_6 != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":294 - * for i in range(document_len): - * predict_word = word_vocabs[i] - * if predict_word is None: # <<<<<<<<<<<<<< - * # shrink document to leave out word - * document_len = document_len - 1 + /* "gensim/models/doc2vec_inner.pyx":298 + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged # <<<<<<<<<<<<<< + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue */ - __pyx_t_5 = (__pyx_v_predict_word == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { - - /* "gensim/models/doc2vec_inner.pyx":296 - * if predict_word is None: - * # shrink document to leave out word - * document_len = document_len - 1 # <<<<<<<<<<<<<< - * continue # leaving j unchanged - * else: + goto __pyx_L13_continue; + } + + /* "gensim/models/doc2vec_inner.pyx":299 + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[i] = predict_word.index */ - __pyx_v_document_len = (__pyx_v_document_len - 1); + __pyx_t_6 = (__pyx_v_sample != 0); + if (__pyx_t_6) { + } else { + __pyx_t_5 = __pyx_t_6; + goto __pyx_L17_bool_binop_done; + } + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_8 = PyObject_RichCompare(__pyx_t_3, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_8); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_8); if (unlikely(__pyx_t_6 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_5 = __pyx_t_6; + __pyx_L17_bool_binop_done:; + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":297 - * # shrink document to leave out word - * document_len = document_len - 1 - * continue # leaving j unchanged # <<<<<<<<<<<<<< - * else: - * indexes[i] = predict_word.index + /* "gensim/models/doc2vec_inner.pyx":300 + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[i] = predict_word.index + * if hs: */ - goto __pyx_L10_continue; + goto __pyx_L13_continue; } - /*else*/ { - /* "gensim/models/doc2vec_inner.pyx":299 - * continue # leaving j unchanged - * else: - * indexes[i] = predict_word.index # <<<<<<<<<<<<<< - * if hs: - * codelens[i] = len(predict_word.code) + /* "gensim/models/doc2vec_inner.pyx":301 + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue + * indexes[i] = predict_word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(predict_word.code) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_14; + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":300 - * else: - * indexes[i] = predict_word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(predict_word.code) - * codes[i] = np.PyArray_DATA(predict_word.code) + /* "gensim/models/doc2vec_inner.pyx":302 + * continue + * indexes[i] = predict_word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":301 - * indexes[i] = predict_word.index - * if hs: - * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< - * codes[i] = np.PyArray_DATA(predict_word.code) - * points[i] = np.PyArray_DATA(predict_word.point) + /* "gensim/models/doc2vec_inner.pyx":303 + * indexes[i] = predict_word.index + * if hs: + * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_7 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_7); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_13 = PyObject_Length(__pyx_t_8); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":302 - * if hs: - * codelens[i] = len(predict_word.code) - * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< - * points[i] = np.PyArray_DATA(predict_word.point) - * else: + /* "gensim/models/doc2vec_inner.pyx":304 + * if hs: + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - - /* "gensim/models/doc2vec_inner.pyx":303 - * codelens[i] = len(predict_word.code) - * codes[i] = np.PyArray_DATA(predict_word.code) - * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< - * else: - * codelens[i] = 1 + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_8))); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + + /* "gensim/models/doc2vec_inner.pyx":305 + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< + * result += 1 + * i += 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 303; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - goto __pyx_L13; - } - /*else*/ { + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_8))); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + goto __pyx_L19; + } + __pyx_L19:; - /* "gensim/models/doc2vec_inner.pyx":305 - * points[i] = np.PyArray_DATA(predict_word.point) - * else: - * codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 - * + /* "gensim/models/doc2vec_inner.pyx":306 + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 # <<<<<<<<<<<<<< + * i += 1 + * if i == MAX_DOCUMENT_LEN: */ - (__pyx_v_codelens[__pyx_v_i]) = 1; - } - __pyx_L13:; + __pyx_v_result = (__pyx_v_result + 1); - /* "gensim/models/doc2vec_inner.pyx":306 - * else: - * codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< + /* "gensim/models/doc2vec_inner.pyx":307 + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 + * i += 1 # <<<<<<<<<<<<<< + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? + */ + __pyx_v_i = (__pyx_v_i + 1); + + /* "gensim/models/doc2vec_inner.pyx":308 + * result += 1 + * i += 1 + * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * document_len = i + */ + __pyx_t_5 = ((__pyx_v_i == 10000) != 0); + if (__pyx_t_5) { + + /* "gensim/models/doc2vec_inner.pyx":309 + * i += 1 + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * document_len = i * - * if _train_words: */ - __pyx_v_result = (__pyx_v_result + 1); + goto __pyx_L14_break; } - __pyx_L10_continue:; + + /* "gensim/models/doc2vec_inner.pyx":295 + * vlookup = model.vocab + * i = 0 + * for token in doc_words: # <<<<<<<<<<<<<< + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word + */ + __pyx_L13_continue:; } + __pyx_L14_break:; + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + + /* "gensim/models/doc2vec_inner.pyx":310 + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? + * document_len = i # <<<<<<<<<<<<<< + * + * if _train_words: + */ + __pyx_v_document_len = __pyx_v_i; - /* "gensim/models/doc2vec_inner.pyx":308 - * result += 1 + /* "gensim/models/doc2vec_inner.pyx":312 + * document_len = i * * if _train_words: # <<<<<<<<<<<<<< * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): */ - __pyx_t_4 = (__pyx_v__train_words != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v__train_words != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":310 + /* "gensim/models/doc2vec_inner.pyx":314 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< @@ -3518,78 +3668,78 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * */ __pyx_t_2 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_15 = NULL; + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_14 = NULL; __pyx_t_7 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_15); + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_14 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_14)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_14); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_1, function); __pyx_t_7 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_15) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + __pyx_t_15 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + if (__pyx_t_14) { + PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_14 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_7, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_15, 0+__pyx_t_7, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_7, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_7, __pyx_t_8); + PyTuple_SET_ITEM(__pyx_t_15, 1+__pyx_t_7, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); - __pyx_t_1 = 0; + PyTuple_SET_ITEM(__pyx_t_15, 2+__pyx_t_7, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); __pyx_t_8 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_16, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = 0; + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_15, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (likely(PyList_CheckExact(__pyx_t_10)) || PyTuple_CheckExact(__pyx_t_10)) { - __pyx_t_6 = __pyx_t_10; __Pyx_INCREF(__pyx_t_6); __pyx_t_7 = 0; - __pyx_t_17 = NULL; + __pyx_t_1 = __pyx_t_10; __Pyx_INCREF(__pyx_t_1); __pyx_t_7 = 0; + __pyx_t_11 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_17 = Py_TYPE(__pyx_t_6)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_11 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; for (;;) { - if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_6))) { - if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_6)) break; + if (likely(!__pyx_t_11)) { + if (likely(PyList_CheckExact(__pyx_t_1))) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_6, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_6, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_10 = __pyx_t_17(__pyx_t_6); + __pyx_t_10 = __pyx_t_11(__pyx_t_1); if (unlikely(!__pyx_t_10)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -3600,17 +3750,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/doc2vec_inner.pyx":311 + /* "gensim/models/doc2vec_inner.pyx":315 * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * for i in range(doctag_len): + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) */ - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 315; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":310 + /* "gensim/models/doc2vec_inner.pyx":314 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< @@ -3618,36 +3768,52 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * */ } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L14; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + goto __pyx_L21; } - __pyx_L14:; + __pyx_L21:; - /* "gensim/models/doc2vec_inner.pyx":313 + /* "gensim/models/doc2vec_inner.pyx":317 * reduced_windows[i] = item * + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] + */ + __pyx_t_7 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = 10000; + if (((__pyx_t_7 < __pyx_t_16) != 0)) { + __pyx_t_13 = __pyx_t_7; + } else { + __pyx_t_13 = __pyx_t_16; + } + __pyx_v_doctag_len = ((int)__pyx_t_13); + + /* "gensim/models/doc2vec_inner.pyx":318 + * + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): # <<<<<<<<<<<<<< * _doctag_indexes[i] = doctag_indexes[i] * result += 1 */ __pyx_t_2 = __pyx_v_doctag_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { + __pyx_v_i = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":314 - * + /* "gensim/models/doc2vec_inner.pyx":319 + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":315 + /* "gensim/models/doc2vec_inner.pyx":320 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -3657,12 +3823,12 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY __pyx_v_result = (__pyx_v_result + 1); } - /* "gensim/models/doc2vec_inner.pyx":318 + /* "gensim/models/doc2vec_inner.pyx":323 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< * for i in range(document_len): - * if codelens[i] == 0: + * if _train_words: # simultaneous skip-gram wordvec-training */ { #ifdef WITH_THREAD @@ -3671,49 +3837,29 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY #endif /*try:*/ { - /* "gensim/models/doc2vec_inner.pyx":319 + /* "gensim/models/doc2vec_inner.pyx":324 * # release GIL & train on the document * with nogil: * for i in range(document_len): # <<<<<<<<<<<<<< - * if codelens[i] == 0: - * continue + * if _train_words: # simultaneous skip-gram wordvec-training + * j = i - window + reduced_windows[i] */ __pyx_t_2 = __pyx_v_document_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { + __pyx_v_i = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":320 + /* "gensim/models/doc2vec_inner.pyx":325 * with nogil: * for i in range(document_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< - * continue - * if _train_words: # simultaneous skip-gram wordvec-training - */ - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); - if (__pyx_t_4) { - - /* "gensim/models/doc2vec_inner.pyx":321 - * for i in range(document_len): - * if codelens[i] == 0: - * continue # <<<<<<<<<<<<<< - * if _train_words: # simultaneous skip-gram wordvec-training - * j = i - window + reduced_windows[i] - */ - goto __pyx_L22_continue; - } - - /* "gensim/models/doc2vec_inner.pyx":322 - * if codelens[i] == 0: - * continue * if _train_words: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < 0: */ - __pyx_t_4 = (__pyx_v__train_words != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v__train_words != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":323 - * continue + /* "gensim/models/doc2vec_inner.pyx":326 + * for i in range(document_len): * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< * if j < 0: @@ -3721,17 +3867,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":324 + /* "gensim/models/doc2vec_inner.pyx":327 * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 * k = i + window + 1 - reduced_windows[i] */ - __pyx_t_4 = ((__pyx_v_j < 0) != 0); - if (__pyx_t_4) { + __pyx_t_5 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":325 + /* "gensim/models/doc2vec_inner.pyx":328 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -3739,11 +3885,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * if k > document_len: */ __pyx_v_j = 0; - goto __pyx_L26; + goto __pyx_L32; } - __pyx_L26:; + __pyx_L32:; - /* "gensim/models/doc2vec_inner.pyx":326 + /* "gensim/models/doc2vec_inner.pyx":329 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3752,78 +3898,70 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":327 + /* "gensim/models/doc2vec_inner.pyx":330 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > document_len: # <<<<<<<<<<<<<< * k = document_len * for j in range(j, k): */ - __pyx_t_4 = ((__pyx_v_k > __pyx_v_document_len) != 0); - if (__pyx_t_4) { + __pyx_t_5 = ((__pyx_v_k > __pyx_v_document_len) != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":328 + /* "gensim/models/doc2vec_inner.pyx":331 * k = i + window + 1 - reduced_windows[i] * if k > document_len: * k = document_len # <<<<<<<<<<<<<< * for j in range(j, k): - * if j == i or codelens[j] == 0: + * if j == i: */ __pyx_v_k = __pyx_v_document_len; - goto __pyx_L27; + goto __pyx_L33; } - __pyx_L27:; + __pyx_L33:; - /* "gensim/models/doc2vec_inner.pyx":329 + /* "gensim/models/doc2vec_inner.pyx":332 * if k > document_len: * k = document_len * for j in range(j, k): # <<<<<<<<<<<<<< - * if j == i or codelens[j] == 0: + * if j == i: * continue */ - __pyx_t_11 = __pyx_v_k; - for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_11; __pyx_t_18+=1) { + __pyx_t_16 = __pyx_v_k; + for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_16; __pyx_t_18+=1) { __pyx_v_j = __pyx_t_18; - /* "gensim/models/doc2vec_inner.pyx":330 + /* "gensim/models/doc2vec_inner.pyx":333 * k = document_len * for j in range(j, k): - * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< + * if j == i: # <<<<<<<<<<<<<< * continue * if hs: */ __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L31_bool_binop_done; - } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_j]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L31_bool_binop_done:; - if (__pyx_t_4) { + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":331 + /* "gensim/models/doc2vec_inner.pyx":334 * for j in range(j, k): - * if j == i or codelens[j] == 0: + * if j == i: * continue # <<<<<<<<<<<<<< * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose */ - goto __pyx_L28_continue; + goto __pyx_L34_continue; } - /* "gensim/models/doc2vec_inner.pyx":332 - * if j == i or codelens[j] == 0: + /* "gensim/models/doc2vec_inner.pyx":335 + * if j == i: * continue * if hs: # <<<<<<<<<<<<<< * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":334 + /* "gensim/models/doc2vec_inner.pyx":337 * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< @@ -3831,21 +3969,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * if negative: */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__word_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); - goto __pyx_L33; + goto __pyx_L37; } - __pyx_L33:; + __pyx_L37:; - /* "gensim/models/doc2vec_inner.pyx":336 + /* "gensim/models/doc2vec_inner.pyx":339 * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * next_random = fast_document_dbow_neg(negative, cum_table, cum_table_len, _word_vectors, syn1neg, size, */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":338 + /* "gensim/models/doc2vec_inner.pyx":341 * if negative: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * next_random = fast_document_dbow_neg(negative, cum_table, cum_table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -3853,16 +3991,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * _learn_words, _learn_hidden, _word_locks) */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v__word_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); - goto __pyx_L34; + goto __pyx_L38; } - __pyx_L34:; - __pyx_L28_continue:; + __pyx_L38:; + __pyx_L34_continue:; } - goto __pyx_L25; + goto __pyx_L31; } - __pyx_L25:; + __pyx_L31:; - /* "gensim/models/doc2vec_inner.pyx":343 + /* "gensim/models/doc2vec_inner.pyx":346 * * # docvec-training * for j in range(doctag_len): # <<<<<<<<<<<<<< @@ -3873,17 +4011,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":344 + /* "gensim/models/doc2vec_inner.pyx":347 * # docvec-training * for j in range(doctag_len): * if hs: # <<<<<<<<<<<<<< * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":345 + /* "gensim/models/doc2vec_inner.pyx":348 * for j in range(doctag_len): * if hs: * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], # <<<<<<<<<<<<<< @@ -3891,21 +4029,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * if negative: */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doctag_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); - goto __pyx_L37; + goto __pyx_L41; } - __pyx_L37:; + __pyx_L41:; - /* "gensim/models/doc2vec_inner.pyx":347 + /* "gensim/models/doc2vec_inner.pyx":350 * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_document_dbow_neg(negative, cum_table, cum_table_len, _doctag_vectors, syn1neg, size, * indexes[i], _doctag_indexes[j], _alpha, _work, next_random, */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":348 + /* "gensim/models/doc2vec_inner.pyx":351 * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: * next_random = fast_document_dbow_neg(negative, cum_table, cum_table_len, _doctag_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -3913,33 +4051,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * _learn_doctags, _learn_hidden, _doctag_locks) */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v__doctag_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); - goto __pyx_L38; + goto __pyx_L42; } - __pyx_L38:; + __pyx_L42:; } - __pyx_L22_continue:; } } - /* "gensim/models/doc2vec_inner.pyx":318 + /* "gensim/models/doc2vec_inner.pyx":323 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< * for i in range(document_len): - * if codelens[i] == 0: + * if _train_words: # simultaneous skip-gram wordvec-training */ /*finally:*/ { /*normal exit:*/{ #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L21; + goto __pyx_L28; } - __pyx_L21:; + __pyx_L28:; } } - /* "gensim/models/doc2vec_inner.pyx":352 + /* "gensim/models/doc2vec_inner.pyx":355 * _learn_doctags, _learn_hidden, _doctag_locks) * * return result # <<<<<<<<<<<<<< @@ -3947,16 +4084,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 352; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; goto __pyx_L0; /* "gensim/models/doc2vec_inner.pyx":222 * * - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -3964,14 +4101,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_15); - __Pyx_XDECREF(__pyx_t_16); __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_token); __Pyx_XDECREF(__pyx_v_predict_word); __Pyx_XDECREF(__pyx_v_item); __Pyx_XDECREF(__pyx_v_work); @@ -3984,10 +4123,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_train_document_dbow(CY return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":355 +/* "gensim/models/doc2vec_inner.pyx":358 * * - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -3997,7 +4136,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm = {"train_document_dm", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_word_vocabs = 0; + PyObject *__pyx_v_doc_words = 0; PyObject *__pyx_v_doctag_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; @@ -4016,14 +4155,14 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_document_dm (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_doc_words,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "gensim/models/doc2vec_inner.pyx":356 + /* "gensim/models/doc2vec_inner.pyx":359 * - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs @@ -4032,8 +4171,8 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "gensim/models/doc2vec_inner.pyx":357 - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + /* "gensim/models/doc2vec_inner.pyx":360 + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -4069,19 +4208,19 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doc_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -4130,7 +4269,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -4152,7 +4291,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO } } __pyx_v_model = values[0]; - __pyx_v_word_vocabs = values[1]; + __pyx_v_doc_words = values[1]; __pyx_v_doctag_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; @@ -4167,18 +4306,18 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); + __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(__pyx_self, __pyx_v_model, __pyx_v_doc_words, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":355 + /* "gensim/models/doc2vec_inner.pyx":358 * * - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -4188,9 +4327,10 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_3train_document_dm(PyO return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { +static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_doc_words, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; + int __pyx_v_sample; int __pyx_v__learn_doctags; int __pyx_v__learn_words; int __pyx_v__learn_hidden; @@ -4224,27 +4364,29 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; - PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_token = NULL; + PyObject *__pyx_v_predict_word = NULL; PyObject *__pyx_v_item = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_3; - int __pyx_t_4; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; int __pyx_t_5; - PyObject *__pyx_t_6 = NULL; + int __pyx_t_6; Py_ssize_t __pyx_t_7; PyObject *__pyx_t_8 = NULL; unsigned PY_LONG_LONG __pyx_t_9; PyObject *__pyx_t_10 = NULL; - long __pyx_t_11; - Py_ssize_t __pyx_t_12; - int __pyx_t_13; - __pyx_t_5numpy_uint32_t __pyx_t_14; + PyObject *(*__pyx_t_11)(PyObject *); + __pyx_t_5numpy_uint32_t __pyx_t_12; + Py_ssize_t __pyx_t_13; + PyObject *__pyx_t_14 = NULL; PyObject *__pyx_t_15 = NULL; - PyObject *__pyx_t_16 = NULL; - PyObject *(*__pyx_t_17)(PyObject *); + long __pyx_t_16; + int __pyx_t_17; int __pyx_t_18; int __pyx_t_19; int __pyx_lineno = 0; @@ -4258,76 +4400,91 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":358 + /* "gensim/models/doc2vec_inner.pyx":361 * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int _learn_doctags = learn_doctags + * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":359 + /* "gensim/models/doc2vec_inner.pyx":362 * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) * cdef int _learn_doctags = learn_doctags - * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":360 + /* "gensim/models/doc2vec_inner.pyx":363 * cdef int hs = model.hs * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * cdef int _learn_doctags = learn_doctags + * cdef int _learn_words = learn_words + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/doc2vec_inner.pyx":364 + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doctags = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":361 - * cdef int negative = model.negative + /* "gensim/models/doc2vec_inner.pyx":365 + * cdef int sample = (model.sample != 0) * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":362 + /* "gensim/models/doc2vec_inner.pyx":366 * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count = 1.0 */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":363 + /* "gensim/models/doc2vec_inner.pyx":367 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * cdef REAL_t count, inv_count = 1.0 * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":364 + /* "gensim/models/doc2vec_inner.pyx":368 * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count = 1.0 # <<<<<<<<<<<<<< @@ -4336,43 +4493,43 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_inv_count = 1.0; - /* "gensim/models/doc2vec_inner.pyx":372 + /* "gensim/models/doc2vec_inner.pyx":376 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__alpha = __pyx_t_3; + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":373 + /* "gensim/models/doc2vec_inner.pyx":377 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_DOCUMENT_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":381 + /* "gensim/models/doc2vec_inner.pyx":385 * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k, m */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":384 + /* "gensim/models/doc2vec_inner.pyx":388 * * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< @@ -4381,621 +4538,746 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_result = 0; - /* "gensim/models/doc2vec_inner.pyx":398 + /* "gensim/models/doc2vec_inner.pyx":402 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) */ - __pyx_t_4 = (__pyx_v_word_vectors == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_word_vectors == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":399 + /* "gensim/models/doc2vec_inner.pyx":403 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_3); + __pyx_t_3 = 0; goto __pyx_L3; } __pyx_L3:; - /* "gensim/models/doc2vec_inner.pyx":400 + /* "gensim/models/doc2vec_inner.pyx":404 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "gensim/models/doc2vec_inner.pyx":401 + /* "gensim/models/doc2vec_inner.pyx":405 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) */ - __pyx_t_5 = (__pyx_v_doctag_vectors == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { + __pyx_t_6 = (__pyx_v_doctag_vectors == Py_None); + __pyx_t_5 = (__pyx_t_6 != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":402 + /* "gensim/models/doc2vec_inner.pyx":406 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_1); + __pyx_t_1 = 0; goto __pyx_L4; } __pyx_L4:; - /* "gensim/models/doc2vec_inner.pyx":403 + /* "gensim/models/doc2vec_inner.pyx":407 * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "gensim/models/doc2vec_inner.pyx":404 + /* "gensim/models/doc2vec_inner.pyx":408 * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) */ - __pyx_t_4 = (__pyx_v_word_locks == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_word_locks == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":405 + /* "gensim/models/doc2vec_inner.pyx":409 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_6); - __pyx_t_6 = 0; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); + __pyx_t_1 = 0; goto __pyx_L5; } __pyx_L5:; - /* "gensim/models/doc2vec_inner.pyx":406 + /* "gensim/models/doc2vec_inner.pyx":410 * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "gensim/models/doc2vec_inner.pyx":407 + /* "gensim/models/doc2vec_inner.pyx":411 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) */ - __pyx_t_5 = (__pyx_v_doctag_locks == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { + __pyx_t_6 = (__pyx_v_doctag_locks == Py_None); + __pyx_t_5 = (__pyx_t_6 != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":408 + /* "gensim/models/doc2vec_inner.pyx":412 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_3); + __pyx_t_3 = 0; goto __pyx_L6; } __pyx_L6:; - /* "gensim/models/doc2vec_inner.pyx":409 + /* "gensim/models/doc2vec_inner.pyx":413 * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 413; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "gensim/models/doc2vec_inner.pyx":411 + /* "gensim/models/doc2vec_inner.pyx":415 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) * */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":412 + /* "gensim/models/doc2vec_inner.pyx":416 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L7; } __pyx_L7:; - /* "gensim/models/doc2vec_inner.pyx":414 + /* "gensim/models/doc2vec_inner.pyx":418 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":415 + /* "gensim/models/doc2vec_inner.pyx":419 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 419; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":416 + /* "gensim/models/doc2vec_inner.pyx":420 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * if negative or sample: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 420; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":417 + /* "gensim/models/doc2vec_inner.pyx":421 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_7; + goto __pyx_L8; + } + __pyx_L8:; - /* "gensim/models/doc2vec_inner.pyx":418 + /* "gensim/models/doc2vec_inner.pyx":422 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_6 = (__pyx_v_negative != 0); + if (!__pyx_t_6) { + } else { + __pyx_t_5 = __pyx_t_6; + goto __pyx_L10_bool_binop_done; + } + __pyx_t_6 = (__pyx_v_sample != 0); + __pyx_t_5 = __pyx_t_6; + __pyx_L10_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/doc2vec_inner.pyx":423 + * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyNumber_Add(__pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __pyx_v_next_random = __pyx_t_9; - goto __pyx_L8; + goto __pyx_L9; } - __pyx_L8:; + __pyx_L9:; - /* "gensim/models/doc2vec_inner.pyx":421 + /* "gensim/models/doc2vec_inner.pyx":426 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) */ - __pyx_t_4 = (__pyx_v_work == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + __pyx_t_5 = (__pyx_v_work == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":422 + /* "gensim/models/doc2vec_inner.pyx":427 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 422; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 427; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); __pyx_t_10 = 0; - goto __pyx_L9; + goto __pyx_L12; } - __pyx_L9:; + __pyx_L12:; - /* "gensim/models/doc2vec_inner.pyx":423 + /* "gensim/models/doc2vec_inner.pyx":428 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "gensim/models/doc2vec_inner.pyx":424 + /* "gensim/models/doc2vec_inner.pyx":429 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) */ - __pyx_t_5 = (__pyx_v_neu1 == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { + __pyx_t_6 = (__pyx_v_neu1 == Py_None); + __pyx_t_5 = (__pyx_t_6 != 0); + if (__pyx_t_5) { - /* "gensim/models/doc2vec_inner.pyx":425 + /* "gensim/models/doc2vec_inner.pyx":430 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_8) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_8) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L10; + goto __pyx_L13; } - __pyx_L10:; + __pyx_L13:; - /* "gensim/models/doc2vec_inner.pyx":426 + /* "gensim/models/doc2vec_inner.pyx":431 * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) + * vlookup = model.vocab */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "gensim/models/doc2vec_inner.pyx":428 + /* "gensim/models/doc2vec_inner.pyx":433 * _neu1 = np.PyArray_DATA(neu1) * - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< - * j = 0 - * for i in range(document_len): + * vlookup = model.vocab # <<<<<<<<<<<<<< + * i = 0 + * for token in doc_words: */ - __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = 10000; - if (((__pyx_t_7 < __pyx_t_11) != 0)) { - __pyx_t_12 = __pyx_t_7; - } else { - __pyx_t_12 = __pyx_t_11; - } - __pyx_v_document_len = ((int)__pyx_t_12); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_v_vlookup = __pyx_t_8; + __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":429 + /* "gensim/models/doc2vec_inner.pyx":434 * - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - * j = 0 # <<<<<<<<<<<<<< - * for i in range(document_len): - * word = word_vocabs[i] + * vlookup = model.vocab + * i = 0 # <<<<<<<<<<<<<< + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None */ - __pyx_v_j = 0; + __pyx_v_i = 0; - /* "gensim/models/doc2vec_inner.pyx":430 - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - * j = 0 - * for i in range(document_len): # <<<<<<<<<<<<<< - * word = word_vocabs[i] - * if word is None: + /* "gensim/models/doc2vec_inner.pyx":435 + * vlookup = model.vocab + * i = 0 + * for token in doc_words: # <<<<<<<<<<<<<< + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word */ - __pyx_t_2 = __pyx_v_document_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + if (likely(PyList_CheckExact(__pyx_v_doc_words)) || PyTuple_CheckExact(__pyx_v_doc_words)) { + __pyx_t_8 = __pyx_v_doc_words; __Pyx_INCREF(__pyx_t_8); __pyx_t_7 = 0; + __pyx_t_11 = NULL; + } else { + __pyx_t_7 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_11 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + for (;;) { + if (likely(!__pyx_t_11)) { + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } + } else { + __pyx_t_3 = __pyx_t_11(__pyx_t_8); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":431 - * j = 0 - * for i in range(document_len): - * word = word_vocabs[i] # <<<<<<<<<<<<<< - * if word is None: - * # shrink document to leave out word + /* "gensim/models/doc2vec_inner.pyx":436 + * i = 0 + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged + */ + __pyx_t_5 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if ((__pyx_t_5 != 0)) { + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __pyx_t_1; + __pyx_t_1 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_3 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_3); + __pyx_t_3 = 0; + + /* "gensim/models/doc2vec_inner.pyx":437 + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_8); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_8); - __pyx_t_8 = 0; + __pyx_t_5 = (__pyx_v_predict_word == Py_None); + __pyx_t_6 = (__pyx_t_5 != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":432 - * for i in range(document_len): - * word = word_vocabs[i] - * if word is None: # <<<<<<<<<<<<<< - * # shrink document to leave out word - * document_len = document_len - 1 + /* "gensim/models/doc2vec_inner.pyx":438 + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged # <<<<<<<<<<<<<< + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue */ - __pyx_t_4 = (__pyx_v_word == Py_None); - __pyx_t_5 = (__pyx_t_4 != 0); - if (__pyx_t_5) { + goto __pyx_L14_continue; + } - /* "gensim/models/doc2vec_inner.pyx":434 - * if word is None: - * # shrink document to leave out word - * document_len = document_len - 1 # <<<<<<<<<<<<<< - * continue # leaving j unchanged - * else: + /* "gensim/models/doc2vec_inner.pyx":439 + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[i] = predict_word.index */ - __pyx_v_document_len = (__pyx_v_document_len - 1); + __pyx_t_5 = (__pyx_v_sample != 0); + if (__pyx_t_5) { + } else { + __pyx_t_6 = __pyx_t_5; + goto __pyx_L18_bool_binop_done; + } + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_10 = PyObject_RichCompare(__pyx_t_3, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_10); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_10); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __pyx_t_6 = __pyx_t_5; + __pyx_L18_bool_binop_done:; + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":435 - * # shrink document to leave out word - * document_len = document_len - 1 - * continue # leaving j unchanged # <<<<<<<<<<<<<< - * else: - * indexes[j] = word.index + /* "gensim/models/doc2vec_inner.pyx":440 + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[i] = predict_word.index + * if hs: */ - goto __pyx_L11_continue; + goto __pyx_L14_continue; } - /*else*/ { - /* "gensim/models/doc2vec_inner.pyx":437 - * continue # leaving j unchanged - * else: - * indexes[j] = word.index # <<<<<<<<<<<<<< - * if hs: - * codelens[j] = len(word.code) + /* "gensim/models/doc2vec_inner.pyx":441 + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue + * indexes[i] = predict_word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(predict_word.code) */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 437; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":438 - * else: - * indexes[j] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[j] = len(word.code) - * codes[j] = np.PyArray_DATA(word.code) + /* "gensim/models/doc2vec_inner.pyx":442 + * continue + * indexes[i] = predict_word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) */ - __pyx_t_5 = (__pyx_v_hs != 0); - if (__pyx_t_5) { + __pyx_t_6 = (__pyx_v_hs != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":439 - * indexes[j] = word.index - * if hs: - * codelens[j] = len(word.code) # <<<<<<<<<<<<<< - * codes[j] = np.PyArray_DATA(word.code) - * points[j] = np.PyArray_DATA(word.point) + /* "gensim/models/doc2vec_inner.pyx":443 + * indexes[i] = predict_word.index + * if hs: + * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_12 = PyObject_Length(__pyx_t_8); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_12); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_13 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":440 - * if hs: - * codelens[j] = len(word.code) - * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[j] = np.PyArray_DATA(word.point) - * result += 1 - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 440; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_8))); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - - /* "gensim/models/doc2vec_inner.pyx":441 - * codelens[j] = len(word.code) - * codes[j] = np.PyArray_DATA(word.code) - * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< - * result += 1 - * j = j + 1 - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - if (!(likely(((__pyx_t_8) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_8, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_8))); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - goto __pyx_L14; - } - __pyx_L14:; + /* "gensim/models/doc2vec_inner.pyx":444 + * if hs: + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 + */ + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":442 - * codes[j] = np.PyArray_DATA(word.code) - * points[j] = np.PyArray_DATA(word.point) - * result += 1 # <<<<<<<<<<<<<< - * j = j + 1 - * # single randint() call avoids a big thread-sync slowdown + /* "gensim/models/doc2vec_inner.pyx":445 + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< + * result += 1 + * i += 1 + */ + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + goto __pyx_L20; + } + __pyx_L20:; + + /* "gensim/models/doc2vec_inner.pyx":446 + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 # <<<<<<<<<<<<<< + * i += 1 + * if i == MAX_DOCUMENT_LEN: */ - __pyx_v_result = (__pyx_v_result + 1); + __pyx_v_result = (__pyx_v_result + 1); - /* "gensim/models/doc2vec_inner.pyx":443 - * points[j] = np.PyArray_DATA(word.point) - * result += 1 - * j = j + 1 # <<<<<<<<<<<<<< - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(model.random.randint(0, window, document_len)): + /* "gensim/models/doc2vec_inner.pyx":447 + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 + * i += 1 # <<<<<<<<<<<<<< + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? + */ + __pyx_v_i = (__pyx_v_i + 1); + + /* "gensim/models/doc2vec_inner.pyx":448 + * result += 1 + * i += 1 + * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * document_len = i */ - __pyx_v_j = (__pyx_v_j + 1); + __pyx_t_6 = ((__pyx_v_i == 10000) != 0); + if (__pyx_t_6) { + + /* "gensim/models/doc2vec_inner.pyx":449 + * i += 1 + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * document_len = i + * + */ + goto __pyx_L15_break; } - __pyx_L11_continue:; + + /* "gensim/models/doc2vec_inner.pyx":435 + * vlookup = model.vocab + * i = 0 + * for token in doc_words: # <<<<<<<<<<<<<< + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word + */ + __pyx_L14_continue:; } + __pyx_L15_break:; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/doc2vec_inner.pyx":445 - * j = j + 1 + /* "gensim/models/doc2vec_inner.pyx":450 + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? + * document_len = i # <<<<<<<<<<<<<< + * + * # single randint() call avoids a big thread-sync slowdown + */ + __pyx_v_document_len = __pyx_v_i; + + /* "gensim/models/doc2vec_inner.pyx":453 + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ __pyx_t_2 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_15 = NULL; - __pyx_t_12 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_15); + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_14 = NULL; + __pyx_t_7 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_14 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_14)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_14); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); - __pyx_t_12 = 1; + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_7 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_15) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + __pyx_t_15 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + if (__pyx_t_14) { + PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_14 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_12, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_15, 0+__pyx_t_7, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_12, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_12, __pyx_t_10); + PyTuple_SET_ITEM(__pyx_t_15, 1+__pyx_t_7, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); - __pyx_t_1 = 0; + PyTuple_SET_ITEM(__pyx_t_15, 2+__pyx_t_7, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); __pyx_t_10 = 0; - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_16, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = 0; + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_15, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (likely(PyList_CheckExact(__pyx_t_8)) || PyTuple_CheckExact(__pyx_t_8)) { - __pyx_t_6 = __pyx_t_8; __Pyx_INCREF(__pyx_t_6); __pyx_t_12 = 0; - __pyx_t_17 = NULL; + __pyx_t_1 = __pyx_t_8; __Pyx_INCREF(__pyx_t_1); __pyx_t_7 = 0; + __pyx_t_11 = NULL; } else { - __pyx_t_12 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_17 = Py_TYPE(__pyx_t_6)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_8); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_11 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; for (;;) { - if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_6))) { - if (__pyx_t_12 >= PyList_GET_SIZE(__pyx_t_6)) break; + if (likely(!__pyx_t_11)) { + if (likely(PyList_CheckExact(__pyx_t_1))) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_8 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_12); __Pyx_INCREF(__pyx_t_8); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_6, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_12 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_12); __Pyx_INCREF(__pyx_t_8); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_8 = PySequence_ITEM(__pyx_t_6, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_1, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_8 = __pyx_t_17(__pyx_t_6); + __pyx_t_8 = __pyx_t_11(__pyx_t_1); if (unlikely(!__pyx_t_8)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -5006,43 +5288,43 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/doc2vec_inner.pyx":446 + /* "gensim/models/doc2vec_inner.pyx":454 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) */ - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":445 - * j = j + 1 + /* "gensim/models/doc2vec_inner.pyx":453 + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/doc2vec_inner.pyx":448 + /* "gensim/models/doc2vec_inner.pyx":456 * reduced_windows[i] = item * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] */ - __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_11 = 10000; - if (((__pyx_t_12 < __pyx_t_11) != 0)) { - __pyx_t_7 = __pyx_t_12; + __pyx_t_7 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = 10000; + if (((__pyx_t_7 < __pyx_t_16) != 0)) { + __pyx_t_13 = __pyx_t_7; } else { - __pyx_t_7 = __pyx_t_11; + __pyx_t_13 = __pyx_t_16; } - __pyx_v_doctag_len = ((int)__pyx_t_7); + __pyx_v_doctag_len = ((int)__pyx_t_13); - /* "gensim/models/doc2vec_inner.pyx":449 + /* "gensim/models/doc2vec_inner.pyx":457 * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -5050,23 +5332,23 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * result += 1 */ __pyx_t_2 = __pyx_v_doctag_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { + __pyx_v_i = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":450 + /* "gensim/models/doc2vec_inner.pyx":458 * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/doc2vec_inner.pyx":451 + /* "gensim/models/doc2vec_inner.pyx":459 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -5076,7 +5358,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_result = (__pyx_v_result + 1); } - /* "gensim/models/doc2vec_inner.pyx":454 + /* "gensim/models/doc2vec_inner.pyx":462 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -5090,7 +5372,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT #endif /*try:*/ { - /* "gensim/models/doc2vec_inner.pyx":455 + /* "gensim/models/doc2vec_inner.pyx":463 * # release GIL & train on the document * with nogil: * for i in range(document_len): # <<<<<<<<<<<<<< @@ -5098,10 +5380,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * if j < 0: */ __pyx_t_2 = __pyx_v_document_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_2; __pyx_t_17+=1) { + __pyx_v_i = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":456 + /* "gensim/models/doc2vec_inner.pyx":464 * with nogil: * for i in range(document_len): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -5110,17 +5392,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":457 + /* "gensim/models/doc2vec_inner.pyx":465 * for i in range(document_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 * k = i + window + 1 - reduced_windows[i] */ - __pyx_t_5 = ((__pyx_v_j < 0) != 0); - if (__pyx_t_5) { + __pyx_t_6 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":458 + /* "gensim/models/doc2vec_inner.pyx":466 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5128,11 +5410,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * if k > document_len: */ __pyx_v_j = 0; - goto __pyx_L24; + goto __pyx_L31; } - __pyx_L24:; + __pyx_L31:; - /* "gensim/models/doc2vec_inner.pyx":459 + /* "gensim/models/doc2vec_inner.pyx":467 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -5141,17 +5423,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/doc2vec_inner.pyx":460 + /* "gensim/models/doc2vec_inner.pyx":468 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > document_len: # <<<<<<<<<<<<<< * k = document_len * */ - __pyx_t_5 = ((__pyx_v_k > __pyx_v_document_len) != 0); - if (__pyx_t_5) { + __pyx_t_6 = ((__pyx_v_k > __pyx_v_document_len) != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":461 + /* "gensim/models/doc2vec_inner.pyx":469 * k = i + window + 1 - reduced_windows[i] * if k > document_len: * k = document_len # <<<<<<<<<<<<<< @@ -5159,11 +5441,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * # compose l1 (in _neu1) & clear _work */ __pyx_v_k = __pyx_v_document_len; - goto __pyx_L25; + goto __pyx_L32; } - __pyx_L25:; + __pyx_L32:; - /* "gensim/models/doc2vec_inner.pyx":464 + /* "gensim/models/doc2vec_inner.pyx":472 * * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -5172,7 +5454,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ memset(__pyx_v__neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":465 + /* "gensim/models/doc2vec_inner.pyx":473 * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -5181,7 +5463,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/doc2vec_inner.pyx":466 + /* "gensim/models/doc2vec_inner.pyx":474 * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5192,28 +5474,28 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":467 + /* "gensim/models/doc2vec_inner.pyx":475 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (__pyx_t_5) { + __pyx_t_6 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":468 + /* "gensim/models/doc2vec_inner.pyx":476 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< * else: * count += ONEF */ - goto __pyx_L26_continue; + goto __pyx_L33_continue; } /*else*/ { - /* "gensim/models/doc2vec_inner.pyx":470 + /* "gensim/models/doc2vec_inner.pyx":478 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -5222,7 +5504,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_13doc2vec_inner_ONEF); - /* "gensim/models/doc2vec_inner.pyx":471 + /* "gensim/models/doc2vec_inner.pyx":479 * else: * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -5231,10 +5513,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L26_continue:; + __pyx_L33_continue:; } - /* "gensim/models/doc2vec_inner.pyx":472 + /* "gensim/models/doc2vec_inner.pyx":480 * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -5245,7 +5527,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":473 + /* "gensim/models/doc2vec_inner.pyx":481 * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doctag_len): * count += ONEF # <<<<<<<<<<<<<< @@ -5254,7 +5536,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_13doc2vec_inner_ONEF); - /* "gensim/models/doc2vec_inner.pyx":474 + /* "gensim/models/doc2vec_inner.pyx":482 * for m in range(doctag_len): * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -5264,17 +5546,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - /* "gensim/models/doc2vec_inner.pyx":475 + /* "gensim/models/doc2vec_inner.pyx":483 * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count * if cbow_mean: */ - __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - if (__pyx_t_5) { + __pyx_t_6 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":476 + /* "gensim/models/doc2vec_inner.pyx":484 * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -5282,21 +5564,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - goto __pyx_L31; + goto __pyx_L38; } - __pyx_L31:; + __pyx_L38:; - /* "gensim/models/doc2vec_inner.pyx":477 + /* "gensim/models/doc2vec_inner.pyx":485 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error */ - __pyx_t_5 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_5) { + __pyx_t_6 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":478 + /* "gensim/models/doc2vec_inner.pyx":486 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -5304,11 +5586,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * if hs: */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__neu1, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L32; + goto __pyx_L39; } - __pyx_L32:; + __pyx_L39:; - /* "gensim/models/doc2vec_inner.pyx":479 + /* "gensim/models/doc2vec_inner.pyx":487 * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< @@ -5317,17 +5599,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ memset(__pyx_v__work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":480 + /* "gensim/models/doc2vec_inner.pyx":488 * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: # <<<<<<<<<<<<<< * fast_document_dm_hs(points[i], codes[i], codelens[i], * _neu1, syn1, _alpha, _work, */ - __pyx_t_5 = (__pyx_v_hs != 0); - if (__pyx_t_5) { + __pyx_t_6 = (__pyx_v_hs != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":481 + /* "gensim/models/doc2vec_inner.pyx":489 * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: * fast_document_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -5335,21 +5617,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * size, _learn_hidden) */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); - goto __pyx_L33; + goto __pyx_L40; } - __pyx_L33:; + __pyx_L40:; - /* "gensim/models/doc2vec_inner.pyx":484 + /* "gensim/models/doc2vec_inner.pyx":492 * _neu1, syn1, _alpha, _work, * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< * next_random = fast_document_dm_neg(negative, cum_table, cum_table_len, next_random, * _neu1, syn1neg, indexes[i], _alpha, _work, */ - __pyx_t_5 = (__pyx_v_negative != 0); - if (__pyx_t_5) { + __pyx_t_6 = (__pyx_v_negative != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":485 + /* "gensim/models/doc2vec_inner.pyx":493 * size, _learn_hidden) * if negative: * next_random = fast_document_dm_neg(negative, cum_table, cum_table_len, next_random, # <<<<<<<<<<<<<< @@ -5357,21 +5639,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * size, _learn_hidden) */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dm_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); - goto __pyx_L34; + goto __pyx_L41; } - __pyx_L34:; + __pyx_L41:; - /* "gensim/models/doc2vec_inner.pyx":489 + /* "gensim/models/doc2vec_inner.pyx":497 * size, _learn_hidden) * * if not cbow_mean: # <<<<<<<<<<<<<< * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work */ - __pyx_t_5 = ((!(__pyx_v_cbow_mean != 0)) != 0); - if (__pyx_t_5) { + __pyx_t_6 = ((!(__pyx_v_cbow_mean != 0)) != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":490 + /* "gensim/models/doc2vec_inner.pyx":498 * * if not cbow_mean: * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -5379,21 +5661,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * if _learn_doctags: */ __pyx_v_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L35; + goto __pyx_L42; } - __pyx_L35:; + __pyx_L42:; - /* "gensim/models/doc2vec_inner.pyx":492 + /* "gensim/models/doc2vec_inner.pyx":500 * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work * if _learn_doctags: # <<<<<<<<<<<<<< * for m in range(doctag_len): * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, */ - __pyx_t_5 = (__pyx_v__learn_doctags != 0); - if (__pyx_t_5) { + __pyx_t_6 = (__pyx_v__learn_doctags != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":493 + /* "gensim/models/doc2vec_inner.pyx":501 * # apply accumulated error in work * if _learn_doctags: * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -5404,7 +5686,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":494 + /* "gensim/models/doc2vec_inner.pyx":502 * if _learn_doctags: * for m in range(doctag_len): * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, # <<<<<<<<<<<<<< @@ -5413,21 +5695,21 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doctag_locks[(__pyx_v__doctag_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - goto __pyx_L36; + goto __pyx_L43; } - __pyx_L36:; + __pyx_L43:; - /* "gensim/models/doc2vec_inner.pyx":496 + /* "gensim/models/doc2vec_inner.pyx":504 * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< * for m in range(j, k): * if m == i: */ - __pyx_t_5 = (__pyx_v__learn_words != 0); - if (__pyx_t_5) { + __pyx_t_6 = (__pyx_v__learn_words != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":497 + /* "gensim/models/doc2vec_inner.pyx":505 * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5438,28 +5720,28 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "gensim/models/doc2vec_inner.pyx":498 + /* "gensim/models/doc2vec_inner.pyx":506 * if _learn_words: * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (__pyx_t_5) { + __pyx_t_6 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_6) { - /* "gensim/models/doc2vec_inner.pyx":499 + /* "gensim/models/doc2vec_inner.pyx":507 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, */ - goto __pyx_L40_continue; + goto __pyx_L47_continue; } /*else*/ { - /* "gensim/models/doc2vec_inner.pyx":501 + /* "gensim/models/doc2vec_inner.pyx":509 * continue * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, # <<<<<<<<<<<<<< @@ -5468,15 +5750,15 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L40_continue:; + __pyx_L47_continue:; } - goto __pyx_L39; + goto __pyx_L46; } - __pyx_L39:; + __pyx_L46:; } } - /* "gensim/models/doc2vec_inner.pyx":454 + /* "gensim/models/doc2vec_inner.pyx":462 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -5488,13 +5770,13 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L21; + goto __pyx_L28; } - __pyx_L21:; + __pyx_L28:; } } - /* "gensim/models/doc2vec_inner.pyx":504 + /* "gensim/models/doc2vec_inner.pyx":512 * &_word_vectors[indexes[m] * size], &ONE) * * return result # <<<<<<<<<<<<<< @@ -5502,16 +5784,16 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 504; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":355 + /* "gensim/models/doc2vec_inner.pyx":358 * * - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -5519,15 +5801,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_15); - __Pyx_XDECREF(__pyx_t_16); __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; - __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_token); + __Pyx_XDECREF(__pyx_v_predict_word); __Pyx_XDECREF(__pyx_v_item); __Pyx_XDECREF(__pyx_v_work); __Pyx_XDECREF(__pyx_v_neu1); @@ -5540,10 +5824,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_2train_document_dm(CYT return __pyx_r; } -/* "gensim/models/doc2vec_inner.pyx":507 +/* "gensim/models/doc2vec_inner.pyx":515 * * - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -5553,7 +5837,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con static PyMethodDef __pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat = {"train_document_dm_concat", (PyCFunction)__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_word_vocabs = 0; + PyObject *__pyx_v_doc_words = 0; PyObject *__pyx_v_doctag_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; @@ -5572,14 +5856,14 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_document_dm_concat (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_doc_words,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "gensim/models/doc2vec_inner.pyx":508 + /* "gensim/models/doc2vec_inner.pyx":516 * - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs @@ -5588,8 +5872,8 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "gensim/models/doc2vec_inner.pyx":509 - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + /* "gensim/models/doc2vec_inner.pyx":517 + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -5625,19 +5909,19 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doc_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -5686,7 +5970,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -5708,7 +5992,7 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con } } __pyx_v_model = values[0]; - __pyx_v_word_vocabs = values[1]; + __pyx_v_doc_words = values[1]; __pyx_v_doctag_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; @@ -5723,18 +6007,18 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); + __pyx_r = __pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_doc_words, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":507 + /* "gensim/models/doc2vec_inner.pyx":515 * * - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -5744,9 +6028,10 @@ static PyObject *__pyx_pw_6gensim_6models_13doc2vec_inner_5train_document_dm_con return __pyx_r; } -static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { +static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_doc_words, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; + int __pyx_v_sample; int __pyx_v__learn_doctags; int __pyx_v__learn_words; int __pyx_v__learn_hidden; @@ -5781,13 +6066,15 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; - PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_token = NULL; + PyObject *__pyx_v_predict_word = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_3; - PyObject *__pyx_t_4 = NULL; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; Py_ssize_t __pyx_t_5; long __pyx_t_6; Py_ssize_t __pyx_t_7; @@ -5796,10 +6083,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con PyObject *__pyx_t_10 = NULL; unsigned PY_LONG_LONG __pyx_t_11; PyObject *__pyx_t_12 = NULL; - int __pyx_t_13; + PyObject *(*__pyx_t_13)(PyObject *); __pyx_t_5numpy_uint32_t __pyx_t_14; int __pyx_t_15; int __pyx_t_16; + int __pyx_t_17; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -5811,125 +6099,140 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "gensim/models/doc2vec_inner.pyx":510 + /* "gensim/models/doc2vec_inner.pyx":518 * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int _learn_doctags = learn_doctags + * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 510; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 518; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 510; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 518; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":511 + /* "gensim/models/doc2vec_inner.pyx":519 * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) * cdef int _learn_doctags = learn_doctags - * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 519; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 519; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":512 + /* "gensim/models/doc2vec_inner.pyx":520 * cdef int hs = model.hs * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * cdef int _learn_doctags = learn_doctags + * cdef int _learn_words = learn_words + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/doc2vec_inner.pyx":521 + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doctags = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":513 - * cdef int negative = model.negative + /* "gensim/models/doc2vec_inner.pyx":522 + * cdef int sample = (model.sample != 0) * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":514 + /* "gensim/models/doc2vec_inner.pyx":523 * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 514; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 523; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":522 + /* "gensim/models/doc2vec_inner.pyx":531 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__alpha = __pyx_t_3; + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 531; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/doc2vec_inner.pyx":523 + /* "gensim/models/doc2vec_inner.pyx":532 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size # <<<<<<<<<<<<<< * cdef int vector_size = model.vector_size * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 523; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 523; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 532; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 532; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_layer1_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":524 + /* "gensim/models/doc2vec_inner.pyx":533 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_DOCUMENT_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 524; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 524; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 533; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 533; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_vector_size = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":532 + /* "gensim/models/doc2vec_inner.pyx":541 * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * cdef int expected_doctag_len = model.dm_tag_count * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 532; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 532; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 541; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 541; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":533 + /* "gensim/models/doc2vec_inner.pyx":542 * cdef int doctag_len * cdef int window = model.window * cdef int expected_doctag_len = model.dm_tag_count # <<<<<<<<<<<<<< * * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 533; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 533; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 542; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 542; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_expected_doctag_len = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":536 + /* "gensim/models/doc2vec_inner.pyx":545 * * cdef int i, j, k, m, n * cdef long result = 0 # <<<<<<<<<<<<<< @@ -5938,33 +6241,33 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_result = 0; - /* "gensim/models/doc2vec_inner.pyx":537 + /* "gensim/models/doc2vec_inner.pyx":546 * cdef int i, j, k, m, n * cdef long result = 0 * cdef int null_word_index = model.vocab['\0'].index # <<<<<<<<<<<<<< * * # For hierarchical softmax - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 537; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 537; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 537; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + */ + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 546; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyObject_GetItem(__pyx_t_3, __pyx_kp_s__5); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 546; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 537; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_index); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 546; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 546; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_null_word_index = __pyx_t_2; - /* "gensim/models/doc2vec_inner.pyx":550 + /* "gensim/models/doc2vec_inner.pyx":559 * cdef unsigned long long next_random * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * if doctag_len != expected_doctag_len: - * return 0 # skip doc without expected nmber of tags + * return 0 # skip doc without expected number of tags */ - __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 559; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_5 < __pyx_t_6) != 0)) { __pyx_t_7 = __pyx_t_5; @@ -5973,20 +6276,20 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con } __pyx_v_doctag_len = ((int)__pyx_t_7); - /* "gensim/models/doc2vec_inner.pyx":551 + /* "gensim/models/doc2vec_inner.pyx":560 * * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: # <<<<<<<<<<<<<< - * return 0 # skip doc without expected nmber of tags + * return 0 # skip doc without expected number of tags * */ __pyx_t_8 = ((__pyx_v_doctag_len != __pyx_v_expected_doctag_len) != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":552 + /* "gensim/models/doc2vec_inner.pyx":561 * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: - * return 0 # skip doc without expected nmber of tags # <<<<<<<<<<<<<< + * return 0 # skip doc without expected number of tags # <<<<<<<<<<<<<< * * # default vectors, locks from syn0/doctag_syn0 */ @@ -5996,7 +6299,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con goto __pyx_L0; } - /* "gensim/models/doc2vec_inner.pyx":555 + /* "gensim/models/doc2vec_inner.pyx":564 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -6007,32 +6310,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":556 + /* "gensim/models/doc2vec_inner.pyx":565 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 556; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_3); + __pyx_t_3 = 0; goto __pyx_L4; } __pyx_L4:; - /* "gensim/models/doc2vec_inner.pyx":557 + /* "gensim/models/doc2vec_inner.pyx":566 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "gensim/models/doc2vec_inner.pyx":558 + /* "gensim/models/doc2vec_inner.pyx":567 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -6043,35 +6346,35 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":559 + /* "gensim/models/doc2vec_inner.pyx":568 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 559; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 568; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 568; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 559; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_4); - __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_1); + __pyx_t_1 = 0; goto __pyx_L5; } __pyx_L5:; - /* "gensim/models/doc2vec_inner.pyx":560 + /* "gensim/models/doc2vec_inner.pyx":569 * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 560; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 569; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_vectors = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "gensim/models/doc2vec_inner.pyx":561 + /* "gensim/models/doc2vec_inner.pyx":570 * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -6082,32 +6385,32 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":562 + /* "gensim/models/doc2vec_inner.pyx":571 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 562; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_4); - __pyx_t_4 = 0; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 571; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); + __pyx_t_1 = 0; goto __pyx_L6; } __pyx_L6:; - /* "gensim/models/doc2vec_inner.pyx":563 + /* "gensim/models/doc2vec_inner.pyx":572 * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 563; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "gensim/models/doc2vec_inner.pyx":564 + /* "gensim/models/doc2vec_inner.pyx":573 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -6118,35 +6421,35 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":565 + /* "gensim/models/doc2vec_inner.pyx":574 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_3); + __pyx_t_3 = 0; goto __pyx_L7; } __pyx_L7:; - /* "gensim/models/doc2vec_inner.pyx":566 + /* "gensim/models/doc2vec_inner.pyx":575 * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "gensim/models/doc2vec_inner.pyx":568 + /* "gensim/models/doc2vec_inner.pyx":577 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -6156,23 +6459,23 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_v_hs != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":569 + /* "gensim/models/doc2vec_inner.pyx":578 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 569; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 569; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 578; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L8; } __pyx_L8:; - /* "gensim/models/doc2vec_inner.pyx":571 + /* "gensim/models/doc2vec_inner.pyx":580 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -6182,83 +6485,104 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_v_negative != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":572 + /* "gensim/models/doc2vec_inner.pyx":581 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":573 + /* "gensim/models/doc2vec_inner.pyx":582 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * if negative or sample: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 573; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 573; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":574 + /* "gensim/models/doc2vec_inner.pyx":583 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_7 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cum_table_len = __pyx_t_7; + goto __pyx_L9; + } + __pyx_L9:; - /* "gensim/models/doc2vec_inner.pyx":575 + /* "gensim/models/doc2vec_inner.pyx":584 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_9 = (__pyx_v_negative != 0); + if (!__pyx_t_9) { + } else { + __pyx_t_8 = __pyx_t_9; + goto __pyx_L11_bool_binop_done; + } + __pyx_t_9 = (__pyx_v_sample != 0); + __pyx_t_8 = __pyx_t_9; + __pyx_L11_bool_binop_done:; + if (__pyx_t_8) { + + /* "gensim/models/doc2vec_inner.pyx":585 + * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyNumber_Add(__pyx_t_4, __pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_10); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_10); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_v_next_random = __pyx_t_11; - goto __pyx_L9; + goto __pyx_L10; } - __pyx_L9:; + __pyx_L10:; - /* "gensim/models/doc2vec_inner.pyx":578 + /* "gensim/models/doc2vec_inner.pyx":588 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -6269,50 +6593,50 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":579 + /* "gensim/models/doc2vec_inner.pyx":589 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_4, __pyx_t_1); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 579; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 589; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_12); __pyx_t_12 = 0; - goto __pyx_L10; + goto __pyx_L13; } - __pyx_L10:; + __pyx_L13:; - /* "gensim/models/doc2vec_inner.pyx":580 + /* "gensim/models/doc2vec_inner.pyx":590 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 590; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "gensim/models/doc2vec_inner.pyx":581 + /* "gensim/models/doc2vec_inner.pyx":591 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -6323,250 +6647,343 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "gensim/models/doc2vec_inner.pyx":582 + /* "gensim/models/doc2vec_inner.pyx":592 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_3 = 0; + __pyx_t_3 = PyDict_New(); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_3, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_4, __pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 592; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_10); __pyx_t_10 = 0; - goto __pyx_L11; + goto __pyx_L14; } - __pyx_L11:; + __pyx_L14:; - /* "gensim/models/doc2vec_inner.pyx":583 + /* "gensim/models/doc2vec_inner.pyx":593 * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) + * vlookup = model.vocab */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "gensim/models/doc2vec_inner.pyx":585 + /* "gensim/models/doc2vec_inner.pyx":595 * _neu1 = np.PyArray_DATA(neu1) * - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< - * j = 0 - * for i in range(document_len): + * vlookup = model.vocab # <<<<<<<<<<<<<< + * i = 0 + * for token in doc_words: */ - __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_6 = 10000; - if (((__pyx_t_7 < __pyx_t_6) != 0)) { - __pyx_t_5 = __pyx_t_7; - } else { - __pyx_t_5 = __pyx_t_6; - } - __pyx_v_document_len = ((int)__pyx_t_5); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_v_vlookup = __pyx_t_10; + __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":586 + /* "gensim/models/doc2vec_inner.pyx":596 * - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - * j = 0 # <<<<<<<<<<<<<< - * for i in range(document_len): - * word = word_vocabs[i] + * vlookup = model.vocab + * i = 0 # <<<<<<<<<<<<<< + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None */ - __pyx_v_j = 0; + __pyx_v_i = 0; - /* "gensim/models/doc2vec_inner.pyx":587 - * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - * j = 0 - * for i in range(document_len): # <<<<<<<<<<<<<< - * word = word_vocabs[i] - * if word is None: + /* "gensim/models/doc2vec_inner.pyx":597 + * vlookup = model.vocab + * i = 0 + * for token in doc_words: # <<<<<<<<<<<<<< + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word */ - __pyx_t_2 = __pyx_v_document_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; - - /* "gensim/models/doc2vec_inner.pyx":588 - * j = 0 - * for i in range(document_len): - * word = word_vocabs[i] # <<<<<<<<<<<<<< - * if word is None: - * # shrink document to leave out word - */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 588; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + if (likely(PyList_CheckExact(__pyx_v_doc_words)) || PyTuple_CheckExact(__pyx_v_doc_words)) { + __pyx_t_10 = __pyx_v_doc_words; __Pyx_INCREF(__pyx_t_10); __pyx_t_7 = 0; + __pyx_t_13 = NULL; + } else { + __pyx_t_7 = -1; __pyx_t_10 = PyObject_GetIter(__pyx_v_doc_words); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_10); - __pyx_t_10 = 0; + __pyx_t_13 = Py_TYPE(__pyx_t_10)->tp_iternext; if (unlikely(!__pyx_t_13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + for (;;) { + if (likely(!__pyx_t_13)) { + if (likely(PyList_CheckExact(__pyx_t_10))) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_10)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_10, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_10)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_10, __pyx_t_7); __Pyx_INCREF(__pyx_t_3); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_10, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } + } else { + __pyx_t_3 = __pyx_t_13(__pyx_t_10); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/doc2vec_inner.pyx":589 - * for i in range(document_len): - * word = word_vocabs[i] - * if word is None: # <<<<<<<<<<<<<< - * # shrink document to leave out word - * document_len = document_len - 1 + /* "gensim/models/doc2vec_inner.pyx":598 + * i = 0 + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged + */ + __pyx_t_8 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if ((__pyx_t_8 != 0)) { + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __pyx_t_1; + __pyx_t_1 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_3 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_3); + __pyx_t_3 = 0; + + /* "gensim/models/doc2vec_inner.pyx":599 + * for token in doc_words: + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word # <<<<<<<<<<<<<< + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): */ - __pyx_t_8 = (__pyx_v_word == Py_None); + __pyx_t_8 = (__pyx_v_predict_word == Py_None); __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":591 - * if word is None: - * # shrink document to leave out word - * document_len = document_len - 1 # <<<<<<<<<<<<<< - * continue # leaving j unchanged - * else: + /* "gensim/models/doc2vec_inner.pyx":600 + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged # <<<<<<<<<<<<<< + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue */ - __pyx_v_document_len = (__pyx_v_document_len - 1); + goto __pyx_L15_continue; + } - /* "gensim/models/doc2vec_inner.pyx":592 - * # shrink document to leave out word - * document_len = document_len - 1 - * continue # leaving j unchanged # <<<<<<<<<<<<<< - * else: - * indexes[j] = word.index + /* "gensim/models/doc2vec_inner.pyx":601 + * if predict_word is None: # shrink document to leave out word + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[i] = predict_word.index */ - goto __pyx_L12_continue; + __pyx_t_8 = (__pyx_v_sample != 0); + if (__pyx_t_8) { + } else { + __pyx_t_9 = __pyx_t_8; + goto __pyx_L19_bool_binop_done; } - /*else*/ { + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 601; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 601; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_12 = PyObject_RichCompare(__pyx_t_3, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_12); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 601; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_12); if (unlikely(__pyx_t_8 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 601; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + __pyx_t_9 = __pyx_t_8; + __pyx_L19_bool_binop_done:; + if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":594 - * continue # leaving j unchanged - * else: - * indexes[j] = word.index # <<<<<<<<<<<<<< - * if hs: - * codelens[j] = len(word.code) + /* "gensim/models/doc2vec_inner.pyx":602 + * continue # leaving i unchanged + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[i] = predict_word.index + * if hs: */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 594; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 594; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; + goto __pyx_L15_continue; + } - /* "gensim/models/doc2vec_inner.pyx":595 - * else: - * indexes[j] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[j] = len(word.code) - * codes[j] = np.PyArray_DATA(word.code) + /* "gensim/models/doc2vec_inner.pyx":603 + * if sample and predict_word.sample_int < random_int32(&next_random): + * continue + * indexes[i] = predict_word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(predict_word.code) */ - __pyx_t_9 = (__pyx_v_hs != 0); - if (__pyx_t_9) { + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_12); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_14; - /* "gensim/models/doc2vec_inner.pyx":596 - * indexes[j] = word.index - * if hs: - * codelens[j] = len(word.code) # <<<<<<<<<<<<<< - * codes[j] = np.PyArray_DATA(word.code) - * points[j] = np.PyArray_DATA(word.point) + /* "gensim/models/doc2vec_inner.pyx":604 + * continue + * indexes[i] = predict_word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 596; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __pyx_t_5 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 596; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_5); + __pyx_t_9 = (__pyx_v_hs != 0); + if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":597 - * if hs: - * codelens[j] = len(word.code) - * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[j] = np.PyArray_DATA(word.point) - * else: + /* "gensim/models/doc2vec_inner.pyx":605 + * indexes[i] = predict_word.index + * if hs: + * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) + */ + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + __pyx_t_5 = PyObject_Length(__pyx_t_12); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); + + /* "gensim/models/doc2vec_inner.pyx":606 + * if hs: + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 597; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - - /* "gensim/models/doc2vec_inner.pyx":598 - * codelens[j] = len(word.code) - * codes[j] = np.PyArray_DATA(word.code) - * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< - * else: - * codelens[j] = 1 + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_12))); + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + + /* "gensim/models/doc2vec_inner.pyx":607 + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< + * result += 1 + * i += 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 598; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - goto __pyx_L15; - } - /*else*/ { + __pyx_t_12 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 607; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + if (!(likely(((__pyx_t_12) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_12, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 607; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_12))); + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + goto __pyx_L21; + } + __pyx_L21:; - /* "gensim/models/doc2vec_inner.pyx":600 - * points[j] = np.PyArray_DATA(word.point) - * else: - * codelens[j] = 1 # <<<<<<<<<<<<<< - * result += 1 - * j = j + 1 + /* "gensim/models/doc2vec_inner.pyx":608 + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 # <<<<<<<<<<<<<< + * i += 1 + * if i == MAX_DOCUMENT_LEN: */ - (__pyx_v_codelens[__pyx_v_j]) = 1; - } - __pyx_L15:; + __pyx_v_result = (__pyx_v_result + 1); - /* "gensim/models/doc2vec_inner.pyx":601 - * else: - * codelens[j] = 1 - * result += 1 # <<<<<<<<<<<<<< - * j = j + 1 - * + /* "gensim/models/doc2vec_inner.pyx":609 + * points[i] = np.PyArray_DATA(predict_word.point) + * result += 1 + * i += 1 # <<<<<<<<<<<<<< + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? */ - __pyx_v_result = (__pyx_v_result + 1); + __pyx_v_i = (__pyx_v_i + 1); - /* "gensim/models/doc2vec_inner.pyx":602 - * codelens[j] = 1 - * result += 1 - * j = j + 1 # <<<<<<<<<<<<<< + /* "gensim/models/doc2vec_inner.pyx":610 + * result += 1 + * i += 1 + * if i == MAX_DOCUMENT_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * document_len = i + */ + __pyx_t_9 = ((__pyx_v_i == 10000) != 0); + if (__pyx_t_9) { + + /* "gensim/models/doc2vec_inner.pyx":611 + * i += 1 + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * document_len = i * - * for i in range(doctag_len): */ - __pyx_v_j = (__pyx_v_j + 1); + goto __pyx_L16_break; } - __pyx_L12_continue:; + + /* "gensim/models/doc2vec_inner.pyx":597 + * vlookup = model.vocab + * i = 0 + * for token in doc_words: # <<<<<<<<<<<<<< + * predict_word = vlookup[token] if token in vlookup else None + * if predict_word is None: # shrink document to leave out word + */ + __pyx_L15_continue:; } + __pyx_L16_break:; + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "gensim/models/doc2vec_inner.pyx":604 - * j = j + 1 + /* "gensim/models/doc2vec_inner.pyx":612 + * if i == MAX_DOCUMENT_LEN: + * break # TODO: log warning, tally overflow? + * document_len = i # <<<<<<<<<<<<<< + * + * for i in range(doctag_len): + */ + __pyx_v_document_len = __pyx_v_i; + + /* "gensim/models/doc2vec_inner.pyx":614 + * document_len = i * * for i in range(doctag_len): # <<<<<<<<<<<<<< * _doctag_indexes[i] = doctag_indexes[i] * result += 1 */ __pyx_t_2 = __pyx_v_doctag_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { + __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":605 + /* "gensim/models/doc2vec_inner.pyx":615 * * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_10); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; - /* "gensim/models/doc2vec_inner.pyx":606 + /* "gensim/models/doc2vec_inner.pyx":616 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -6576,7 +6993,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_v_result = (__pyx_v_result + 1); } - /* "gensim/models/doc2vec_inner.pyx":609 + /* "gensim/models/doc2vec_inner.pyx":619 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -6590,7 +7007,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con #endif /*try:*/ { - /* "gensim/models/doc2vec_inner.pyx":610 + /* "gensim/models/doc2vec_inner.pyx":620 * # release GIL & train on the document * with nogil: * for i in range(document_len): # <<<<<<<<<<<<<< @@ -6598,10 +7015,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * k = i + window + 1 # past document end OK: will pad with null word */ __pyx_t_2 = __pyx_v_document_len; - for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { - __pyx_v_i = __pyx_t_13; + for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_2; __pyx_t_15+=1) { + __pyx_v_i = __pyx_t_15; - /* "gensim/models/doc2vec_inner.pyx":611 + /* "gensim/models/doc2vec_inner.pyx":621 * with nogil: * for i in range(document_len): * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< @@ -6610,7 +7027,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "gensim/models/doc2vec_inner.pyx":612 + /* "gensim/models/doc2vec_inner.pyx":622 * for i in range(document_len): * j = i - window # negative OK: will pad with null word * k = i + window + 1 # past document end OK: will pad with null word # <<<<<<<<<<<<<< @@ -6619,18 +7036,18 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "gensim/models/doc2vec_inner.pyx":615 + /* "gensim/models/doc2vec_inner.pyx":625 * * # compose l1 & clear work * for m in range(doctag_len): # <<<<<<<<<<<<<< * # doc vector(s) * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], */ - __pyx_t_15 = __pyx_v_doctag_len; - for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { - __pyx_v_m = __pyx_t_16; + __pyx_t_16 = __pyx_v_doctag_len; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) { + __pyx_v_m = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":617 + /* "gensim/models/doc2vec_inner.pyx":627 * for m in range(doctag_len): * # doc vector(s) * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -6640,7 +7057,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); } - /* "gensim/models/doc2vec_inner.pyx":619 + /* "gensim/models/doc2vec_inner.pyx":629 * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * n = 0 # <<<<<<<<<<<<<< @@ -6649,18 +7066,18 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_n = 0; - /* "gensim/models/doc2vec_inner.pyx":620 + /* "gensim/models/doc2vec_inner.pyx":630 * vector_size * cython.sizeof(REAL_t)) * n = 0 * for m in range(j, k): # <<<<<<<<<<<<<< * # word vectors in window * if m == i: */ - __pyx_t_15 = __pyx_v_k; - for (__pyx_t_16 = __pyx_v_j; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { - __pyx_v_m = __pyx_t_16; + __pyx_t_16 = __pyx_v_k; + for (__pyx_t_17 = __pyx_v_j; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) { + __pyx_v_m = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":622 + /* "gensim/models/doc2vec_inner.pyx":632 * for m in range(j, k): * # word vectors in window * if m == i: # <<<<<<<<<<<<<< @@ -6670,17 +7087,17 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":623 + /* "gensim/models/doc2vec_inner.pyx":633 * # word vectors in window * if m == i: * continue # <<<<<<<<<<<<<< * if m < 0 or m >= document_len: * window_indexes[n] = null_word_index */ - goto __pyx_L25_continue; + goto __pyx_L32_continue; } - /* "gensim/models/doc2vec_inner.pyx":624 + /* "gensim/models/doc2vec_inner.pyx":634 * if m == i: * continue * if m < 0 or m >= document_len: # <<<<<<<<<<<<<< @@ -6691,14 +7108,14 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con if (!__pyx_t_8) { } else { __pyx_t_9 = __pyx_t_8; - goto __pyx_L29_bool_binop_done; + goto __pyx_L36_bool_binop_done; } __pyx_t_8 = ((__pyx_v_m >= __pyx_v_document_len) != 0); __pyx_t_9 = __pyx_t_8; - __pyx_L29_bool_binop_done:; + __pyx_L36_bool_binop_done:; if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":625 + /* "gensim/models/doc2vec_inner.pyx":635 * continue * if m < 0 or m >= document_len: * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< @@ -6706,11 +7123,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * window_indexes[n] = indexes[m] */ (__pyx_v_window_indexes[__pyx_v_n]) = __pyx_v_null_word_index; - goto __pyx_L28; + goto __pyx_L35; } /*else*/ { - /* "gensim/models/doc2vec_inner.pyx":627 + /* "gensim/models/doc2vec_inner.pyx":637 * window_indexes[n] = null_word_index * else: * window_indexes[n] = indexes[m] # <<<<<<<<<<<<<< @@ -6719,9 +7136,9 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ (__pyx_v_window_indexes[__pyx_v_n]) = (__pyx_v_indexes[__pyx_v_m]); } - __pyx_L28:; + __pyx_L35:; - /* "gensim/models/doc2vec_inner.pyx":628 + /* "gensim/models/doc2vec_inner.pyx":638 * else: * window_indexes[n] = indexes[m] * n = n + 1 # <<<<<<<<<<<<<< @@ -6729,10 +7146,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], */ __pyx_v_n = (__pyx_v_n + 1); - __pyx_L25_continue:; + __pyx_L32_continue:; } - /* "gensim/models/doc2vec_inner.pyx":629 + /* "gensim/models/doc2vec_inner.pyx":639 * window_indexes[n] = indexes[m] * n = n + 1 * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -6740,10 +7157,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * vector_size * cython.sizeof(REAL_t)) */ __pyx_t_6 = (2 * __pyx_v_window); - for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { - __pyx_v_m = __pyx_t_15; + for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_6; __pyx_t_16+=1) { + __pyx_v_m = __pyx_t_16; - /* "gensim/models/doc2vec_inner.pyx":630 + /* "gensim/models/doc2vec_inner.pyx":640 * n = n + 1 * for m in range(2 * window): * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -6753,7 +7170,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con memcpy((&(__pyx_v__neu1[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); } - /* "gensim/models/doc2vec_inner.pyx":632 + /* "gensim/models/doc2vec_inner.pyx":642 * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< @@ -6762,7 +7179,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ memset(__pyx_v__work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/doc2vec_inner.pyx":634 + /* "gensim/models/doc2vec_inner.pyx":644 * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< @@ -6772,7 +7189,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v_hs != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":635 + /* "gensim/models/doc2vec_inner.pyx":645 * * if hs: * fast_document_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -6780,11 +7197,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * layer1_size, vector_size, _learn_hidden) */ __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); - goto __pyx_L33; + goto __pyx_L40; } - __pyx_L33:; + __pyx_L40:; - /* "gensim/models/doc2vec_inner.pyx":638 + /* "gensim/models/doc2vec_inner.pyx":648 * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -6794,7 +7211,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v_negative != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":639 + /* "gensim/models/doc2vec_inner.pyx":649 * layer1_size, vector_size, _learn_hidden) * if negative: * next_random = fast_document_dmc_neg(negative, cum_table, cum_table_len, next_random, # <<<<<<<<<<<<<< @@ -6802,11 +7219,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * layer1_size, vector_size, _learn_hidden) */ __pyx_v_next_random = __pyx_f_6gensim_6models_13doc2vec_inner_fast_document_dmc_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); - goto __pyx_L34; + goto __pyx_L41; } - __pyx_L34:; + __pyx_L41:; - /* "gensim/models/doc2vec_inner.pyx":643 + /* "gensim/models/doc2vec_inner.pyx":653 * layer1_size, vector_size, _learn_hidden) * * if _learn_doctags: # <<<<<<<<<<<<<< @@ -6816,18 +7233,18 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v__learn_doctags != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":644 + /* "gensim/models/doc2vec_inner.pyx":654 * * if _learn_doctags: * for m in range(doctag_len): # <<<<<<<<<<<<<< * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) */ - __pyx_t_15 = __pyx_v_doctag_len; - for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { - __pyx_v_m = __pyx_t_16; + __pyx_t_16 = __pyx_v_doctag_len; + for (__pyx_t_17 = 0; __pyx_t_17 < __pyx_t_16; __pyx_t_17+=1) { + __pyx_v_m = __pyx_t_17; - /* "gensim/models/doc2vec_inner.pyx":645 + /* "gensim/models/doc2vec_inner.pyx":655 * if _learn_doctags: * for m in range(doctag_len): * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< @@ -6836,11 +7253,11 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doctag_locks[(__pyx_v__doctag_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - goto __pyx_L35; + goto __pyx_L42; } - __pyx_L35:; + __pyx_L42:; - /* "gensim/models/doc2vec_inner.pyx":647 + /* "gensim/models/doc2vec_inner.pyx":657 * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -6850,7 +7267,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con __pyx_t_9 = (__pyx_v__learn_words != 0); if (__pyx_t_9) { - /* "gensim/models/doc2vec_inner.pyx":648 + /* "gensim/models/doc2vec_inner.pyx":658 * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -6858,10 +7275,10 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) */ __pyx_t_6 = (2 * __pyx_v_window); - for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { - __pyx_v_m = __pyx_t_15; + for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_6; __pyx_t_16+=1) { + __pyx_v_m = __pyx_t_16; - /* "gensim/models/doc2vec_inner.pyx":649 + /* "gensim/models/doc2vec_inner.pyx":659 * if _learn_words: * for m in range(2 * window): * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], # <<<<<<<<<<<<<< @@ -6870,13 +7287,13 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__word_locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v__work[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_6gensim_6models_13doc2vec_inner_ONE)); } - goto __pyx_L38; + goto __pyx_L45; } - __pyx_L38:; + __pyx_L45:; } } - /* "gensim/models/doc2vec_inner.pyx":609 + /* "gensim/models/doc2vec_inner.pyx":619 * * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< @@ -6888,28 +7305,28 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L20; + goto __pyx_L27; } - __pyx_L20:; + __pyx_L27:; } } - /* "gensim/models/doc2vec_inner.pyx":652 + /* "gensim/models/doc2vec_inner.pyx":662 * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) * * return result # <<<<<<<<<<<<<< */ __Pyx_XDECREF(__pyx_r); - __pyx_t_10 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 652; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 662; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_r = __pyx_t_10; __pyx_t_10 = 0; goto __pyx_L0; - /* "gensim/models/doc2vec_inner.pyx":507 + /* "gensim/models/doc2vec_inner.pyx":515 * * - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -6917,13 +7334,15 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_3); __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_12); __Pyx_AddTraceback("gensim.models.doc2vec_inner.train_document_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; - __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_token); + __Pyx_XDECREF(__pyx_v_predict_word); __Pyx_XDECREF(__pyx_v_work); __Pyx_XDECREF(__pyx_v_neu1); __Pyx_XDECREF(__pyx_v_word_vectors); @@ -6935,7 +7354,7 @@ static PyObject *__pyx_pf_6gensim_6models_13doc2vec_inner_4train_document_dm_con return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -6985,7 +7404,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_GIVEREF(__pyx_v_info->obj); } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203 * # of flags * * if info == NULL: return # <<<<<<<<<<<<<< @@ -6998,7 +7417,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L0; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206 * * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -7007,7 +7426,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_endian_detector = 1; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":207 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":207 * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -7016,7 +7435,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209 * cdef bint little_endian = ((&endian_detector)[0] != 0) * * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<< @@ -7025,7 +7444,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_ndim = PyArray_NDIM(__pyx_v_self); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 * ndim = PyArray_NDIM(self) * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -7035,7 +7454,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":212 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":212 * * if sizeof(npy_intp) != sizeof(Py_ssize_t): * copy_shape = 1 # <<<<<<<<<<<<<< @@ -7047,7 +7466,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 * copy_shape = 1 * else: * copy_shape = 0 # <<<<<<<<<<<<<< @@ -7058,7 +7477,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L4:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -7072,7 +7491,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L6_bool_binop_done; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -7084,7 +7503,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L6_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -7098,7 +7517,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -7112,7 +7531,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L9_bool_binop_done; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221 * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -7124,7 +7543,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L9_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -7138,7 +7557,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 * raise ValueError(u"ndarray is not Fortran contiguous") * * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<< @@ -7147,7 +7566,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->buf = PyArray_DATA(__pyx_v_self); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":225 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":225 * * info.buf = PyArray_DATA(self) * info.ndim = ndim # <<<<<<<<<<<<<< @@ -7156,7 +7575,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->ndim = __pyx_v_ndim; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -7166,7 +7585,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (__pyx_v_copy_shape != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 * # Allocate new buffer for strides and shape info. * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) # <<<<<<<<<<<<<< @@ -7175,7 +7594,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2))); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230 * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim # <<<<<<<<<<<<<< @@ -7184,7 +7603,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim * for i in range(ndim): # <<<<<<<<<<<<<< @@ -7195,7 +7614,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232 * info.shape = info.strides + ndim * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<< @@ -7204,7 +7623,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<< @@ -7217,7 +7636,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 * info.shape[i] = PyArray_DIMS(self)[i] * else: * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<< @@ -7226,7 +7645,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self)); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236 * else: * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -7237,7 +7656,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L11:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -7246,7 +7665,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->suboffsets = NULL; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<< @@ -7255,7 +7674,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<< @@ -7264,7 +7683,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0)); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 * * cdef int t * cdef char* f = NULL # <<<<<<<<<<<<<< @@ -7273,7 +7692,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_f = NULL; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 * cdef int t * cdef char* f = NULL * cdef dtype descr = self.descr # <<<<<<<<<<<<<< @@ -7285,7 +7704,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3); __pyx_t_3 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 * cdef int offset * * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<< @@ -7294,7 +7713,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 * cdef bint hasfields = PyDataType_HASFIELDS(descr) * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< @@ -7312,7 +7731,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L15_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":251 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":251 * if not hasfields and not copy_shape: * # do not call releasebuffer * info.obj = None # <<<<<<<<<<<<<< @@ -7328,7 +7747,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 * else: * # need to call releasebuffer * info.obj = self # <<<<<<<<<<<<<< @@ -7343,7 +7762,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L14:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 * info.obj = self * * if not hasfields: # <<<<<<<<<<<<<< @@ -7353,7 +7772,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 * * if not hasfields: * t = descr.type_num # <<<<<<<<<<<<<< @@ -7363,7 +7782,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_4 = __pyx_v_descr->type_num; __pyx_v_t = __pyx_t_4; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -7383,7 +7802,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L20_next_or:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -7401,7 +7820,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L19_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -7415,7 +7834,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -7424,7 +7843,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ switch (__pyx_v_t) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261 * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<< @@ -7435,7 +7854,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_b; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<< @@ -7446,7 +7865,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_B; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<< @@ -7457,7 +7876,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_h; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264 * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<< @@ -7468,7 +7887,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_H; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<< @@ -7479,7 +7898,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_i; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266 * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<< @@ -7490,7 +7909,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_I; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<< @@ -7501,7 +7920,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_l; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<< @@ -7512,7 +7931,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_L; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269 * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<< @@ -7523,7 +7942,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_q; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<< @@ -7534,7 +7953,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Q; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271 * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<< @@ -7545,7 +7964,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_f; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<< @@ -7556,7 +7975,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_d; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<< @@ -7567,7 +7986,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_g; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<< @@ -7578,7 +7997,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zf; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<< @@ -7589,7 +8008,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zd; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<< @@ -7600,7 +8019,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zg; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -7612,7 +8031,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; default: - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 * elif t == NPY_OBJECT: f = "O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -7638,7 +8057,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f # <<<<<<<<<<<<<< @@ -7647,7 +8066,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = __pyx_v_f; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f * return # <<<<<<<<<<<<<< @@ -7659,7 +8078,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 * return * else: * info.format = stdlib.malloc(_buffer_format_string_len) # <<<<<<<<<<<<<< @@ -7668,7 +8087,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = ((char *)malloc(255)); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 * else: * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<< @@ -7677,7 +8096,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->format[0]) = '^'; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 # <<<<<<<<<<<<<< @@ -7686,7 +8105,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_offset = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<< @@ -7696,7 +8115,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 255), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_f = __pyx_t_7; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 * info.format + _buffer_format_string_len, * &offset) * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<< @@ -7706,7 +8125,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P (__pyx_v_f[0]) = '\x00'; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -7738,7 +8157,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -7762,7 +8181,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s int __pyx_t_1; __Pyx_RefNannySetupContext("__releasebuffer__", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< @@ -7772,7 +8191,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): * stdlib.free(info.format) # <<<<<<<<<<<<<< @@ -7784,7 +8203,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s } __pyx_L3:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":294 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":294 * if PyArray_HASFIELDS(self): * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -7794,7 +8213,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): * stdlib.free(info.strides) # <<<<<<<<<<<<<< @@ -7806,7 +8225,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s } __pyx_L4:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -7818,7 +8237,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __Pyx_RefNannyFinishContext(); } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -7835,7 +8254,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< @@ -7849,7 +8268,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -7868,7 +8287,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -7885,7 +8304,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< @@ -7899,7 +8318,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -7918,7 +8337,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -7935,7 +8354,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< @@ -7949,7 +8368,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -7968,7 +8387,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -7985,7 +8404,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":781 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":781 * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< @@ -7999,7 +8418,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -8018,7 +8437,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -8035,7 +8454,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< @@ -8049,7 +8468,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -8068,7 +8487,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -8100,7 +8519,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_util_dtypestring", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":793 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":793 * cdef int delta_offset * cdef tuple i * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -8109,7 +8528,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_endian_detector = 1; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 * cdef tuple i * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -8118,7 +8537,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -8140,7 +8559,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3); __pyx_t_3 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 * * for childname in descr.names: * fields = descr.fields[childname] # <<<<<<<<<<<<<< @@ -8157,7 +8576,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3)); __pyx_t_3 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 * for childname in descr.names: * fields = descr.fields[childname] * child, new_offset = fields # <<<<<<<<<<<<<< @@ -8196,7 +8615,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 * child, new_offset = fields * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< @@ -8213,7 +8632,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0); if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -8227,7 +8646,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -8247,7 +8666,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L8_next_or:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 * * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -8265,7 +8684,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_L7_bool_binop_done:; if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -8279,7 +8698,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":816 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":816 * * # Output padding bytes * while offset[0] < new_offset: # <<<<<<<<<<<<<< @@ -8295,7 +8714,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_6) break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":817 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":817 * # Output padding bytes * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<< @@ -8304,7 +8723,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ (__pyx_v_f[0]) = 120; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte * f += 1 # <<<<<<<<<<<<<< @@ -8313,7 +8732,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 * f[0] = 120 # "x"; pad byte * f += 1 * offset[0] += 1 # <<<<<<<<<<<<<< @@ -8324,7 +8743,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1); } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":821 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":821 * offset[0] += 1 * * offset[0] += child.itemsize # <<<<<<<<<<<<<< @@ -8334,7 +8753,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_8 = 0; (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 * offset[0] += child.itemsize * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< @@ -8344,7 +8763,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0); if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":824 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":824 * * if not PyDataType_HASFIELDS(child): * t = child.type_num # <<<<<<<<<<<<<< @@ -8356,7 +8775,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< @@ -8366,7 +8785,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0); if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -8380,7 +8799,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":829 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":829 * * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<< @@ -8398,7 +8817,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":830 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":830 * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<< @@ -8416,7 +8835,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":831 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":831 * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<< @@ -8434,7 +8853,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":832 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":832 * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<< @@ -8452,7 +8871,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":833 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":833 * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<< @@ -8470,7 +8889,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":834 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":834 * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<< @@ -8488,7 +8907,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":835 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":835 * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<< @@ -8506,7 +8925,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":836 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":836 * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<< @@ -8524,7 +8943,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<< @@ -8542,7 +8961,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<< @@ -8560,7 +8979,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<< @@ -8578,7 +8997,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<< @@ -8596,7 +9015,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":841 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":841 * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<< @@ -8614,7 +9033,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<< @@ -8634,7 +9053,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":843 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":843 * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<< @@ -8654,7 +9073,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<< @@ -8674,7 +9093,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<< @@ -8693,7 +9112,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 * elif t == NPY_OBJECT: f[0] = 79 #"O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -8716,7 +9135,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L15:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":848 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":848 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * f += 1 # <<<<<<<<<<<<<< @@ -8728,7 +9147,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 * # Cython ignores struct boundary information ("T{...}"), * # so don't output it * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<< @@ -8740,7 +9159,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L13:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -8750,7 +9169,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 * # so don't output it * f = _util_dtypestring(child, f, end, offset) * return f # <<<<<<<<<<<<<< @@ -8760,7 +9179,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_r = __pyx_v_f; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -8785,7 +9204,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -8800,7 +9219,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a int __pyx_t_2; __Pyx_RefNannySetupContext("set_array_base", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":971 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":971 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -8811,7 +9230,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":972 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":972 * cdef PyObject* baseptr * if base is None: * baseptr = NULL # <<<<<<<<<<<<<< @@ -8823,7 +9242,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 * baseptr = NULL * else: * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<< @@ -8832,7 +9251,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_INCREF(__pyx_v_base); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 * else: * Py_INCREF(base) # important to do this before decref below! * baseptr = base # <<<<<<<<<<<<<< @@ -8843,7 +9262,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } __pyx_L3:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 * Py_INCREF(base) # important to do this before decref below! * baseptr = base * Py_XDECREF(arr.base) # <<<<<<<<<<<<<< @@ -8852,7 +9271,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_XDECREF(__pyx_v_arr->base); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 * baseptr = base * Py_XDECREF(arr.base) * arr.base = baseptr # <<<<<<<<<<<<<< @@ -8861,7 +9280,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_v_arr->base = __pyx_v_baseptr; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -8873,7 +9292,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __Pyx_RefNannyFinishContext(); } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -8887,7 +9306,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< @@ -8897,7 +9316,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: * return None # <<<<<<<<<<<<<< @@ -8911,7 +9330,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":983 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":983 * return None * else: * return arr.base # <<<<<<<<<<<<<< @@ -8922,7 +9341,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py goto __pyx_L0; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -8967,7 +9386,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_REAL, __pyx_k_REAL, sizeof(__pyx_k_REAL), 0, 0, 1, 1}, {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, - {&__pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_k_Volumes_work_workspace_gensim_t, sizeof(__pyx_k_Volumes_work_workspace_gensim_t), 0, 0, 1, 0}, {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, @@ -8979,6 +9397,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_cum_table, __pyx_k_cum_table, sizeof(__pyx_k_cum_table), 0, 0, 1, 1}, {&__pyx_n_s_cum_table_len, __pyx_k_cum_table_len, sizeof(__pyx_k_cum_table_len), 0, 0, 1, 1}, {&__pyx_n_s_dm_tag_count, __pyx_k_dm_tag_count, sizeof(__pyx_k_dm_tag_count), 0, 0, 1, 1}, + {&__pyx_n_s_doc_words, __pyx_k_doc_words, sizeof(__pyx_k_doc_words), 0, 0, 1, 1}, {&__pyx_n_s_doctag_indexes, __pyx_k_doctag_indexes, sizeof(__pyx_k_doctag_indexes), 0, 0, 1, 1}, {&__pyx_n_s_doctag_indexes_2, __pyx_k_doctag_indexes_2, sizeof(__pyx_k_doctag_indexes_2), 0, 0, 1, 1}, {&__pyx_n_s_doctag_len, __pyx_k_doctag_len, sizeof(__pyx_k_doctag_len), 0, 0, 1, 1}, @@ -8996,6 +9415,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_doc2vec_inner, __pyx_k_gensim_models_doc2vec_inner, sizeof(__pyx_k_gensim_models_doc2vec_inner), 0, 0, 1, 1}, + {&__pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_k_home_ubuntu_src_gensim_bigdocve, sizeof(__pyx_k_home_ubuntu_src_gensim_bigdocve), 0, 0, 1, 0}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, @@ -9028,11 +9448,14 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_point, __pyx_k_point, sizeof(__pyx_k_point), 0, 0, 1, 1}, {&__pyx_n_s_points, __pyx_k_points, sizeof(__pyx_k_points), 0, 0, 1, 1}, {&__pyx_n_s_predict_word, __pyx_k_predict_word, sizeof(__pyx_k_predict_word), 0, 0, 1, 1}, + {&__pyx_n_s_r, __pyx_k_r, sizeof(__pyx_k_r), 0, 0, 1, 1}, {&__pyx_n_s_randint, __pyx_k_randint, sizeof(__pyx_k_randint), 0, 0, 1, 1}, {&__pyx_n_s_random, __pyx_k_random, sizeof(__pyx_k_random), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, {&__pyx_n_s_reduced_windows, __pyx_k_reduced_windows, sizeof(__pyx_k_reduced_windows), 0, 0, 1, 1}, {&__pyx_n_s_result, __pyx_k_result, sizeof(__pyx_k_result), 0, 0, 1, 1}, + {&__pyx_n_s_sample, __pyx_k_sample, sizeof(__pyx_k_sample), 0, 0, 1, 1}, + {&__pyx_n_s_sample_int, __pyx_k_sample_int, sizeof(__pyx_k_sample_int), 0, 0, 1, 1}, {&__pyx_n_s_scipy_linalg_blas, __pyx_k_scipy_linalg_blas, sizeof(__pyx_k_scipy_linalg_blas), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, {&__pyx_n_s_syn0, __pyx_k_syn0, sizeof(__pyx_k_syn0), 0, 0, 1, 1}, @@ -9040,6 +9463,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_syn1, __pyx_k_syn1, sizeof(__pyx_k_syn1), 0, 0, 1, 1}, {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, {&__pyx_n_s_train_document_dbow, __pyx_k_train_document_dbow, sizeof(__pyx_k_train_document_dbow), 0, 0, 1, 1}, {&__pyx_n_s_train_document_dm, __pyx_k_train_document_dm, sizeof(__pyx_k_train_document_dm), 0, 0, 1, 1}, {&__pyx_n_s_train_document_dm_concat, __pyx_k_train_document_dm_concat, sizeof(__pyx_k_train_document_dm_concat), 0, 0, 1, 1}, @@ -9047,16 +9471,15 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_train_words_2, __pyx_k_train_words_2, sizeof(__pyx_k_train_words_2), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, {&__pyx_n_s_vector_size, __pyx_k_vector_size, sizeof(__pyx_k_vector_size), 0, 0, 1, 1}, + {&__pyx_n_s_vlookup, __pyx_k_vlookup, sizeof(__pyx_k_vlookup), 0, 0, 1, 1}, {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_window_indexes, __pyx_k_window_indexes, sizeof(__pyx_k_window_indexes), 0, 0, 1, 1}, - {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, {&__pyx_n_s_word2vec, __pyx_k_word2vec, sizeof(__pyx_k_word2vec), 0, 0, 1, 1}, {&__pyx_n_s_word_locks, __pyx_k_word_locks, sizeof(__pyx_k_word_locks), 0, 0, 1, 1}, {&__pyx_n_s_word_locks_2, __pyx_k_word_locks_2, sizeof(__pyx_k_word_locks_2), 0, 0, 1, 1}, {&__pyx_n_s_word_vectors, __pyx_k_word_vectors, sizeof(__pyx_k_word_vectors), 0, 0, 1, 1}, {&__pyx_n_s_word_vectors_2, __pyx_k_word_vectors_2, sizeof(__pyx_k_word_vectors_2), 0, 0, 1, 1}, - {&__pyx_n_s_word_vocabs, __pyx_k_word_vocabs, sizeof(__pyx_k_word_vocabs), 0, 0, 1, 1}, {&__pyx_n_s_work, __pyx_k_work, sizeof(__pyx_k_work), 0, 0, 1, 1}, {&__pyx_n_s_work_2, __pyx_k_work_2, sizeof(__pyx_k_work_2), 0, 0, 1, 1}, {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1}, @@ -9064,7 +9487,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -9076,49 +9499,49 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/doc2vec_inner.pyx":283 - * cum_table = (np.PyArray_DATA(model.cum_table)) + /* "gensim/models/doc2vec_inner.pyx":286 * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/doc2vec_inner.pyx":418 - * cum_table = (np.PyArray_DATA(model.cum_table)) + /* "gensim/models/doc2vec_inner.pyx":423 * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "gensim/models/doc2vec_inner.pyx":575 - * cum_table = (np.PyArray_DATA(model.cum_table)) + /* "gensim/models/doc2vec_inner.pyx":585 * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 585; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -9129,7 +9552,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__8); __Pyx_GIVEREF(__pyx_tuple__8); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -9140,7 +9563,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -9151,7 +9574,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -9162,7 +9585,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -9173,7 +9596,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__12); __Pyx_GIVEREF(__pyx_tuple__12); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -9187,38 +9610,38 @@ static int __Pyx_InitCachedConstants(void) { /* "gensim/models/doc2vec_inner.pyx":222 * * - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__14 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_r, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_document_dbow, 222, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_train_document_dbow, 222, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "gensim/models/doc2vec_inner.pyx":355 + /* "gensim/models/doc2vec_inner.pyx":358 * * - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(53, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_document_dm, 355, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 53, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_train_document_dm, 358, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "gensim/models/doc2vec_inner.pyx":507 + /* "gensim/models/doc2vec_inner.pyx":515 * * - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__18 = PyTuple_Pack(53, __pyx_n_s_model, __pyx_n_s_doc_words, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_predict_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_document_dm_concat, 507, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 53, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_train_document_dm_concat, 515, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -9350,6 +9773,7 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) if (__Pyx_ImportFunction(__pyx_t_2, "our_dot_noblas", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ImportFunction(__pyx_t_2, "our_saxpy_noblas", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas, "void (int const *, float const *, float const *, int const *, float *, int const *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ImportFunction(__pyx_t_2, "bisect_left", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_bisect_left, "unsigned PY_LONG_LONG (__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ImportFunction(__pyx_t_2, "random_int32", (void (**)(void))&__pyx_f_6gensim_6models_14word2vec_inner_random_int32, "unsigned PY_LONG_LONG (unsigned PY_LONG_LONG *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} Py_DECREF(__pyx_t_2); __pyx_t_2 = 0; /*--- Execution code ---*/ @@ -9398,7 +9822,7 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) * * from scipy.linalg.blas import fblas # <<<<<<<<<<<<<< * - * from word2vec_inner cimport bisect_left, \ + * from word2vec_inner cimport bisect_left, random_int32, \ */ __pyx_t_4 = PyList_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 18; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -9456,7 +9880,7 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) /* "gensim/models/doc2vec_inner.pyx":222 * * - * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -9465,28 +9889,28 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dbow, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/doc2vec_inner.pyx":355 + /* "gensim/models/doc2vec_inner.pyx":358 * * - * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_3train_document_dm, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "gensim/models/doc2vec_inner.pyx":507 + /* "gensim/models/doc2vec_inner.pyx":515 * * - * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, NULL, __pyx_n_s_gensim_models_doc2vec_inner); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm_concat, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 507; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm_concat, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 515; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; /* "gensim/models/doc2vec_inner.pyx":1 @@ -9499,7 +9923,7 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -10633,6 +11057,32 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( return (unsigned PY_LONG_LONG) -1; } +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value) { + const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(unsigned PY_LONG_LONG) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long)) { + return PyLong_FromUnsignedLongLong((unsigned long long) value); + } + } else { + if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long long)) { + return PyLong_FromLongLong((long long) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(unsigned PY_LONG_LONG), + little, !is_unsigned); + } +} + static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *x) { const npy_uint32 neg_one = (npy_uint32) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index b8610f48a6..7515af5544 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -17,7 +17,7 @@ from libc.string cimport memset, memcpy from scipy.linalg.blas import fblas -from word2vec_inner cimport bisect_left, \ +from word2vec_inner cimport bisect_left, random_int32, \ scopy, saxpy, sdot, dsdot, snrm2, sscal, \ REAL_t, EXP_TABLE, \ our_dot, our_saxpy, \ @@ -219,11 +219,12 @@ cdef unsigned long long fast_document_dmc_neg( return next_random -def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, +def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs cdef int negative = model.negative + cdef int sample = (model.sample != 0) cdef int _train_words = train_words cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden @@ -246,6 +247,7 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, cdef int window = model.window cdef int i, j + cdef unsigned long long r cdef long result = 0 # For hierarchical softmax @@ -280,36 +282,39 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, syn1neg = (np.PyArray_DATA(model.syn1neg)) cum_table = (np.PyArray_DATA(model.cum_table)) cum_table_len = len(model.cum_table) + if negative or sample: next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # convert Python structures to primitive types, so we can release the GIL if work is None: work = zeros(model.layer1_size, dtype=REAL) _work = np.PyArray_DATA(work) - document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) - for i in range(document_len): - predict_word = word_vocabs[i] - if predict_word is None: - # shrink document to leave out word - document_len = document_len - 1 - continue # leaving j unchanged - else: - indexes[i] = predict_word.index - if hs: - codelens[i] = len(predict_word.code) - codes[i] = np.PyArray_DATA(predict_word.code) - points[i] = np.PyArray_DATA(predict_word.point) - else: - codelens[i] = 1 - result += 1 + vlookup = model.vocab + i = 0 + for token in doc_words: + predict_word = vlookup[token] if token in vlookup else None + if predict_word is None: # shrink document to leave out word + continue # leaving i unchanged + if sample and predict_word.sample_int < random_int32(&next_random): + continue + indexes[i] = predict_word.index + if hs: + codelens[i] = len(predict_word.code) + codes[i] = np.PyArray_DATA(predict_word.code) + points[i] = np.PyArray_DATA(predict_word.point) + result += 1 + i += 1 + if i == MAX_DOCUMENT_LEN: + break # TODO: log warning, tally overflow? + document_len = i if _train_words: # single randint() call avoids a big thread-synchronization slowdown for i, item in enumerate(model.random.randint(0, window, document_len)): reduced_windows[i] = item + doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) for i in range(doctag_len): _doctag_indexes[i] = doctag_indexes[i] result += 1 @@ -317,8 +322,6 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # release GIL & train on the document with nogil: for i in range(document_len): - if codelens[i] == 0: - continue if _train_words: # simultaneous skip-gram wordvec-training j = i - window + reduced_windows[i] if j < 0: @@ -327,7 +330,7 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, if k > document_len: k = document_len for j in range(j, k): - if j == i or codelens[j] == 0: + if j == i: continue if hs: # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose @@ -352,11 +355,12 @@ def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, return result -def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, +def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs cdef int negative = model.negative + cdef int sample = (model.sample != 0) cdef int _learn_doctags = learn_doctags cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden @@ -415,6 +419,7 @@ def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 syn1neg = (np.PyArray_DATA(model.syn1neg)) cum_table = (np.PyArray_DATA(model.cum_table)) cum_table_len = len(model.cum_table) + if negative or sample: next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # convert Python structures to primitive types, so we can release the GIL @@ -425,22 +430,25 @@ def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 neu1 = zeros(model.layer1_size, dtype=REAL) _neu1 = np.PyArray_DATA(neu1) - document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - j = 0 - for i in range(document_len): - word = word_vocabs[i] - if word is None: - # shrink document to leave out word - document_len = document_len - 1 - continue # leaving j unchanged - else: - indexes[j] = word.index - if hs: - codelens[j] = len(word.code) - codes[j] = np.PyArray_DATA(word.code) - points[j] = np.PyArray_DATA(word.point) - result += 1 - j = j + 1 + vlookup = model.vocab + i = 0 + for token in doc_words: + predict_word = vlookup[token] if token in vlookup else None + if predict_word is None: # shrink document to leave out word + continue # leaving i unchanged + if sample and predict_word.sample_int < random_int32(&next_random): + continue + indexes[i] = predict_word.index + if hs: + codelens[i] = len(predict_word.code) + codes[i] = np.PyArray_DATA(predict_word.code) + points[i] = np.PyArray_DATA(predict_word.point) + result += 1 + i += 1 + if i == MAX_DOCUMENT_LEN: + break # TODO: log warning, tally overflow? + document_len = i + # single randint() call avoids a big thread-sync slowdown for i, item in enumerate(model.random.randint(0, window, document_len)): reduced_windows[i] = item @@ -504,11 +512,12 @@ def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 return result -def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, +def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs cdef int negative = model.negative + cdef int sample = (model.sample != 0) cdef int _learn_doctags = learn_doctags cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden @@ -549,7 +558,7 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) if doctag_len != expected_doctag_len: - return 0 # skip doc without expected nmber of tags + return 0 # skip doc without expected number of tags # default vectors, locks from syn0/doctag_syn0 if word_vectors is None: @@ -572,6 +581,7 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non syn1neg = (np.PyArray_DATA(model.syn1neg)) cum_table = (np.PyArray_DATA(model.cum_table)) cum_table_len = len(model.cum_table) + if negative or sample: next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # convert Python structures to primitive types, so we can release the GIL @@ -582,24 +592,24 @@ def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non neu1 = zeros(model.layer1_size, dtype=REAL) _neu1 = np.PyArray_DATA(neu1) - document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) - j = 0 - for i in range(document_len): - word = word_vocabs[i] - if word is None: - # shrink document to leave out word - document_len = document_len - 1 - continue # leaving j unchanged - else: - indexes[j] = word.index - if hs: - codelens[j] = len(word.code) - codes[j] = np.PyArray_DATA(word.code) - points[j] = np.PyArray_DATA(word.point) - else: - codelens[j] = 1 - result += 1 - j = j + 1 + vlookup = model.vocab + i = 0 + for token in doc_words: + predict_word = vlookup[token] if token in vlookup else None + if predict_word is None: # shrink document to leave out word + continue # leaving i unchanged + if sample and predict_word.sample_int < random_int32(&next_random): + continue + indexes[i] = predict_word.index + if hs: + codelens[i] = len(predict_word.code) + codes[i] = np.PyArray_DATA(predict_word.code) + points[i] = np.PyArray_DATA(predict_word.point) + result += 1 + i += 1 + if i == MAX_DOCUMENT_LEN: + break # TODO: log warning, tally overflow? + document_len = i for i in range(doctag_len): _doctag_indexes[i] = doctag_indexes[i] diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index af40205ee6..1fe914036c 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -66,30 +66,31 @@ In Proceedings of NIPS, 2013. .. [3] Optimizing word2vec in gensim, http://radimrehurek.com/2013/09/word2vec-in-python-part-two-optimizing/ """ +from __future__ import division # py3 "true division" import logging import sys import os import heapq -import time +from timeit import default_timer from copy import deepcopy import threading try: - from queue import Queue + from queue import Queue, Empty except ImportError: - from Queue import Queue + from Queue import Queue, Empty from numpy import exp, log, dot, zeros, outer, random, dtype, float32 as REAL,\ uint32, seterr, array, uint8, vstack, fromstring, sqrt, newaxis,\ ndarray, empty, sum as np_sum, prod, ones, repeat as np_repeat -logger = logging.getLogger("gensim.models.word2vec") - from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc from six import iteritems, itervalues, string_types from six.moves import xrange from types import GeneratorType +logger = logging.getLogger("gensim.models.word2vec") + try: from gensim.models.word2vec_inner import train_sentence_sg, train_sentence_cbow, FAST_VERSION except ImportError: @@ -100,61 +101,65 @@ def train_sentence_sg(model, sentence, alpha, work=None): """ Update skip-gram model by training on a single sentence. - The sentence is a list of Vocab objects (or None, where the corresponding - word is not in the vocabulary. Called internally from `Word2Vec.train()`. + The sentence is a list of string tokens, which are looked up in the model's + vocab dictionary. Called internally from `Word2Vec.train()`. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from word2vec_inner instead. """ + word_vocabs = [model.vocab[w] for w in sentence if w in model.vocab and + model.vocab[w].sample_int > model.random.randint(2**32)] for pos, word in enumerate(sentence): - if word is None: - continue # OOV word in the input sentence => skip reduced_window = model.random.randint(model.window) # `b` in the original word2vec code # now go over all words from the (reduced) window, predicting each one in turn start = max(0, pos - model.window + reduced_window) - for pos2, word2 in enumerate(sentence[start : pos + model.window + 1 - reduced_window], start): + for pos2, word2_vocab in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start): # don't train on OOV words and on the `word` itself - if word2 and not (pos2 == pos): - train_sg_pair(model, word, word2.index, alpha) + if (pos2 == pos): + train_sg_pair(model, word, word2_vocab.index, alpha) - return len([word for word in sentence if word is not None]) + return len(word_vocabs) def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): """ Update CBOW model by training on a single sentence. - The sentence is a list of Vocab objects (or None, where the corresponding - word is not in the vocabulary. Called internally from `Word2Vec.train()`. + The sentence is a list of string tokens, which are looked up in the model's + vocab dictionary. Called internally from `Word2Vec.train()`. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from word2vec_inner instead. """ - for pos, word in enumerate(sentence): - if word is None: - continue # OOV word in the input sentence => skip - reduced_window = model.random.randint(model.window) # `b` in the original word2vec code + word_vocabs = [model.vocab[w] for w in sentence if w in model.vocab and + model.vocab[w].sample_int > model.random.randint(2**32)] + for pos, word in enumerate(word_vocabs): + reduced_window = model.random.randint(model.window) # `b` in the original word2vec code start = max(0, pos - model.window + reduced_window) - window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start) + window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start) word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x vector_size + l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x vector_size if word2_indices and model.cbow_mean: l1 /= len(word2_indices) train_cbow_pair(model, word, word2_indices, l1, alpha) - return len([word for word in sentence if word is not None]) + return len(word_vocabs) -def train_sg_pair(model, predict_word, context_index, alpha, learn_vectors=True, learn_hidden=True, +def train_sg_pair(model, word, context_index, alpha, learn_vectors=True, learn_hidden=True, context_vectors=None, context_locks=None): if context_vectors is None: context_vectors = model.syn0 if context_locks is None: context_locks = model.syn0_lockf - l1 = context_vectors[context_index] + if word not in model.vocab: + return + predict_word = model.vocab[word] # target word (NN output) + + l1 = context_vectors[context_index] # input word (NN input/projection layer) lock_factor = context_locks[context_index] neu1e = zeros(l1.shape) @@ -190,12 +195,12 @@ def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=Tr neu1e = zeros(l1.shape) if model.hs: - l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size - fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output - ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate + l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size + fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output + ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate if learn_hidden: - model.syn1[word.point] += outer(ga, l1) # learn hidden -> output - neu1e += dot(ga, l2a) # save error + model.syn1[word.point] += outer(ga, l1) # learn hidden -> output + neu1e += dot(ga, l2a) # save error if model.negative: # use this word (label = 1) + `negative` other random words not from this sentence (label = 0) @@ -204,19 +209,20 @@ def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=Tr w = model.cum_table.searchsorted(model.random.randint(model.cum_table[-1])) if w != word.index: word_indices.append(w) - l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size - fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output - gb = (model.neg_labels - fb) * alpha # vector of error gradients multiplied by the learning rate + l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size + fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output + gb = (model.neg_labels - fb) * alpha # vector of error gradients multiplied by the learning rate if learn_hidden: - model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output - neu1e += dot(gb, l2b) # save error + model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output + neu1e += dot(gb, l2b) # save error if learn_vectors: # learn input -> hidden, here for all words in the window separately l = len(input_word_indices) - model.syn0[input_word_indices] += np_repeat(neu1e, l).reshape(l, model.vector_size) * model.syn0_lockf[input_word_indices][:, None] + model.syn0[input_word_indices] += np_repeat(neu1e, l).reshape(l, model.vector_size) * \ + model.syn0_lockf[input_word_indices][:, None] return neu1e -# could move this import up to where train_* is imported, +# could move this import up to where train_* is imported, # but for now just do it separately incase there are unforseen bugs in score_ try: from gensim.models.word2vec_inner import score_sentence_sg, score_sentence_cbow @@ -243,10 +249,10 @@ def score_sentence_sg(model, sentence, work=None): # now go over all words from the window, predicting each one in turn start = max(0, pos - model.window) - for pos2, word2 in enumerate(sentence[start : pos + model.window + 1], start): + for pos2, word2 in enumerate(sentence[start:(pos + model.window + 1)], start): # don't train on OOV words and on the `word` itself if word2 and not (pos2 == pos): - log_prob_sentence += score_sg_pair(model, word, word2) + log_prob_sentence += score_sg_pair(model, word, word2) return log_prob_sentence @@ -261,7 +267,6 @@ def score_sentence_cbow(model, sentence, alpha, work=None, neu1=None): will use the optimized version from word2vec_inner instead. """ - log_prob_sentence = 0.0 if model.negative: raise RuntimeError("scoring is only available for HS=True") @@ -271,31 +276,37 @@ def score_sentence_cbow(model, sentence, alpha, work=None, neu1=None): continue # OOV word in the input sentence => skip start = max(0, pos - model.window) - window_pos = enumerate(sentence[start : pos + model.window + 1], start) + window_pos = enumerate(sentence[start:(pos + model.window + 1)], start) word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size + l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size if word2_indices and model.cbow_mean: l1 /= len(word2_indices) log_prob_sentence += score_cbow_pair(model, word, word2_indices, l1) return log_prob_sentence + def score_sg_pair(model, word, word2): l1 = model.syn0[word2.index] l2a = deepcopy(model.syn1[word.point]) # 2d matrix, codelen x layer1_size - sgn = -1.0**word.code # ch function, 0-> 1, 1 -> -1 + sgn = -1.0**word.code # ch function, 0-> 1, 1 -> -1 lprob = -log(1.0 + exp(-sgn*dot(l1, l2a.T))) return sum(lprob) + def score_cbow_pair(model, word, word2_indices, l1): - l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size - sgn = -1.0**word.code # ch function, 0-> 1, 1 -> -1 + l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size + sgn = -1.0**word.code # ch function, 0-> 1, 1 -> -1 lprob = -log(1.0 + exp(-sgn*dot(l1, l2a.T))) return sum(lprob) class Vocab(object): - """A single vocabulary item, used internally for constructing binary trees (incl. both word leaves and inner nodes).""" + """ + A single vocabulary item, used internally for collecting per-word frequency/sampling info, + and for constructing binary trees (incl. both word leaves and inner nodes). + + """ def __init__(self, **kwargs): self.count = 0 self.__dict__.update(kwargs) @@ -317,8 +328,8 @@ class Word2Vec(utils.SaveLoad): """ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, - sample=0, seed=1, workers=1, min_alpha=0.0001, sg=1, hs=1, negative=0, - cbow_mean=0, hashfxn=hash, iter=1, null_word=0): + sample=0, seed=1, workers=1, min_alpha=0.0001, sg=1, hs=1, negative=0, + cbow_mean=0, hashfxn=hash, iter=1, null_word=0): """ Initialize the model from an iterable of `sentences`. Each sentence is a list of words (unicode strings) that will be used for training. @@ -331,7 +342,8 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it in some other way. - `sg` defines the training algorithm. By default (`sg=1`), skip-gram is used. Otherwise, `cbow` is employed. + `sg` defines the training algorithm. By default (`sg=1`), skip-gram is used. + Otherwise, `cbow` is employed. `size` is the dimensionality of the feature vectors. @@ -366,7 +378,7 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, self.vocab = {} # mapping from a word (string) to a Vocab object self.index2word = [] # map from a word's matrix index (int) to word (string) self.sg = int(sg) - self.cum_table = None # for negative sampling + self.cum_table = None # for negative sampling self.vector_size = int(size) self.layer1_size = int(size) if size % 4 != 0: @@ -385,13 +397,14 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, self.hashfxn = hashfxn self.iter = iter self.null_word = null_word + self.train_count = 0 + self.total_train_time = 0 if sentences is not None: if isinstance(sentences, GeneratorType): raise TypeError("You can't pass a generator as the sentences argument. Try an iterator.") self.build_vocab(sentences) self.train(sentences) - def make_cum_table(self, power=0.75, domain=2**31 - 1): """ Create a cumulative-distribution table using stored vocabulary word counts for @@ -412,8 +425,8 @@ def make_cum_table(self, power=0.75, domain=2**31 - 1): for word_index in range(vocab_size): cumulative += self.vocab[self.index2word[word_index]].count**power / train_words_pow self.cum_table[word_index] = round(cumulative * domain) - assert self.cum_table[-1] == domain - + if len(self.cum_table) > 0: + assert self.cum_table[-1] == domain def create_binary_tree(self): """ @@ -447,56 +460,26 @@ def create_binary_tree(self): logger.info("built huffman tree with maximum node depth %i" % max_depth) - def precalc_sampling(self): - """Precalculate each vocabulary item's threshold for sampling""" - if self.sample: - logger.info("frequent-word downsampling, threshold %g; progress tallies will be approximate" % (self.sample)) - total_words = sum(v.count for v in itervalues(self.vocab)) - threshold_count = float(self.sample) * total_words - for v in itervalues(self.vocab): - prob = (sqrt(v.count / threshold_count) + 1) * (threshold_count / v.count) if self.sample else 1.0 - v.sample_probability = min(prob, 1.0) - def build_vocab(self, sentences): """ Build vocabulary from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of unicode strings. """ - logger.info("collecting all words and their counts") - vocab = self._vocab_from(sentences) - # assign a unique index to each word - self.vocab, self.index2word = {}, [] - for word, v in iteritems(vocab): - if v.count >= self.min_count: - v.index = len(self.vocab) - self.index2word.append(word) - self.vocab[word] = v - logger.info("total %i word types after removing those with count<%s" % (len(self.vocab), self.min_count)) + self.scan_vocab(sentences) # initial survey + self.scale_vocab() # trim by min_count & precalculate downsampling + self.finalize_vocab() # build tables & arrays - if self.null_word: - # create null pseudo-word for padding when using concatenative L1 (run-of-words) - # this word is only ever input – never predicted – so count, huffman-point doesn't matter - word, v = '\0', Vocab(count=1) - v.index = len(self.vocab) - self.index2word.append(word) - self.vocab[word] = v - if self.hs: - # add info about each word's Huffman encoding - self.create_binary_tree() - if self.negative: - # build the table for drawing random words (for negative sampling) - self.make_cum_table() - # precalculate downsampling thresholds - self.precalc_sampling() - self.reset_weights() - sys.stderr.flush() + def scan_vocab(self, sentences, progress_per=10000): + """Do an initial scan of all words appearing in sentences.""" + logger.info("collecting all words and their counts") + self.vocab = self._vocab_from(sentences, progress_per=progress_per) - def _vocab_from(self, sentences): + def _vocab_from(self, sentences, progress_per=10000): sentence_no, vocab = -1, {} total_words = 0 for sentence_no, sentence in enumerate(sentences): - if sentence_no % 10000 == 0: + if sentence_no % progress_per == 0: logger.info("PROGRESS: at sentence #%i, processed %i words and %i word types" % (sentence_no, total_words, len(vocab))) for word in sentence: @@ -507,8 +490,109 @@ def _vocab_from(self, sentences): vocab[word] = Vocab(count=1) logger.info("collected %i word types from a corpus of %i words and %i sentences" % (len(vocab), total_words, sentence_no + 1)) + self.corpus_count = sentence_no + 1 return vocab + def scale_vocab(self, min_count=None, sample=None, dry_run=False): + """ + Apply vocabulary settings for `min_count` (discarding less-frequent words) + and `sample` (controlling the downsampling of more-frequent words). + + Calling with `dry_run=True` will only simulate the provided settings and + report the size of the retained vocabulary, effective corpus length, and + estimated memory requirements. Results are both printed via logging and + returned as a dict. + + """ + min_count = min_count or self.min_count + sample = sample or self.sample + + # Discard words less-frequent than min_count + if not dry_run: + self.index2word = [] + # make stored settings match these applied settings + self.min_count = min_count + self.sample = sample + drop_unique, drop_total, retain_total, original_total = 0, 0, 0, 0 + retain_words = [] + for word, v in iteritems(self.vocab): + if v.count >= min_count: + retain_words.append(word) + retain_total += v.count + original_total += v.count + else: + drop_unique += 1 + drop_total += v.count + original_total += v.count + logger.info("min_count=%d retains %i unique words (drops %i)" + % (min_count, len(retain_words), drop_unique)) + logger.info("min_count leaves %i word corpus (%i%% of original %i)" + % (retain_total, retain_total * 100 / max(original_total, 1), original_total)) + + # Precalculate each vocabulary item's threshold for sampling + if not sample: + # no words downsampled + threshold_count = retain_total + elif sample < 1.0: + # traditional meaning: set parameter as proportion of total + threshold_count = sample * retain_total + else: + # new shorthand: sample >= 1 means downsample all words with higher count than sample + threshold_count = int(sample * (3 + sqrt(5)) / 2) + downsample_total, downsample_unique = 0, 0 + for w in retain_words: + v = self.vocab[w] + word_probability = (sqrt(v.count / threshold_count) + 1) * (threshold_count / v.count) + if word_probability < 1.0: + downsample_unique += 1 + downsample_total += word_probability * v.count + else: + word_probability = 1.0 + downsample_total += v.count + if not dry_run: + v.sample_int = int(round(word_probability * 2**32)) + logger.info("sample=%g downsamples %i most-common words" % (sample, downsample_unique)) + logger.info("downsampling leaves estimated %i word corpus (%i%% of prior %i)" + % (downsample_total, round(downsample_total * 100 / max(retain_total, 1)), retain_total)) + + # return from each step: words-affected, resulting-corpus-size + report_values = {'drop_unique': drop_unique, 'retain_total': retain_total, + 'downsample_unique': downsample_unique, 'downsample_total': int(downsample_total)} + + # print extra memory estimates + report_values['memory'] = self.estimate_memory(vocab_size=len(retain_words)) + + if not dry_run: + new_vocab = {} + for w in retain_words: + new_vocab[w] = self.vocab[w] + new_vocab[w].index = len(self.index2word) + self.index2word.append(w) + self.vocab = new_vocab + logger.info("vocabulary min_count & sample applied, and indexes assigned") + return report_values + + def finalize_vocab(self): + """Build tables and model weights based on final vocabulary settings.""" + if not self.index2word: + self.scale_vocab() + if self.hs: + # add info about each word's Huffman encoding + self.create_binary_tree() + if self.negative: + # build the table for drawing random words (for negative sampling) + self.make_cum_table() + if self.null_word: + # create null pseudo-word for padding when using concatenative L1 (run-of-words) + # this word is only ever input – never predicted – so count, huffman-point, etc doesn't matter + word, v = '\0', Vocab(count=1, sample_int=0) + v.index = len(self.vocab) + self.index2word.append(word) + self.vocab[word] = v + # set initial input/projection and hidden weights + self.reset_weights() + sys.stderr.flush() + def reset_from(self, other_model): """ Borrow shareable pre-built structures (like vocab) from the other_model. Useful @@ -517,23 +601,20 @@ def reset_from(self, other_model): self.vocab = other_model.vocab self.index2word = other_model.index2word self.cum_table = other_model.cum_table + self.corpus_count = other_model.corpus_count self.reset_weights() - def _prepare_items(self, items): - for sentence in items: - # avoid calling random_sample() where prob >= 1, to speed things up a little: - sampled = [self.vocab[word] for word in sentence - if word in self.vocab and (self.vocab[word].sample_probability >= 1.0 or - self.vocab[word].sample_probability >= random.random_sample())] - yield sampled - - def _get_job_words(self, alpha, work, job, neu1): - if self.sg: - return sum(train_sentence_sg(self, sentence, alpha, work) for sentence in job) - else: - return sum(train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job) + def _do_train_job(self, job, alpha, inits): + work, neu1 = inits + tally = 0 + for sentence in job: + if self.sg: + tally += train_sentence_sg(self, sentence, alpha, work) + else: + tally += train_sentence_cbow(self, sentence, alpha, work, neu1) + return tally - def train(self, sentences, total_words=None, word_count=0, chunksize=100): + def train(self, sentences, total_words=None, word_count=0, chunksize=100, queue_factor=2, report_delay=1): """ Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). Each sentence must be a list of unicode strings. @@ -541,7 +622,8 @@ def train(self, sentences, total_words=None, word_count=0, chunksize=100): """ if FAST_VERSION < 0: import warnings - warnings.warn("C extension compilation failed, training will be slow. Install a C compiler and reinstall gensim for fast training.") + warnings.warn("C extension not loaded for Word2Vec, training will be slow. " + "Install a C compiler and reinstall gensim for fast training.") self.neg_labels = [] if self.negative > 0: # precompute negative labels optimization for pure-python training @@ -549,70 +631,101 @@ def train(self, sentences, total_words=None, word_count=0, chunksize=100): self.neg_labels[0] = 1. logger.info("training model with %i workers on %i vocabulary and %i features, " - "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s" % - (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)) + "using sg=%s hs=%s sample=%s and negative=%s" + % (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)) if not self.vocab: raise RuntimeError("you must first build vocabulary before training the model") + if not hasattr(self, 'syn0'): + raise RuntimeError("you must first finalize vocabulary before training the model") if self.iter > 1: sentences = utils.RepeatCorpusNTimes(sentences, self.iter) - start, next_report = time.time(), [1.0] - word_count = [word_count] - total_words = total_words or int(sum(v.count * v.sample_probability for v in itervalues(self.vocab)) * self.iter) - jobs = Queue(maxsize=2 * self.workers) # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( - lock = threading.Lock() # for shared state (=number of words trained so far, log reports...) - - def worker_train(): - """Train the model, lifting lists of sentences from the jobs queue.""" - work = zeros(self.layer1_size, dtype=REAL) # each thread must have its own work memory + def worker_init(): + work = matutils.zeros_aligned(self.layer1_size, dtype=REAL) # per-thread private work memory neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) - + return (work, neu1) + def worker_one_job(job, inits): + items, alpha = job + if items is None: # signal to finish + return False + # train & return tally + job_words = self._do_train_job(items, alpha, inits) + progress_queue.put(job_words) # report progress + return True + def worker_loop(): + """Train the model, lifting lists of sentences from the jobs queue.""" + init = worker_init() while True: - job = jobs.get() - if job is None: # data finished, exit + job = job_queue.get() + if not worker_one_job(job, init): break - # update the learning rate before every job - alpha = max(self.min_alpha, self.alpha * (1 - 1.0 * word_count[0] / total_words)) - # how many words did we train on? out-of-vocabulary (unknown) words do not count - job_words = self._get_job_words(alpha, work, job, neu1) - with lock: - word_count[0] += job_words - elapsed = time.time() - start - if elapsed >= next_report[0]: - logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" % - (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0)) - next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports - sys.stderr.flush() - workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)] + start, next_report = default_timer(), 1.0 + total_words = total_words or int(sum(v.count * (v.sample_int/2**32) for v in itervalues(self.vocab)) * + self.iter) + # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( + if self.workers > 0: + job_queue = Queue(maxsize=queue_factor * self.workers) + else: + job_queue = FakeJobQueue(worker_init, worker_one_job) + progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers) + + workers = [threading.Thread(target=worker_loop) for _ in xrange(self.workers)] for thread in workers: thread.daemon = True # make interrupting the process with ctrl+c easier thread.start() - # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue - for job_no, job in enumerate(utils.grouper(self._prepare_items(sentences), chunksize)): - logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize())) - jobs.put(job) - logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize()) - for _ in xrange(self.workers): - jobs.put(None) # give the workers heads up that they can finish -- no more work! - - for thread in workers: - thread.join() + pushed_words = 0 + push_done = False + done_jobs = 0 + next_alpha = self.alpha + jobs_source = enumerate(utils.grouper(sentences, chunksize)) + # fill jobs queue with (sentence, alpha) job tuples + while True: + try: + job_no, items = next(jobs_source) + logger.debug("putting job #%i in the queue", job_no) + job_queue.put((items, next_alpha)) + # update the learning rate before every job + pushed_words += round((chunksize/self.corpus_count)/total_words) + next_alpha = self.alpha - (self.alpha - self.min_alpha) * (pushed_words / total_words) + except StopIteration: + logger.info("reached end of input; waiting to finish %i outstanding jobs" % (job_no-done_jobs+1)) + for _ in xrange(self.workers): + job_queue.put((None, 0)) # give the workers heads up that they can finish -- no more work! + push_done = True + try: + while done_jobs < (job_no+1): + word_count += progress_queue.get(push_done) # only block after all jobs pushed + done_jobs += 1 + elapsed = default_timer() - start + if elapsed >= next_report: + est_alpha = self.alpha - (self.alpha - self.min_alpha) * (word_count / total_words) + logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s", + 100.0 * word_count / total_words, est_alpha, word_count / elapsed) + next_report = elapsed + report_delay # don't flood log, wait report_delay seconds + sys.stderr.flush() + else: + # loop ended by job count; really done + break + except Empty: + pass # already out of loop; continue to next push - elapsed = time.time() - start + elapsed = default_timer() - start logger.info("training on %i words took %.1fs, %.0f words/s" % - (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0)) + (word_count, elapsed, word_count / elapsed if elapsed else 0.0)) + self.train_count += 1 + self.total_train_time += elapsed self.clear_sims() - return word_count[0] + return word_count def _score_job_words(self, sentence, work, neu1): if self.sg: - return score_sentence_sg(self, sentence, work) + return score_sentence_sg(self, sentence, work) else: - return score_sentence_cbow(self, sentence, work, neu1) + return score_sentence_cbow(self, sentence, work, neu1) # basics copied from the train() function def score(self, sentences, total_sentences=None, chunksize=100): @@ -628,11 +741,12 @@ def score(self, sentences, total_sentences=None, chunksize=100): """ if FAST_VERSION < 0: import warnings - warnings.warn("C extension compilation failed, scoring will be slow. Install a C compiler and reinstall gensim for fastness.") + warnings.warn("C extension compilation failed, scoring will be slow. " + "Install a C compiler and reinstall gensim for fastness.") logger.info("scoring sentences with %i workers on %i vocabulary and %i features, " - "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s" % - (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)) + "using sg=%s hs=%s sample=%s and negative=%s" + % (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)) if not self.vocab: raise RuntimeError("you must first build vocabulary before scoring new data") @@ -641,11 +755,12 @@ def score(self, sentences, total_sentences=None, chunksize=100): raise RuntimeError("we have only implemented score for hs") start, next_report = time.time(), [1.0] - jobs = Queue(maxsize=2 * self.workers) # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( + # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :( + jobs = Queue(maxsize=2 * self.workers) lock = threading.Lock() # for shared state (scores, log reports...) total_sentences = total_sentences or int(1e9) sentence_scores = matutils.zeros_aligned(total_sentences, dtype=REAL) - sentence_count = [0] + sentence_count = [0] def worker_score(): """score the enumerated sentences, lifting lists of sentences from the jobs queue.""" @@ -657,17 +772,17 @@ def worker_score(): if job is None: # data finished, exit break ns = 0 - for (id,sentence) in job: - sentence_scores[id] = self._score_job_words(sentence, work, neu1) - ns += 1 + for (id, sentence) in job: + sentence_scores[id] = self._score_job_words(sentence, work, neu1) + ns += 1 with lock: sentence_count[0] += ns elapsed = time.time() - start if elapsed >= next_report[0]: - logger.info("PROGRESS: at %i sentences, %.0f sentences/s" % - (sentence_count[0], sentence_count[0] / elapsed if elapsed else 0.0)) - next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports + logger.info("PROGRESS: at %i sentences, %.0f sentences/s" + % (sentence_count[0], sentence_count[0] / elapsed if elapsed else 0.0)) + next_report[0] = elapsed + 1.0 # wait at least a second between progress reports workers = [threading.Thread(target=worker_score) for _ in xrange(self.workers)] for thread in workers: @@ -686,8 +801,8 @@ def worker_score(): thread.join() elapsed = time.time() - start - logger.info("scoring %i sentences took %.1fs, %.0f sentences/s" % - (sentence_count[0], elapsed, sentence_count[0] / elapsed if elapsed else 0.0)) + logger.info("scoring %i sentences took %.1fs, %.0f sentences/s" + % (sentence_count[0], elapsed, sentence_count[0] / elapsed if elapsed else 0.0)) self.syn0norm = None return sentence_scores[:sentence_count[0]] @@ -739,7 +854,6 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False): else: fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row)))) - @classmethod def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, encoding='utf8'): """ @@ -782,7 +896,8 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True, ch = fin.read(1) if ch == b' ': break - if ch != b'\n': # ignore newlines in front of words (some binary files have newline, some don't) + if ch != b'\n': # ignore newlines in front of words (some binary files have) + word.append(ch) word = utils.to_unicode(b''.join(word), encoding=encoding) if counts is None: @@ -840,14 +955,14 @@ def intersect_word2vec_format(self, fname, binary=False, encoding='utf8'): ch = fin.read(1) if ch == b' ': break - if ch != b'\n': # ignore newlines in front of words (some binary files have newline, some don't) + if ch != b'\n': # ignore newlines in front of words (some binary files have) word.append(ch) word = utils.to_unicode(b''.join(word), encoding=encoding) weights = fromstring(fin.read(binary_len), dtype=REAL) if word in self.vocab: overlap_count += 1 self.syn0[self.vocab[word].index] = weights - self.syn0_lockf[self.vocab[word].index] = 0.0 # lock it + self.syn0_lockf[self.vocab[word].index] = 0.0 # lock it else: for line_no, line in enumerate(fin): parts = utils.to_unicode(line[:-1], encoding=encoding).split(" ") @@ -859,15 +974,15 @@ def intersect_word2vec_format(self, fname, binary=False, encoding='utf8'): self.syn0[self.vocab[word].index] = weights logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.syn0.shape, fname)) - def most_similar(self, positive=[], negative=[], topn=10): """ Find the top-N most similar words. Positive words contribute positively towards the similarity, negative words negatively. This method computes cosine similarity between a simple mean of the projection - weight vectors of the given words and the vectors for each word in the model. The method corresponds to the `word-analogy` and - `distance` scripts in the original word2vec implementation. + weight vectors of the given words and the vectors for each word in the model. + The method corresponds to the `word-analogy` and `distance` scripts in the original + word2vec implementation. If topn is False, most_similar returns the vector of similarity scores. @@ -968,7 +1083,6 @@ def word_vec(word): result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words] return result[:topn] - def doesnt_match(self, words): """ Which word from the given list doesn't go with the others? @@ -990,7 +1104,6 @@ def doesnt_match(self, words): dists = dot(vectors, mean) return sorted(zip(dists, words))[0][1] - def __getitem__(self, word): """ Return a word's representations in vector space, as a 1D numpy array. @@ -1003,11 +1116,9 @@ def __getitem__(self, word): """ return self.syn0[self.vocab[word].index] - def __contains__(self, word): return word in self.vocab - def similarity(self, w1, w2): """ Compute cosine similarity between two words. @@ -1023,7 +1134,6 @@ def similarity(self, w1, w2): """ return dot(matutils.unitvec(self[w1]), matutils.unitvec(self[w2])) - def n_similarity(self, ws1, ws2): """ Compute cosine similarity between two sets of words. @@ -1044,7 +1154,6 @@ def n_similarity(self, ws1, ws2): v2 = [self[word] for word in ws2] return dot(matutils.unitvec(array(v1).mean(axis=0)), matutils.unitvec(array(v2).mean(axis=0))) - def init_sims(self, replace=False): """ Precompute L2-normalized vectors. @@ -1067,13 +1176,28 @@ def init_sims(self, replace=False): else: self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL) + def estimate_memory(self, vocab_size=None): + """Estimate required memory for a model using current settings and provided vocabulary size.""" + vocab_size = vocab_size or len(self.vocab) + report = {} + report['vocab'] = vocab_size * (700 if self.hs else 500) + report['syn0'] = vocab_size * self.vector_size * 4 + if self.hs: + report['syn1'] = vocab_size * self.layer1_size * 4 + if self.negative: + report['syn1neg'] = vocab_size * self.layer1_size * 4 + report['total'] = sum(report.values()) + logger.info("estimated required memory for %i words and %i dimensions: %i bytes" + % (vocab_size, self.vector_size, report['total'])) + return report + @staticmethod def log_accuracy(section): correct, incorrect = len(section['correct']), len(section['incorrect']) if correct + incorrect > 0: logger.info("%s: %.1f%% (%i/%i)" % - (section['section'], 100.0 * correct / (correct + incorrect), - correct, correct + incorrect)) + (section['section'], 100.0 * correct / (correct + incorrect), + correct, correct + incorrect)) def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar): """ @@ -1123,7 +1247,7 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar): if index in ok_index and index not in ignore: predicted = self.index2word[index] if predicted != expected: - logger.debug("%s: expected %s, predicted %s" % (line.strip(), expected, predicted)) + logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted) break if predicted == expected: section['correct'].append((a, b, c, expected)) @@ -1143,29 +1267,36 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar): sections.append(total) return sections - def __str__(self): return "Word2Vec(vocab=%s, size=%s, alpha=%s)" % (len(self.index2word), self.vector_size, self.alpha) - def save(self, *args, **kwargs): # don't bother storing the cached normalized vectors, recalculable table - kwargs['ignore'] = kwargs.get('ignore', ['syn0norm','table','cum_table']) + kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'table', 'cum_table']) super(Word2Vec, self).save(*args, **kwargs) save.__doc__ = utils.SaveLoad.save.__doc__ - @classmethod def load(cls, *args, **kwargs): model = super(Word2Vec, cls).load(*args, **kwargs) - if hasattr(model,'table'): - delattr(model,'table') # discard in favor of cum_table + if hasattr(model, 'table'): + delattr(model, 'table') # discard in favor of cum_table if model.negative: model.make_cum_table() # rebuild cum_table from vocabulary return model +class FakeJobQueue(object): + """Pretends to be a Queue; does equivalent of work_loop in calling thread.""" + def __init__(self, init_fn, job_fn): + self.inits = init_fn() + self.job_fn = job_fn + + def put(self, job): + self.job_fn(job, self.inits) + + class BrownCorpus(object): """Iterate over sentences from the Brown corpus (part of NLTK data).""" def __init__(self, dirname): @@ -1202,12 +1333,13 @@ def __iter__(self): while True: text = rest + fin.read(8192) # avoid loading the entire file (=1 line) into RAM if text == rest: # EOF - sentence.extend(rest.split()) # return the last chunk of words, too (may be shorter/longer) + sentence.extend(rest.split()) # return the last chunk of words, too (may be shorter/longer) if sentence: yield sentence break - last_token = text.rfind(b' ') # the last token may have been split in two... keep it for the next iteration - words, rest = (utils.to_unicode(text[:last_token]).split(), text[last_token:].strip()) if last_token >= 0 else ([], text) + last_token = text.rfind(b' ') # last token may have been split in two... keep for next iteration + words, rest = (utils.to_unicode(text[:last_token]).split(), + text[last_token:].strip()) if last_token >= 0 else ([], text) sentence.extend(words) while len(sentence) >= self.max_sentence_length: yield sentence[:self.max_sentence_length] @@ -1247,7 +1379,6 @@ def __iter__(self): yield utils.to_unicode(line).split() - # Example: ./word2vec.py ~/workspace/word2vec/text8 ~/workspace/word2vec/questions-words.txt ./text8 if __name__ == "__main__": logging.basicConfig(format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s', level=logging.INFO) diff --git a/gensim/models/word2vec_inner.c b/gensim/models/word2vec_inner.c index c6fab520d8..28c37ea940 100644 --- a/gensim/models/word2vec_inner.c +++ b/gensim/models/word2vec_inner.c @@ -459,7 +459,7 @@ static const char *__pyx_f[] = { "type.pxd", }; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":726 * # in Cython to enable them only on the right systems. * * ctypedef npy_int8 int8_t # <<<<<<<<<<<<<< @@ -468,7 +468,7 @@ static const char *__pyx_f[] = { */ typedef npy_int8 __pyx_t_5numpy_int8_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":727 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":727 * * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t # <<<<<<<<<<<<<< @@ -477,7 +477,7 @@ typedef npy_int8 __pyx_t_5numpy_int8_t; */ typedef npy_int16 __pyx_t_5numpy_int16_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":728 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":728 * ctypedef npy_int8 int8_t * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t # <<<<<<<<<<<<<< @@ -486,7 +486,7 @@ typedef npy_int16 __pyx_t_5numpy_int16_t; */ typedef npy_int32 __pyx_t_5numpy_int32_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":729 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":729 * ctypedef npy_int16 int16_t * ctypedef npy_int32 int32_t * ctypedef npy_int64 int64_t # <<<<<<<<<<<<<< @@ -495,7 +495,7 @@ typedef npy_int32 __pyx_t_5numpy_int32_t; */ typedef npy_int64 __pyx_t_5numpy_int64_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":733 * #ctypedef npy_int128 int128_t * * ctypedef npy_uint8 uint8_t # <<<<<<<<<<<<<< @@ -504,7 +504,7 @@ typedef npy_int64 __pyx_t_5numpy_int64_t; */ typedef npy_uint8 __pyx_t_5numpy_uint8_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":734 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":734 * * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t # <<<<<<<<<<<<<< @@ -513,7 +513,7 @@ typedef npy_uint8 __pyx_t_5numpy_uint8_t; */ typedef npy_uint16 __pyx_t_5numpy_uint16_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":735 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":735 * ctypedef npy_uint8 uint8_t * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t # <<<<<<<<<<<<<< @@ -522,7 +522,7 @@ typedef npy_uint16 __pyx_t_5numpy_uint16_t; */ typedef npy_uint32 __pyx_t_5numpy_uint32_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":736 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":736 * ctypedef npy_uint16 uint16_t * ctypedef npy_uint32 uint32_t * ctypedef npy_uint64 uint64_t # <<<<<<<<<<<<<< @@ -531,7 +531,7 @@ typedef npy_uint32 __pyx_t_5numpy_uint32_t; */ typedef npy_uint64 __pyx_t_5numpy_uint64_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":740 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":740 * #ctypedef npy_uint128 uint128_t * * ctypedef npy_float32 float32_t # <<<<<<<<<<<<<< @@ -540,7 +540,7 @@ typedef npy_uint64 __pyx_t_5numpy_uint64_t; */ typedef npy_float32 __pyx_t_5numpy_float32_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":741 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":741 * * ctypedef npy_float32 float32_t * ctypedef npy_float64 float64_t # <<<<<<<<<<<<<< @@ -549,7 +549,7 @@ typedef npy_float32 __pyx_t_5numpy_float32_t; */ typedef npy_float64 __pyx_t_5numpy_float64_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":750 * # The int types are mapped a bit surprising -- * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t # <<<<<<<<<<<<<< @@ -558,7 +558,7 @@ typedef npy_float64 __pyx_t_5numpy_float64_t; */ typedef npy_long __pyx_t_5numpy_int_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":751 * # numpy.int corresponds to 'l' and numpy.long to 'q' * ctypedef npy_long int_t * ctypedef npy_longlong long_t # <<<<<<<<<<<<<< @@ -567,7 +567,7 @@ typedef npy_long __pyx_t_5numpy_int_t; */ typedef npy_longlong __pyx_t_5numpy_long_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":752 * ctypedef npy_long int_t * ctypedef npy_longlong long_t * ctypedef npy_longlong longlong_t # <<<<<<<<<<<<<< @@ -576,7 +576,7 @@ typedef npy_longlong __pyx_t_5numpy_long_t; */ typedef npy_longlong __pyx_t_5numpy_longlong_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":754 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":754 * ctypedef npy_longlong longlong_t * * ctypedef npy_ulong uint_t # <<<<<<<<<<<<<< @@ -585,7 +585,7 @@ typedef npy_longlong __pyx_t_5numpy_longlong_t; */ typedef npy_ulong __pyx_t_5numpy_uint_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":755 * * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t # <<<<<<<<<<<<<< @@ -594,7 +594,7 @@ typedef npy_ulong __pyx_t_5numpy_uint_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":756 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":756 * ctypedef npy_ulong uint_t * ctypedef npy_ulonglong ulong_t * ctypedef npy_ulonglong ulonglong_t # <<<<<<<<<<<<<< @@ -603,7 +603,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulong_t; */ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":758 * ctypedef npy_ulonglong ulonglong_t * * ctypedef npy_intp intp_t # <<<<<<<<<<<<<< @@ -612,7 +612,7 @@ typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t; */ typedef npy_intp __pyx_t_5numpy_intp_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":759 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":759 * * ctypedef npy_intp intp_t * ctypedef npy_uintp uintp_t # <<<<<<<<<<<<<< @@ -621,7 +621,7 @@ typedef npy_intp __pyx_t_5numpy_intp_t; */ typedef npy_uintp __pyx_t_5numpy_uintp_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":761 * ctypedef npy_uintp uintp_t * * ctypedef npy_double float_t # <<<<<<<<<<<<<< @@ -630,7 +630,7 @@ typedef npy_uintp __pyx_t_5numpy_uintp_t; */ typedef npy_double __pyx_t_5numpy_float_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":762 * * ctypedef npy_double float_t * ctypedef npy_double double_t # <<<<<<<<<<<<<< @@ -639,7 +639,7 @@ typedef npy_double __pyx_t_5numpy_float_t; */ typedef npy_double __pyx_t_5numpy_double_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":763 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":763 * ctypedef npy_double float_t * ctypedef npy_double double_t * ctypedef npy_longdouble longdouble_t # <<<<<<<<<<<<<< @@ -679,7 +679,7 @@ typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t /*--- Type declarations ---*/ -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":765 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":765 * ctypedef npy_longdouble longdouble_t * * ctypedef npy_cfloat cfloat_t # <<<<<<<<<<<<<< @@ -688,7 +688,7 @@ typedef __pyx_t_5numpy_float32_t __pyx_t_6gensim_6models_14word2vec_inner_REAL_t */ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":766 * * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t # <<<<<<<<<<<<<< @@ -697,7 +697,7 @@ typedef npy_cfloat __pyx_t_5numpy_cfloat_t; */ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":767 * ctypedef npy_cfloat cfloat_t * ctypedef npy_cdouble cdouble_t * ctypedef npy_clongdouble clongdouble_t # <<<<<<<<<<<<<< @@ -706,7 +706,7 @@ typedef npy_cdouble __pyx_t_5numpy_cdouble_t; */ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":769 * ctypedef npy_clongdouble clongdouble_t * * ctypedef npy_cdouble complex_t # <<<<<<<<<<<<<< @@ -884,6 +884,11 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg #define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw) #endif +static CYTHON_INLINE int __Pyx_PySequence_Contains(PyObject* item, PyObject* seq, int eq) { + int result = PySequence_Contains(seq, item); + return unlikely(result < 0) ? result : (result == (eq == Py_EQ)); +} + #define __Pyx_GetItemInt(o, i, type, is_signed, to_py_func, is_list, wraparound, boundscheck) \ (__Pyx_fits_Py_ssize_t(i, type, is_signed) ? \ __Pyx_GetItemInt_Fast(o, (Py_ssize_t)i, is_list, wraparound, boundscheck) : \ @@ -981,6 +986,8 @@ static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG(PyObject *); +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value); + static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *); #ifndef __PYX_FORCE_INIT_THREADS @@ -1176,6 +1183,7 @@ static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_1 static __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas(int const *, float const *, int const *, float const *, int const *); /*proto*/ static void __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG); /*proto*/ +static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_random_int32(unsigned PY_LONG_LONG *); /*proto*/ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ @@ -1248,11 +1256,14 @@ static char __pyx_k_saxpy[] = "saxpy"; static char __pyx_k_scopy[] = "scopy"; static char __pyx_k_snrm2[] = "snrm2"; static char __pyx_k_sscal[] = "sscal"; +static char __pyx_k_token[] = "token"; +static char __pyx_k_vocab[] = "vocab"; static char __pyx_k_import[] = "__import__"; static char __pyx_k_neu1_2[] = "neu1"; static char __pyx_k_points[] = "points"; static char __pyx_k_random[] = "random"; static char __pyx_k_result[] = "result"; +static char __pyx_k_sample[] = "sample"; static char __pyx_k_window[] = "window"; static char __pyx_k_work_2[] = "work"; static char __pyx_k_alpha_2[] = "_alpha"; @@ -1261,6 +1272,7 @@ static char __pyx_k_indexes[] = "indexes"; static char __pyx_k_our_dot[] = "our_dot"; static char __pyx_k_randint[] = "randint"; static char __pyx_k_syn1neg[] = "syn1neg"; +static char __pyx_k_vlookup[] = "vlookup"; static char __pyx_k_codelens[] = "codelens"; static char __pyx_k_cpointer[] = "_cpointer"; static char __pyx_k_expected[] = "expected"; @@ -1273,6 +1285,7 @@ static char __pyx_k_cum_table[] = "cum_table"; static char __pyx_k_enumerate[] = "enumerate"; static char __pyx_k_our_saxpy[] = "our_saxpy"; static char __pyx_k_ValueError[] = "ValueError"; +static char __pyx_k_sample_int[] = "sample_int"; static char __pyx_k_syn0_lockf[] = "syn0_lockf"; static char __pyx_k_word_locks[] = "word_locks"; static char __pyx_k_layer1_size[] = "layer1_size"; @@ -1289,7 +1302,7 @@ static char __pyx_k_score_sentence_cbow[] = "score_sentence_cbow"; static char __pyx_k_train_sentence_cbow[] = "train_sentence_cbow"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; static char __pyx_k_gensim_models_word2vec_inner[] = "gensim.models.word2vec_inner"; -static char __pyx_k_Volumes_work_workspace_gensim_t[] = "/Volumes/work/workspace/gensim/trunk/gensim/models/word2vec_inner.pyx"; +static char __pyx_k_home_ubuntu_src_gensim_bigdocve[] = "/home/ubuntu/src/gensim-bigdocvec-pr/gensim/models/word2vec_inner.pyx"; static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; static char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd"; static char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported"; @@ -1303,7 +1316,6 @@ static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor; static PyObject *__pyx_n_s_REAL; static PyObject *__pyx_n_s_RuntimeError; static PyObject *__pyx_n_s_ValueError; -static PyObject *__pyx_kp_s_Volumes_work_workspace_gensim_t; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; static PyObject *__pyx_n_s_cbow_mean; @@ -1320,6 +1332,7 @@ static PyObject *__pyx_n_s_expected; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_gensim_models_word2vec_inner; +static PyObject *__pyx_kp_s_home_ubuntu_src_gensim_bigdocve; static PyObject *__pyx_n_s_hs; static PyObject *__pyx_n_s_i; static PyObject *__pyx_n_s_import; @@ -1351,6 +1364,8 @@ static PyObject *__pyx_n_s_random; static PyObject *__pyx_n_s_range; static PyObject *__pyx_n_s_reduced_windows; static PyObject *__pyx_n_s_result; +static PyObject *__pyx_n_s_sample; +static PyObject *__pyx_n_s_sample_int; static PyObject *__pyx_n_s_saxpy; static PyObject *__pyx_n_s_scipy_linalg_blas; static PyObject *__pyx_n_s_scopy; @@ -1367,9 +1382,12 @@ static PyObject *__pyx_n_s_syn0_lockf; static PyObject *__pyx_n_s_syn1; static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_test; +static PyObject *__pyx_n_s_token; static PyObject *__pyx_n_s_train_sentence_cbow; static PyObject *__pyx_n_s_train_sentence_sg; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; +static PyObject *__pyx_n_s_vlookup; +static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_word; static PyObject *__pyx_n_s_word_locks; @@ -1803,7 +1821,7 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in * lo = mid + 1 * return lo # <<<<<<<<<<<<<< * - * + * # this quick & dirty RNG apparently matches Java's (non-Secure)Random */ __pyx_r = __pyx_v_lo; goto __pyx_L0; @@ -1821,8 +1839,61 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_in return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":99 +/* "gensim/models/word2vec_inner.pyx":100 + * # this quick & dirty RNG apparently matches Java's (non-Secure)Random + * # note this function side-effects next_random to set up the next number + * cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil: # <<<<<<<<<<<<<< + * cdef unsigned long long this_random = next_random[0] >> 16 + * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL + */ + +static CYTHON_INLINE unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_random_int32(unsigned PY_LONG_LONG *__pyx_v_next_random) { + unsigned PY_LONG_LONG __pyx_v_this_random; + unsigned PY_LONG_LONG __pyx_r; + + /* "gensim/models/word2vec_inner.pyx":101 + * # note this function side-effects next_random to set up the next number + * cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil: + * cdef unsigned long long this_random = next_random[0] >> 16 # <<<<<<<<<<<<<< + * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL + * return this_random + */ + __pyx_v_this_random = ((__pyx_v_next_random[0]) >> 16); + + /* "gensim/models/word2vec_inner.pyx":102 + * cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil: + * cdef unsigned long long this_random = next_random[0] >> 16 + * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL # <<<<<<<<<<<<<< + * return this_random + * + */ + (__pyx_v_next_random[0]) = ((((__pyx_v_next_random[0]) * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & 281474976710655ULL); + + /* "gensim/models/word2vec_inner.pyx":103 + * cdef unsigned long long this_random = next_random[0] >> 16 + * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL + * return this_random # <<<<<<<<<<<<<< * + * cdef unsigned long long fast_sentence_sg_neg( + */ + __pyx_r = __pyx_v_this_random; + goto __pyx_L0; + + /* "gensim/models/word2vec_inner.pyx":100 + * # this quick & dirty RNG apparently matches Java's (non-Secure)Random + * # note this function side-effects next_random to set up the next number + * cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil: # <<<<<<<<<<<<<< + * cdef unsigned long long this_random = next_random[0] >> 16 + * next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "gensim/models/word2vec_inner.pyx":105 + * return this_random * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -1844,7 +1915,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente int __pyx_t_3; int __pyx_t_4; - /* "gensim/models/word2vec_inner.pyx":106 + /* "gensim/models/word2vec_inner.pyx":112 * * cdef long long a * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1853,7 +1924,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":107 + /* "gensim/models/word2vec_inner.pyx":113 * cdef long long a * cdef long long row1 = word2_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -1862,7 +1933,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/word2vec_inner.pyx":112 + /* "gensim/models/word2vec_inner.pyx":118 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1871,7 +1942,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/word2vec_inner.pyx":114 + /* "gensim/models/word2vec_inner.pyx":120 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -1882,7 +1953,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":115 + /* "gensim/models/word2vec_inner.pyx":121 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -1892,7 +1963,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":116 + /* "gensim/models/word2vec_inner.pyx":122 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -1901,7 +1972,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/word2vec_inner.pyx":117 + /* "gensim/models/word2vec_inner.pyx":123 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -1913,7 +1984,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":119 + /* "gensim/models/word2vec_inner.pyx":125 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -1922,7 +1993,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/word2vec_inner.pyx":120 + /* "gensim/models/word2vec_inner.pyx":126 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -1931,7 +2002,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/word2vec_inner.pyx":121 + /* "gensim/models/word2vec_inner.pyx":127 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -1941,7 +2012,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":122 + /* "gensim/models/word2vec_inner.pyx":128 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -1951,7 +2022,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente goto __pyx_L3_continue; } - /* "gensim/models/word2vec_inner.pyx":123 + /* "gensim/models/word2vec_inner.pyx":129 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -1962,7 +2033,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } __pyx_L5:; - /* "gensim/models/word2vec_inner.pyx":125 + /* "gensim/models/word2vec_inner.pyx":131 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -1971,7 +2042,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":126 + /* "gensim/models/word2vec_inner.pyx":132 * * row2 = target_index * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -1980,7 +2051,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":127 + /* "gensim/models/word2vec_inner.pyx":133 * row2 = target_index * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -1998,7 +2069,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":128 + /* "gensim/models/word2vec_inner.pyx":134 * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2008,7 +2079,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente goto __pyx_L3_continue; } - /* "gensim/models/word2vec_inner.pyx":129 + /* "gensim/models/word2vec_inner.pyx":135 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2017,7 +2088,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/word2vec_inner.pyx":130 + /* "gensim/models/word2vec_inner.pyx":136 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2026,7 +2097,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/word2vec_inner.pyx":131 + /* "gensim/models/word2vec_inner.pyx":137 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2035,7 +2106,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":132 + /* "gensim/models/word2vec_inner.pyx":138 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2046,7 +2117,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L3_continue:; } - /* "gensim/models/word2vec_inner.pyx":134 + /* "gensim/models/word2vec_inner.pyx":140 * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< @@ -2055,7 +2126,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":136 + /* "gensim/models/word2vec_inner.pyx":142 * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -2065,8 +2136,8 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/word2vec_inner.pyx":99 - * + /* "gensim/models/word2vec_inner.pyx":105 + * return this_random * * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -2078,7 +2149,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":139 +/* "gensim/models/word2vec_inner.pyx":145 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -2100,7 +2171,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx int __pyx_t_4; PY_LONG_LONG __pyx_t_5; - /* "gensim/models/word2vec_inner.pyx":147 + /* "gensim/models/word2vec_inner.pyx":153 * cdef long long a, b * cdef long long row2 * cdef REAL_t f, g, count, inv_count = 1.0 # <<<<<<<<<<<<<< @@ -2109,7 +2180,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_inv_count = 1.0; - /* "gensim/models/word2vec_inner.pyx":150 + /* "gensim/models/word2vec_inner.pyx":156 * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2118,7 +2189,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/word2vec_inner.pyx":151 + /* "gensim/models/word2vec_inner.pyx":157 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2127,7 +2198,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/word2vec_inner.pyx":152 + /* "gensim/models/word2vec_inner.pyx":158 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2138,7 +2209,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":153 + /* "gensim/models/word2vec_inner.pyx":159 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2156,7 +2227,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":154 + /* "gensim/models/word2vec_inner.pyx":160 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -2167,7 +2238,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":156 + /* "gensim/models/word2vec_inner.pyx":162 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -2176,7 +2247,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14word2vec_inner_ONEF); - /* "gensim/models/word2vec_inner.pyx":157 + /* "gensim/models/word2vec_inner.pyx":163 * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -2188,7 +2259,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_L3_continue:; } - /* "gensim/models/word2vec_inner.pyx":158 + /* "gensim/models/word2vec_inner.pyx":164 * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -2198,7 +2269,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":159 + /* "gensim/models/word2vec_inner.pyx":165 * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -2210,7 +2281,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx } __pyx_L8:; - /* "gensim/models/word2vec_inner.pyx":160 + /* "gensim/models/word2vec_inner.pyx":166 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -2220,7 +2291,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":161 + /* "gensim/models/word2vec_inner.pyx":167 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -2232,7 +2303,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx } __pyx_L9:; - /* "gensim/models/word2vec_inner.pyx":163 + /* "gensim/models/word2vec_inner.pyx":169 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2241,7 +2312,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/word2vec_inner.pyx":164 + /* "gensim/models/word2vec_inner.pyx":170 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): # <<<<<<<<<<<<<< @@ -2252,7 +2323,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { __pyx_v_b = __pyx_t_5; - /* "gensim/models/word2vec_inner.pyx":165 + /* "gensim/models/word2vec_inner.pyx":171 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2261,7 +2332,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":166 + /* "gensim/models/word2vec_inner.pyx":172 * for b in range(codelens[i]): * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2270,7 +2341,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":167 + /* "gensim/models/word2vec_inner.pyx":173 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2288,7 +2359,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_L13_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":168 + /* "gensim/models/word2vec_inner.pyx":174 * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2298,7 +2369,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx goto __pyx_L10_continue; } - /* "gensim/models/word2vec_inner.pyx":169 + /* "gensim/models/word2vec_inner.pyx":175 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2307,7 +2378,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/word2vec_inner.pyx":170 + /* "gensim/models/word2vec_inner.pyx":176 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2316,7 +2387,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/word2vec_inner.pyx":171 + /* "gensim/models/word2vec_inner.pyx":177 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2325,7 +2396,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":172 + /* "gensim/models/word2vec_inner.pyx":178 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2336,7 +2407,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_L10_continue:; } - /* "gensim/models/word2vec_inner.pyx":174 + /* "gensim/models/word2vec_inner.pyx":180 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2346,7 +2417,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":175 + /* "gensim/models/word2vec_inner.pyx":181 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -2358,7 +2429,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx } __pyx_L15:; - /* "gensim/models/word2vec_inner.pyx":177 + /* "gensim/models/word2vec_inner.pyx":183 * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) * * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2369,7 +2440,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":178 + /* "gensim/models/word2vec_inner.pyx":184 * * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2387,7 +2458,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_L19_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":179 + /* "gensim/models/word2vec_inner.pyx":185 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -2398,7 +2469,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":181 + /* "gensim/models/word2vec_inner.pyx":187 * continue * else: * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< @@ -2410,7 +2481,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx __pyx_L16_continue:; } - /* "gensim/models/word2vec_inner.pyx":139 + /* "gensim/models/word2vec_inner.pyx":145 * * * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< @@ -2421,7 +2492,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx /* function exit code */ } -/* "gensim/models/word2vec_inner.pyx":184 +/* "gensim/models/word2vec_inner.pyx":190 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2448,7 +2519,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente int __pyx_t_4; long __pyx_t_5; - /* "gensim/models/word2vec_inner.pyx":192 + /* "gensim/models/word2vec_inner.pyx":198 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2457,7 +2528,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_modulo = 281474976710655ULL; - /* "gensim/models/word2vec_inner.pyx":193 + /* "gensim/models/word2vec_inner.pyx":199 * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL * cdef REAL_t f, g, count, inv_count = 1.0, label # <<<<<<<<<<<<<< @@ -2466,7 +2537,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_inv_count = 1.0; - /* "gensim/models/word2vec_inner.pyx":197 + /* "gensim/models/word2vec_inner.pyx":203 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< @@ -2475,7 +2546,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "gensim/models/word2vec_inner.pyx":199 + /* "gensim/models/word2vec_inner.pyx":205 * word_index = indexes[i] * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2484,7 +2555,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/word2vec_inner.pyx":200 + /* "gensim/models/word2vec_inner.pyx":206 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2493,7 +2564,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/word2vec_inner.pyx":201 + /* "gensim/models/word2vec_inner.pyx":207 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2504,7 +2575,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":202 + /* "gensim/models/word2vec_inner.pyx":208 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2522,7 +2593,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":203 + /* "gensim/models/word2vec_inner.pyx":209 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -2533,7 +2604,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":205 + /* "gensim/models/word2vec_inner.pyx":211 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -2542,7 +2613,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14word2vec_inner_ONEF); - /* "gensim/models/word2vec_inner.pyx":206 + /* "gensim/models/word2vec_inner.pyx":212 * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -2554,7 +2625,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L3_continue:; } - /* "gensim/models/word2vec_inner.pyx":207 + /* "gensim/models/word2vec_inner.pyx":213 * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -2564,7 +2635,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":208 + /* "gensim/models/word2vec_inner.pyx":214 * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -2576,7 +2647,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } __pyx_L8:; - /* "gensim/models/word2vec_inner.pyx":209 + /* "gensim/models/word2vec_inner.pyx":215 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -2586,7 +2657,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = (__pyx_v_cbow_mean != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":210 + /* "gensim/models/word2vec_inner.pyx":216 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -2598,7 +2669,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } __pyx_L9:; - /* "gensim/models/word2vec_inner.pyx":212 + /* "gensim/models/word2vec_inner.pyx":218 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2607,7 +2678,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/word2vec_inner.pyx":214 + /* "gensim/models/word2vec_inner.pyx":220 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2618,7 +2689,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "gensim/models/word2vec_inner.pyx":215 + /* "gensim/models/word2vec_inner.pyx":221 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2628,7 +2699,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":216 + /* "gensim/models/word2vec_inner.pyx":222 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2637,7 +2708,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_target_index = __pyx_v_word_index; - /* "gensim/models/word2vec_inner.pyx":217 + /* "gensim/models/word2vec_inner.pyx":223 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2649,7 +2720,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":219 + /* "gensim/models/word2vec_inner.pyx":225 * label = ONEF * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) # <<<<<<<<<<<<<< @@ -2658,7 +2729,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_target_index = __pyx_f_6gensim_6models_14word2vec_inner_bisect_left(__pyx_v_cum_table, ((__pyx_v_next_random >> 16) % (__pyx_v_cum_table[(__pyx_v_cum_table_len - 1)])), 0, __pyx_v_cum_table_len); - /* "gensim/models/word2vec_inner.pyx":220 + /* "gensim/models/word2vec_inner.pyx":226 * else: * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2667,7 +2738,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "gensim/models/word2vec_inner.pyx":221 + /* "gensim/models/word2vec_inner.pyx":227 * target_index = bisect_left(cum_table, (next_random >> 16) % cum_table[cum_table_len-1], 0, cum_table_len) * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2677,7 +2748,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":222 + /* "gensim/models/word2vec_inner.pyx":228 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2687,7 +2758,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente goto __pyx_L10_continue; } - /* "gensim/models/word2vec_inner.pyx":223 + /* "gensim/models/word2vec_inner.pyx":229 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2698,7 +2769,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } __pyx_L12:; - /* "gensim/models/word2vec_inner.pyx":225 + /* "gensim/models/word2vec_inner.pyx":231 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2707,7 +2778,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":226 + /* "gensim/models/word2vec_inner.pyx":232 * * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2716,7 +2787,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":227 + /* "gensim/models/word2vec_inner.pyx":233 * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2734,7 +2805,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":228 + /* "gensim/models/word2vec_inner.pyx":234 * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2744,7 +2815,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente goto __pyx_L10_continue; } - /* "gensim/models/word2vec_inner.pyx":229 + /* "gensim/models/word2vec_inner.pyx":235 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2753,7 +2824,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/word2vec_inner.pyx":230 + /* "gensim/models/word2vec_inner.pyx":236 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2762,7 +2833,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "gensim/models/word2vec_inner.pyx":231 + /* "gensim/models/word2vec_inner.pyx":237 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2771,7 +2842,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":232 + /* "gensim/models/word2vec_inner.pyx":238 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2782,7 +2853,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L10_continue:; } - /* "gensim/models/word2vec_inner.pyx":234 + /* "gensim/models/word2vec_inner.pyx":240 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< @@ -2792,7 +2863,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":235 + /* "gensim/models/word2vec_inner.pyx":241 * * if not cbow_mean: # divide error over summed window vectors * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -2804,7 +2875,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } __pyx_L17:; - /* "gensim/models/word2vec_inner.pyx":237 + /* "gensim/models/word2vec_inner.pyx":243 * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) * * for m in range(j,k): # <<<<<<<<<<<<<< @@ -2815,7 +2886,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":238 + /* "gensim/models/word2vec_inner.pyx":244 * * for m in range(j,k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2833,7 +2904,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L21_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":239 + /* "gensim/models/word2vec_inner.pyx":245 * for m in range(j,k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -2844,7 +2915,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":241 + /* "gensim/models/word2vec_inner.pyx":247 * continue * else: * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< @@ -2856,7 +2927,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_L18_continue:; } - /* "gensim/models/word2vec_inner.pyx":243 + /* "gensim/models/word2vec_inner.pyx":249 * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -2866,7 +2937,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "gensim/models/word2vec_inner.pyx":184 + /* "gensim/models/word2vec_inner.pyx":190 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2879,7 +2950,7 @@ static unsigned PY_LONG_LONG __pyx_f_6gensim_6models_14word2vec_inner_fast_sente return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":247 +/* "gensim/models/word2vec_inner.pyx":253 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< @@ -2923,21 +2994,21 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_1train_sentence_sg(Py case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { goto __pyx_L5_argtuple_error; @@ -2954,7 +3025,7 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_1train_sentence_sg(Py } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.word2vec_inner.train_sentence_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -2970,6 +3041,7 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_1train_sentence_sg(Py static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work) { int __pyx_v_hs; int __pyx_v_negative; + int __pyx_v_sample; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; @@ -2991,123 +3063,139 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_token = NULL; PyObject *__pyx_v_word = NULL; PyObject *__pyx_v_item = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_3; - int __pyx_t_4; - Py_ssize_t __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - unsigned PY_LONG_LONG __pyx_t_8; - long __pyx_t_9; - Py_ssize_t __pyx_t_10; - int __pyx_t_11; - int __pyx_t_12; - __pyx_t_5numpy_uint32_t __pyx_t_13; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; + int __pyx_t_5; + Py_ssize_t __pyx_t_6; + int __pyx_t_7; + PyObject *__pyx_t_8 = NULL; + unsigned PY_LONG_LONG __pyx_t_9; + PyObject *(*__pyx_t_10)(PyObject *); + PyObject *__pyx_t_11 = NULL; + __pyx_t_5numpy_uint32_t __pyx_t_12; + Py_ssize_t __pyx_t_13; PyObject *__pyx_t_14 = NULL; PyObject *__pyx_t_15 = NULL; - PyObject *__pyx_t_16 = NULL; - PyObject *(*__pyx_t_17)(PyObject *); + int __pyx_t_16; + int __pyx_t_17; int __pyx_t_18; - int __pyx_t_19; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_sg", 0); - /* "gensim/models/word2vec_inner.pyx":248 + /* "gensim/models/word2vec_inner.pyx":254 * * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * + * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":249 + /* "gensim/models/word2vec_inner.pyx":255 * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":251 + /* "gensim/models/word2vec_inner.pyx":256 + * cdef int hs = model.hs * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * + * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/word2vec_inner.pyx":258 + * cdef int sample = (model.sample != 0) * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn0 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":252 + /* "gensim/models/word2vec_inner.pyx":259 * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t _alpha = alpha */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 259; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":254 + /* "gensim/models/word2vec_inner.pyx":261 * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__alpha = __pyx_t_3; + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 261; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/word2vec_inner.pyx":255 + /* "gensim/models/word2vec_inner.pyx":262 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 262; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":261 + /* "gensim/models/word2vec_inner.pyx":268 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 261; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 261; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":264 + /* "gensim/models/word2vec_inner.pyx":271 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -3116,394 +3204,534 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT */ __pyx_v_result = 0; - /* "gensim/models/word2vec_inner.pyx":277 + /* "gensim/models/word2vec_inner.pyx":285 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) * */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/word2vec_inner.pyx":278 + /* "gensim/models/word2vec_inner.pyx":286 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L3; } __pyx_L3:; - /* "gensim/models/word2vec_inner.pyx":280 + /* "gensim/models/word2vec_inner.pyx":288 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/word2vec_inner.pyx":281 + /* "gensim/models/word2vec_inner.pyx":289 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":282 + /* "gensim/models/word2vec_inner.pyx":290 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * if negative or sample: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":283 + /* "gensim/models/word2vec_inner.pyx":291 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_cum_table_len = __pyx_t_5; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_cum_table_len = __pyx_t_6; + goto __pyx_L4; + } + __pyx_L4:; - /* "gensim/models/word2vec_inner.pyx":284 + /* "gensim/models/word2vec_inner.pyx":292 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_7 = (__pyx_v_negative != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_5 = __pyx_t_7; + goto __pyx_L6_bool_binop_done; + } + __pyx_t_7 = (__pyx_v_sample != 0); + __pyx_t_5 = __pyx_t_7; + __pyx_L6_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/word2vec_inner.pyx":293 + * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_7); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_v_next_random = __pyx_t_8; - goto __pyx_L4; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_v_next_random = __pyx_t_9; + goto __pyx_L5; } - __pyx_L4:; + __pyx_L5:; - /* "gensim/models/word2vec_inner.pyx":287 + /* "gensim/models/word2vec_inner.pyx":296 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * + * vlookup = model.vocab */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 296; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/word2vec_inner.pyx":288 - * # convert Python structures to primitive types, so we can release the GIL + /* "gensim/models/word2vec_inner.pyx":298 * work = np.PyArray_DATA(_work) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * - * for i in range(sentence_len): + * vlookup = model.vocab # <<<<<<<<<<<<<< + * i = 0 + * for token in sentence: */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 288; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_5 < __pyx_t_9) != 0)) { - __pyx_t_10 = __pyx_t_5; - } else { - __pyx_t_10 = __pyx_t_9; - } - __pyx_v_sentence_len = ((int)__pyx_t_10); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_v_vlookup = __pyx_t_8; + __pyx_t_8 = 0; - /* "gensim/models/word2vec_inner.pyx":290 - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + /* "gensim/models/word2vec_inner.pyx":299 * - * for i in range(sentence_len): # <<<<<<<<<<<<<< - * word = sentence[i] + * vlookup = model.vocab + * i = 0 # <<<<<<<<<<<<<< + * for token in sentence: + * word = vlookup[token] if token in vlookup else None + */ + __pyx_v_i = 0; + + /* "gensim/models/word2vec_inner.pyx":300 + * vlookup = model.vocab + * i = 0 + * for token in sentence: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None * if word is None: */ - __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + if (likely(PyList_CheckExact(__pyx_v_sentence)) || PyTuple_CheckExact(__pyx_v_sentence)) { + __pyx_t_8 = __pyx_v_sentence; __Pyx_INCREF(__pyx_t_8); __pyx_t_6 = 0; + __pyx_t_10 = NULL; + } else { + __pyx_t_6 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sentence); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_10 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } + } else { + __pyx_t_3 = __pyx_t_10(__pyx_t_8); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":291 - * - * for i in range(sentence_len): - * word = sentence[i] # <<<<<<<<<<<<<< + /* "gensim/models/word2vec_inner.pyx":301 + * i = 0 + * for token in sentence: + * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: - * codelens[i] = 0 + * continue # leaving i unchanged/shortening sentence */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); - __pyx_t_7 = 0; + __pyx_t_5 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if ((__pyx_t_5 != 0)) { + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __pyx_t_1; + __pyx_t_1 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_3 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":292 - * for i in range(sentence_len): - * word = sentence[i] + /* "gensim/models/word2vec_inner.pyx":302 + * for token in sentence: + * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< - * codelens[i] = 0 - * else: + * continue # leaving i unchanged/shortening sentence + * if sample and word.sample_int < random_int32(&next_random): */ - __pyx_t_4 = (__pyx_v_word == Py_None); - __pyx_t_12 = (__pyx_t_4 != 0); - if (__pyx_t_12) { + __pyx_t_5 = (__pyx_v_word == Py_None); + __pyx_t_7 = (__pyx_t_5 != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":293 - * word = sentence[i] + /* "gensim/models/word2vec_inner.pyx":303 + * word = vlookup[token] if token in vlookup else None * if word is None: - * codelens[i] = 0 # <<<<<<<<<<<<<< - * else: - * indexes[i] = word.index + * continue # leaving i unchanged/shortening sentence # <<<<<<<<<<<<<< + * if sample and word.sample_int < random_int32(&next_random): + * continue */ - (__pyx_v_codelens[__pyx_v_i]) = 0; - goto __pyx_L7; + goto __pyx_L8_continue; } - /*else*/ { - /* "gensim/models/word2vec_inner.pyx":295 - * codelens[i] = 0 - * else: - * indexes[i] = word.index # <<<<<<<<<<<<<< - * if hs: - * codelens[i] = len(word.code) + /* "gensim/models/word2vec_inner.pyx":304 + * if word is None: + * continue # leaving i unchanged/shortening sentence + * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[i] = word.index */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 295; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; + __pyx_t_5 = (__pyx_v_sample != 0); + if (__pyx_t_5) { + } else { + __pyx_t_7 = __pyx_t_5; + goto __pyx_L12_bool_binop_done; + } + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_11 = PyObject_RichCompare(__pyx_t_3, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_11); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_t_7 = __pyx_t_5; + __pyx_L12_bool_binop_done:; + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":296 - * else: - * indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) + /* "gensim/models/word2vec_inner.pyx":305 + * continue # leaving i unchanged/shortening sentence + * if sample and word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[i] = word.index + * if hs: */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + goto __pyx_L8_continue; + } - /* "gensim/models/word2vec_inner.pyx":297 - * indexes[i] = word.index - * if hs: - * codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) + /* "gensim/models/word2vec_inner.pyx":306 + * if sample and word.sample_int < random_int32(&next_random): + * continue + * indexes[i] = word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_10 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_11); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/word2vec_inner.pyx":298 - * if hs: - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[i] = np.PyArray_DATA(word.point) - * else: + /* "gensim/models/word2vec_inner.pyx":307 + * continue + * indexes[i] = word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = (__pyx_v_hs != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":299 - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< - * else: - * codelens[i] = 1 - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L8; - } - /*else*/ { + /* "gensim/models/word2vec_inner.pyx":308 + * indexes[i] = word.index + * if hs: + * codelens[i] = len(word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) + */ + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_13 = PyObject_Length(__pyx_t_11); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_13); - /* "gensim/models/word2vec_inner.pyx":301 - * points[i] = np.PyArray_DATA(word.point) - * else: - * codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 - * # single randint() call avoids a big thread-sync slowdown + /* "gensim/models/word2vec_inner.pyx":309 + * if hs: + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(word.point) + * result += 1 */ - (__pyx_v_codelens[__pyx_v_i]) = 1; - } - __pyx_L8:; + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_11))); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - /* "gensim/models/word2vec_inner.pyx":302 - * else: - * codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(model.random.randint(0, window, sentence_len)): + /* "gensim/models/word2vec_inner.pyx":310 + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + * result += 1 + * i += 1 */ - __pyx_v_result = (__pyx_v_result + 1); + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_11))); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + goto __pyx_L14; } - __pyx_L7:; + __pyx_L14:; + + /* "gensim/models/word2vec_inner.pyx":311 + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) + * result += 1 # <<<<<<<<<<<<<< + * i += 1 + * if i == MAX_SENTENCE_LEN: + */ + __pyx_v_result = (__pyx_v_result + 1); + + /* "gensim/models/word2vec_inner.pyx":312 + * points[i] = np.PyArray_DATA(word.point) + * result += 1 + * i += 1 # <<<<<<<<<<<<<< + * if i == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? + */ + __pyx_v_i = (__pyx_v_i + 1); + + /* "gensim/models/word2vec_inner.pyx":313 + * result += 1 + * i += 1 + * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * sentence_len = i + */ + __pyx_t_7 = ((__pyx_v_i == 10000) != 0); + if (__pyx_t_7) { + + /* "gensim/models/word2vec_inner.pyx":314 + * i += 1 + * if i == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * sentence_len = i + * + */ + goto __pyx_L9_break; + } + + /* "gensim/models/word2vec_inner.pyx":300 + * vlookup = model.vocab + * i = 0 + * for token in sentence: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + __pyx_L8_continue:; } + __pyx_L9_break:; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/word2vec_inner.pyx":304 - * result += 1 + /* "gensim/models/word2vec_inner.pyx":315 + * if i == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? + * sentence_len = i # <<<<<<<<<<<<<< + * + * # single randint() call avoids a big thread-sync slowdown + */ + __pyx_v_sentence_len = __pyx_v_i; + + /* "gensim/models/word2vec_inner.pyx":318 + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ __pyx_t_2 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_11, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = NULL; - __pyx_t_10 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_15); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_t_11 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_14 = NULL; + __pyx_t_6 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_14 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_14)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_14); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); - __pyx_t_10 = 1; + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_6 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_15) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + __pyx_t_15 = PyTuple_New(3+__pyx_t_6); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + if (__pyx_t_14) { + PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_14 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_10, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_15, 0+__pyx_t_6, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_10, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_10, __pyx_t_14); - __Pyx_GIVEREF(__pyx_t_14); - __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_16, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (likely(PyList_CheckExact(__pyx_t_7)) || PyTuple_CheckExact(__pyx_t_7)) { - __pyx_t_6 = __pyx_t_7; __Pyx_INCREF(__pyx_t_6); __pyx_t_10 = 0; - __pyx_t_17 = NULL; + PyTuple_SET_ITEM(__pyx_t_15, 1+__pyx_t_6, __pyx_t_11); + __Pyx_GIVEREF(__pyx_t_11); + PyTuple_SET_ITEM(__pyx_t_15, 2+__pyx_t_6, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_11 = 0; + __pyx_t_3 = 0; + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_15, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (likely(PyList_CheckExact(__pyx_t_8)) || PyTuple_CheckExact(__pyx_t_8)) { + __pyx_t_1 = __pyx_t_8; __Pyx_INCREF(__pyx_t_1); __pyx_t_6 = 0; + __pyx_t_10 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_17 = Py_TYPE(__pyx_t_6)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_8); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_10 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; for (;;) { - if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_6))) { - if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_6)) break; + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_1))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_7 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_7); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_1, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_7); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_1, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_7 = __pyx_t_17(__pyx_t_6); - if (unlikely(!__pyx_t_7)) { + __pyx_t_8 = __pyx_t_10(__pyx_t_1); + if (unlikely(!__pyx_t_8)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } - __Pyx_GOTREF(__pyx_t_7); + __Pyx_GOTREF(__pyx_t_8); } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_7); - __pyx_t_7 = 0; + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_8); + __pyx_t_8 = 0; __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/word2vec_inner.pyx":305 + /* "gensim/models/word2vec_inner.pyx":319 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 305; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/word2vec_inner.pyx":304 - * result += 1 + /* "gensim/models/word2vec_inner.pyx":318 + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":308 + /* "gensim/models/word2vec_inner.pyx":322 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window + reduced_windows[i] */ { #ifdef WITH_THREAD @@ -3512,57 +3740,37 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT #endif /*try:*/ { - /* "gensim/models/word2vec_inner.pyx":309 + /* "gensim/models/word2vec_inner.pyx":323 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< - * if codelens[i] == 0: - * continue + * j = i - window + reduced_windows[i] + * if j < 0: */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_2; __pyx_t_16+=1) { + __pyx_v_i = __pyx_t_16; - /* "gensim/models/word2vec_inner.pyx":310 + /* "gensim/models/word2vec_inner.pyx":324 * with nogil: * for i in range(sentence_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< - * continue - * j = i - window + reduced_windows[i] - */ - __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); - if (__pyx_t_12) { - - /* "gensim/models/word2vec_inner.pyx":311 - * for i in range(sentence_len): - * if codelens[i] == 0: - * continue # <<<<<<<<<<<<<< - * j = i - window + reduced_windows[i] - * if j < 0: - */ - goto __pyx_L14_continue; - } - - /* "gensim/models/word2vec_inner.pyx":312 - * if codelens[i] == 0: - * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< * if j < 0: * j = 0 */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/word2vec_inner.pyx":313 - * continue + /* "gensim/models/word2vec_inner.pyx":325 + * for i in range(sentence_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 * k = i + window + 1 - reduced_windows[i] */ - __pyx_t_12 = ((__pyx_v_j < 0) != 0); - if (__pyx_t_12) { + __pyx_t_7 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":314 + /* "gensim/models/word2vec_inner.pyx":326 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -3570,11 +3778,11 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * if k > sentence_len: */ __pyx_v_j = 0; - goto __pyx_L17; + goto __pyx_L23; } - __pyx_L17:; + __pyx_L23:; - /* "gensim/models/word2vec_inner.pyx":315 + /* "gensim/models/word2vec_inner.pyx":327 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3583,78 +3791,70 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/word2vec_inner.pyx":316 + /* "gensim/models/word2vec_inner.pyx":328 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< * k = sentence_len * for j in range(j, k): */ - __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); - if (__pyx_t_12) { + __pyx_t_7 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":317 + /* "gensim/models/word2vec_inner.pyx":329 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< * for j in range(j, k): - * if j == i or codelens[j] == 0: + * if j == i: */ __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L18; + goto __pyx_L24; } - __pyx_L18:; + __pyx_L24:; - /* "gensim/models/word2vec_inner.pyx":318 + /* "gensim/models/word2vec_inner.pyx":330 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< - * if j == i or codelens[j] == 0: + * if j == i: * continue */ - __pyx_t_18 = __pyx_v_k; - for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { - __pyx_v_j = __pyx_t_19; + __pyx_t_17 = __pyx_v_k; + for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_17; __pyx_t_18+=1) { + __pyx_v_j = __pyx_t_18; - /* "gensim/models/word2vec_inner.pyx":319 + /* "gensim/models/word2vec_inner.pyx":331 * k = sentence_len * for j in range(j, k): - * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< + * if j == i: # <<<<<<<<<<<<<< * continue * if hs: */ - __pyx_t_4 = ((__pyx_v_j == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_12 = __pyx_t_4; - goto __pyx_L22_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_j]) == 0) != 0); - __pyx_t_12 = __pyx_t_4; - __pyx_L22_bool_binop_done:; - if (__pyx_t_12) { + __pyx_t_7 = ((__pyx_v_j == __pyx_v_i) != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":320 + /* "gensim/models/word2vec_inner.pyx":332 * for j in range(j, k): - * if j == i or codelens[j] == 0: + * if j == i: * continue # <<<<<<<<<<<<<< * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) */ - goto __pyx_L19_continue; + goto __pyx_L25_continue; } - /* "gensim/models/word2vec_inner.pyx":321 - * if j == i or codelens[j] == 0: + /* "gensim/models/word2vec_inner.pyx":333 + * if j == i: * continue * if hs: # <<<<<<<<<<<<<< * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + __pyx_t_7 = (__pyx_v_hs != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":322 + /* "gensim/models/word2vec_inner.pyx":334 * continue * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) # <<<<<<<<<<<<<< @@ -3662,21 +3862,21 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) */ __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_word_locks); - goto __pyx_L24; + goto __pyx_L28; } - __pyx_L24:; + __pyx_L28:; - /* "gensim/models/word2vec_inner.pyx":323 + /* "gensim/models/word2vec_inner.pyx":335 * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) * */ - __pyx_t_12 = (__pyx_v_negative != 0); - if (__pyx_t_12) { + __pyx_t_7 = (__pyx_v_negative != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":324 + /* "gensim/models/word2vec_inner.pyx":336 * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) # <<<<<<<<<<<<<< @@ -3684,34 +3884,33 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * return result */ __pyx_v_next_random = __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_word_locks); - goto __pyx_L25; + goto __pyx_L29; } - __pyx_L25:; - __pyx_L19_continue:; + __pyx_L29:; + __pyx_L25_continue:; } - __pyx_L14_continue:; } } - /* "gensim/models/word2vec_inner.pyx":308 + /* "gensim/models/word2vec_inner.pyx":322 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window + reduced_windows[i] */ /*finally:*/ { /*normal exit:*/{ #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L13; + goto __pyx_L20; } - __pyx_L13:; + __pyx_L20:; } } - /* "gensim/models/word2vec_inner.pyx":326 + /* "gensim/models/word2vec_inner.pyx":338 * next_random = fast_sentence_sg_neg(negative, cum_table, cum_table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) * * return result # <<<<<<<<<<<<<< @@ -3719,13 +3918,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; goto __pyx_L0; - /* "gensim/models/word2vec_inner.pyx":247 + /* "gensim/models/word2vec_inner.pyx":253 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< @@ -3736,14 +3935,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_11); __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_15); - __Pyx_XDECREF(__pyx_t_16); __Pyx_AddTraceback("gensim.models.word2vec_inner.train_sentence_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_token); __Pyx_XDECREF(__pyx_v_word); __Pyx_XDECREF(__pyx_v_item); __Pyx_XGIVEREF(__pyx_r); @@ -3751,7 +3952,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_train_sentence_sg(CYT return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":329 +/* "gensim/models/word2vec_inner.pyx":341 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -3797,26 +3998,26 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_3train_sentence_cbow( case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -3835,7 +4036,7 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_3train_sentence_cbow( } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.word2vec_inner.train_sentence_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3851,6 +4052,7 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_3train_sentence_cbow( static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1) { int __pyx_v_hs; int __pyx_v_negative; + int __pyx_v_sample; int __pyx_v_cbow_mean; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0; __pyx_t_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks; @@ -3874,134 +4076,150 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( __pyx_t_5numpy_uint32_t *__pyx_v_cum_table; unsigned PY_LONG_LONG __pyx_v_cum_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; + PyObject *__pyx_v_vlookup = NULL; + PyObject *__pyx_v_token = NULL; PyObject *__pyx_v_word = NULL; PyObject *__pyx_v_item = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; - __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_3; - int __pyx_t_4; - Py_ssize_t __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - unsigned PY_LONG_LONG __pyx_t_8; - long __pyx_t_9; - Py_ssize_t __pyx_t_10; - int __pyx_t_11; - int __pyx_t_12; - __pyx_t_5numpy_uint32_t __pyx_t_13; + PyObject *__pyx_t_3 = NULL; + __pyx_t_6gensim_6models_14word2vec_inner_REAL_t __pyx_t_4; + int __pyx_t_5; + Py_ssize_t __pyx_t_6; + int __pyx_t_7; + PyObject *__pyx_t_8 = NULL; + unsigned PY_LONG_LONG __pyx_t_9; + PyObject *(*__pyx_t_10)(PyObject *); + PyObject *__pyx_t_11 = NULL; + __pyx_t_5numpy_uint32_t __pyx_t_12; + Py_ssize_t __pyx_t_13; PyObject *__pyx_t_14 = NULL; PyObject *__pyx_t_15 = NULL; - PyObject *__pyx_t_16 = NULL; - PyObject *(*__pyx_t_17)(PyObject *); + int __pyx_t_16; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_cbow", 0); - /* "gensim/models/word2vec_inner.pyx":330 + /* "gensim/models/word2vec_inner.pyx":342 * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int cbow_mean = model.cbow_mean + * cdef int sample = (model.sample != 0) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":331 + /* "gensim/models/word2vec_inner.pyx":343 * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean - * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":332 - * cdef int hs = model.hs + /* "gensim/models/word2vec_inner.pyx":344 + * cdef int hs = model.hs + * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) # <<<<<<<<<<<<<< + * cdef int cbow_mean = model.cbow_mean + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_sample); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = PyObject_RichCompare(__pyx_t_1, __pyx_int_0, Py_NE); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 344; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_sample = __pyx_t_2; + + /* "gensim/models/word2vec_inner.pyx":345 * cdef int negative = model.negative + * cdef int sample = (model.sample != 0) * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":334 + /* "gensim/models/word2vec_inner.pyx":347 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn0 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":335 + /* "gensim/models/word2vec_inner.pyx":348 * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_word_locks = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":338 + /* "gensim/models/word2vec_inner.pyx":351 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__alpha = __pyx_t_3; + __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_4 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 351; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__alpha = __pyx_t_4; - /* "gensim/models/word2vec_inner.pyx":339 + /* "gensim/models/word2vec_inner.pyx":352 * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 352; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 352; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":345 + /* "gensim/models/word2vec_inner.pyx":358 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":348 + /* "gensim/models/word2vec_inner.pyx":361 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -4010,399 +4228,539 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( */ __pyx_v_result = 0; - /* "gensim/models/word2vec_inner.pyx":361 + /* "gensim/models/word2vec_inner.pyx":375 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) * */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "gensim/models/word2vec_inner.pyx":362 + /* "gensim/models/word2vec_inner.pyx":376 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; goto __pyx_L3; } __pyx_L3:; - /* "gensim/models/word2vec_inner.pyx":364 + /* "gensim/models/word2vec_inner.pyx":378 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "gensim/models/word2vec_inner.pyx":365 + /* "gensim/models/word2vec_inner.pyx":379 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 379; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 379; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1neg = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":366 + /* "gensim/models/word2vec_inner.pyx":380 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) # <<<<<<<<<<<<<< * cum_table_len = len(model.cum_table) - * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * if negative or sample: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_cum_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_3))); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":367 + /* "gensim/models/word2vec_inner.pyx":381 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) # <<<<<<<<<<<<<< + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) - * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_cum_table_len = __pyx_t_5; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cum_table); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_6 = PyObject_Length(__pyx_t_3); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_v_cum_table_len = __pyx_t_6; + goto __pyx_L4; + } + __pyx_L4:; - /* "gensim/models/word2vec_inner.pyx":368 + /* "gensim/models/word2vec_inner.pyx":382 * cum_table = (np.PyArray_DATA(model.cum_table)) * cum_table_len = len(model.cum_table) + * if negative or sample: # <<<<<<<<<<<<<< + * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) + * + */ + __pyx_t_7 = (__pyx_v_negative != 0); + if (!__pyx_t_7) { + } else { + __pyx_t_5 = __pyx_t_7; + goto __pyx_L6_bool_binop_done; + } + __pyx_t_7 = (__pyx_v_sample != 0); + __pyx_t_5 = __pyx_t_7; + __pyx_L6_bool_binop_done:; + if (__pyx_t_5) { + + /* "gensim/models/word2vec_inner.pyx":383 + * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_t_6, __pyx_t_1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_3, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = PyNumber_Add(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_7); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_v_next_random = __pyx_t_8; - goto __pyx_L4; + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_8); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_v_next_random = __pyx_t_9; + goto __pyx_L5; } - __pyx_L4:; + __pyx_L5:; - /* "gensim/models/word2vec_inner.pyx":371 + /* "gensim/models/word2vec_inner.pyx":386 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 386; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/word2vec_inner.pyx":372 + /* "gensim/models/word2vec_inner.pyx":387 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * + * vlookup = model.vocab */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/word2vec_inner.pyx":373 - * work = np.PyArray_DATA(_work) + /* "gensim/models/word2vec_inner.pyx":389 * neu1 = np.PyArray_DATA(_neu1) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * - * for i in range(sentence_len): + * vlookup = model.vocab # <<<<<<<<<<<<<< + * i = 0 + * for token in sentence: */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_5 < __pyx_t_9) != 0)) { - __pyx_t_10 = __pyx_t_5; - } else { - __pyx_t_10 = __pyx_t_9; - } - __pyx_v_sentence_len = ((int)__pyx_t_10); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_v_vlookup = __pyx_t_8; + __pyx_t_8 = 0; - /* "gensim/models/word2vec_inner.pyx":375 - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + /* "gensim/models/word2vec_inner.pyx":390 * - * for i in range(sentence_len): # <<<<<<<<<<<<<< - * word = sentence[i] + * vlookup = model.vocab + * i = 0 # <<<<<<<<<<<<<< + * for token in sentence: + * word = vlookup[token] if token in vlookup else None + */ + __pyx_v_i = 0; + + /* "gensim/models/word2vec_inner.pyx":391 + * vlookup = model.vocab + * i = 0 + * for token in sentence: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None * if word is None: */ - __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + if (likely(PyList_CheckExact(__pyx_v_sentence)) || PyTuple_CheckExact(__pyx_v_sentence)) { + __pyx_t_8 = __pyx_v_sentence; __Pyx_INCREF(__pyx_t_8); __pyx_t_6 = 0; + __pyx_t_10 = NULL; + } else { + __pyx_t_6 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_v_sentence); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_10 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + for (;;) { + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_3); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_3 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } + } else { + __pyx_t_3 = __pyx_t_10(__pyx_t_8); + if (unlikely(!__pyx_t_3)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_3); + } + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":376 - * - * for i in range(sentence_len): - * word = sentence[i] # <<<<<<<<<<<<<< + /* "gensim/models/word2vec_inner.pyx":392 + * i = 0 + * for token in sentence: + * word = vlookup[token] if token in vlookup else None # <<<<<<<<<<<<<< * if word is None: - * codelens[i] = 0 + * continue # leaving i unchanged/shortening sentence */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); - __pyx_t_7 = 0; + __pyx_t_5 = (__Pyx_PySequence_Contains(__pyx_v_token, __pyx_v_vlookup, Py_EQ)); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if ((__pyx_t_5 != 0)) { + __pyx_t_1 = PyObject_GetItem(__pyx_v_vlookup, __pyx_v_token); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = __pyx_t_1; + __pyx_t_1 = 0; + } else { + __Pyx_INCREF(Py_None); + __pyx_t_3 = Py_None; + } + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_3); + __pyx_t_3 = 0; - /* "gensim/models/word2vec_inner.pyx":377 - * for i in range(sentence_len): - * word = sentence[i] + /* "gensim/models/word2vec_inner.pyx":393 + * for token in sentence: + * word = vlookup[token] if token in vlookup else None * if word is None: # <<<<<<<<<<<<<< - * codelens[i] = 0 - * else: + * continue # leaving i unchanged/shortening sentence + * if sample and word.sample_int < random_int32(&next_random): */ - __pyx_t_4 = (__pyx_v_word == Py_None); - __pyx_t_12 = (__pyx_t_4 != 0); - if (__pyx_t_12) { + __pyx_t_5 = (__pyx_v_word == Py_None); + __pyx_t_7 = (__pyx_t_5 != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":378 - * word = sentence[i] + /* "gensim/models/word2vec_inner.pyx":394 + * word = vlookup[token] if token in vlookup else None * if word is None: - * codelens[i] = 0 # <<<<<<<<<<<<<< - * else: - * indexes[i] = word.index + * continue # leaving i unchanged/shortening sentence # <<<<<<<<<<<<<< + * if sample and word.sample_int < random_int32(&next_random): + * continue */ - (__pyx_v_codelens[__pyx_v_i]) = 0; - goto __pyx_L7; + goto __pyx_L8_continue; } - /*else*/ { - /* "gensim/models/word2vec_inner.pyx":380 - * codelens[i] = 0 - * else: - * indexes[i] = word.index # <<<<<<<<<<<<<< - * if hs: - * codelens[i] = len(word.code) + /* "gensim/models/word2vec_inner.pyx":395 + * if word is None: + * continue # leaving i unchanged/shortening sentence + * if sample and word.sample_int < random_int32(&next_random): # <<<<<<<<<<<<<< + * continue + * indexes[i] = word.index */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; + __pyx_t_5 = (__pyx_v_sample != 0); + if (__pyx_t_5) { + } else { + __pyx_t_7 = __pyx_t_5; + goto __pyx_L12_bool_binop_done; + } + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_sample_int); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_1 = __Pyx_PyInt_From_unsigned_PY_LONG_LONG(__pyx_f_6gensim_6models_14word2vec_inner_random_int32((&__pyx_v_next_random))); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_11 = PyObject_RichCompare(__pyx_t_3, __pyx_t_1, Py_LT); __Pyx_XGOTREF(__pyx_t_11); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_5 = __Pyx_PyObject_IsTrue(__pyx_t_11); if (unlikely(__pyx_t_5 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 395; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_t_7 = __pyx_t_5; + __pyx_L12_bool_binop_done:; + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":381 - * else: - * indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) + /* "gensim/models/word2vec_inner.pyx":396 + * continue # leaving i unchanged/shortening sentence + * if sample and word.sample_int < random_int32(&next_random): + * continue # <<<<<<<<<<<<<< + * indexes[i] = word.index + * if hs: */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + goto __pyx_L8_continue; + } - /* "gensim/models/word2vec_inner.pyx":382 - * indexes[i] = word.index - * if hs: - * codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) + /* "gensim/models/word2vec_inner.pyx":397 + * if sample and word.sample_int < random_int32(&next_random): + * continue + * indexes[i] = word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_10 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_t_11); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/word2vec_inner.pyx":383 - * if hs: - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[i] = np.PyArray_DATA(word.point) - * else: + /* "gensim/models/word2vec_inner.pyx":398 + * continue + * indexes[i] = word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = (__pyx_v_hs != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":384 - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< - * else: - * codelens[i] = 1 - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - goto __pyx_L8; - } - /*else*/ { + /* "gensim/models/word2vec_inner.pyx":399 + * indexes[i] = word.index + * if hs: + * codelens[i] = len(word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) + */ + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_13 = PyObject_Length(__pyx_t_11); if (unlikely(__pyx_t_13 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_13); - /* "gensim/models/word2vec_inner.pyx":386 - * points[i] = np.PyArray_DATA(word.point) - * else: - * codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 - * # single randint() call avoids a big thread-sync slowdown + /* "gensim/models/word2vec_inner.pyx":400 + * if hs: + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(word.point) + * result += 1 */ - (__pyx_v_codelens[__pyx_v_i]) = 1; - } - __pyx_L8:; + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_11))); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; - /* "gensim/models/word2vec_inner.pyx":387 - * else: - * codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(model.random.randint(0, window, sentence_len)): + /* "gensim/models/word2vec_inner.pyx":401 + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + * result += 1 + * i += 1 */ - __pyx_v_result = (__pyx_v_result + 1); + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + if (!(likely(((__pyx_t_11) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_11, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_11))); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + goto __pyx_L14; + } + __pyx_L14:; + + /* "gensim/models/word2vec_inner.pyx":402 + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) + * result += 1 # <<<<<<<<<<<<<< + * i += 1 + * if i == MAX_SENTENCE_LEN: + */ + __pyx_v_result = (__pyx_v_result + 1); + + /* "gensim/models/word2vec_inner.pyx":403 + * points[i] = np.PyArray_DATA(word.point) + * result += 1 + * i += 1 # <<<<<<<<<<<<<< + * if i == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? + */ + __pyx_v_i = (__pyx_v_i + 1); + + /* "gensim/models/word2vec_inner.pyx":404 + * result += 1 + * i += 1 + * if i == MAX_SENTENCE_LEN: # <<<<<<<<<<<<<< + * break # TODO: log warning, tally overflow? + * sentence_len = i + */ + __pyx_t_7 = ((__pyx_v_i == 10000) != 0); + if (__pyx_t_7) { + + /* "gensim/models/word2vec_inner.pyx":405 + * i += 1 + * if i == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? # <<<<<<<<<<<<<< + * sentence_len = i + * + */ + goto __pyx_L9_break; } - __pyx_L7:; + + /* "gensim/models/word2vec_inner.pyx":391 + * vlookup = model.vocab + * i = 0 + * for token in sentence: # <<<<<<<<<<<<<< + * word = vlookup[token] if token in vlookup else None + * if word is None: + */ + __pyx_L8_continue:; } + __pyx_L9_break:; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "gensim/models/word2vec_inner.pyx":389 - * result += 1 + /* "gensim/models/word2vec_inner.pyx":406 + * if i == MAX_SENTENCE_LEN: + * break # TODO: log warning, tally overflow? + * sentence_len = i # <<<<<<<<<<<<<< + * + * # single randint() call avoids a big thread-sync slowdown + */ + __pyx_v_sentence_len = __pyx_v_i; + + /* "gensim/models/word2vec_inner.pyx":409 + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ __pyx_t_2 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_random); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_11, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = NULL; - __pyx_t_10 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_15); + __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; + __pyx_t_11 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_11); + __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_3); + __pyx_t_14 = NULL; + __pyx_t_6 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_14 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_14)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_14); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); - __pyx_t_10 = 1; + __Pyx_DECREF_SET(__pyx_t_1, function); + __pyx_t_6 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_15) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + __pyx_t_15 = PyTuple_New(3+__pyx_t_6); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + if (__pyx_t_14) { + PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_14 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_10, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_15, 0+__pyx_t_6, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_10, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_10, __pyx_t_14); - __Pyx_GIVEREF(__pyx_t_14); - __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_16, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (likely(PyList_CheckExact(__pyx_t_7)) || PyTuple_CheckExact(__pyx_t_7)) { - __pyx_t_6 = __pyx_t_7; __Pyx_INCREF(__pyx_t_6); __pyx_t_10 = 0; - __pyx_t_17 = NULL; + PyTuple_SET_ITEM(__pyx_t_15, 1+__pyx_t_6, __pyx_t_11); + __Pyx_GIVEREF(__pyx_t_11); + PyTuple_SET_ITEM(__pyx_t_15, 2+__pyx_t_6, __pyx_t_3); + __Pyx_GIVEREF(__pyx_t_3); + __pyx_t_11 = 0; + __pyx_t_3 = 0; + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_15, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (likely(PyList_CheckExact(__pyx_t_8)) || PyTuple_CheckExact(__pyx_t_8)) { + __pyx_t_1 = __pyx_t_8; __Pyx_INCREF(__pyx_t_1); __pyx_t_6 = 0; + __pyx_t_10 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_17 = Py_TYPE(__pyx_t_6)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = -1; __pyx_t_1 = PyObject_GetIter(__pyx_t_8); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_10 = Py_TYPE(__pyx_t_1)->tp_iternext; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; for (;;) { - if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_6))) { - if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_6)) break; + if (likely(!__pyx_t_10)) { + if (likely(PyList_CheckExact(__pyx_t_1))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_7 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_7); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyList_GET_ITEM(__pyx_t_1, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_1, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_1)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_10); __Pyx_INCREF(__pyx_t_7); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_6); __Pyx_INCREF(__pyx_t_8); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_6, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PySequence_ITEM(__pyx_t_1, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_7 = __pyx_t_17(__pyx_t_6); - if (unlikely(!__pyx_t_7)) { + __pyx_t_8 = __pyx_t_10(__pyx_t_1); + if (unlikely(!__pyx_t_8)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 389; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } - __Pyx_GOTREF(__pyx_t_7); + __Pyx_GOTREF(__pyx_t_8); } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_7); - __pyx_t_7 = 0; + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_8); + __pyx_t_8 = 0; __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "gensim/models/word2vec_inner.pyx":390 + /* "gensim/models/word2vec_inner.pyx":410 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; + __pyx_t_12 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_12 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 410; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_12; - /* "gensim/models/word2vec_inner.pyx":389 - * result += 1 + /* "gensim/models/word2vec_inner.pyx":409 + * * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(model.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":393 + /* "gensim/models/word2vec_inner.pyx":413 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4416,7 +4774,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( #endif /*try:*/ { - /* "gensim/models/word2vec_inner.pyx":394 + /* "gensim/models/word2vec_inner.pyx":414 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4424,30 +4782,30 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( * continue */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_2; __pyx_t_16+=1) { + __pyx_v_i = __pyx_t_16; - /* "gensim/models/word2vec_inner.pyx":395 + /* "gensim/models/word2vec_inner.pyx":415 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< * continue * j = i - window + reduced_windows[i] */ - __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); - if (__pyx_t_12) { + __pyx_t_7 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":396 + /* "gensim/models/word2vec_inner.pyx":416 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < 0: */ - goto __pyx_L14_continue; + goto __pyx_L21_continue; } - /* "gensim/models/word2vec_inner.pyx":397 + /* "gensim/models/word2vec_inner.pyx":417 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4456,17 +4814,17 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/word2vec_inner.pyx":398 + /* "gensim/models/word2vec_inner.pyx":418 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 * k = i + window + 1 - reduced_windows[i] */ - __pyx_t_12 = ((__pyx_v_j < 0) != 0); - if (__pyx_t_12) { + __pyx_t_7 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":399 + /* "gensim/models/word2vec_inner.pyx":419 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -4474,11 +4832,11 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( * if k > sentence_len: */ __pyx_v_j = 0; - goto __pyx_L17; + goto __pyx_L24; } - __pyx_L17:; + __pyx_L24:; - /* "gensim/models/word2vec_inner.pyx":400 + /* "gensim/models/word2vec_inner.pyx":420 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4487,17 +4845,17 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "gensim/models/word2vec_inner.pyx":401 + /* "gensim/models/word2vec_inner.pyx":421 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< * k = sentence_len * if hs: */ - __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); - if (__pyx_t_12) { + __pyx_t_7 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":402 + /* "gensim/models/word2vec_inner.pyx":422 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -4505,21 +4863,21 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) */ __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L18; + goto __pyx_L25; } - __pyx_L18:; + __pyx_L25:; - /* "gensim/models/word2vec_inner.pyx":403 + /* "gensim/models/word2vec_inner.pyx":423 * if k > sentence_len: * k = sentence_len * if hs: # <<<<<<<<<<<<<< * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + __pyx_t_7 = (__pyx_v_hs != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":404 + /* "gensim/models/word2vec_inner.pyx":424 * k = sentence_len * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) # <<<<<<<<<<<<<< @@ -4527,21 +4885,21 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) */ __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks); - goto __pyx_L19; + goto __pyx_L26; } - __pyx_L19:; + __pyx_L26:; - /* "gensim/models/word2vec_inner.pyx":405 + /* "gensim/models/word2vec_inner.pyx":425 * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) * */ - __pyx_t_12 = (__pyx_v_negative != 0); - if (__pyx_t_12) { + __pyx_t_7 = (__pyx_v_negative != 0); + if (__pyx_t_7) { - /* "gensim/models/word2vec_inner.pyx":406 + /* "gensim/models/word2vec_inner.pyx":426 * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) # <<<<<<<<<<<<<< @@ -4549,14 +4907,14 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( * return result */ __pyx_v_next_random = __pyx_f_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_cum_table, __pyx_v_cum_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks); - goto __pyx_L20; + goto __pyx_L27; } - __pyx_L20:; - __pyx_L14_continue:; + __pyx_L27:; + __pyx_L21_continue:; } } - /* "gensim/models/word2vec_inner.pyx":393 + /* "gensim/models/word2vec_inner.pyx":413 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4568,13 +4926,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L13; + goto __pyx_L20; } - __pyx_L13:; + __pyx_L20:; } } - /* "gensim/models/word2vec_inner.pyx":408 + /* "gensim/models/word2vec_inner.pyx":428 * next_random = fast_sentence_cbow_neg(negative, cum_table, cum_table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) * * return result # <<<<<<<<<<<<<< @@ -4582,13 +4940,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_6 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_r = __pyx_t_6; - __pyx_t_6 = 0; + __pyx_t_1 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 428; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_r = __pyx_t_1; + __pyx_t_1 = 0; goto __pyx_L0; - /* "gensim/models/word2vec_inner.pyx":329 + /* "gensim/models/word2vec_inner.pyx":341 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -4599,14 +4957,16 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_3); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_11); __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_15); - __Pyx_XDECREF(__pyx_t_16); __Pyx_AddTraceback("gensim.models.word2vec_inner.train_sentence_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; + __Pyx_XDECREF(__pyx_v_vlookup); + __Pyx_XDECREF(__pyx_v_token); __Pyx_XDECREF(__pyx_v_word); __Pyx_XDECREF(__pyx_v_item); __Pyx_XGIVEREF(__pyx_r); @@ -4614,7 +4974,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_2train_sentence_cbow( return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":412 +/* "gensim/models/word2vec_inner.pyx":432 * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< @@ -4656,16 +5016,16 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_5score_sentence_sg(Py case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("score_sentence_sg", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_sg", 1, 3, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("score_sentence_sg", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_sg", 1, 3, 3, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "score_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "score_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; @@ -4680,7 +5040,7 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_5score_sentence_sg(Py } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("score_sentence_sg", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_sg", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.word2vec_inner.score_sentence_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -4727,46 +5087,46 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY int __pyx_clineno = 0; __Pyx_RefNannySetupContext("score_sentence_sg", 0); - /* "gensim/models/word2vec_inner.pyx":414 + /* "gensim/models/word2vec_inner.pyx":434 * def score_sentence_sg(model, sentence, _work): * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef int size = model.layer1_size */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":416 + /* "gensim/models/word2vec_inner.pyx":436 * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *work * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 436; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":421 + /* "gensim/models/word2vec_inner.pyx":441 * cdef np.uint32_t indexes[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":424 + /* "gensim/models/word2vec_inner.pyx":444 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -4775,37 +5135,37 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY */ __pyx_v_result = 0; - /* "gensim/models/word2vec_inner.pyx":430 + /* "gensim/models/word2vec_inner.pyx":450 * cdef np.uint8_t *codes[MAX_SENTENCE_LEN] * * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":433 + /* "gensim/models/word2vec_inner.pyx":453 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/word2vec_inner.pyx":434 + /* "gensim/models/word2vec_inner.pyx":454 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_3 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 434; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_4 = 10000; if (((__pyx_t_3 < __pyx_t_4) != 0)) { __pyx_t_5 = __pyx_t_3; @@ -4814,7 +5174,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY } __pyx_v_sentence_len = ((int)__pyx_t_5); - /* "gensim/models/word2vec_inner.pyx":436 + /* "gensim/models/word2vec_inner.pyx":456 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4825,19 +5185,19 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_2; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "gensim/models/word2vec_inner.pyx":437 + /* "gensim/models/word2vec_inner.pyx":457 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 437; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":438 + /* "gensim/models/word2vec_inner.pyx":458 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -4848,7 +5208,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY __pyx_t_8 = (__pyx_t_7 != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":439 + /* "gensim/models/word2vec_inner.pyx":459 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -4860,59 +5220,59 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":441 + /* "gensim/models/word2vec_inner.pyx":461 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_9 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_9 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_9 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_9; - /* "gensim/models/word2vec_inner.pyx":442 + /* "gensim/models/word2vec_inner.pyx":462 * else: * indexes[i] = word.index * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - /* "gensim/models/word2vec_inner.pyx":443 + /* "gensim/models/word2vec_inner.pyx":463 * indexes[i] = word.index * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * result += 1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":444 + /* "gensim/models/word2vec_inner.pyx":464 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":445 + /* "gensim/models/word2vec_inner.pyx":465 * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) * result += 1 # <<<<<<<<<<<<<< @@ -4924,7 +5284,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY __pyx_L5:; } - /* "gensim/models/word2vec_inner.pyx":448 + /* "gensim/models/word2vec_inner.pyx":468 * * # release GIL & train on the sentence * work[0] = 0.0 # <<<<<<<<<<<<<< @@ -4933,7 +5293,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY */ (__pyx_v_work[0]) = 0.0; - /* "gensim/models/word2vec_inner.pyx":450 + /* "gensim/models/word2vec_inner.pyx":470 * work[0] = 0.0 * * with nogil: # <<<<<<<<<<<<<< @@ -4947,7 +5307,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY #endif /*try:*/ { - /* "gensim/models/word2vec_inner.pyx":451 + /* "gensim/models/word2vec_inner.pyx":471 * * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4958,7 +5318,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_2; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "gensim/models/word2vec_inner.pyx":452 + /* "gensim/models/word2vec_inner.pyx":472 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -4968,7 +5328,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY __pyx_t_8 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":453 + /* "gensim/models/word2vec_inner.pyx":473 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -4978,7 +5338,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY goto __pyx_L9_continue; } - /* "gensim/models/word2vec_inner.pyx":454 + /* "gensim/models/word2vec_inner.pyx":474 * if codelens[i] == 0: * continue * j = i - window # <<<<<<<<<<<<<< @@ -4987,7 +5347,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "gensim/models/word2vec_inner.pyx":455 + /* "gensim/models/word2vec_inner.pyx":475 * continue * j = i - window * if j < 0: # <<<<<<<<<<<<<< @@ -4997,7 +5357,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY __pyx_t_8 = ((__pyx_v_j < 0) != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":456 + /* "gensim/models/word2vec_inner.pyx":476 * j = i - window * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5009,7 +5369,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY } __pyx_L12:; - /* "gensim/models/word2vec_inner.pyx":457 + /* "gensim/models/word2vec_inner.pyx":477 * if j < 0: * j = 0 * k = i + window + 1 # <<<<<<<<<<<<<< @@ -5018,7 +5378,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "gensim/models/word2vec_inner.pyx":458 + /* "gensim/models/word2vec_inner.pyx":478 * j = 0 * k = i + window + 1 * if k > sentence_len: # <<<<<<<<<<<<<< @@ -5028,7 +5388,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY __pyx_t_8 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":459 + /* "gensim/models/word2vec_inner.pyx":479 * k = i + window + 1 * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -5040,7 +5400,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY } __pyx_L13:; - /* "gensim/models/word2vec_inner.pyx":460 + /* "gensim/models/word2vec_inner.pyx":480 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -5051,7 +5411,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY for (__pyx_t_11 = __pyx_v_j; __pyx_t_11 < __pyx_t_10; __pyx_t_11+=1) { __pyx_v_j = __pyx_t_11; - /* "gensim/models/word2vec_inner.pyx":461 + /* "gensim/models/word2vec_inner.pyx":481 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< @@ -5069,7 +5429,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY __pyx_L17_bool_binop_done:; if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":462 + /* "gensim/models/word2vec_inner.pyx":482 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< @@ -5079,7 +5439,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY goto __pyx_L14_continue; } - /* "gensim/models/word2vec_inner.pyx":463 + /* "gensim/models/word2vec_inner.pyx":483 * if j == i or codelens[j] == 0: * continue * score_pair_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], work) # <<<<<<<<<<<<<< @@ -5093,7 +5453,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY } } - /* "gensim/models/word2vec_inner.pyx":450 + /* "gensim/models/word2vec_inner.pyx":470 * work[0] = 0.0 * * with nogil: # <<<<<<<<<<<<<< @@ -5111,7 +5471,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY } } - /* "gensim/models/word2vec_inner.pyx":465 + /* "gensim/models/word2vec_inner.pyx":485 * score_pair_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], work) * * return work[0] # <<<<<<<<<<<<<< @@ -5119,13 +5479,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY * cdef void score_pair_sg_hs( */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble((__pyx_v_work[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyFloat_FromDouble((__pyx_v_work[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "gensim/models/word2vec_inner.pyx":412 + /* "gensim/models/word2vec_inner.pyx":432 * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< @@ -5145,7 +5505,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_4score_sentence_sg(CY return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":467 +/* "gensim/models/word2vec_inner.pyx":487 * return work[0] * * cdef void score_pair_sg_hs( # <<<<<<<<<<<<<< @@ -5165,7 +5525,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n int __pyx_t_4; long __pyx_t_5; - /* "gensim/models/word2vec_inner.pyx":473 + /* "gensim/models/word2vec_inner.pyx":493 * * cdef long long b * cdef long long row1 = word2_index * size, row2, sgn # <<<<<<<<<<<<<< @@ -5174,7 +5534,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":476 + /* "gensim/models/word2vec_inner.pyx":496 * cdef REAL_t f * * for b in range(codelen): # <<<<<<<<<<<<<< @@ -5185,7 +5545,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":477 + /* "gensim/models/word2vec_inner.pyx":497 * * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -5194,7 +5554,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":478 + /* "gensim/models/word2vec_inner.pyx":498 * for b in range(codelen): * row2 = word_point[b] * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -5203,7 +5563,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":479 + /* "gensim/models/word2vec_inner.pyx":499 * row2 = word_point[b] * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 # <<<<<<<<<<<<<< @@ -5212,7 +5572,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n */ __pyx_v_sgn = __Pyx_pow_long(-1, ((long)(__pyx_v_word_code[__pyx_v_b]))); - /* "gensim/models/word2vec_inner.pyx":480 + /* "gensim/models/word2vec_inner.pyx":500 * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 * f = sgn*f # <<<<<<<<<<<<<< @@ -5221,7 +5581,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n */ __pyx_v_f = (__pyx_v_sgn * __pyx_v_f); - /* "gensim/models/word2vec_inner.pyx":481 + /* "gensim/models/word2vec_inner.pyx":501 * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 * f = sgn*f * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -5239,7 +5599,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":482 + /* "gensim/models/word2vec_inner.pyx":502 * f = sgn*f * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -5249,7 +5609,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n goto __pyx_L3_continue; } - /* "gensim/models/word2vec_inner.pyx":483 + /* "gensim/models/word2vec_inner.pyx":503 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -5258,7 +5618,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/word2vec_inner.pyx":484 + /* "gensim/models/word2vec_inner.pyx":504 * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * work[0] += f # <<<<<<<<<<<<<< @@ -5270,7 +5630,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n __pyx_L3_continue:; } - /* "gensim/models/word2vec_inner.pyx":467 + /* "gensim/models/word2vec_inner.pyx":487 * return work[0] * * cdef void score_pair_sg_hs( # <<<<<<<<<<<<<< @@ -5281,7 +5641,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_sg_hs(__pyx_t_5n /* function exit code */ } -/* "gensim/models/word2vec_inner.pyx":486 +/* "gensim/models/word2vec_inner.pyx":506 * work[0] += f * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< @@ -5325,21 +5685,21 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_7score_sentence_cbow( case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "score_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "score_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { goto __pyx_L5_argtuple_error; @@ -5356,7 +5716,7 @@ static PyObject *__pyx_pw_6gensim_6models_14word2vec_inner_7score_sentence_cbow( } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("score_sentence_cbow", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("gensim.models.word2vec_inner.score_sentence_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5403,59 +5763,59 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( int __pyx_clineno = 0; __Pyx_RefNannySetupContext("score_sentence_cbow", 0); - /* "gensim/models/word2vec_inner.pyx":488 + /* "gensim/models/word2vec_inner.pyx":508 * def score_sentence_cbow(model, sentence, _work, _neu1): * * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":490 + /* "gensim/models/word2vec_inner.pyx":510 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 510; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 510; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":493 + /* "gensim/models/word2vec_inner.pyx":513 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":498 + /* "gensim/models/word2vec_inner.pyx":518 * cdef np.uint32_t indexes[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 518; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 518; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":501 + /* "gensim/models/word2vec_inner.pyx":521 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -5464,47 +5824,47 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( */ __pyx_v_result = 0; - /* "gensim/models/word2vec_inner.pyx":508 + /* "gensim/models/word2vec_inner.pyx":528 * cdef np.uint8_t *codes[MAX_SENTENCE_LEN] * * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 528; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 528; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":511 + /* "gensim/models/word2vec_inner.pyx":531 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 531; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "gensim/models/word2vec_inner.pyx":512 + /* "gensim/models/word2vec_inner.pyx":532 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 532; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "gensim/models/word2vec_inner.pyx":513 + /* "gensim/models/word2vec_inner.pyx":533 * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_3 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 513; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_3 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 533; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_4 = 10000; if (((__pyx_t_3 < __pyx_t_4) != 0)) { __pyx_t_5 = __pyx_t_3; @@ -5513,7 +5873,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( } __pyx_v_sentence_len = ((int)__pyx_t_5); - /* "gensim/models/word2vec_inner.pyx":515 + /* "gensim/models/word2vec_inner.pyx":535 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5524,19 +5884,19 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_2; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "gensim/models/word2vec_inner.pyx":516 + /* "gensim/models/word2vec_inner.pyx":536 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 516; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_1 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_1 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 536; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_1); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":517 + /* "gensim/models/word2vec_inner.pyx":537 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -5547,7 +5907,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( __pyx_t_8 = (__pyx_t_7 != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":518 + /* "gensim/models/word2vec_inner.pyx":538 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -5559,59 +5919,59 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":520 + /* "gensim/models/word2vec_inner.pyx":540 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 540; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_9 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_9 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyInt_As_npy_uint32(__pyx_t_1); if (unlikely((__pyx_t_9 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 540; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_9; - /* "gensim/models/word2vec_inner.pyx":521 + /* "gensim/models/word2vec_inner.pyx":541 * else: * indexes[i] = word.index * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 541; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 521; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 541; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - /* "gensim/models/word2vec_inner.pyx":522 + /* "gensim/models/word2vec_inner.pyx":542 * indexes[i] = word.index * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * result += 1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 542; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 542; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":523 + /* "gensim/models/word2vec_inner.pyx":543 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 523; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 543; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 523; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 543; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":524 + /* "gensim/models/word2vec_inner.pyx":544 * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) * result += 1 # <<<<<<<<<<<<<< @@ -5623,7 +5983,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( __pyx_L5:; } - /* "gensim/models/word2vec_inner.pyx":527 + /* "gensim/models/word2vec_inner.pyx":547 * * # release GIL & train on the sentence * work[0] = 0.0 # <<<<<<<<<<<<<< @@ -5632,7 +5992,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( */ (__pyx_v_work[0]) = 0.0; - /* "gensim/models/word2vec_inner.pyx":528 + /* "gensim/models/word2vec_inner.pyx":548 * # release GIL & train on the sentence * work[0] = 0.0 * with nogil: # <<<<<<<<<<<<<< @@ -5646,7 +6006,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( #endif /*try:*/ { - /* "gensim/models/word2vec_inner.pyx":529 + /* "gensim/models/word2vec_inner.pyx":549 * work[0] = 0.0 * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5657,7 +6017,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_2; __pyx_t_6+=1) { __pyx_v_i = __pyx_t_6; - /* "gensim/models/word2vec_inner.pyx":530 + /* "gensim/models/word2vec_inner.pyx":550 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -5667,7 +6027,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( __pyx_t_8 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":531 + /* "gensim/models/word2vec_inner.pyx":551 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -5677,7 +6037,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( goto __pyx_L9_continue; } - /* "gensim/models/word2vec_inner.pyx":532 + /* "gensim/models/word2vec_inner.pyx":552 * if codelens[i] == 0: * continue * j = i - window # <<<<<<<<<<<<<< @@ -5686,7 +6046,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "gensim/models/word2vec_inner.pyx":533 + /* "gensim/models/word2vec_inner.pyx":553 * continue * j = i - window * if j < 0: # <<<<<<<<<<<<<< @@ -5696,7 +6056,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( __pyx_t_8 = ((__pyx_v_j < 0) != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":534 + /* "gensim/models/word2vec_inner.pyx":554 * j = i - window * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5708,7 +6068,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( } __pyx_L12:; - /* "gensim/models/word2vec_inner.pyx":535 + /* "gensim/models/word2vec_inner.pyx":555 * if j < 0: * j = 0 * k = i + window + 1 # <<<<<<<<<<<<<< @@ -5717,7 +6077,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "gensim/models/word2vec_inner.pyx":536 + /* "gensim/models/word2vec_inner.pyx":556 * j = 0 * k = i + window + 1 * if k > sentence_len: # <<<<<<<<<<<<<< @@ -5727,7 +6087,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( __pyx_t_8 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_8) { - /* "gensim/models/word2vec_inner.pyx":537 + /* "gensim/models/word2vec_inner.pyx":557 * k = i + window + 1 * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -5739,7 +6099,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( } __pyx_L13:; - /* "gensim/models/word2vec_inner.pyx":538 + /* "gensim/models/word2vec_inner.pyx":558 * if k > sentence_len: * k = sentence_len * score_pair_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, work, i, j, k, cbow_mean) # <<<<<<<<<<<<<< @@ -5751,7 +6111,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( } } - /* "gensim/models/word2vec_inner.pyx":528 + /* "gensim/models/word2vec_inner.pyx":548 * # release GIL & train on the sentence * work[0] = 0.0 * with nogil: # <<<<<<<<<<<<<< @@ -5769,7 +6129,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( } } - /* "gensim/models/word2vec_inner.pyx":540 + /* "gensim/models/word2vec_inner.pyx":560 * score_pair_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, work, i, j, k, cbow_mean) * * return work[0] # <<<<<<<<<<<<<< @@ -5777,13 +6137,13 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( * cdef void score_pair_cbow_hs( */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = PyFloat_FromDouble((__pyx_v_work[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 540; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyFloat_FromDouble((__pyx_v_work[0])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 560; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_r = __pyx_t_1; __pyx_t_1 = 0; goto __pyx_L0; - /* "gensim/models/word2vec_inner.pyx":486 + /* "gensim/models/word2vec_inner.pyx":506 * work[0] += f * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< @@ -5803,7 +6163,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_6score_sentence_cbow( return __pyx_r; } -/* "gensim/models/word2vec_inner.pyx":542 +/* "gensim/models/word2vec_inner.pyx":562 * return work[0] * * cdef void score_pair_cbow_hs( # <<<<<<<<<<<<<< @@ -5826,7 +6186,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ PY_LONG_LONG __pyx_t_5; long __pyx_t_6; - /* "gensim/models/word2vec_inner.pyx":553 + /* "gensim/models/word2vec_inner.pyx":573 * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -5835,7 +6195,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "gensim/models/word2vec_inner.pyx":554 + /* "gensim/models/word2vec_inner.pyx":574 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -5844,7 +6204,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_count = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "gensim/models/word2vec_inner.pyx":555 + /* "gensim/models/word2vec_inner.pyx":575 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5855,7 +6215,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":556 + /* "gensim/models/word2vec_inner.pyx":576 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -5873,7 +6233,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":557 + /* "gensim/models/word2vec_inner.pyx":577 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -5884,7 +6244,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":559 + /* "gensim/models/word2vec_inner.pyx":579 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -5893,7 +6253,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_count = (__pyx_v_count + __pyx_v_6gensim_6models_14word2vec_inner_ONEF); - /* "gensim/models/word2vec_inner.pyx":560 + /* "gensim/models/word2vec_inner.pyx":580 * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< @@ -5905,7 +6265,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ __pyx_L3_continue:; } - /* "gensim/models/word2vec_inner.pyx":561 + /* "gensim/models/word2vec_inner.pyx":581 * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< @@ -5923,7 +6283,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ __pyx_L9_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":562 + /* "gensim/models/word2vec_inner.pyx":582 * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -5932,7 +6292,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_inv_count = (__pyx_v_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - /* "gensim/models/word2vec_inner.pyx":563 + /* "gensim/models/word2vec_inner.pyx":583 * if cbow_mean and count > (0.5): * inv_count = ONEF/count * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< @@ -5944,7 +6304,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ } __pyx_L8:; - /* "gensim/models/word2vec_inner.pyx":565 + /* "gensim/models/word2vec_inner.pyx":585 * sscal(&size, &inv_count, neu1, &ONE) * * for b in range(codelens[i]): # <<<<<<<<<<<<<< @@ -5955,7 +6315,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { __pyx_v_b = __pyx_t_5; - /* "gensim/models/word2vec_inner.pyx":566 + /* "gensim/models/word2vec_inner.pyx":586 * * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -5964,7 +6324,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "gensim/models/word2vec_inner.pyx":567 + /* "gensim/models/word2vec_inner.pyx":587 * for b in range(codelens[i]): * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -5973,7 +6333,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_f = __pyx_v_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":568 + /* "gensim/models/word2vec_inner.pyx":588 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 # <<<<<<<<<<<<<< @@ -5982,7 +6342,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_sgn = __Pyx_pow_long(-1, ((long)(__pyx_v_word_code[__pyx_v_b]))); - /* "gensim/models/word2vec_inner.pyx":569 + /* "gensim/models/word2vec_inner.pyx":589 * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 * f = sgn*f # <<<<<<<<<<<<<< @@ -5991,7 +6351,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_f = (__pyx_v_sgn * __pyx_v_f); - /* "gensim/models/word2vec_inner.pyx":570 + /* "gensim/models/word2vec_inner.pyx":590 * sgn = (-1)**word_code[b] # ch function: 0-> 1, 1 -> -1 * f = sgn*f * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -6009,7 +6369,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ __pyx_L14_bool_binop_done:; if (__pyx_t_3) { - /* "gensim/models/word2vec_inner.pyx":571 + /* "gensim/models/word2vec_inner.pyx":591 * f = sgn*f * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -6019,7 +6379,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ goto __pyx_L11_continue; } - /* "gensim/models/word2vec_inner.pyx":572 + /* "gensim/models/word2vec_inner.pyx":592 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -6028,7 +6388,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ */ __pyx_v_f = (__pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "gensim/models/word2vec_inner.pyx":573 + /* "gensim/models/word2vec_inner.pyx":593 * continue * f = LOG_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * work[0] += f # <<<<<<<<<<<<<< @@ -6040,7 +6400,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ __pyx_L11_continue:; } - /* "gensim/models/word2vec_inner.pyx":542 + /* "gensim/models/word2vec_inner.pyx":562 * return work[0] * * cdef void score_pair_cbow_hs( # <<<<<<<<<<<<<< @@ -6051,7 +6411,7 @@ static void __pyx_f_6gensim_6models_14word2vec_inner_score_pair_cbow_hs(__pyx_t_ /* function exit code */ } -/* "gensim/models/word2vec_inner.pyx":576 +/* "gensim/models/word2vec_inner.pyx":596 * * * def init(): # <<<<<<<<<<<<<< @@ -6090,7 +6450,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "gensim/models/word2vec_inner.pyx":586 + /* "gensim/models/word2vec_inner.pyx":606 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6100,7 +6460,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "gensim/models/word2vec_inner.pyx":587 + /* "gensim/models/word2vec_inner.pyx":607 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6110,7 +6470,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "gensim/models/word2vec_inner.pyx":588 + /* "gensim/models/word2vec_inner.pyx":608 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6119,7 +6479,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_expected = ((float)0.1); - /* "gensim/models/word2vec_inner.pyx":589 + /* "gensim/models/word2vec_inner.pyx":609 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6128,7 +6488,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_size = 1; - /* "gensim/models/word2vec_inner.pyx":594 + /* "gensim/models/word2vec_inner.pyx":614 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6138,7 +6498,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "gensim/models/word2vec_inner.pyx":595 + /* "gensim/models/word2vec_inner.pyx":615 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6147,7 +6507,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "gensim/models/word2vec_inner.pyx":596 + /* "gensim/models/word2vec_inner.pyx":616 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6156,7 +6516,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ (__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); - /* "gensim/models/word2vec_inner.pyx":597 + /* "gensim/models/word2vec_inner.pyx":617 * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) * LOG_TABLE[i] = log( EXP_TABLE[i] ) # <<<<<<<<<<<<<< @@ -6166,7 +6526,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P (__pyx_v_6gensim_6models_14word2vec_inner_LOG_TABLE[__pyx_v_i]) = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)log((__pyx_v_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]))); } - /* "gensim/models/word2vec_inner.pyx":600 + /* "gensim/models/word2vec_inner.pyx":620 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6175,7 +6535,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_d_res = __pyx_v_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE), __pyx_v_y, (&__pyx_v_6gensim_6models_14word2vec_inner_ONE)); - /* "gensim/models/word2vec_inner.pyx":601 + /* "gensim/models/word2vec_inner.pyx":621 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6184,7 +6544,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "gensim/models/word2vec_inner.pyx":602 + /* "gensim/models/word2vec_inner.pyx":622 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6194,7 +6554,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/word2vec_inner.pyx":603 + /* "gensim/models/word2vec_inner.pyx":623 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6203,7 +6563,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_double; - /* "gensim/models/word2vec_inner.pyx":604 + /* "gensim/models/word2vec_inner.pyx":624 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6212,7 +6572,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/word2vec_inner.pyx":605 + /* "gensim/models/word2vec_inner.pyx":625 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6225,7 +6585,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/word2vec_inner.pyx":606 + /* "gensim/models/word2vec_inner.pyx":626 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6235,7 +6595,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "gensim/models/word2vec_inner.pyx":607 + /* "gensim/models/word2vec_inner.pyx":627 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6244,7 +6604,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_float; - /* "gensim/models/word2vec_inner.pyx":608 + /* "gensim/models/word2vec_inner.pyx":628 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6253,7 +6613,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_6gensim_6models_14word2vec_inner_saxpy; - /* "gensim/models/word2vec_inner.pyx":609 + /* "gensim/models/word2vec_inner.pyx":629 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6267,7 +6627,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P } /*else*/ { - /* "gensim/models/word2vec_inner.pyx":613 + /* "gensim/models/word2vec_inner.pyx":633 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6276,7 +6636,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "gensim/models/word2vec_inner.pyx":614 + /* "gensim/models/word2vec_inner.pyx":634 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6285,7 +6645,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P */ __pyx_v_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "gensim/models/word2vec_inner.pyx":615 + /* "gensim/models/word2vec_inner.pyx":635 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6298,7 +6658,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P goto __pyx_L0; } - /* "gensim/models/word2vec_inner.pyx":576 + /* "gensim/models/word2vec_inner.pyx":596 * * * def init(): # <<<<<<<<<<<<<< @@ -6313,7 +6673,7 @@ static PyObject *__pyx_pf_6gensim_6models_14word2vec_inner_8init(CYTHON_UNUSED P return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -6363,7 +6723,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __Pyx_GIVEREF(__pyx_v_info->obj); } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":203 * # of flags * * if info == NULL: return # <<<<<<<<<<<<<< @@ -6376,7 +6736,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L0; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":206 * * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -6385,7 +6745,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_endian_detector = 1; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":207 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":207 * cdef int copy_shape, i, ndim * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -6394,7 +6754,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":209 * cdef bint little_endian = ((&endian_detector)[0] != 0) * * ndim = PyArray_NDIM(self) # <<<<<<<<<<<<<< @@ -6403,7 +6763,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_ndim = PyArray_NDIM(__pyx_v_self); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":211 * ndim = PyArray_NDIM(self) * * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -6413,7 +6773,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":212 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":212 * * if sizeof(npy_intp) != sizeof(Py_ssize_t): * copy_shape = 1 # <<<<<<<<<<<<<< @@ -6425,7 +6785,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":214 * copy_shape = 1 * else: * copy_shape = 0 # <<<<<<<<<<<<<< @@ -6436,7 +6796,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L4:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":216 * copy_shape = 0 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6450,7 +6810,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L6_bool_binop_done; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":217 * * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -6462,7 +6822,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L6_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -6476,7 +6836,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":220 * raise ValueError(u"ndarray is not C contiguous") * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) # <<<<<<<<<<<<<< @@ -6490,7 +6850,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P goto __pyx_L9_bool_binop_done; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":221 * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): # <<<<<<<<<<<<<< @@ -6502,7 +6862,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L9_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -6516,7 +6876,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":224 * raise ValueError(u"ndarray is not Fortran contiguous") * * info.buf = PyArray_DATA(self) # <<<<<<<<<<<<<< @@ -6525,7 +6885,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->buf = PyArray_DATA(__pyx_v_self); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":225 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":225 * * info.buf = PyArray_DATA(self) * info.ndim = ndim # <<<<<<<<<<<<<< @@ -6534,7 +6894,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->ndim = __pyx_v_ndim; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":226 * info.buf = PyArray_DATA(self) * info.ndim = ndim * if copy_shape: # <<<<<<<<<<<<<< @@ -6544,7 +6904,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = (__pyx_v_copy_shape != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":229 * # Allocate new buffer for strides and shape info. * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) # <<<<<<<<<<<<<< @@ -6553,7 +6913,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2))); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":230 * # This is allocated as one block, strides first. * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim # <<<<<<<<<<<<<< @@ -6562,7 +6922,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":231 * info.strides = stdlib.malloc(sizeof(Py_ssize_t) * ndim * 2) * info.shape = info.strides + ndim * for i in range(ndim): # <<<<<<<<<<<<<< @@ -6573,7 +6933,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) { __pyx_v_i = __pyx_t_5; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":232 * info.shape = info.strides + ndim * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] # <<<<<<<<<<<<<< @@ -6582,7 +6942,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":233 * for i in range(ndim): * info.strides[i] = PyArray_STRIDES(self)[i] * info.shape[i] = PyArray_DIMS(self)[i] # <<<<<<<<<<<<<< @@ -6595,7 +6955,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":235 * info.shape[i] = PyArray_DIMS(self)[i] * else: * info.strides = PyArray_STRIDES(self) # <<<<<<<<<<<<<< @@ -6604,7 +6964,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self)); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":236 * else: * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) # <<<<<<<<<<<<<< @@ -6615,7 +6975,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L11:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":237 * info.strides = PyArray_STRIDES(self) * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL # <<<<<<<<<<<<<< @@ -6624,7 +6984,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->suboffsets = NULL; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":238 * info.shape = PyArray_DIMS(self) * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) # <<<<<<<<<<<<<< @@ -6633,7 +6993,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":239 * info.suboffsets = NULL * info.itemsize = PyArray_ITEMSIZE(self) * info.readonly = not PyArray_ISWRITEABLE(self) # <<<<<<<<<<<<<< @@ -6642,7 +7002,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0)); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":242 * * cdef int t * cdef char* f = NULL # <<<<<<<<<<<<<< @@ -6651,7 +7011,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_f = NULL; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":243 * cdef int t * cdef char* f = NULL * cdef dtype descr = self.descr # <<<<<<<<<<<<<< @@ -6663,7 +7023,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3); __pyx_t_3 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":247 * cdef int offset * * cdef bint hasfields = PyDataType_HASFIELDS(descr) # <<<<<<<<<<<<<< @@ -6672,7 +7032,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":249 * cdef bint hasfields = PyDataType_HASFIELDS(descr) * * if not hasfields and not copy_shape: # <<<<<<<<<<<<<< @@ -6690,7 +7050,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L15_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":251 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":251 * if not hasfields and not copy_shape: * # do not call releasebuffer * info.obj = None # <<<<<<<<<<<<<< @@ -6706,7 +7066,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":254 * else: * # need to call releasebuffer * info.obj = self # <<<<<<<<<<<<<< @@ -6721,7 +7081,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L14:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":256 * info.obj = self * * if not hasfields: # <<<<<<<<<<<<<< @@ -6731,7 +7091,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":257 * * if not hasfields: * t = descr.type_num # <<<<<<<<<<<<<< @@ -6741,7 +7101,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_4 = __pyx_v_descr->type_num; __pyx_v_t = __pyx_t_4; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":258 * if not hasfields: * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -6761,7 +7121,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } __pyx_L20_next_or:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":259 * t = descr.type_num * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -6779,7 +7139,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_L19_bool_binop_done:; if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -6793,7 +7153,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -6802,7 +7162,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ switch (__pyx_v_t) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":261 * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" # <<<<<<<<<<<<<< @@ -6813,7 +7173,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_b; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":262 * raise ValueError(u"Non-native byte order not supported") * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" # <<<<<<<<<<<<<< @@ -6824,7 +7184,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_B; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":263 * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" # <<<<<<<<<<<<<< @@ -6835,7 +7195,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_h; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":264 * elif t == NPY_UBYTE: f = "B" * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" # <<<<<<<<<<<<<< @@ -6846,7 +7206,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_H; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":265 * elif t == NPY_SHORT: f = "h" * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" # <<<<<<<<<<<<<< @@ -6857,7 +7217,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_i; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":266 * elif t == NPY_USHORT: f = "H" * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" # <<<<<<<<<<<<<< @@ -6868,7 +7228,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_I; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":267 * elif t == NPY_INT: f = "i" * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" # <<<<<<<<<<<<<< @@ -6879,7 +7239,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_l; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":268 * elif t == NPY_UINT: f = "I" * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" # <<<<<<<<<<<<<< @@ -6890,7 +7250,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_L; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":269 * elif t == NPY_LONG: f = "l" * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" # <<<<<<<<<<<<<< @@ -6901,7 +7261,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_q; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":270 * elif t == NPY_ULONG: f = "L" * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" # <<<<<<<<<<<<<< @@ -6912,7 +7272,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Q; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":271 * elif t == NPY_LONGLONG: f = "q" * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" # <<<<<<<<<<<<<< @@ -6923,7 +7283,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_f; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":272 * elif t == NPY_ULONGLONG: f = "Q" * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" # <<<<<<<<<<<<<< @@ -6934,7 +7294,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_d; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":273 * elif t == NPY_FLOAT: f = "f" * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" # <<<<<<<<<<<<<< @@ -6945,7 +7305,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_g; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":274 * elif t == NPY_DOUBLE: f = "d" * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" # <<<<<<<<<<<<<< @@ -6956,7 +7316,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zf; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":275 * elif t == NPY_LONGDOUBLE: f = "g" * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" # <<<<<<<<<<<<<< @@ -6967,7 +7327,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zd; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":276 * elif t == NPY_CFLOAT: f = "Zf" * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" # <<<<<<<<<<<<<< @@ -6978,7 +7338,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_v_f = __pyx_k_Zg; break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":277 * elif t == NPY_CDOUBLE: f = "Zd" * elif t == NPY_CLONGDOUBLE: f = "Zg" * elif t == NPY_OBJECT: f = "O" # <<<<<<<<<<<<<< @@ -6990,7 +7350,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; default: - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":279 * elif t == NPY_OBJECT: f = "O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -7016,7 +7376,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P break; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":280 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f # <<<<<<<<<<<<<< @@ -7025,7 +7385,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = __pyx_v_f; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":281 * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * info.format = f * return # <<<<<<<<<<<<<< @@ -7037,7 +7397,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":283 * return * else: * info.format = stdlib.malloc(_buffer_format_string_len) # <<<<<<<<<<<<<< @@ -7046,7 +7406,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_info->format = ((char *)malloc(255)); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":284 * else: * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment # <<<<<<<<<<<<<< @@ -7055,7 +7415,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ (__pyx_v_info->format[0]) = '^'; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":285 * info.format = stdlib.malloc(_buffer_format_string_len) * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 # <<<<<<<<<<<<<< @@ -7064,7 +7424,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P */ __pyx_v_offset = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":286 * info.format[0] = c'^' # Native data types, manual alignment * offset = 0 * f = _util_dtypestring(descr, info.format + 1, # <<<<<<<<<<<<<< @@ -7074,7 +7434,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 255), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_f = __pyx_t_7; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":289 * info.format + _buffer_format_string_len, * &offset) * f[0] = c'\0' # Terminate format string # <<<<<<<<<<<<<< @@ -7084,7 +7444,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P (__pyx_v_f[0]) = '\x00'; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":197 * # experimental exception made for __getbuffer__ and __releasebuffer__ * # -- the details of this may change. * def __getbuffer__(ndarray self, Py_buffer* info, int flags): # <<<<<<<<<<<<<< @@ -7116,7 +7476,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -7140,7 +7500,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s int __pyx_t_1; __Pyx_RefNannySetupContext("__releasebuffer__", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":292 * * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): # <<<<<<<<<<<<<< @@ -7150,7 +7510,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":293 * def __releasebuffer__(ndarray self, Py_buffer* info): * if PyArray_HASFIELDS(self): * stdlib.free(info.format) # <<<<<<<<<<<<<< @@ -7162,7 +7522,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s } __pyx_L3:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":294 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":294 * if PyArray_HASFIELDS(self): * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): # <<<<<<<<<<<<<< @@ -7172,7 +7532,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":295 * stdlib.free(info.format) * if sizeof(npy_intp) != sizeof(Py_ssize_t): * stdlib.free(info.strides) # <<<<<<<<<<<<<< @@ -7184,7 +7544,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s } __pyx_L4:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":291 * f[0] = c'\0' # Terminate format string * * def __releasebuffer__(ndarray self, Py_buffer* info): # <<<<<<<<<<<<<< @@ -7196,7 +7556,7 @@ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_s __Pyx_RefNannyFinishContext(); } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -7213,7 +7573,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":772 * * cdef inline object PyArray_MultiIterNew1(a): * return PyArray_MultiIterNew(1, a) # <<<<<<<<<<<<<< @@ -7227,7 +7587,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":771 * ctypedef npy_cdouble complex_t * * cdef inline object PyArray_MultiIterNew1(a): # <<<<<<<<<<<<<< @@ -7246,7 +7606,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -7263,7 +7623,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":775 * * cdef inline object PyArray_MultiIterNew2(a, b): * return PyArray_MultiIterNew(2, a, b) # <<<<<<<<<<<<<< @@ -7277,7 +7637,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":774 * return PyArray_MultiIterNew(1, a) * * cdef inline object PyArray_MultiIterNew2(a, b): # <<<<<<<<<<<<<< @@ -7296,7 +7656,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -7313,7 +7673,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":778 * * cdef inline object PyArray_MultiIterNew3(a, b, c): * return PyArray_MultiIterNew(3, a, b, c) # <<<<<<<<<<<<<< @@ -7327,7 +7687,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":777 * return PyArray_MultiIterNew(2, a, b) * * cdef inline object PyArray_MultiIterNew3(a, b, c): # <<<<<<<<<<<<<< @@ -7346,7 +7706,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -7363,7 +7723,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":781 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":781 * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): * return PyArray_MultiIterNew(4, a, b, c, d) # <<<<<<<<<<<<<< @@ -7377,7 +7737,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":780 * return PyArray_MultiIterNew(3, a, b, c) * * cdef inline object PyArray_MultiIterNew4(a, b, c, d): # <<<<<<<<<<<<<< @@ -7396,7 +7756,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -7413,7 +7773,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":784 * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): * return PyArray_MultiIterNew(5, a, b, c, d, e) # <<<<<<<<<<<<<< @@ -7427,7 +7787,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ __pyx_t_1 = 0; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":783 * return PyArray_MultiIterNew(4, a, b, c, d) * * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e): # <<<<<<<<<<<<<< @@ -7446,7 +7806,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__ return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -7478,7 +7838,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_util_dtypestring", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":793 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":793 * cdef int delta_offset * cdef tuple i * cdef int endian_detector = 1 # <<<<<<<<<<<<<< @@ -7487,7 +7847,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_endian_detector = 1; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":794 * cdef tuple i * cdef int endian_detector = 1 * cdef bint little_endian = ((&endian_detector)[0] != 0) # <<<<<<<<<<<<<< @@ -7496,7 +7856,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -7518,7 +7878,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3); __pyx_t_3 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":798 * * for childname in descr.names: * fields = descr.fields[childname] # <<<<<<<<<<<<<< @@ -7535,7 +7895,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3)); __pyx_t_3 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":799 * for childname in descr.names: * fields = descr.fields[childname] * child, new_offset = fields # <<<<<<<<<<<<<< @@ -7574,7 +7934,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":801 * child, new_offset = fields * * if (end - f) - (new_offset - offset[0]) < 15: # <<<<<<<<<<<<<< @@ -7591,7 +7951,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0); if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -7605,7 +7965,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":804 * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") * * if ((child.byteorder == c'>' and little_endian) or # <<<<<<<<<<<<<< @@ -7625,7 +7985,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L8_next_or:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":805 * * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): # <<<<<<<<<<<<<< @@ -7643,7 +8003,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_L7_bool_binop_done:; if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -7657,7 +8017,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":816 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":816 * * # Output padding bytes * while offset[0] < new_offset: # <<<<<<<<<<<<<< @@ -7673,7 +8033,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (!__pyx_t_6) break; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":817 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":817 * # Output padding bytes * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte # <<<<<<<<<<<<<< @@ -7682,7 +8042,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ (__pyx_v_f[0]) = 120; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":818 * while offset[0] < new_offset: * f[0] = 120 # "x"; pad byte * f += 1 # <<<<<<<<<<<<<< @@ -7691,7 +8051,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx */ __pyx_v_f = (__pyx_v_f + 1); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":819 * f[0] = 120 # "x"; pad byte * f += 1 * offset[0] += 1 # <<<<<<<<<<<<<< @@ -7702,7 +8062,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1); } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":821 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":821 * offset[0] += 1 * * offset[0] += child.itemsize # <<<<<<<<<<<<<< @@ -7712,7 +8072,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_8 = 0; (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":823 * offset[0] += child.itemsize * * if not PyDataType_HASFIELDS(child): # <<<<<<<<<<<<<< @@ -7722,7 +8082,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0); if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":824 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":824 * * if not PyDataType_HASFIELDS(child): * t = child.type_num # <<<<<<<<<<<<<< @@ -7734,7 +8094,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4); __pyx_t_4 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":825 * if not PyDataType_HASFIELDS(child): * t = child.type_num * if end - f < 5: # <<<<<<<<<<<<<< @@ -7744,7 +8104,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0); if (__pyx_t_6) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -7758,7 +8118,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":829 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":829 * * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" # <<<<<<<<<<<<<< @@ -7776,7 +8136,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":830 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":830 * # Until ticket #99 is fixed, use integers to avoid warnings * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" # <<<<<<<<<<<<<< @@ -7794,7 +8154,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":831 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":831 * if t == NPY_BYTE: f[0] = 98 #"b" * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" # <<<<<<<<<<<<<< @@ -7812,7 +8172,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":832 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":832 * elif t == NPY_UBYTE: f[0] = 66 #"B" * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" # <<<<<<<<<<<<<< @@ -7830,7 +8190,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":833 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":833 * elif t == NPY_SHORT: f[0] = 104 #"h" * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" # <<<<<<<<<<<<<< @@ -7848,7 +8208,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":834 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":834 * elif t == NPY_USHORT: f[0] = 72 #"H" * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" # <<<<<<<<<<<<<< @@ -7866,7 +8226,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":835 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":835 * elif t == NPY_INT: f[0] = 105 #"i" * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" # <<<<<<<<<<<<<< @@ -7884,7 +8244,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":836 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":836 * elif t == NPY_UINT: f[0] = 73 #"I" * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" # <<<<<<<<<<<<<< @@ -7902,7 +8262,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":837 * elif t == NPY_LONG: f[0] = 108 #"l" * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" # <<<<<<<<<<<<<< @@ -7920,7 +8280,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":838 * elif t == NPY_ULONG: f[0] = 76 #"L" * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" # <<<<<<<<<<<<<< @@ -7938,7 +8298,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":839 * elif t == NPY_LONGLONG: f[0] = 113 #"q" * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" # <<<<<<<<<<<<<< @@ -7956,7 +8316,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":840 * elif t == NPY_ULONGLONG: f[0] = 81 #"Q" * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" # <<<<<<<<<<<<<< @@ -7974,7 +8334,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":841 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":841 * elif t == NPY_FLOAT: f[0] = 102 #"f" * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" # <<<<<<<<<<<<<< @@ -7992,7 +8352,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":842 * elif t == NPY_DOUBLE: f[0] = 100 #"d" * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf # <<<<<<<<<<<<<< @@ -8012,7 +8372,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":843 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":843 * elif t == NPY_LONGDOUBLE: f[0] = 103 #"g" * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd # <<<<<<<<<<<<<< @@ -8032,7 +8392,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":844 * elif t == NPY_CFLOAT: f[0] = 90; f[1] = 102; f += 1 # Zf * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg # <<<<<<<<<<<<<< @@ -8052,7 +8412,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx goto __pyx_L15; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":845 * elif t == NPY_CDOUBLE: f[0] = 90; f[1] = 100; f += 1 # Zd * elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg * elif t == NPY_OBJECT: f[0] = 79 #"O" # <<<<<<<<<<<<<< @@ -8071,7 +8431,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":847 * elif t == NPY_OBJECT: f[0] = 79 #"O" * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) # <<<<<<<<<<<<<< @@ -8094,7 +8454,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L15:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":848 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":848 * else: * raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t) * f += 1 # <<<<<<<<<<<<<< @@ -8106,7 +8466,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":852 * # Cython ignores struct boundary information ("T{...}"), * # so don't output it * f = _util_dtypestring(child, f, end, offset) # <<<<<<<<<<<<<< @@ -8118,7 +8478,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __pyx_L13:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":797 * cdef tuple fields * * for childname in descr.names: # <<<<<<<<<<<<<< @@ -8128,7 +8488,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":853 * # so don't output it * f = _util_dtypestring(child, f, end, offset) * return f # <<<<<<<<<<<<<< @@ -8138,7 +8498,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx __pyx_r = __pyx_v_f; goto __pyx_L0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":786 * return PyArray_MultiIterNew(5, a, b, c, d, e) * * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL: # <<<<<<<<<<<<<< @@ -8163,7 +8523,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx return __pyx_r; } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -8178,7 +8538,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a int __pyx_t_2; __Pyx_RefNannySetupContext("set_array_base", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":971 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":971 * cdef inline void set_array_base(ndarray arr, object base): * cdef PyObject* baseptr * if base is None: # <<<<<<<<<<<<<< @@ -8189,7 +8549,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __pyx_t_2 = (__pyx_t_1 != 0); if (__pyx_t_2) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":972 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":972 * cdef PyObject* baseptr * if base is None: * baseptr = NULL # <<<<<<<<<<<<<< @@ -8201,7 +8561,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":974 * baseptr = NULL * else: * Py_INCREF(base) # important to do this before decref below! # <<<<<<<<<<<<<< @@ -8210,7 +8570,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_INCREF(__pyx_v_base); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":975 * else: * Py_INCREF(base) # important to do this before decref below! * baseptr = base # <<<<<<<<<<<<<< @@ -8221,7 +8581,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a } __pyx_L3:; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":976 * Py_INCREF(base) # important to do this before decref below! * baseptr = base * Py_XDECREF(arr.base) # <<<<<<<<<<<<<< @@ -8230,7 +8590,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ Py_XDECREF(__pyx_v_arr->base); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":977 * baseptr = base * Py_XDECREF(arr.base) * arr.base = baseptr # <<<<<<<<<<<<<< @@ -8239,7 +8599,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a */ __pyx_v_arr->base = __pyx_v_baseptr; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":969 * * * cdef inline void set_array_base(ndarray arr, object base): # <<<<<<<<<<<<<< @@ -8251,7 +8611,7 @@ static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_a __Pyx_RefNannyFinishContext(); } -/* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 +/* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -8265,7 +8625,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py int __pyx_t_1; __Pyx_RefNannySetupContext("get_array_base", 0); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":980 * * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: # <<<<<<<<<<<<<< @@ -8275,7 +8635,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0); if (__pyx_t_1) { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":981 * cdef inline object get_array_base(ndarray arr): * if arr.base is NULL: * return None # <<<<<<<<<<<<<< @@ -8289,7 +8649,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py } /*else*/ { - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":983 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":983 * return None * else: * return arr.base # <<<<<<<<<<<<<< @@ -8300,7 +8660,7 @@ static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__py goto __pyx_L0; } - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -8346,7 +8706,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_REAL, __pyx_k_REAL, sizeof(__pyx_k_REAL), 0, 0, 1, 1}, {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, - {&__pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_k_Volumes_work_workspace_gensim_t, sizeof(__pyx_k_Volumes_work_workspace_gensim_t), 0, 0, 1, 0}, {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, {&__pyx_n_s_cbow_mean, __pyx_k_cbow_mean, sizeof(__pyx_k_cbow_mean), 0, 0, 1, 1}, @@ -8363,6 +8722,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_gensim_models_word2vec_inner, __pyx_k_gensim_models_word2vec_inner, sizeof(__pyx_k_gensim_models_word2vec_inner), 0, 0, 1, 1}, + {&__pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_k_home_ubuntu_src_gensim_bigdocve, sizeof(__pyx_k_home_ubuntu_src_gensim_bigdocve), 0, 0, 1, 0}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, {&__pyx_n_s_i, __pyx_k_i, sizeof(__pyx_k_i), 0, 0, 1, 1}, {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1}, @@ -8394,6 +8754,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, {&__pyx_n_s_reduced_windows, __pyx_k_reduced_windows, sizeof(__pyx_k_reduced_windows), 0, 0, 1, 1}, {&__pyx_n_s_result, __pyx_k_result, sizeof(__pyx_k_result), 0, 0, 1, 1}, + {&__pyx_n_s_sample, __pyx_k_sample, sizeof(__pyx_k_sample), 0, 0, 1, 1}, + {&__pyx_n_s_sample_int, __pyx_k_sample_int, sizeof(__pyx_k_sample_int), 0, 0, 1, 1}, {&__pyx_n_s_saxpy, __pyx_k_saxpy, sizeof(__pyx_k_saxpy), 0, 0, 1, 1}, {&__pyx_n_s_scipy_linalg_blas, __pyx_k_scipy_linalg_blas, sizeof(__pyx_k_scipy_linalg_blas), 0, 0, 1, 1}, {&__pyx_n_s_scopy, __pyx_k_scopy, sizeof(__pyx_k_scopy), 0, 0, 1, 1}, @@ -8410,9 +8772,12 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_syn1, __pyx_k_syn1, sizeof(__pyx_k_syn1), 0, 0, 1, 1}, {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, + {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_cbow, __pyx_k_train_sentence_cbow, sizeof(__pyx_k_train_sentence_cbow), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_sg, __pyx_k_train_sentence_sg, sizeof(__pyx_k_train_sentence_sg), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, + {&__pyx_n_s_vlookup, __pyx_k_vlookup, sizeof(__pyx_k_vlookup), 0, 0, 1, 1}, + {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, {&__pyx_n_s_word_locks, __pyx_k_word_locks, sizeof(__pyx_k_word_locks), 0, 0, 1, 1}, @@ -8424,7 +8789,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 304; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -8436,35 +8801,35 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "gensim/models/word2vec_inner.pyx":284 - * cum_table = (np.PyArray_DATA(model.cum_table)) + /* "gensim/models/word2vec_inner.pyx":293 * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "gensim/models/word2vec_inner.pyx":368 - * cum_table = (np.PyArray_DATA(model.cum_table)) + /* "gensim/models/word2vec_inner.pyx":383 * cum_table_len = len(model.cum_table) + * if negative or sample: * next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): * raise ValueError(u"ndarray is not C contiguous") # <<<<<<<<<<<<<< @@ -8475,7 +8840,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__5); __Pyx_GIVEREF(__pyx_tuple__5); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)): * raise ValueError(u"ndarray is not Fortran contiguous") # <<<<<<<<<<<<<< @@ -8486,7 +8851,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or * (descr.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -8497,7 +8862,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":802 * * if (end - f) - (new_offset - offset[0]) < 15: * raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd") # <<<<<<<<<<<<<< @@ -8508,7 +8873,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__8); __Pyx_GIVEREF(__pyx_tuple__8); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or * (child.byteorder == c'<' and not little_endian)): * raise ValueError(u"Non-native byte order not supported") # <<<<<<<<<<<<<< @@ -8519,7 +8884,7 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__9); __Pyx_GIVEREF(__pyx_tuple__9); - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num * if end - f < 5: * raise RuntimeError(u"Format string allocated too short.") # <<<<<<<<<<<<<< @@ -8530,65 +8895,65 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "gensim/models/word2vec_inner.pyx":247 + /* "gensim/models/word2vec_inner.pyx":253 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__11 = PyTuple_Pack(29, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__11 = PyTuple_Pack(32, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 29, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_sentence_sg, 247, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 32, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_train_sentence_sg, 253, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "gensim/models/word2vec_inner.pyx":329 + /* "gensim/models/word2vec_inner.pyx":341 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(32, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__13 = PyTuple_Pack(35, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_sample, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_cum_table, __pyx_n_s_cum_table_len, __pyx_n_s_next_random, __pyx_n_s_vlookup, __pyx_n_s_token, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 32, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_train_sentence_cbow, 329, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 35, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_train_sentence_cbow, 341, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "gensim/models/word2vec_inner.pyx":412 + /* "gensim/models/word2vec_inner.pyx":432 * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_tuple__15 = PyTuple_Pack(18, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_work, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_word); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__15 = PyTuple_Pack(18, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_work, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_word); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(3, 0, 18, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_score_sentence_sg, 412, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(3, 0, 18, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_score_sentence_sg, 432, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "gensim/models/word2vec_inner.pyx":486 + /* "gensim/models/word2vec_inner.pyx":506 * work[0] += f * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< * * cdef int cbow_mean = model.cbow_mean */ - __pyx_tuple__17 = PyTuple_Pack(21, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_word); if (unlikely(!__pyx_tuple__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__17 = PyTuple_Pack(21, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_word); if (unlikely(!__pyx_tuple__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__17); __Pyx_GIVEREF(__pyx_tuple__17); - __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(4, 0, 21, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_score_sentence_cbow, 486, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__18 = (PyObject*)__Pyx_PyCode_New(4, 0, 21, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__17, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_score_sentence_cbow, 506, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "gensim/models/word2vec_inner.pyx":576 + /* "gensim/models/word2vec_inner.pyx":596 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__19 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 596; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__19); __Pyx_GIVEREF(__pyx_tuple__19); - __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Volumes_work_workspace_gensim_t, __pyx_n_s_init, 576, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__20 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_home_ubuntu_src_gensim_bigdocve, __pyx_n_s_init, 596, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 596; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -8702,6 +9067,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (__Pyx_ExportFunction("our_dot_noblas", (void (*)(void))__pyx_f_6gensim_6models_14word2vec_inner_our_dot_noblas, "__pyx_t_6gensim_6models_14word2vec_inner_REAL_t (int const *, float const *, int const *, float const *, int const *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportFunction("our_saxpy_noblas", (void (*)(void))__pyx_f_6gensim_6models_14word2vec_inner_our_saxpy_noblas, "void (int const *, float const *, float const *, int const *, float *, int const *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} if (__Pyx_ExportFunction("bisect_left", (void (*)(void))__pyx_f_6gensim_6models_14word2vec_inner_bisect_left, "unsigned PY_LONG_LONG (__pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (__Pyx_ExportFunction("random_int32", (void (*)(void))__pyx_f_6gensim_6models_14word2vec_inner_random_int32, "unsigned PY_LONG_LONG (unsigned PY_LONG_LONG *)") < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /*--- Type init code ---*/ /*--- Type import code ---*/ __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "type", @@ -8894,72 +9260,72 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) */ __pyx_v_6gensim_6models_14word2vec_inner_ONEF = ((__pyx_t_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "gensim/models/word2vec_inner.pyx":247 + /* "gensim/models/word2vec_inner.pyx":253 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_1train_sentence_sg, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_1train_sentence_sg, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 247; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 253; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":329 + /* "gensim/models/word2vec_inner.pyx":341 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":412 + /* "gensim/models/word2vec_inner.pyx":432 * * # Score is only implemented for hierarchical softmax * def score_sentence_sg(model, sentence, _work): # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_5score_sentence_sg, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_5score_sentence_sg, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 432; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":486 + /* "gensim/models/word2vec_inner.pyx":506 * work[0] += f * * def score_sentence_cbow(model, sentence, _work, _neu1): # <<<<<<<<<<<<<< * * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_7score_sentence_cbow, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_7score_sentence_cbow, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_score_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":576 + /* "gensim/models/word2vec_inner.pyx":596 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_9init, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_6gensim_6models_14word2vec_inner_9init, NULL, __pyx_n_s_gensim_models_word2vec_inner); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 596; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 596; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "gensim/models/word2vec_inner.pyx":617 + /* "gensim/models/word2vec_inner.pyx":637 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -8972,14 +9338,14 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "gensim/models/word2vec_inner.pyx":1 @@ -8992,7 +9358,7 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "../../../../../../../Users/kofola3/workspace/vew/gensim/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 + /* "../../../../miniconda/envs/gensim_cenv/lib/python2.7/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr * * cdef inline object get_array_base(ndarray arr): # <<<<<<<<<<<<<< @@ -10189,6 +10555,32 @@ static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_As_unsigned_PY_LONG_LONG( return (unsigned PY_LONG_LONG) -1; } +static CYTHON_INLINE PyObject* __Pyx_PyInt_From_unsigned_PY_LONG_LONG(unsigned PY_LONG_LONG value) { + const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG) -1, const_zero = 0; + const int is_unsigned = neg_one > const_zero; + if (is_unsigned) { + if (sizeof(unsigned PY_LONG_LONG) < sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long)) { + return PyLong_FromUnsignedLong((unsigned long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(unsigned long long)) { + return PyLong_FromUnsignedLongLong((unsigned long long) value); + } + } else { + if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long)) { + return PyInt_FromLong((long) value); + } else if (sizeof(unsigned PY_LONG_LONG) <= sizeof(long long)) { + return PyLong_FromLongLong((long long) value); + } + } + { + int one = 1; int little = (int)*(unsigned char *)&one; + unsigned char *bytes = (unsigned char *)&value; + return _PyLong_FromByteArray(bytes, sizeof(unsigned PY_LONG_LONG), + little, !is_unsigned); + } +} + static CYTHON_INLINE npy_uint32 __Pyx_PyInt_As_npy_uint32(PyObject *x) { const npy_uint32 neg_one = (npy_uint32) -1, const_zero = 0; const int is_unsigned = neg_one > const_zero; diff --git a/gensim/models/word2vec_inner.pxd b/gensim/models/word2vec_inner.pxd index dbe02a3a73..04cca9e887 100644 --- a/gensim/models/word2vec_inner.pxd +++ b/gensim/models/word2vec_inner.pxd @@ -50,3 +50,5 @@ cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, con # to support random draws from negative-sampling cum_table cdef unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, unsigned long long lo, unsigned long long hi) nogil + +cdef unsigned long long random_int32(unsigned long long *next_random) nogil diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index dd8bcff017..daca849b46 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -95,6 +95,12 @@ cdef inline unsigned long long bisect_left(np.uint32_t *a, unsigned long long x, lo = mid + 1 return lo +# this quick & dirty RNG apparently matches Java's (non-Secure)Random +# note this function side-effects next_random to set up the next number +cdef inline unsigned long long random_int32(unsigned long long *next_random) nogil: + cdef unsigned long long this_random = next_random[0] >> 16 + next_random[0] = (next_random[0] * 25214903917ULL + 11) & 281474976710655ULL + return this_random cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *cum_table, unsigned long long cum_table_len, @@ -247,6 +253,7 @@ cdef unsigned long long fast_sentence_cbow_neg( def train_sentence_sg(model, sentence, alpha, _work): cdef int hs = model.hs cdef int negative = model.negative + cdef int sample = (model.sample != 0) cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) @@ -272,6 +279,7 @@ def train_sentence_sg(model, sentence, alpha, _work): cdef REAL_t *syn1neg cdef np.uint32_t *cum_table cdef unsigned long long cum_table_len + # for sampling (negative and frequent-word downsampling) cdef unsigned long long next_random if hs: @@ -281,25 +289,31 @@ def train_sentence_sg(model, sentence, alpha, _work): syn1neg = (np.PyArray_DATA(model.syn1neg)) cum_table = (np.PyArray_DATA(model.cum_table)) cum_table_len = len(model.cum_table) + if negative or sample: next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # convert Python structures to primitive types, so we can release the GIL work = np.PyArray_DATA(_work) - sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - for i in range(sentence_len): - word = sentence[i] + vlookup = model.vocab + i = 0 + for token in sentence: + word = vlookup[token] if token in vlookup else None if word is None: - codelens[i] = 0 - else: - indexes[i] = word.index - if hs: - codelens[i] = len(word.code) - codes[i] = np.PyArray_DATA(word.code) - points[i] = np.PyArray_DATA(word.point) - else: - codelens[i] = 1 - result += 1 + continue # leaving i unchanged/shortening sentence + if sample and word.sample_int < random_int32(&next_random): + continue + indexes[i] = word.index + if hs: + codelens[i] = len(word.code) + codes[i] = np.PyArray_DATA(word.code) + points[i] = np.PyArray_DATA(word.point) + result += 1 + i += 1 + if i == MAX_SENTENCE_LEN: + break # TODO: log warning, tally overflow? + sentence_len = i + # single randint() call avoids a big thread-sync slowdown for i, item in enumerate(model.random.randint(0, window, sentence_len)): reduced_windows[i] = item @@ -307,8 +321,6 @@ def train_sentence_sg(model, sentence, alpha, _work): # release GIL & train on the sentence with nogil: for i in range(sentence_len): - if codelens[i] == 0: - continue j = i - window + reduced_windows[i] if j < 0: j = 0 @@ -316,7 +328,7 @@ def train_sentence_sg(model, sentence, alpha, _work): if k > sentence_len: k = sentence_len for j in range(j, k): - if j == i or codelens[j] == 0: + if j == i: continue if hs: fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) @@ -329,6 +341,7 @@ def train_sentence_sg(model, sentence, alpha, _work): def train_sentence_cbow(model, sentence, alpha, _work, _neu1): cdef int hs = model.hs cdef int negative = model.negative + cdef int sample = (model.sample != 0) cdef int cbow_mean = model.cbow_mean cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) @@ -356,6 +369,7 @@ def train_sentence_cbow(model, sentence, alpha, _work, _neu1): cdef REAL_t *syn1neg cdef np.uint32_t *cum_table cdef unsigned long long cum_table_len + # for sampling (negative or frequent-word downsampling) cdef unsigned long long next_random if hs: @@ -365,26 +379,32 @@ def train_sentence_cbow(model, sentence, alpha, _work, _neu1): syn1neg = (np.PyArray_DATA(model.syn1neg)) cum_table = (np.PyArray_DATA(model.cum_table)) cum_table_len = len(model.cum_table) + if negative or sample: next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24) # convert Python structures to primitive types, so we can release the GIL work = np.PyArray_DATA(_work) neu1 = np.PyArray_DATA(_neu1) - sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - for i in range(sentence_len): - word = sentence[i] + vlookup = model.vocab + i = 0 + for token in sentence: + word = vlookup[token] if token in vlookup else None if word is None: - codelens[i] = 0 - else: - indexes[i] = word.index - if hs: - codelens[i] = len(word.code) - codes[i] = np.PyArray_DATA(word.code) - points[i] = np.PyArray_DATA(word.point) - else: - codelens[i] = 1 - result += 1 + continue # leaving i unchanged/shortening sentence + if sample and word.sample_int < random_int32(&next_random): + continue + indexes[i] = word.index + if hs: + codelens[i] = len(word.code) + codes[i] = np.PyArray_DATA(word.code) + points[i] = np.PyArray_DATA(word.point) + result += 1 + i += 1 + if i == MAX_SENTENCE_LEN: + break # TODO: log warning, tally overflow? + sentence_len = i + # single randint() call avoids a big thread-sync slowdown for i, item in enumerate(model.random.randint(0, window, sentence_len)): reduced_windows[i] = item diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 74c44d14d2..31f9b3f5df 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -93,7 +93,9 @@ def test_int_doctags(self): def test_string_doctags(self): """Test doc2vec doctag alternatives""" - corpus = DocsLeeCorpus(True) + corpus = list(DocsLeeCorpus(True)) + # force duplicated tags + corpus = corpus[0:10] + corpus model = doc2vec.Doc2Vec(min_count=1) model.build_vocab(corpus) @@ -101,6 +103,7 @@ def test_string_doctags(self): self.assertEqual(model.docvecs[0].shape,(300,)) self.assertEqual(model.docvecs['_*0'].shape,(300,)) self.assertTrue(all(model.docvecs['_*0']==model.docvecs[0])) + self.assertTrue(max(d.index for d in model.docvecs.doctags.values()) < len(model.docvecs.doctag_syn0)) def test_empty_errors(self): # no input => "RuntimeError: you must first build vocabulary before training the model" diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index c955827dca..ddd8520ceb 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -89,6 +89,11 @@ def testPersistenceWord2VecFormatWithVocab(self): binary_model_with_vocab = word2vec.Word2Vec.load_word2vec_format(testfile(), testvocab, binary=True) self.assertEqual(model.vocab['human'].count, binary_model_with_vocab.vocab['human'].count) + def test_zero_workers_mode(self): + model = word2vec.Word2Vec(sentences, min_count=1) + model0 = word2vec.Word2Vec(sentences, min_count=1, workers=0) + self.models_equal(model,model0) + def testLargeMmap(self): """Test storing/loading the entire model.""" model = word2vec.Word2Vec(sentences, min_count=1)