From a93f8e6f9b375360975584e98055c14132537ecf Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sun, 1 Mar 2015 16:06:56 -0800 Subject: [PATCH 01/49] initial inference support --- gensim/models/doc2vec.py | 64 +++++++++++++++++++++++++++++++++++++++ gensim/models/word2vec.py | 16 +++++++--- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 97696b8974..f8a31f6ecc 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -83,6 +83,7 @@ def train_sentence_dbow(model, sentence, lbls, alpha, work=None, train_words=Tru return len([word for word in sentence if word is not None]) + def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): """ Update distributed memory model by training on a single sentence. @@ -119,6 +120,69 @@ def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_ return len([word for word in sentence if word is not None]) +def infer_vector_dbow(model, document, alpha=0.025, min_alpha=0.0001, steps=50): + """ + Infer a vector for given post-bulk training document, in the 'dbow' model. + + Document should be a list of tokens. + + No cythonized alternative yet. + """ + neg_labels = [] + if model.negative: + # precompute negative labels + neg_labels = zeros(model.negative + 1) + neg_labels[0] = 1.0 + + vector = model.seeded_vector(' '.join(document)) + sentence = next(model._prepare_sentences([LabeledSentence(document,[])]))[0] + + for i in range(steps): + for word in sentence: + if word is None: + continue # OOV word in the input sentence => skip + neu1e = train_sg_pair(model, word, vector, alpha, neg_labels, False, False) + vector += neu1e + alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha + + return vector + +def infer_vector_dm(model, document, alpha=0.025, min_alpha=0.0001, steps=50): + """ + Infer a vector representation for the given post-training document, in the 'dm' model. + + Document should be a list of tokens. + + No cythonized alternative yet. + """ + neg_labels = [] + if model.negative: + # precompute negative labels + neg_labels = zeros(model.negative + 1) + neg_labels[0] = 1. + + vector = model.seeded_vector(' '.join(document)) + sentence = next(model._prepare_sentences([LabeledSentence(document,[])]))[0] + + for i in range(steps): + + for pos, word in enumerate(sentence): + if word is None: + continue # OOV word in the input sentence => skip + reduced_window = random.randint(model.window) # `b` in the original doc2vec code + start = max(0, pos - model.window + reduced_window) + window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start) + word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] + l1 = np_sum(model.syn0[word2_indices], axis=0) + vector # 1 x layer1_size + if word2_indices and model.cbow_mean: + l1 /= (len(word2_indices) + 1) + neu1e = train_cbow_pair(model, word, None, l1, alpha, neg_labels, False, False) + vector += neu1e # learn input -> hidden + + alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha + + return vector + class LabeledSentence(object): """ diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 3ff49909d9..e986eb59a2 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -162,7 +162,10 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): def train_sg_pair(model, word, word2, alpha, labels, train_w1=True, train_w2=True): - l1 = model.syn0[word2.index] + if isinstance(word2, Vocab): + l1 = model.syn0[word2.index] + else: + l1 = word2 # passed-in candidate vector neu1e = zeros(l1.shape) if model.hs: @@ -188,7 +191,7 @@ def train_sg_pair(model, word, word2, alpha, labels, train_w1=True, train_w2=Tru model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error if train_w2: - model.syn0[word2.index] += neu1e # learn input -> hidden + l1 += neu1e # learn input -> hidden (changes model.syn0[word2.index] if l1 is that) return neu1e @@ -522,15 +525,18 @@ def reset_weights(self): # randomize weights vector by vector, rather than materializing a huge random matrix in RAM at once for i in xrange(len(self.vocab)): # construct deterministic seed from word AND seed argument - # Note: Python's built in hash function can vary across versions of Python - random.seed(uint32(self.hashfxn(self.index2word[i] + str(self.seed)))) - self.syn0[i] = (random.rand(self.layer1_size) - 0.5) / self.layer1_size + self.syn0[i] = self.seeded_vector(self.index2word[i] + str(self.seed)) if self.hs: self.syn1 = zeros((len(self.vocab), self.layer1_size), dtype=REAL) if self.negative: self.syn1neg = zeros((len(self.vocab), self.layer1_size), dtype=REAL) self.syn0norm = None + def seeded_vector(self, seed_string): + """Create one 'random' vector (but deterministic by seed_string)""" + # Note: Python's built in hash function can vary across versions of Python + random.seed(uint32(self.hashfxn(seed_string))) + return (random.rand(self.layer1_size) - 0.5) / self.layer1_size def save_word2vec_format(self, fname, fvocab=None, binary=False): """ From 627604e9eac12c5848dcaad2cc6a2552878a278f Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sun, 8 Mar 2015 16:45:13 -0700 Subject: [PATCH 02/49] support for doc2vec dm_concat (concatenative PV-DM) model: null_word for padding; layer1_size potentially different than vector_size; parameter renames for clarity; one-time neg_lables precalc --- gensim/models/word2vec.py | 83 ++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index e986eb59a2..4e4596e8f6 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -109,12 +109,6 @@ def train_sentence_sg(model, sentence, alpha, work=None): will use the optimized version from word2vec_inner instead. """ - labels = [] - if model.negative: - # precompute negative labels - labels = zeros(model.negative + 1) - labels[0] = 1.0 - for pos, word in enumerate(sentence): if word is None: continue # OOV word in the input sentence => skip @@ -125,7 +119,7 @@ def train_sentence_sg(model, sentence, alpha, work=None): for pos2, word2 in enumerate(sentence[start : pos + model.window + 1 - reduced_window], start): # don't train on OOV words and on the `word` itself if word2 and not (pos2 == pos): - train_sg_pair(model, word, word2, alpha, labels) + train_sg_pair(model, word, word2, alpha) return len([word for word in sentence if word is not None]) @@ -140,12 +134,6 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): will use the optimized version from word2vec_inner instead. """ - labels = [] - if model.negative: - # precompute negative labels - labels = zeros(model.negative + 1) - labels[0] = 1. - for pos, word in enumerate(sentence): if word is None: continue # OOV word in the input sentence => skip @@ -153,15 +141,15 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): start = max(0, pos - model.window + reduced_window) window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start) word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x layer1_size + l1 = np_sum(model.syn0[word2_indices], axis=0) # 1 x vector_size if word2_indices and model.cbow_mean: l1 /= len(word2_indices) - train_cbow_pair(model, word, word2_indices, l1, alpha, labels) + train_cbow_pair(model, word, word2_indices, l1, alpha) return len([word for word in sentence if word is not None]) -def train_sg_pair(model, word, word2, alpha, labels, train_w1=True, train_w2=True): +def train_sg_pair(model, word, word2, alpha, learn_weights=True, learn_vectors=True): if isinstance(word2, Vocab): l1 = model.syn0[word2.index] else: @@ -186,23 +174,23 @@ def train_sg_pair(model, word, word2, alpha, labels, train_w1=True, train_w2=Tru word_indices.append(w) l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output - gb = (labels - fb) * alpha # vector of error gradients multiplied by the learning rate - if train_w1: + gb = (model.neg_labels - fb) * alpha # vector of error gradients multiplied by the learning rate + if learn_weights: model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error - if train_w2: - l1 += neu1e # learn input -> hidden (changes model.syn0[word2.index] if l1 is that) + if learn_vectors: + l1 += neu1e # learn input -> hidden (changes model.syn0[word2.index], if that is l1) return neu1e -def train_cbow_pair(model, word, word2_indices, l1, alpha, labels, train_w1=True, train_w2=True): +def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_weights=True, learn_vectors=True): neu1e = zeros(l1.shape) if model.hs: l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate - if train_w1: + if learn_weights: model.syn1[word.point] += outer(ga, l1) # learn hidden -> output neu1e += dot(ga, l2a) # save error @@ -215,12 +203,12 @@ def train_cbow_pair(model, word, word2_indices, l1, alpha, labels, train_w1=True word_indices.append(w) l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output - gb = (labels - fb) * alpha # vector of error gradients multiplied by the learning rate - if train_w1: + gb = (model.neg_labels - fb) * alpha # vector of error gradients multiplied by the learning rate + if learn_weights: model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error - if train_w2: - model.syn0[word2_indices] += neu1e # learn input -> hidden, here for all words in the window separately + if learn_vectors: + model.syn0[input_word_indices] += neu1e # learn input -> hidden, here for all words in the window separately return neu1e @@ -248,7 +236,7 @@ class Word2Vec(utils.SaveLoad): """ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, sg=1, hs=1, negative=0, - cbow_mean=0, hashfxn=hash, iter=1): + cbow_mean=0, hashfxn=hash, iter=1, null_word=0): """ Initialize the model from an iterable of `sentences`. Each sentence is a list of words (unicode strings) that will be used for training. @@ -297,6 +285,7 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, self.index2word = [] # map from a word's matrix index (int) to word (string) self.sg = int(sg) self.table = None # for negative sampling --> this needs a lot of RAM! consider setting back to None before saving + self.vector_size = int(size) self.layer1_size = int(size) if size % 4 != 0: logger.warning("consider setting layer size to a multiple of 4 for greater performance") @@ -312,6 +301,7 @@ def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5, self.cbow_mean = int(cbow_mean) self.hashfxn = hashfxn self.iter = iter + self.null_word = null_word if sentences is not None: if isinstance(sentences, GeneratorType): raise TypeError("You can't pass a generator as the sentences argument. Try an iterator.") @@ -408,6 +398,13 @@ def build_vocab(self, sentences): self.vocab[word] = v logger.info("total %i word types after removing those with count<%s" % (len(self.vocab), self.min_count)) + if self.null_word: + # create null pseudo-word for padding when using concatenative L1 (run-of-words) + # this word is only ever input – never predicted – so count, huffman-point doesn't matter + word, v = '\0', Vocab(count=1) + v.index = len(self.vocab) + self.index2word.append(word) + self.vocab[word] = v if self.hs: # add info about each word's Huffman encoding self.create_binary_tree() @@ -417,6 +414,7 @@ def build_vocab(self, sentences): # precalculate downsampling thresholds self.precalc_sampling() self.reset_weights() + sys.stderr.flush() @staticmethod def _vocab_from(sentences): @@ -450,6 +448,14 @@ def _get_job_words(self, alpha, work, job, neu1): else: return sum(train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job) + def pretrain(self): +# if FAST_VERSION < 0: + self.neg_labels = [] + if self.negative > 0: + # precompute negative labels optimization for pure-python training + self.neg_labels = zeros(self.negative + 1) + self.neg_labels[0] = 1. + def train(self, sentences, total_words=None, word_count=0, chunksize=100): """ Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). @@ -459,6 +465,8 @@ def train(self, sentences, total_words=None, word_count=0, chunksize=100): if FAST_VERSION < 0: import warnings warnings.warn("C extension compilation failed, training will be slow. Install a C compiler and reinstall gensim for fast training.") + + self.pretrain() logger.info("training model with %i workers on %i vocabulary and %i features, " "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s" % (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)) @@ -495,6 +503,7 @@ def worker_train(): logger.info("PROGRESS: at %.2f%% words, alpha %.05f, %.0f words/s" % (100.0 * word_count[0] / total_words, alpha, word_count[0] / elapsed if elapsed else 0.0)) next_report[0] = elapsed + 1.0 # don't flood the log, wait at least a second between progress reports + sys.stderr.flush() workers = [threading.Thread(target=worker_train) for _ in xrange(self.workers)] for thread in workers: @@ -521,7 +530,7 @@ def worker_train(): def reset_weights(self): """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary.""" logger.info("resetting layer weights") - self.syn0 = empty((len(self.vocab), self.layer1_size), dtype=REAL) + self.syn0 = empty((len(self.vocab), self.vector_size), dtype=REAL) # randomize weights vector by vector, rather than materializing a huge random matrix in RAM at once for i in xrange(len(self.vocab)): # construct deterministic seed from word AND seed argument @@ -536,7 +545,7 @@ def seeded_vector(self, seed_string): """Create one 'random' vector (but deterministic by seed_string)""" # Note: Python's built in hash function can vary across versions of Python random.seed(uint32(self.hashfxn(seed_string))) - return (random.rand(self.layer1_size) - 0.5) / self.layer1_size + return (random.rand(self.vector_size) - 0.5) / self.vector_size def save_word2vec_format(self, fname, fvocab=None, binary=False): """ @@ -549,8 +558,8 @@ def save_word2vec_format(self, fname, fvocab=None, binary=False): with utils.smart_open(fvocab, 'wb') as vout: for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count): vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count))) - logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.layer1_size, fname)) - assert (len(self.vocab), self.layer1_size) == self.syn0.shape + logger.info("storing %sx%s projection weights into %s" % (len(self.vocab), self.vector_size, fname)) + assert (len(self.vocab), self.vector_size) == self.syn0.shape with utils.smart_open(fname, 'wb') as fout: fout.write(utils.to_utf8("%s %s\n" % self.syn0.shape)) # store in sorted order: most frequent words at the top @@ -588,11 +597,11 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True): logger.info("loading projection weights from %s" % (fname)) with utils.smart_open(fname) as fin: header = utils.to_unicode(fin.readline()) - vocab_size, layer1_size = map(int, header.split()) # throws for invalid file format - result = Word2Vec(size=layer1_size) - result.syn0 = zeros((vocab_size, layer1_size), dtype=REAL) + vocab_size, vector_size = map(int, header.split()) # throws for invalid file format + result = Word2Vec(size=vector_size) + result.syn0 = zeros((vocab_size, vector_size), dtype=REAL) if binary: - binary_len = dtype(REAL).itemsize * layer1_size + binary_len = dtype(REAL).itemsize * vector_size for line_no in xrange(vocab_size): # mixed text and binary: read text first, then binary word = [] @@ -615,7 +624,7 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True): else: for line_no, line in enumerate(fin): parts = utils.to_unicode(line).split() - if len(parts) != layer1_size + 1: + if len(parts) != vector_size + 1: raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no)) word, weights = parts[0], list(map(REAL, parts[1:])) if counts is None: @@ -916,7 +925,7 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar): def __str__(self): - return "Word2Vec(vocab=%s, size=%s, alpha=%s)" % (len(self.index2word), self.layer1_size, self.alpha) + return "Word2Vec(vocab=%s, size=%s, alpha=%s)" % (len(self.index2word), self.vector_size, self.alpha) def save(self, *args, **kwargs): From f1ad6b643c926ae493f223c48fc6029137324a50 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sun, 8 Mar 2015 16:47:07 -0700 Subject: [PATCH 03/49] pure-python doc2vec dm_concat (concatenative PV-DM) model: train_sentence_dm_concat, infer_vector_dm_concat methods --- gensim/models/doc2vec.py | 163 ++++++++++++++++++++++++++++++--------- 1 file changed, 128 insertions(+), 35 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index f8a31f6ecc..8aef920c68 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -42,7 +42,7 @@ except ImportError: from Queue import Queue -from numpy import zeros, random, sum as np_sum +from numpy import zeros, random, sum as np_sum, add as np_add, concatenate from six import string_types logger = logging.getLogger(__name__) @@ -67,23 +67,16 @@ def train_sentence_dbow(model, sentence, lbls, alpha, work=None, train_words=Tru will use the optimized version from doc2vec_inner instead. """ - neg_labels = [] - if model.negative: - # precompute negative labels - neg_labels = zeros(model.negative + 1) - neg_labels[0] = 1.0 - for label in lbls: if label is None: continue # OOV word in the input sentence => skip for word in sentence: if word is None: continue # OOV word in the input sentence => skip - train_sg_pair(model, word, label, alpha, neg_labels, train_words, train_lbls) + train_sg_pair(model, word, label, alpha, train_words, train_lbls) return len([word for word in sentence if word is not None]) - def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): """ Update distributed memory model by training on a single sentence. @@ -98,11 +91,6 @@ def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_ lbl_indices = [lbl.index for lbl in lbls if lbl is not None] lbl_sum = np_sum(model.syn0[lbl_indices], axis=0) lbl_len = len(lbl_indices) - neg_labels = [] - if model.negative: - # precompute negative labels - neg_labels = zeros(model.negative + 1) - neg_labels[0] = 1. for pos, word in enumerate(sentence): if word is None: @@ -114,39 +102,87 @@ def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_ l1 = np_sum(model.syn0[word2_indices], axis=0) + lbl_sum # 1 x layer1_size if word2_indices and model.cbow_mean: l1 /= (len(word2_indices) + lbl_len) - neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, neg_labels, train_words, train_words) + neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, train_words, train_words) if train_lbls: model.syn0[lbl_indices] += neu1e return len([word for word in sentence if word is not None]) + +def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): + """ + Update distributed memory model by training on a single sentence. + + The sentence is a list of Vocab objects (or None, where the corresponding + word is not in the vocabulary. Called internally from `Doc2Vec.train()`. + + This is the non-optimized, Python version. If you have a C compiler, gensim + will use the optimized version from doc2vec_inner instead. + + """ + lbl_indices = [lbl.index for lbl in lbls if lbl is not None] + if len(lbl_indices) != model.dm_lbl_count: + return # skip doc without expected lbl(s) + + null_word = model.vocab['\0'] + pre_pad_count = int((model.window + 1) / 2) + post_pad_count = int(model.window / 2) + padded_sentence_indices = ( + (pre_pad_count * [null_word.index]) # pre-padding + + [word.index for word in sentence if word is not None] # elide out-of-Vocabulary words + + (post_pad_count * [null_word.index]) # post-padding + ) + + for pos in range(pre_pad_count, len(padded_sentence_indices) - post_pad_count): + l1_indices = ( + lbl_indices # doc vector(s) + + padded_sentence_indices[pos - pre_pad_count : pos] # preceding words + + padded_sentence_indices[pos + 1 : pos + 1 + post_pad_count] # following words + ) + word = model.vocab[model.index2word[padded_sentence_indices[pos]]] + l1 = model.syn0[l1_indices].ravel() # numpy advanced-indexing: copy; flatten to 1d + neu1e = train_cbow_pair(model, word, None, l1, alpha, True, False) + + if not train_lbls: + # trim lbl indices/errors + l1_indices = l1_indices[len(lbl_indices):] + neu1e = neu1e[len(lbl_indices) * model.vector_size:] + if not train_words: + # trim word-vector indices/errors + l1_indices = l1_indices[:-model.window] + neu1e = neu1e[:-model.window * model.vector_size] + if l1_indices: + # if indices left to train, do so + np_add.at(model.syn0, l1_indices, neu1e.reshape(len(l1_indices), model.vector_size)) + + return len(padded_sentence_indices) - pre_pad_count - post_pad_count + + def infer_vector_dbow(model, document, alpha=0.025, min_alpha=0.0001, steps=50): """ Infer a vector for given post-bulk training document, in the 'dbow' model. - Document should be a list of tokens. + Document should be a list of tokens. No cythonized alternative yet. """ - neg_labels = [] - if model.negative: - # precompute negative labels - neg_labels = zeros(model.negative + 1) - neg_labels[0] = 1.0 + if not hasattr(model, 'neg_labels'): + model.pretrain() vector = model.seeded_vector(' '.join(document)) - sentence = next(model._prepare_sentences([LabeledSentence(document,[])]))[0] + sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] for i in range(steps): for word in sentence: if word is None: continue # OOV word in the input sentence => skip - neu1e = train_sg_pair(model, word, vector, alpha, neg_labels, False, False) + neu1e = train_sg_pair(model, word, vector, alpha, False, False) vector += neu1e alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha return vector + def infer_vector_dm(model, document, alpha=0.025, min_alpha=0.0001, steps=50): """ Infer a vector representation for the given post-training document, in the 'dm' model. @@ -155,17 +191,14 @@ def infer_vector_dm(model, document, alpha=0.025, min_alpha=0.0001, steps=50): No cythonized alternative yet. """ - neg_labels = [] - if model.negative: - # precompute negative labels - neg_labels = zeros(model.negative + 1) - neg_labels[0] = 1. + if not hasattr(model, 'neg_labels'): + model.pretrain() vector = model.seeded_vector(' '.join(document)) - sentence = next(model._prepare_sentences([LabeledSentence(document,[])]))[0] + sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] for i in range(steps): - + for pos, word in enumerate(sentence): if word is None: continue # OOV word in the input sentence => skip @@ -176,8 +209,50 @@ def infer_vector_dm(model, document, alpha=0.025, min_alpha=0.0001, steps=50): l1 = np_sum(model.syn0[word2_indices], axis=0) + vector # 1 x layer1_size if word2_indices and model.cbow_mean: l1 /= (len(word2_indices) + 1) - neu1e = train_cbow_pair(model, word, None, l1, alpha, neg_labels, False, False) - vector += neu1e # learn input -> hidden + neu1e = train_cbow_pair(model, word, None, l1, alpha, False, False) + vector += neu1e # learn input -> hidden + + alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha + + return vector + + +def infer_vector_dm_concat(model, document, alpha=0.025, min_alpha=0.0001, steps=50): + """ + Infer a vector representation for the given post-training document, in the 'dm_concat' model. + + Document should be a list of tokens. + + No cythonized alternative yet. + """ + if not hasattr(model, 'neg_labels'): + model.pretrain() + + vector = model.seeded_vector(' '.join(document)) + sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] + + null_word = model.vocab['\0'] + pre_pad_count = int((model.window + 1) / 2) + post_pad_count = int(model.window / 2) + padded_sentence_indices = ( + (pre_pad_count * [null_word.index]) # pre-padding + + [word.index for word in sentence if word is not None] # elide out-of-Vocabulary words + + (post_pad_count * [null_word.index]) # post-padding + ) + + for i in range(steps): + + for pos in range(pre_pad_count, len(padded_sentence_indices)-post_pad_count): + word = model.vocab[model.index2word[padded_sentence_indices[pos]]] + l1 = concatenate([ + [vector], # doc vector-in-training + model.syn0[padded_sentence_indices[pos - pre_pad_count : pos]], # preceding words + model.syn0[padded_sentence_indices[pos + 1 : pos + 1 + post_pad_count]], # following words + ]).ravel() + + neu1e = train_cbow_pair(model, word, None, l1, alpha, False, False) + + vector += neu1e[:model.vector_size] # train doc vector only alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha @@ -210,7 +285,7 @@ class Doc2Vec(Word2Vec): """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, - dm_mean=0, train_words=True, train_lbls=True, **kwargs): + dm_mean=0, dm_concat=0, dm_lbl_count=1, train_words=True, train_lbls=True, **kwargs): """ Initialize the model from an iterable of `sentences`. Each sentence is a LabeledSentence object that will be used for training. @@ -247,16 +322,32 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, `dm_mean` = if 0 (default), use the sum of the context word vectors. If 1, use the mean. Only applies when dm is used. + `dm_concat` = if 1, use concatenation of context vectors rather than sum/average; + default is 0 (off). + + `dm_lbl_count` = expected constant number of sentence lbls per sentence, when using + dm_concat mode; default is 1. + """ Word2Vec.__init__(self, size=size, alpha=alpha, window=window, min_count=min_count, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, - sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, **kwargs) + sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, + null_word=dm_concat, **kwargs) self.train_words = train_words self.train_lbls = train_lbls + self.dm_concat = dm_concat + self.dm_lbl_count = dm_lbl_count if sentences is not None: self.build_vocab(sentences) self.train(sentences) + def reset_weights(self): + if self.dm_concat: + # expand l1 size to match concatenated lbls+words length + self.layer1_size = (self.dm_lbl_count + self.window) * self.vector_size + logger.info("using concatenative %d-dimensional layer1"% (self.layer1_size)) + Word2Vec.reset_weights(self) + @staticmethod def _vocab_from(sentences): sentence_no, vocab = -1, {} @@ -271,7 +362,7 @@ def _vocab_from(sentences): if label in vocab: vocab[label].count += sentence_length else: - vocab[label] = Vocab(count=sentence_length) + vocab[label] = Vocab(count=sentence_length) # FIXME: doc-labels for short docs can be culled by min_count for word in sentence.words: total_words += 1 if word in vocab: @@ -293,6 +384,8 @@ def _prepare_sentences(self, sentences): def _get_job_words(self, alpha, work, job, neu1): if self.sg: return sum(train_sentence_dbow(self, sentence, lbls, alpha, work, self.train_words, self.train_lbls) for sentence, lbls in job) + elif self.dm_concat: + return sum(train_sentence_dm_concat(self, sentence, lbls, alpha, work, neu1, self.train_words, self.train_lbls) for sentence, lbls in job) else: return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1, self.train_words, self.train_lbls) for sentence, lbls in job) From 305ae2b7ebcadfac6ec1748d5ea3b85aabf143f4 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 11 Mar 2015 01:49:57 -0700 Subject: [PATCH 04/49] infer_vector() on Doc2Vec --- gensim/models/doc2vec.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 8aef920c68..5a5b931c76 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -389,8 +389,21 @@ def _get_job_words(self, alpha, work, job, neu1): else: return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1, self.train_words, self.train_lbls) for sentence, lbls in job) + def infer_vector(self, document, alpha=0.025, min_alpha=0.0001, steps=50): + """ + Infer a vector for given post-bulk training document. + + Document should be a list of tokens. + """ + if self.sg: + return infer_vector_dbow(self, document, alpha, min_alpha, steps) + elif self.dm_concat: + return infer_vector_dm_concat(self, document, alpha, min_alpha, steps) + else: + return infer_vector_dm(self, document, alpha, min_alpha, steps) + def __str__(self): - return "Doc2Vec(vocab=%s, size=%s, alpha=%s)" % (len(self.index2word), self.layer1_size, self.alpha) + return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors From 5aa04580c3829b423c9849be1ccce99fa65ca05a Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 11 Mar 2015 12:38:54 -0700 Subject: [PATCH 05/49] missed rename in sg path --- gensim/models/word2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 4e4596e8f6..e3b2e5b9e7 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -161,7 +161,7 @@ def train_sg_pair(model, word, word2, alpha, learn_weights=True, learn_vectors=T l2a = deepcopy(model.syn1[word.point]) # 2d matrix, codelen x layer1_size fa = 1.0 / (1.0 + exp(-dot(l1, l2a.T))) # propagate hidden -> output ga = (1 - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate - if train_w1: + if learn_weights: model.syn1[word.point] += outer(ga, l1) # learn hidden -> output neu1e += dot(ga, l2a) # save error From 9f3d28bca0e73324a79164220859d1edbf28ab4a Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 14 Mar 2015 01:33:24 -0700 Subject: [PATCH 06/49] =?UTF-8?q?only=20swap=20dot/saxpy=20for=20detected?= =?UTF-8?q?=20blas=20=E2=80=93=20reducing=20code=20duplication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gensim/models/doc2vec_inner.c | 5331 ++++++------------------------- gensim/models/doc2vec_inner.pyx | 539 +--- 2 files changed, 1111 insertions(+), 4759 deletions(-) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 48bdfbaf75..460be9f0d5 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -765,45 +765,27 @@ typedef double (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr)(int c * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< * - * ctypedef void (*fast_sentence_dbow_hs_ptr) ( + * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x */ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr)(int const *, float const *, float const *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":34 - * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil - * - * ctypedef void (*fast_sentence_dbow_hs_ptr) ( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, - */ -typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs_ptr)(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int); - -/* "trunk/gensim/models/doc2vec_inner.pyx":39 - * const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int tw, int tl) nogil +/* "trunk/gensim/models/doc2vec_inner.pyx":50 * - * ctypedef unsigned long long (*fast_sentence_dbow_neg_ptr) ( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ -typedef unsigned PY_LONG_LONG (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg_ptr)(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int); - -/* "trunk/gensim/models/doc2vec_inner.pyx":45 - * unsigned long long next_random, int tw, int tl) nogil + * # function implementations swapped based on BLAS detected + * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil * - * ctypedef void (*fast_sentence_dm_hs_ptr) ( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ -typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs_ptr)(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int); +typedef __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_our_dot_ptr)(int const *, float const *, int const *, float const *, int const *); /* "trunk/gensim/models/doc2vec_inner.pyx":51 - * REAL_t *work, int i, int j, int k, int cbow_mean, int lbl_length, int tw, int tl) nogil + * # function implementations swapped based on BLAS detected + * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< * - * ctypedef unsigned long long (*fast_sentence_dm_neg_ptr) ( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * cdef our_dot_ptr our_dot */ -typedef unsigned PY_LONG_LONG (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg_ptr)(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, int, int, int); +typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_ptr)(int const *, float const *, float const *, int const *, float *, int const *); /* --- Runtime support code (head) --- */ #ifndef CYTHON_REFNANNY @@ -1160,25 +1142,19 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sdot_ptr __pyx_v_5trunk_6g static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_dsdot_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot; static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_snrm2; static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal; -static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs; -static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg; -static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs; -static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg; static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[1000]; static int __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE; static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, int, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, int, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, int, int, int); /*proto*/ +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_our_dot_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot; +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_ptr __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy; +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, int, int, int); /*proto*/ #define __Pyx_MODULE_NAME "trunk.gensim.models.doc2vec_inner" int __pyx_module_is_main_trunk__gensim__models__doc2vec_inner = 0; @@ -1395,193 +1371,192 @@ static PyObject *__pyx_codeobj__12; static PyObject *__pyx_codeobj__14; static PyObject *__pyx_codeobj__16; -/* "trunk/gensim/models/doc2vec_inner.pyx":76 - * cdef REAL_t ONEF = 1.0 +/* "trunk/gensim/models/doc2vec_inner.pyx":57 + * + * # for when fblas.sdot returns a double + * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return dsdot(N, X, incX, Y, incY) * - * cdef void fast_sentence0_dbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_r; - /* "trunk/gensim/models/doc2vec_inner.pyx":82 - * - * cdef long long a, b - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef REAL_t f, g + /* "trunk/gensim/models/doc2vec_inner.pyx":58 + * # for when fblas.sdot returns a double + * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + * return dsdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< * + * # for when fblas.sdot returns a float */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); + __pyx_r = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); + goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":85 - * cdef REAL_t f, g + /* "trunk/gensim/models/doc2vec_inner.pyx":57 + * + * # for when fblas.sdot returns a double + * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return dsdot(N, X, incX, Y, incY) * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * for b in range(codelen): - * row2 = word_point[b] * size */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":86 + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/doc2vec_inner.pyx":61 + * + * # for when fblas.sdot returns a float + * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return sdot(N, X, incX, Y, incY) * - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelen): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) */ - __pyx_t_1 = __pyx_v_codelen; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":87 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelen): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_r; + + /* "trunk/gensim/models/doc2vec_inner.pyx":62 + * # for when fblas.sdot returns a float + * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + * return sdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< + * + * # for when no blas available */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); + __pyx_r = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); + goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":88 - * for b in range(codelen): - * row2 = word_point[b] * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue + /* "trunk/gensim/models/doc2vec_inner.pyx":61 + * + * # for when fblas.sdot returns a float + * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return sdot(N, X, incX, Y, incY) + * */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); - /* "trunk/gensim/models/doc2vec_inner.pyx":89 - * row2 = word_point[b] * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/doc2vec_inner.pyx":65 + * + * # for when no blas available + * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * # not a true full dot()-implementation: just enough for our cases + * cdef int i */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":90 - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha +static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas(int const *__pyx_v_N, float const *__pyx_v_X, CYTHON_UNUSED int const *__pyx_v_incX, float const *__pyx_v_Y, CYTHON_UNUSED int const *__pyx_v_incY) { + int __pyx_v_i; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_a; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_r; + int __pyx_t_1; + + /* "trunk/gensim/models/doc2vec_inner.pyx":69 + * cdef int i + * cdef REAL_t a + * a = 0.0 # <<<<<<<<<<<<<< + * for i from 0 <= i < N[0] by 1: + * a += X[i] * Y[i] */ - goto __pyx_L3_continue; - } + __pyx_v_a = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":91 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":70 + * cdef REAL_t a + * a = 0.0 + * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< + * a += X[i] * Y[i] + * return a */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); + __pyx_t_1 = (__pyx_v_N[0]); + for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { - /* "trunk/gensim/models/doc2vec_inner.pyx":92 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: + /* "trunk/gensim/models/doc2vec_inner.pyx":71 + * a = 0.0 + * for i from 0 <= i < N[0] by 1: + * a += X[i] * Y[i] # <<<<<<<<<<<<<< + * return a + * */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); + __pyx_v_a = (__pyx_v_a + ((__pyx_v_X[__pyx_v_i]) * (__pyx_v_Y[__pyx_v_i]))); + } - /* "trunk/gensim/models/doc2vec_inner.pyx":93 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":72 + * for i from 0 <= i < N[0] by 1: + * a += X[i] * Y[i] + * return a # <<<<<<<<<<<<<< + * + * # for when no blas available */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_r = __pyx_v_a; + goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":94 - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if tl: + /* "trunk/gensim/models/doc2vec_inner.pyx":65 + * + * # for when no blas available + * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * # not a true full dot()-implementation: just enough for our cases + * cdef int i */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":95 - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/doc2vec_inner.pyx":75 + * + * # for when no blas available + * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * cdef int i + * for i from 0 <= i < N[0] by 1: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L8; - } - __pyx_L8:; - __pyx_L3_continue:; - } - /* "trunk/gensim/models/doc2vec_inner.pyx":96 - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if tl: # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int const *__pyx_v_N, float const *__pyx_v_alpha, float const *__pyx_v_X, int const *__pyx_v_incX, float *__pyx_v_Y, int const *__pyx_v_incY) { + int __pyx_v_i; + int __pyx_t_1; + + /* "trunk/gensim/models/doc2vec_inner.pyx":77 + * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: + * cdef int i + * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< + * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] * */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { + __pyx_t_1 = (__pyx_v_N[0]); + for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { - /* "trunk/gensim/models/doc2vec_inner.pyx":97 - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":78 + * cdef int i + * for i from 0 <= i < N[0] by 1: + * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L9; + (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))]) = (((__pyx_v_alpha[0]) * (__pyx_v_X[(__pyx_v_i * (__pyx_v_incX[0]))])) + (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))])); } - __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":76 - * cdef REAL_t ONEF = 1.0 + /* "trunk/gensim/models/doc2vec_inner.pyx":75 * - * cdef void fast_sentence0_dbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, + * # for when no blas available + * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * cdef int i + * for i from 0 <= i < N[0] by 1: */ /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":100 +/* "trunk/gensim/models/doc2vec_inner.pyx":81 * * - * cdef void fast_sentence1_dbow_hs( # <<<<<<<<<<<<<< + * cdef void fast_sentence_dbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_tw, int __pyx_v_tl) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_tw, int __pyx_v_tl) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -1592,7 +1567,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_h int __pyx_t_3; int __pyx_t_4; - /* "trunk/gensim/models/doc2vec_inner.pyx":106 + /* "trunk/gensim/models/doc2vec_inner.pyx":87 * * cdef long long a, b * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1601,7 +1576,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_h */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":109 + /* "trunk/gensim/models/doc2vec_inner.pyx":90 * cdef REAL_t f, g * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1610,38 +1585,38 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_h */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":110 + /* "trunk/gensim/models/doc2vec_inner.pyx":91 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): # <<<<<<<<<<<<<< * row2 = word_point[b] * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) */ __pyx_t_1 = __pyx_v_codelen; for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":111 + /* "trunk/gensim/models/doc2vec_inner.pyx":92 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":112 + /* "trunk/gensim/models/doc2vec_inner.pyx":93 * for b in range(codelen): * row2 = word_point[b] * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":113 + /* "trunk/gensim/models/doc2vec_inner.pyx":94 * row2 = word_point[b] * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1657,8 +1632,8 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_h __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":114 - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":95 + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1667,83 +1642,83 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_h goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":115 + /* "trunk/gensim/models/doc2vec_inner.pyx":96 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":116 + /* "trunk/gensim/models/doc2vec_inner.pyx":97 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if tw: */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":117 + /* "trunk/gensim/models/doc2vec_inner.pyx":98 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":118 + /* "trunk/gensim/models/doc2vec_inner.pyx":99 * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) * if tl: */ __pyx_t_3 = (__pyx_v_tw != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":119 - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":100 + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L8; } __pyx_L8:; __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":120 + /* "trunk/gensim/models/doc2vec_inner.pyx":101 * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) * if tl: # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) * */ __pyx_t_3 = (__pyx_v_tl != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":121 - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":102 + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L9; } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":100 + /* "trunk/gensim/models/doc2vec_inner.pyx":81 * * - * cdef void fast_sentence1_dbow_hs( # <<<<<<<<<<<<<< + * cdef void fast_sentence_dbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, */ @@ -1751,353 +1726,99 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_h /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":124 +/* "trunk/gensim/models/doc2vec_inner.pyx":105 * * - * cdef void fast_sentence2_dbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, + * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, + * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_a; - PY_LONG_LONG __pyx_v_b; +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_tw, int __pyx_v_tl) { PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; + unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; + __pyx_t_5numpy_uint32_t __pyx_v_target_index; + int __pyx_v_d; + unsigned PY_LONG_LONG __pyx_r; + long __pyx_t_1; + int __pyx_t_2; int __pyx_t_3; - PY_LONG_LONG __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; + int __pyx_t_4; - /* "trunk/gensim/models/doc2vec_inner.pyx":130 + /* "trunk/gensim/models/doc2vec_inner.pyx":112 * - * cdef long long a, b + * cdef long long a * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef REAL_t f, g - * + * cdef unsigned long long modulo = 281474976710655ULL + * cdef REAL_t f, g, label */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":133 - * cdef REAL_t f, g - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * for b in range(codelen): + /* "trunk/gensim/models/doc2vec_inner.pyx":113 + * cdef long long a + * cdef long long row1 = word2_index * size, row2 + * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< + * cdef REAL_t f, g, label + * cdef np.uint32_t target_index */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":134 + /* "trunk/gensim/models/doc2vec_inner.pyx":118 + * cdef int d * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< - * for b in range(codelen): - * row2 = word_point[b] * size + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * + * for d in range(negative+1): */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":135 - * for a in range(size): - * work[a] = 0.0 - * for b in range(codelen): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = 0.0 + /* "trunk/gensim/models/doc2vec_inner.pyx":120 + * memset(work, 0, size * cython.sizeof(REAL_t)) + * + * for d in range(negative+1): # <<<<<<<<<<<<<< + * if d == 0: + * target_index = word_index */ - __pyx_t_1 = __pyx_v_codelen; + __pyx_t_1 = (__pyx_v_negative + 1); for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_b = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":136 - * work[a] = 0.0 - * for b in range(codelen): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); + __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":137 - * for b in range(codelen): - * row2 = word_point[b] * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += syn0[row1 + a] * syn1[row2 + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":121 + * + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = word_index + * label = ONEF */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); + __pyx_t_3 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":138 - * row2 = word_point[b] * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += syn0[row1 + a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: + /* "trunk/gensim/models/doc2vec_inner.pyx":122 + * for d in range(negative+1): + * if d == 0: + * target_index = word_index # <<<<<<<<<<<<<< + * label = ONEF + * else: */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_a = __pyx_t_4; + __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":139 - * f = 0.0 - * for a in range(size): - * f += syn0[row1 + a] * syn1[row2 + a] # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue + /* "trunk/gensim/models/doc2vec_inner.pyx":123 + * if d == 0: + * target_index = word_index + * label = ONEF # <<<<<<<<<<<<<< + * else: + * target_index = table[(next_random >> 16) % table_len] */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]) * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); + __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; + goto __pyx_L5; } + /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":140 - * for a in range(size): - * f += syn0[row1 + a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_6 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_6) { - } else { - __pyx_t_5 = __pyx_t_6; - goto __pyx_L10_bool_binop_done; - } - __pyx_t_6 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_5 = __pyx_t_6; - __pyx_L10_bool_binop_done:; - if (__pyx_t_5) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":141 - * f += syn0[row1 + a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L5_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":142 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":143 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1[row2 + a] - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":144 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1[row2 + a] - * if tw: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_a = __pyx_t_4; - - /* "trunk/gensim/models/doc2vec_inner.pyx":145 - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - * work[a] += g * syn1[row2 + a] # <<<<<<<<<<<<<< - * if tw: - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_work[__pyx_t_7]) = ((__pyx_v_work[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":146 - * for a in range(size): - * work[a] += g * syn1[row2 + a] - * if tw: # <<<<<<<<<<<<<< - * for a in range(size): - * syn1[row2 + a] += g * syn0[row1 + a] - */ - __pyx_t_5 = (__pyx_v_tw != 0); - if (__pyx_t_5) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":147 - * work[a] += g * syn1[row2 + a] - * if tw: - * for a in range(size): # <<<<<<<<<<<<<< - * syn1[row2 + a] += g * syn0[row1 + a] - * if tl: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_a = __pyx_t_4; - - /* "trunk/gensim/models/doc2vec_inner.pyx":148 - * if tw: - * for a in range(size): - * syn1[row2 + a] += g * syn0[row1 + a] # <<<<<<<<<<<<<< - * if tl: - * for a in range(size): - */ - __pyx_t_7 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1[__pyx_t_7]) = ((__pyx_v_syn1[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]))); - } - goto __pyx_L14; - } - __pyx_L14:; - __pyx_L5_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":149 - * for a in range(size): - * syn1[row2 + a] += g * syn0[row1 + a] - * if tl: # <<<<<<<<<<<<<< - * for a in range(size): - * syn0[row1 + a] += work[a] - */ - __pyx_t_5 = (__pyx_v_tl != 0); - if (__pyx_t_5) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":150 - * syn1[row2 + a] += g * syn0[row1 + a] - * if tl: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[row1 + a] += work[a] - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":151 - * if tl: - * for a in range(size): - * syn0[row1 + a] += work[a] # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_4 = (__pyx_v_row1 + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_4]) = ((__pyx_v_syn0[__pyx_t_4]) + (__pyx_v_work[__pyx_v_a])); - } - goto __pyx_L17; - } - __pyx_L17:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":124 - * - * - * cdef void fast_sentence2_dbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":154 - * - * - * cdef unsigned long long fast_sentence0_dbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - int __pyx_v_d; - unsigned PY_LONG_LONG __pyx_r; - long __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - - /* "trunk/gensim/models/doc2vec_inner.pyx":161 - * - * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef unsigned long long modulo = 281474976710655ULL - * cdef REAL_t f, g, label - */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":162 - * cdef long long a - * cdef long long row1 = word2_index * size, row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, label - * cdef np.uint32_t target_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/doc2vec_inner.pyx":167 - * cdef int d - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":169 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * if d == 0: - * target_index = word_index - */ - __pyx_t_1 = (__pyx_v_negative + 1); - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_d = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":170 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":171 - * for d in range(negative+1): - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/doc2vec_inner.pyx":172 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L5; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":174 + /* "trunk/gensim/models/doc2vec_inner.pyx":125 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -2106,7 +1827,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":175 + /* "trunk/gensim/models/doc2vec_inner.pyx":126 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2115,7 +1836,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":176 + /* "trunk/gensim/models/doc2vec_inner.pyx":127 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2125,7 +1846,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":177 + /* "trunk/gensim/models/doc2vec_inner.pyx":128 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -2135,7 +1856,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":178 + /* "trunk/gensim/models/doc2vec_inner.pyx":129 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2146,27 +1867,27 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":180 + /* "trunk/gensim/models/doc2vec_inner.pyx":131 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":181 + /* "trunk/gensim/models/doc2vec_inner.pyx":132 * * row2 = target_index * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":182 + /* "trunk/gensim/models/doc2vec_inner.pyx":133 * row2 = target_index * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -2182,8 +1903,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":183 - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":134 + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -2192,2754 +1913,95 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":184 + /* "trunk/gensim/models/doc2vec_inner.pyx":135 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":185 + /* "trunk/gensim/models/doc2vec_inner.pyx":136 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if tw: */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":186 + /* "trunk/gensim/models/doc2vec_inner.pyx":137 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":187 + /* "trunk/gensim/models/doc2vec_inner.pyx":138 * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":188 - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L10; - } - __pyx_L10:; - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":189 - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - * - */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":190 - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":192 - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - * - * return next_random # <<<<<<<<<<<<<< - * - * cdef unsigned long long fast_sentence1_dbow_neg( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":154 - * - * - * cdef unsigned long long fast_sentence0_dbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":194 - * return next_random - * - * cdef unsigned long long fast_sentence1_dbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - int __pyx_v_d; - unsigned PY_LONG_LONG __pyx_r; - long __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - - /* "trunk/gensim/models/doc2vec_inner.pyx":201 - * - * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef unsigned long long modulo = 281474976710655ULL - * cdef REAL_t f, g, label - */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":202 - * cdef long long a - * cdef long long row1 = word2_index * size, row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, label - * cdef np.uint32_t target_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/doc2vec_inner.pyx":207 - * cdef int d - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":209 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * - * if d == 0: - */ - __pyx_t_1 = (__pyx_v_negative + 1); - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_d = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":211 - * for d in range(negative+1): - * - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":212 - * - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/doc2vec_inner.pyx":213 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L5; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":215 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":216 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/doc2vec_inner.pyx":217 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":218 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L3_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":219 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - __pyx_L5:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":221 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":222 - * - * row2 = target_index * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":223 - * row2 = target_index * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L8_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L8_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":224 - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - */ - goto __pyx_L3_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":225 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":226 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: - */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":227 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - - /* "trunk/gensim/models/doc2vec_inner.pyx":228 - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":229 - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L10; - } - __pyx_L10:; - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":230 - * if tw: - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - * - */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":231 - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":233 - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - * - * return next_random # <<<<<<<<<<<<<< - * - * cdef unsigned long long fast_sentence2_dbow_neg( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":194 - * return next_random - * - * cdef unsigned long long fast_sentence1_dbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":235 - * return next_random - * - * cdef unsigned long long fast_sentence2_dbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_a; - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - int __pyx_v_d; - unsigned PY_LONG_LONG __pyx_r; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; - long __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; - - /* "trunk/gensim/models/doc2vec_inner.pyx":242 - * - * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef unsigned long long modulo = 281474976710655ULL - * cdef REAL_t f, g, label - */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":243 - * cdef long long a - * cdef long long row1 = word2_index * size, row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, label - * cdef np.uint32_t target_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/doc2vec_inner.pyx":248 - * cdef int d - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":249 - * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":251 - * work[a] = 0.0 - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * - * if d == 0: - */ - __pyx_t_3 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_3; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; - - /* "trunk/gensim/models/doc2vec_inner.pyx":253 - * for d in range(negative+1): - * - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_4 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":254 - * - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/doc2vec_inner.pyx":255 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L7; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":257 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":258 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/doc2vec_inner.pyx":259 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":260 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L5_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":261 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - __pyx_L7:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":263 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): - */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":264 - * - * row2 = target_index * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += syn0[row1 + a] * syn1neg[row2 + a] - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":265 - * row2 = target_index * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += syn0[row1 + a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_t_5 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_5; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":266 - * f = 0.0 - * for a in range(size): - * f += syn0[row1 + a] * syn1neg[row2 + a] # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]) * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":267 - * for a in range(size): - * f += syn0[row1 + a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_6 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_6) { - } else { - __pyx_t_4 = __pyx_t_6; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_6 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_4 = __pyx_t_6; - __pyx_L12_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":268 - * f += syn0[row1 + a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - */ - goto __pyx_L5_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":269 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * for a in range(size): - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":270 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] - */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":271 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1neg[row2 + a] - * if tw: - */ - __pyx_t_5 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_5; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":272 - * g = (label - f) * alpha - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] # <<<<<<<<<<<<<< - * if tw: - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_work[__pyx_t_7]) = ((__pyx_v_work[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":273 - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] - * if tw: # <<<<<<<<<<<<<< - * for a in range(size): - * syn1neg[row2 + a] += g * syn0[row1 + a] - */ - __pyx_t_4 = (__pyx_v_tw != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":274 - * work[a] += g * syn1neg[row2 + a] - * if tw: - * for a in range(size): # <<<<<<<<<<<<<< - * syn1neg[row2 + a] += g * syn0[row1 + a] - * if tl: - */ - __pyx_t_5 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_5; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":275 - * if tw: - * for a in range(size): - * syn1neg[row2 + a] += g * syn0[row1 + a] # <<<<<<<<<<<<<< - * if tl: - * for a in range(size): - */ - __pyx_t_7 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1neg[__pyx_t_7]) = ((__pyx_v_syn1neg[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]))); - } - goto __pyx_L16; - } - __pyx_L16:; - __pyx_L5_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":276 - * for a in range(size): - * syn1neg[row2 + a] += g * syn0[row1 + a] - * if tl: # <<<<<<<<<<<<<< - * for a in range(size): - * syn0[row1 + a] += work[a] - */ - __pyx_t_4 = (__pyx_v_tl != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":277 - * syn1neg[row2 + a] += g * syn0[row1 + a] - * if tl: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[row1 + a] += work[a] - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":278 - * if tl: - * for a in range(size): - * syn0[row1 + a] += work[a] # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_t_7 = (__pyx_v_row1 + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } - goto __pyx_L19; - } - __pyx_L19:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":280 - * syn0[row1 + a] += work[a] - * - * return next_random # <<<<<<<<<<<<<< - * - * cdef void fast_sentence0_dm_hs( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":235 - * return next_random - * - * cdef unsigned long long fast_sentence2_dbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":282 - * return next_random - * - * cdef void fast_sentence0_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; - int __pyx_v_m; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - PY_LONG_LONG __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":293 - * cdef int m - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":294 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":295 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":296 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":297 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":299 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":300 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":301 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":302 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":303 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L8_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":305 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":306 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * - * if cbow_mean and count > (0.5): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L8_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":308 - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L12_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":309 - * - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/doc2vec_inner.pyx":310 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":312 - * sscal(&size, &inv_count, neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":313 - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_b = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":314 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":315 - * for b in range(codelens[i]): - * row2 = word_point[b] * size - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":316 - * row2 = word_point[b] * size - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L17_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L17_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":317 - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L14_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":318 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":319 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":320 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - - /* "trunk/gensim/models/doc2vec_inner.pyx":321 - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":322 - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if tw: - * for m in range(j, k): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L19; - } - __pyx_L19:; - __pyx_L14_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":323 - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":324 - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":325 - * if tw: - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L24_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L24_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":326 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - */ - goto __pyx_L21_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":328 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< - * if tl: - * for m in range(lbl_length): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L21_continue:; - } - goto __pyx_L20; - } - __pyx_L20:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":329 - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - * if tl: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":330 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - * if tl: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":331 - * if tl: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":332 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - */ - goto __pyx_L27_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":334 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * - * cdef void fast_sentence1_dm_hs( - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L27_continue:; - } - goto __pyx_L26; - } - __pyx_L26:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":282 - * return next_random - * - * cdef void fast_sentence0_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":336 - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - * - * cdef void fast_sentence1_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; - int __pyx_v_m; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - PY_LONG_LONG __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":347 - * cdef int m - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":348 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":349 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":350 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":351 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":353 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":354 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":355 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":356 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":357 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L8_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":359 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":360 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * - * if cbow_mean and count > (0.5): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L8_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":362 - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count , neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L12_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":363 - * - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count , neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/doc2vec_inner.pyx":364 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count , neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":366 - * sscal(&size, &inv_count , neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":367 - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_b = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":368 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":369 - * for b in range(codelens[i]): - * row2 = word_point[b] * size - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":370 - * row2 = word_point[b] * size - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L17_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L17_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":371 - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L14_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":372 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":373 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":374 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - - /* "trunk/gensim/models/doc2vec_inner.pyx":375 - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":376 - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if tw: - * for m in range(j, k): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L19; - } - __pyx_L19:; - __pyx_L14_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":377 - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":378 - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":379 - * if tw: - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L24_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L24_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":380 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - */ - goto __pyx_L21_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":382 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * if tl: - * for m in range(lbl_length): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L21_continue:; - } - goto __pyx_L20; - } - __pyx_L20:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":383 - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * if tl: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":384 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * if tl: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":385 - * if tl: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":386 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - */ - goto __pyx_L27_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":388 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * - * cdef void fast_sentence2_dm_hs( - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L27_continue:; - } - goto __pyx_L26; - } - __pyx_L26:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":336 - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - * - * cdef void fast_sentence1_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":390 - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - * - * cdef void fast_sentence2_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_a; - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; - int __pyx_v_m; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; - PY_LONG_LONG __pyx_t_8; - - /* "trunk/gensim/models/doc2vec_inner.pyx":401 - * cdef int m - * - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] = 0.0 - * count = 0.0 - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":402 - * - * for a in range(size): - * neu1[a] = 0.0 # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - (__pyx_v_neu1[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":403 - * for a in range(size): - * neu1[a] = 0.0 - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":404 - * neu1[a] = 0.0 - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; - - /* "trunk/gensim/models/doc2vec_inner.pyx":405 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L8_bool_binop_done; - } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L8_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":406 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L5_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":408 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":409 - * else: - * count += ONEF - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] += syn0[indexes[m] * size + a] - * for m in range(lbl_length): - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":410 - * count += ONEF - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) + (__pyx_v_syn0[(((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a)])); - } - } - __pyx_L5_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":411 - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; - - /* "trunk/gensim/models/doc2vec_inner.pyx":412 - * neu1[a] += syn0[indexes[m] * size + a] - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":413 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L12_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":415 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] += syn0[lbl_indexes[m] * size + a] - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":416 - * else: - * count += ONEF - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] += syn0[lbl_indexes[m] * size + a] - * - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":417 - * count += ONEF - * for a in range(size): - * neu1[a] += syn0[lbl_indexes[m] * size + a] # <<<<<<<<<<<<<< - * - * if cbow_mean and count > (0.5): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) + (__pyx_v_syn0[(((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a)])); - } - } - __pyx_L12_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":419 - * neu1[a] += syn0[lbl_indexes[m] * size + a] - * - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] /= count - */ - __pyx_t_5 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L18_bool_binop_done; - } - __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L18_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":420 - * - * if cbow_mean and count > (0.5): - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] /= count - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":421 - * if cbow_mean and count > (0.5): - * for a in range(size): - * neu1[a] /= count # <<<<<<<<<<<<<< - * - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) / __pyx_v_count); - } - goto __pyx_L17; - } - __pyx_L17:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":423 - * neu1[a] /= count - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * for b in range(codelens[i]): - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":424 - * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":425 - * for a in range(size): - * work[a] = 0.0 - * for b in range(codelens[i]): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = 0.0 - */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_b = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":426 - * work[a] = 0.0 - * for b in range(codelens[i]): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":427 - * for b in range(codelens[i]): - * row2 = word_point[b] * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += neu1[a] * syn1[row2 + a] - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":428 - * row2 = word_point[b] * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += neu1[a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7+=1) { - __pyx_v_a = __pyx_t_7; - - /* "trunk/gensim/models/doc2vec_inner.pyx":429 - * f = 0.0 - * for a in range(size): - * f += neu1[a] * syn1[row2 + a] # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_neu1[__pyx_v_a]) * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":430 - * for a in range(size): - * f += neu1[a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_5 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L29_bool_binop_done; - } - __pyx_t_5 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L29_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":431 - * f += neu1[a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L24_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":432 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":433 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1[row2 + a] - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":434 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1[row2 + a] - * if tw: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7+=1) { - __pyx_v_a = __pyx_t_7; - - /* "trunk/gensim/models/doc2vec_inner.pyx":435 - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - * work[a] += g * syn1[row2 + a] # <<<<<<<<<<<<<< - * if tw: - * for a in range(size): - */ - __pyx_t_8 = __pyx_v_a; - (__pyx_v_work[__pyx_t_8]) = ((__pyx_v_work[__pyx_t_8]) + (__pyx_v_g * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":436 - * for a in range(size): - * work[a] += g * syn1[row2 + a] - * if tw: # <<<<<<<<<<<<<< - * for a in range(size): - * syn1[row2 + a] += g * neu1[a] - */ - __pyx_t_4 = (__pyx_v_tw != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":437 - * work[a] += g * syn1[row2 + a] - * if tw: - * for a in range(size): # <<<<<<<<<<<<<< - * syn1[row2 + a] += g * neu1[a] - * if tw: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7+=1) { - __pyx_v_a = __pyx_t_7; - - /* "trunk/gensim/models/doc2vec_inner.pyx":438 - * if tw: - * for a in range(size): - * syn1[row2 + a] += g * neu1[a] # <<<<<<<<<<<<<< - * if tw: - * for m in range(j, k): - */ - __pyx_t_8 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1[__pyx_t_8]) = ((__pyx_v_syn1[__pyx_t_8]) + (__pyx_v_g * (__pyx_v_neu1[__pyx_v_a]))); - } - goto __pyx_L33; - } - __pyx_L33:; - __pyx_L24_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":439 - * for a in range(size): - * syn1[row2 + a] += g * neu1[a] - * if tw: # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_t_4 = (__pyx_v_tw != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":440 - * syn1[row2 + a] += g * neu1[a] - * if tw: - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; - - /* "trunk/gensim/models/doc2vec_inner.pyx":441 - * if tw: - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L40_bool_binop_done; - } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L40_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":442 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * for a in range(size): - */ - goto __pyx_L37_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":444 - * continue - * else: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[indexes[m] * size + a] += work[a] - * if tl: - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":445 - * else: - * for a in range(size): - * syn0[indexes[m] * size + a] += work[a] # <<<<<<<<<<<<<< - * if tl: - * for m in range(lbl_length): - */ - __pyx_t_7 = (((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } - } - __pyx_L37_continue:; - } - goto __pyx_L36; - } - __pyx_L36:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":446 - * for a in range(size): - * syn0[indexes[m] * size + a] += work[a] - * if tl: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_4 = (__pyx_v_tl != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":447 - * syn0[indexes[m] * size + a] += work[a] - * if tl: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; - - /* "trunk/gensim/models/doc2vec_inner.pyx":448 - * if tl: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":449 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * for a in range(size): - */ - goto __pyx_L45_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":451 - * continue - * else: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[lbl_indexes[m] * size + a] += work[a] - * - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":452 - * else: - * for a in range(size): - * syn0[lbl_indexes[m] * size + a] += work[a] # <<<<<<<<<<<<<< - * - * cdef unsigned long long fast_sentence0_dm_neg( - */ - __pyx_t_7 = (((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } - } - __pyx_L45_continue:; - } - goto __pyx_L44; - } - __pyx_L44:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":390 - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - * - * cdef void fast_sentence2_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/doc2vec_inner.pyx":454 - * syn0[lbl_indexes[m] * size + a] += work[a] - * - * cdef unsigned long long fast_sentence0_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - __pyx_t_5numpy_uint32_t __pyx_v_word_index; - int __pyx_v_d; - int __pyx_v_m; - unsigned PY_LONG_LONG __pyx_r; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - long __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":462 - * cdef long long a - * cdef long long row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, count, inv_count, label - * cdef np.uint32_t target_index, word_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/doc2vec_inner.pyx":467 - * cdef int d, m - * - * word_index = indexes[i] # <<<<<<<<<<<<<< - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":469 - * word_index = indexes[i] - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":470 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":471 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":472 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":473 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":475 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":476 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":477 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":478 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":479 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L8_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":481 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":482 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L8_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":483 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L12_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":484 - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/doc2vec_inner.pyx":485 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":487 - * sscal(&size, &inv_count, neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":489 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * if d == 0: - * target_index = word_index - */ - __pyx_t_5 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; - - /* "trunk/gensim/models/doc2vec_inner.pyx":490 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":491 - * for d in range(negative+1): - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/doc2vec_inner.pyx":492 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L16; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":494 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":495 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/doc2vec_inner.pyx":496 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":497 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L14_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":498 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - __pyx_L16:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":500 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - - /* "trunk/gensim/models/doc2vec_inner.pyx":501 - * - * row2 = target_index * size - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":502 - * row2 = target_index * size - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L19_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L19_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":503 - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - */ - goto __pyx_L14_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":504 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":505 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: - */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/doc2vec_inner.pyx":506 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - - /* "trunk/gensim/models/doc2vec_inner.pyx":507 - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if tw: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":508 - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if tw: - * for m in range(j,k): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L21; - } - __pyx_L21:; - __pyx_L14_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":509 - * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if tw: # <<<<<<<<<<<<<< - * for m in range(j,k): - * if m == i or codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":510 - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if tw: - * for m in range(j,k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":511 - * if tw: - * for m in range(j,k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L26_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L26_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":512 - * for m in range(j,k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - */ - goto __pyx_L23_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":514 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * if tl: - * for m in range(lbl_length): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L23_continue:; - } - goto __pyx_L22; - } - __pyx_L22:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":515 - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * if tl: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":516 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * if tl: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; + __pyx_t_3 = (__pyx_v_tw != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":517 + /* "trunk/gensim/models/doc2vec_inner.pyx":139 + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if tw: + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if tl: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L10; + } + __pyx_L10:; + __pyx_L3_continue:; + } - /* "trunk/gensim/models/doc2vec_inner.pyx":518 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":140 + * if tw: + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * if tl: # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * */ - goto __pyx_L29_continue; - } - /*else*/ { + __pyx_t_3 = (__pyx_v_tl != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":520 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":141 + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * if tl: + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L29_continue:; - } - goto __pyx_L28; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L11; } - __pyx_L28:; + __pyx_L11:; - /* "trunk/gensim/models/doc2vec_inner.pyx":522 - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":143 + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< * - * cdef unsigned long long fast_sentence1_dm_neg( + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":454 - * syn0[lbl_indexes[m] * size + a] += work[a] + /* "trunk/gensim/models/doc2vec_inner.pyx":105 * - * cdef unsigned long long fast_sentence0_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * + * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, + * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ /* function exit code */ @@ -4947,53 +2009,30 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":524 - * return next_random +/* "trunk/gensim/models/doc2vec_inner.pyx":146 * - * cdef unsigned long long fast_sentence1_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * + * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { + PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - __pyx_t_5numpy_uint32_t __pyx_v_word_index; - int __pyx_v_d; int __pyx_v_m; - unsigned PY_LONG_LONG __pyx_r; int __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - long __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":532 - * cdef long long a - * cdef long long row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, count, inv_count, label - * cdef np.uint32_t target_index, word_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/doc2vec_inner.pyx":537 - * cdef int d, m - * - * word_index = indexes[i] # <<<<<<<<<<<<<< - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); + PY_LONG_LONG __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":539 - * word_index = indexes[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":157 + * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 @@ -5001,7 +2040,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":540 + /* "trunk/gensim/models/doc2vec_inner.pyx":158 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -5010,7 +2049,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":541 + /* "trunk/gensim/models/doc2vec_inner.pyx":159 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5021,7 +2060,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":542 + /* "trunk/gensim/models/doc2vec_inner.pyx":160 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -5039,7 +2078,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":543 + /* "trunk/gensim/models/doc2vec_inner.pyx":161 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -5050,30 +2089,30 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":545 + /* "trunk/gensim/models/doc2vec_inner.pyx":163 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * for m in range(lbl_length): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":546 + /* "trunk/gensim/models/doc2vec_inner.pyx":164 * else: * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * for m in range(lbl_length): * if lbl_codelens[m] == 0: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":547 + /* "trunk/gensim/models/doc2vec_inner.pyx":165 * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * for m in range(lbl_length): # <<<<<<<<<<<<<< * if lbl_codelens[m] == 0: * continue @@ -5082,8 +2121,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":548 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":166 + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * for m in range(lbl_length): * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< * continue @@ -5092,7 +2131,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":549 + /* "trunk/gensim/models/doc2vec_inner.pyx":167 * for m in range(lbl_length): * if lbl_codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -5103,30 +2142,30 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":551 + /* "trunk/gensim/models/doc2vec_inner.pyx":169 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + * */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":552 + /* "trunk/gensim/models/doc2vec_inner.pyx":170 * else: * count += ONEF - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * * if cbow_mean and count > (0.5): - * inv_count = ONEF/count */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } __pyx_L8_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":553 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":172 + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + * * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count * sscal(&size, &inv_count, neu1, &ONE) @@ -5142,8 +2181,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L12_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":554 - * saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":173 + * * if cbow_mean and count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< * sscal(&size, &inv_count, neu1, &ONE) @@ -5151,7 +2190,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - /* "trunk/gensim/models/doc2vec_inner.pyx":555 + /* "trunk/gensim/models/doc2vec_inner.pyx":174 * if cbow_mean and count > (0.5): * inv_count = ONEF/count * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< @@ -5163,127 +2202,47 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } __pyx_L11:; - /* "trunk/gensim/models/doc2vec_inner.pyx":557 + /* "trunk/gensim/models/doc2vec_inner.pyx":176 * sscal(&size, &inv_count, neu1, &ONE) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): + * for b in range(codelens[i]): + * row2 = word_point[b] * size */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":559 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * if d == 0: - * target_index = word_index - */ - __pyx_t_5 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; - - /* "trunk/gensim/models/doc2vec_inner.pyx":560 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":561 - * for d in range(negative+1): - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/doc2vec_inner.pyx":562 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L16; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":564 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":565 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/doc2vec_inner.pyx":566 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":567 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L14_continue; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":568 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":177 * - * row2 = target_index * size + * memset(work, 0, size * cython.sizeof(REAL_t)) + * for b in range(codelens[i]): # <<<<<<<<<<<<<< + * row2 = word_point[b] * size + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } - __pyx_L16:; + __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { + __pyx_v_b = __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":570 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":178 + * memset(work, 0, size * cython.sizeof(REAL_t)) + * for b in range(codelens[i]): + * row2 = word_point[b] * size # <<<<<<<<<<<<<< + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); + __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":571 - * - * row2 = target_index * size - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":179 + * for b in range(codelens[i]): + * row2 = word_point[b] * size + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE))); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":572 - * row2 = target_index * size - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":180 + * row2 = word_point[b] * size + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -5292,88 +2251,88 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L19_bool_binop_done; + goto __pyx_L17_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L19_bool_binop_done:; + __pyx_L17_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":573 - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":181 + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha + * g = (1 - word_code[b] - f) * alpha */ goto __pyx_L14_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":574 + /* "trunk/gensim/models/doc2vec_inner.pyx":182 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":575 + /* "trunk/gensim/models/doc2vec_inner.pyx":183 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if tw: */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); + __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":576 + /* "trunk/gensim/models/doc2vec_inner.pyx":184 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":577 - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":185 + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if tw: # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * if tw: */ __pyx_t_3 = (__pyx_v_tw != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":578 - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":186 + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if tw: - * for m in range(j,k): + * for m in range(j, k): */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L21; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L19; } - __pyx_L21:; + __pyx_L19:; __pyx_L14_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":579 + /* "trunk/gensim/models/doc2vec_inner.pyx":187 * if tw: - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * if tw: # <<<<<<<<<<<<<< - * for m in range(j,k): + * for m in range(j, k): * if m == i or codelens[m] == 0: */ __pyx_t_3 = (__pyx_v_tw != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":580 - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":188 + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * if tw: - * for m in range(j,k): # <<<<<<<<<<<<<< + * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ @@ -5381,9 +2340,9 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":581 + /* "trunk/gensim/models/doc2vec_inner.pyx":189 * if tw: - * for m in range(j,k): + * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: @@ -5392,42 +2351,42 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L26_bool_binop_done; + goto __pyx_L24_bool_binop_done; } __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L26_bool_binop_done:; + __pyx_L24_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":582 - * for m in range(j,k): + /* "trunk/gensim/models/doc2vec_inner.pyx":190 + * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) */ - goto __pyx_L23_continue; + goto __pyx_L21_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":584 + /* "trunk/gensim/models/doc2vec_inner.pyx":192 * continue * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< * if tl: * for m in range(lbl_length): */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L23_continue:; + __pyx_L21_continue:; } - goto __pyx_L22; + goto __pyx_L20; } - __pyx_L22:; + __pyx_L20:; - /* "trunk/gensim/models/doc2vec_inner.pyx":585 + /* "trunk/gensim/models/doc2vec_inner.pyx":193 * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) * if tl: # <<<<<<<<<<<<<< * for m in range(lbl_length): * if lbl_codelens[m] == 0: @@ -5435,8 +2394,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = (__pyx_v_tl != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":586 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":194 + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) * if tl: * for m in range(lbl_length): # <<<<<<<<<<<<<< * if lbl_codelens[m] == 0: @@ -5446,7 +2405,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":587 + /* "trunk/gensim/models/doc2vec_inner.pyx":195 * if tl: * for m in range(lbl_length): * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< @@ -5456,70 +2415,58 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":588 + /* "trunk/gensim/models/doc2vec_inner.pyx":196 * for m in range(lbl_length): * if lbl_codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) */ - goto __pyx_L29_continue; + goto __pyx_L27_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":590 + /* "trunk/gensim/models/doc2vec_inner.pyx":198 * continue * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * * - * return next_random */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L29_continue:; + __pyx_L27_continue:; } - goto __pyx_L28; + goto __pyx_L26; } - __pyx_L28:; + __pyx_L26:; - /* "trunk/gensim/models/doc2vec_inner.pyx":592 - * saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - * - * return next_random # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":146 * - * cdef unsigned long long fast_sentence2_dm_neg( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":524 - * return next_random * - * cdef unsigned long long fast_sentence1_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ /* function exit code */ - __pyx_L0:; - return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":594 - * return next_random +/* "trunk/gensim/models/doc2vec_inner.pyx":201 + * * - * cdef unsigned long long fast_sentence2_dm_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { - PY_LONG_LONG __pyx_v_a; +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; __pyx_t_5numpy_uint32_t __pyx_v_target_index; __pyx_t_5numpy_uint32_t __pyx_v_word_index; @@ -5527,15 +2474,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast int __pyx_v_m; unsigned PY_LONG_LONG __pyx_r; int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; + int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; - long __pyx_t_8; + long __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":602 + /* "trunk/gensim/models/doc2vec_inner.pyx":209 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -5544,271 +2488,217 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":607 + /* "trunk/gensim/models/doc2vec_inner.pyx":214 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< * - * for a in range(size): + * memset(neu1, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "trunk/gensim/models/doc2vec_inner.pyx":609 + /* "trunk/gensim/models/doc2vec_inner.pyx":216 * word_index = indexes[i] * - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] = 0.0 - * count = 0.0 - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":610 - * - * for a in range(size): - * neu1[a] = 0.0 # <<<<<<<<<<<<<< + * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 * for m in range(j, k): */ - (__pyx_v_neu1[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } + memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":611 - * for a in range(size): - * neu1[a] = 0.0 + /* "trunk/gensim/models/doc2vec_inner.pyx":217 + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< * for m in range(j, k): * if m == i or codelens[m] == 0: */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":612 - * neu1[a] = 0.0 + /* "trunk/gensim/models/doc2vec_inner.pyx":218 + * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":613 + /* "trunk/gensim/models/doc2vec_inner.pyx":219 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); + if (!__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L8_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L6_bool_binop_done; } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L8_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L6_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":614 + /* "trunk/gensim/models/doc2vec_inner.pyx":220 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: * count += ONEF */ - goto __pyx_L5_continue; + goto __pyx_L3_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":616 + /* "trunk/gensim/models/doc2vec_inner.pyx":222 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * for m in range(lbl_length): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":617 + /* "trunk/gensim/models/doc2vec_inner.pyx":223 * else: * count += ONEF - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] += syn0[indexes[m] * size + a] - * for m in range(lbl_length): - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":618 - * count += ONEF - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * for m in range(lbl_length): * if lbl_codelens[m] == 0: */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) + (__pyx_v_syn0[(((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a)])); - } + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L5_continue:; + __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":619 - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":224 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * for m in range(lbl_length): # <<<<<<<<<<<<<< * if lbl_codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":620 - * neu1[a] += syn0[indexes[m] * size + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":225 + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * for m in range(lbl_length): * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_4) { + __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":621 + /* "trunk/gensim/models/doc2vec_inner.pyx":226 * for m in range(lbl_length): * if lbl_codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: * count += ONEF */ - goto __pyx_L12_continue; + goto __pyx_L8_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":623 + /* "trunk/gensim/models/doc2vec_inner.pyx":228 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] += syn0[lbl_indexes[m] * size + a] + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + * if cbow_mean and count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":624 + /* "trunk/gensim/models/doc2vec_inner.pyx":229 * else: * count += ONEF - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] += syn0[lbl_indexes[m] * size + a] - * if cbow_mean and count > (0.5): - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":625 - * count += ONEF - * for a in range(size): - * neu1[a] += syn0[lbl_indexes[m] * size + a] # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * if cbow_mean and count > (0.5): - * for a in range(size): + * inv_count = ONEF/count */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) + (__pyx_v_syn0[(((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a)])); - } + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L12_continue:; + __pyx_L8_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":626 - * for a in range(size): - * neu1[a] += syn0[lbl_indexes[m] * size + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":230 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] /= count + * inv_count = ONEF/count + * sscal(&size, &inv_count, neu1, &ONE) */ - __pyx_t_5 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_5) { + __pyx_t_4 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L18_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L12_bool_binop_done; } - __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L18_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L12_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":627 - * neu1[a] += syn0[lbl_indexes[m] * size + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":231 + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] /= count + * inv_count = ONEF/count # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) * */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - /* "trunk/gensim/models/doc2vec_inner.pyx":628 + /* "trunk/gensim/models/doc2vec_inner.pyx":232 * if cbow_mean and count > (0.5): - * for a in range(size): - * neu1[a] /= count # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< * - * for a in range(size): + * memset(work, 0, size * cython.sizeof(REAL_t)) */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) / __pyx_v_count); - } - goto __pyx_L17; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L11; } - __pyx_L17:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":630 - * neu1[a] /= count - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_L11:; - /* "trunk/gensim/models/doc2vec_inner.pyx":631 + /* "trunk/gensim/models/doc2vec_inner.pyx":234 + * sscal(&size, &inv_count, neu1, &ONE) * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * * for d in range(negative+1): */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - } + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":633 - * work[a] = 0.0 + /* "trunk/gensim/models/doc2vec_inner.pyx":236 + * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: * target_index = word_index */ - __pyx_t_8 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_8; __pyx_t_1+=1) { + __pyx_t_5 = (__pyx_v_negative + 1); + for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":634 + /* "trunk/gensim/models/doc2vec_inner.pyx":237 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = word_index * label = ONEF */ - __pyx_t_4 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_4) { + __pyx_t_3 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":635 + /* "trunk/gensim/models/doc2vec_inner.pyx":238 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -5817,7 +2707,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":636 + /* "trunk/gensim/models/doc2vec_inner.pyx":239 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -5825,11 +2715,11 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * target_index = table[(next_random >> 16) % table_len] */ __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L26; + goto __pyx_L16; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":638 + /* "trunk/gensim/models/doc2vec_inner.pyx":241 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -5838,7 +2728,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":639 + /* "trunk/gensim/models/doc2vec_inner.pyx":242 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -5847,27 +2737,27 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":640 + /* "trunk/gensim/models/doc2vec_inner.pyx":243 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< * continue * label = 0.0 */ - __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_4) { + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":641 + /* "trunk/gensim/models/doc2vec_inner.pyx":244 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 * */ - goto __pyx_L24_continue; + goto __pyx_L14_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":642 + /* "trunk/gensim/models/doc2vec_inner.pyx":245 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -5876,314 +2766,241 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); } - __pyx_L26:; + __pyx_L16:; - /* "trunk/gensim/models/doc2vec_inner.pyx":644 + /* "trunk/gensim/models/doc2vec_inner.pyx":247 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":645 + /* "trunk/gensim/models/doc2vec_inner.pyx":248 * * row2 = target_index * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += neu1[a] * syn1neg[row2 + a] - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":646 - * row2 = target_index * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += neu1[a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":647 - * f = 0.0 - * for a in range(size): - * f += neu1[a] * syn1neg[row2 + a] # <<<<<<<<<<<<<< + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_neu1[__pyx_v_a]) * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":648 - * for a in range(size): - * f += neu1[a] * syn1neg[row2 + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":249 + * row2 = target_index * size + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] */ - __pyx_t_5 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_5) { + __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); + if (!__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L31_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L19_bool_binop_done; } - __pyx_t_5 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L31_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L19_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":649 - * f += neu1[a] * syn1neg[row2 + a] + /* "trunk/gensim/models/doc2vec_inner.pyx":250 + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha */ - goto __pyx_L24_continue; + goto __pyx_L14_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":650 + /* "trunk/gensim/models/doc2vec_inner.pyx":251 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * g = (label - f) * alpha - * for a in range(size): + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":651 + /* "trunk/gensim/models/doc2vec_inner.pyx":252 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if tw: */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":652 + /* "trunk/gensim/models/doc2vec_inner.pyx":253 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1neg[row2 + a] + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * if tw: + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":653 + /* "trunk/gensim/models/doc2vec_inner.pyx":254 * g = (label - f) * alpha - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] # <<<<<<<<<<<<<< - * if tw: - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_work[__pyx_t_7]) = ((__pyx_v_work[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":654 - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if tw: # <<<<<<<<<<<<<< - * for a in range(size): - * syn1neg[row2 + a] += g * neu1[a] - */ - __pyx_t_4 = (__pyx_v_tw != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":655 - * work[a] += g * syn1neg[row2 + a] - * if tw: - * for a in range(size): # <<<<<<<<<<<<<< - * syn1neg[row2 + a] += g * neu1[a] - * + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if tw: */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_t_3 = (__pyx_v_tw != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":656 + /* "trunk/gensim/models/doc2vec_inner.pyx":255 + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if tw: - * for a in range(size): - * syn1neg[row2 + a] += g * neu1[a] # <<<<<<<<<<<<<< - * + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if tw: + * for m in range(j,k): */ - __pyx_t_7 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1neg[__pyx_t_7]) = ((__pyx_v_syn1neg[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_neu1[__pyx_v_a]))); - } - goto __pyx_L35; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L21; } - __pyx_L35:; - __pyx_L24_continue:; + __pyx_L21:; + __pyx_L14_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":658 - * syn1neg[row2 + a] += g * neu1[a] - * + /* "trunk/gensim/models/doc2vec_inner.pyx":256 + * if tw: + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * if tw: # <<<<<<<<<<<<<< - * for m in range(j, k): + * for m in range(j,k): * if m == i or codelens[m] == 0: */ - __pyx_t_4 = (__pyx_v_tw != 0); - if (__pyx_t_4) { + __pyx_t_3 = (__pyx_v_tw != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":659 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":257 + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * if tw: - * for m in range(j, k): # <<<<<<<<<<<<<< + * for m in range(j,k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":660 + /* "trunk/gensim/models/doc2vec_inner.pyx":258 * if tw: - * for m in range(j, k): + * for m in range(j,k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); + if (!__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L42_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L26_bool_binop_done; } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L42_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L26_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":661 - * for m in range(j, k): + /* "trunk/gensim/models/doc2vec_inner.pyx":259 + * for m in range(j,k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * for a in range(size): + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) */ - goto __pyx_L39_continue; + goto __pyx_L23_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":663 + /* "trunk/gensim/models/doc2vec_inner.pyx":261 * continue * else: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[indexes[m] * size + a] += work[a] - * if tl: - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":664 - * else: - * for a in range(size): - * syn0[indexes[m] * size + a] += work[a] # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< * if tl: * for m in range(lbl_length): */ - __pyx_t_7 = (((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L39_continue:; + __pyx_L23_continue:; } - goto __pyx_L38; + goto __pyx_L22; } - __pyx_L38:; + __pyx_L22:; - /* "trunk/gensim/models/doc2vec_inner.pyx":665 - * for a in range(size): - * syn0[indexes[m] * size + a] += work[a] + /* "trunk/gensim/models/doc2vec_inner.pyx":262 + * else: + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) * if tl: # <<<<<<<<<<<<<< * for m in range(lbl_length): * if lbl_codelens[m] == 0: */ - __pyx_t_4 = (__pyx_v_tl != 0); - if (__pyx_t_4) { + __pyx_t_3 = (__pyx_v_tl != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":666 - * syn0[indexes[m] * size + a] += work[a] + /* "trunk/gensim/models/doc2vec_inner.pyx":263 + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) * if tl: * for m in range(lbl_length): # <<<<<<<<<<<<<< * if lbl_codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":667 + /* "trunk/gensim/models/doc2vec_inner.pyx":264 * if tl: * for m in range(lbl_length): * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_4) { + __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":668 + /* "trunk/gensim/models/doc2vec_inner.pyx":265 * for m in range(lbl_length): * if lbl_codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * for a in range(size): + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) */ - goto __pyx_L47_continue; + goto __pyx_L29_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":670 + /* "trunk/gensim/models/doc2vec_inner.pyx":267 * continue * else: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[lbl_indexes[m] * size + a] += work[a] - * - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":671 - * else: - * for a in range(size): - * syn0[lbl_indexes[m] * size + a] += work[a] # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_t_7 = (((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L47_continue:; + __pyx_L29_continue:; } - goto __pyx_L46; + goto __pyx_L28; } - __pyx_L46:; + __pyx_L28:; - /* "trunk/gensim/models/doc2vec_inner.pyx":673 - * syn0[lbl_indexes[m] * size + a] += work[a] + /* "trunk/gensim/models/doc2vec_inner.pyx":269 + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":594 - * return next_random + /* "trunk/gensim/models/doc2vec_inner.pyx":201 + * * - * cdef unsigned long long fast_sentence2_dm_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, */ @@ -6193,8 +3010,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":675 - * return next_random +/* "trunk/gensim/models/doc2vec_inner.pyx":272 + * * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -6243,36 +3060,36 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 7) { goto __pyx_L5_argtuple_error; @@ -6295,7 +3112,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -6363,102 +3180,102 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_dbow", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":676 + /* "trunk/gensim/models/doc2vec_inner.pyx":273 * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int tw = train_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 676; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 676; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":677 + /* "trunk/gensim/models/doc2vec_inner.pyx":274 * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int tw = train_words * cdef int tl = train_lbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 677; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 677; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":678 + /* "trunk/gensim/models/doc2vec_inner.pyx":275 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int tw = train_words # <<<<<<<<<<<<<< * cdef int tl = train_lbls * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 678; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_tw = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":679 + /* "trunk/gensim/models/doc2vec_inner.pyx":276 * cdef int negative = model.negative * cdef int tw = train_words * cdef int tl = train_lbls # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 679; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_tl = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":681 + /* "trunk/gensim/models/doc2vec_inner.pyx":278 * cdef int tl = train_lbls * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t _alpha = alpha */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 681; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 681; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":683 + /* "trunk/gensim/models/doc2vec_inner.pyx":280 * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 683; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":684 + /* "trunk/gensim/models/doc2vec_inner.pyx":281 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 684; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 684; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":693 + /* "trunk/gensim/models/doc2vec_inner.pyx":290 * cdef int sentence_len * cdef int lbl_length * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 693; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 693; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":696 + /* "trunk/gensim/models/doc2vec_inner.pyx":293 * * cdef int i, j * cdef long result = 0 # <<<<<<<<<<<<<< @@ -6467,7 +3284,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":709 + /* "trunk/gensim/models/doc2vec_inner.pyx":306 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -6477,23 +3294,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":710 + /* "trunk/gensim/models/doc2vec_inner.pyx":307 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/doc2vec_inner.pyx":712 + /* "trunk/gensim/models/doc2vec_inner.pyx":309 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -6503,106 +3320,106 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":713 + /* "trunk/gensim/models/doc2vec_inner.pyx":310 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":714 + /* "trunk/gensim/models/doc2vec_inner.pyx":311 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 714; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 714; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":715 + /* "trunk/gensim/models/doc2vec_inner.pyx":312 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":716 + /* "trunk/gensim/models/doc2vec_inner.pyx":313 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":719 + /* "trunk/gensim/models/doc2vec_inner.pyx":316 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 719; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":720 + /* "trunk/gensim/models/doc2vec_inner.pyx":317 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -6611,14 +3428,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/doc2vec_inner.pyx":721 + /* "trunk/gensim/models/doc2vec_inner.pyx":318 * work = np.PyArray_DATA(_work) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_10 < __pyx_t_9) != 0)) { __pyx_t_5 = __pyx_t_10; @@ -6627,7 +3444,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_v_lbl_length = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":723 + /* "trunk/gensim/models/doc2vec_inner.pyx":320 * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6638,19 +3455,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":724 + /* "trunk/gensim/models/doc2vec_inner.pyx":321 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 724; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":725 + /* "trunk/gensim/models/doc2vec_inner.pyx":322 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -6661,7 +3478,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":726 + /* "trunk/gensim/models/doc2vec_inner.pyx":323 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -6673,20 +3490,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":728 + /* "trunk/gensim/models/doc2vec_inner.pyx":325 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 728; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 728; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":729 + /* "trunk/gensim/models/doc2vec_inner.pyx":326 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -6696,49 +3513,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":730 + /* "trunk/gensim/models/doc2vec_inner.pyx":327 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 730; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 730; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":731 + /* "trunk/gensim/models/doc2vec_inner.pyx":328 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 731; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 731; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":732 + /* "trunk/gensim/models/doc2vec_inner.pyx":329 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":734 + /* "trunk/gensim/models/doc2vec_inner.pyx":331 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -6749,7 +3566,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":735 + /* "trunk/gensim/models/doc2vec_inner.pyx":332 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -6761,7 +3578,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L7:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":737 + /* "trunk/gensim/models/doc2vec_inner.pyx":334 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -6769,17 +3586,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * for i in range(lbl_length): */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_5 = 0; @@ -6793,7 +3610,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -6807,7 +3624,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -6815,9 +3632,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_5 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -6825,16 +3642,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -6843,7 +3660,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -6854,17 +3671,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":738 + /* "trunk/gensim/models/doc2vec_inner.pyx":335 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * for i in range(lbl_length): * word = lbls[i] */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":737 + /* "trunk/gensim/models/doc2vec_inner.pyx":334 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -6874,7 +3691,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":739 + /* "trunk/gensim/models/doc2vec_inner.pyx":336 * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item * for i in range(lbl_length): # <<<<<<<<<<<<<< @@ -6885,19 +3702,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":740 + /* "trunk/gensim/models/doc2vec_inner.pyx":337 * reduced_windows[i] = item * for i in range(lbl_length): * word = lbls[i] # <<<<<<<<<<<<<< * if word is None: * lbl_codelens[i] = 0 */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 740; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_7); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":741 + /* "trunk/gensim/models/doc2vec_inner.pyx":338 * for i in range(lbl_length): * word = lbls[i] * if word is None: # <<<<<<<<<<<<<< @@ -6908,7 +3725,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_t_12 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":742 + /* "trunk/gensim/models/doc2vec_inner.pyx":339 * word = lbls[i] * if word is None: * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< @@ -6920,20 +3737,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":744 + /* "trunk/gensim/models/doc2vec_inner.pyx":341 * lbl_codelens[i] = 0 * else: * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * lbl_codelens[i] = len(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 744; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 744; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":745 + /* "trunk/gensim/models/doc2vec_inner.pyx":342 * else: * lbl_indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -6943,23 +3760,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":746 + /* "trunk/gensim/models/doc2vec_inner.pyx":343 * lbl_indexes[i] = word.index * if hs: * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< * else: * lbl_codelens[i] = 1 */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 746; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 746; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); goto __pyx_L14; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":748 + /* "trunk/gensim/models/doc2vec_inner.pyx":345 * lbl_codelens[i] = len(word.code) * else: * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< @@ -6970,7 +3787,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":749 + /* "trunk/gensim/models/doc2vec_inner.pyx":346 * else: * lbl_codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -6982,7 +3799,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L13:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":752 + /* "trunk/gensim/models/doc2vec_inner.pyx":349 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6996,7 +3813,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":753 + /* "trunk/gensim/models/doc2vec_inner.pyx":350 * # release GIL & train on the sentence * with nogil: * for j in range(lbl_length): # <<<<<<<<<<<<<< @@ -7007,7 +3824,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_j = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":754 + /* "trunk/gensim/models/doc2vec_inner.pyx":351 * with nogil: * for j in range(lbl_length): * if lbl_codelens[j] == 0: # <<<<<<<<<<<<<< @@ -7017,7 +3834,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_j]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":755 + /* "trunk/gensim/models/doc2vec_inner.pyx":352 * for j in range(lbl_length): * if lbl_codelens[j] == 0: * continue # <<<<<<<<<<<<<< @@ -7027,7 +3844,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ goto __pyx_L18_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":756 + /* "trunk/gensim/models/doc2vec_inner.pyx":353 * if lbl_codelens[j] == 0: * continue * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -7038,7 +3855,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_i = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":757 + /* "trunk/gensim/models/doc2vec_inner.pyx":354 * continue * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -7048,7 +3865,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":758 + /* "trunk/gensim/models/doc2vec_inner.pyx":355 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -7058,7 +3875,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ goto __pyx_L21_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":759 + /* "trunk/gensim/models/doc2vec_inner.pyx":356 * if codelens[i] == 0: * continue * if hs: # <<<<<<<<<<<<<< @@ -7068,19 +3885,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":760 + /* "trunk/gensim/models/doc2vec_inner.pyx":357 * continue * if hs: * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) # <<<<<<<<<<<<<< * if negative: * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_tw, __pyx_v_tl); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_tw, __pyx_v_tl); goto __pyx_L24; } __pyx_L24:; - /* "trunk/gensim/models/doc2vec_inner.pyx":761 + /* "trunk/gensim/models/doc2vec_inner.pyx":358 * if hs: * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) * if negative: # <<<<<<<<<<<<<< @@ -7090,14 +3907,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":762 + /* "trunk/gensim/models/doc2vec_inner.pyx":359 * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) * if negative: * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) # <<<<<<<<<<<<<< * * return result */ - __pyx_v_next_random = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_tw, __pyx_v_tl); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_tw, __pyx_v_tl); goto __pyx_L25; } __pyx_L25:; @@ -7107,7 +3924,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } } - /* "trunk/gensim/models/doc2vec_inner.pyx":752 + /* "trunk/gensim/models/doc2vec_inner.pyx":349 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -7125,7 +3942,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } } - /* "trunk/gensim/models/doc2vec_inner.pyx":764 + /* "trunk/gensim/models/doc2vec_inner.pyx":361 * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) * * return result # <<<<<<<<<<<<<< @@ -7133,14 +3950,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 764; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":675 - * return next_random + /* "trunk/gensim/models/doc2vec_inner.pyx":272 + * * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -7165,7 +3982,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":767 +/* "trunk/gensim/models/doc2vec_inner.pyx":364 * * * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< @@ -7217,41 +4034,41 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { goto __pyx_L5_argtuple_error; @@ -7276,7 +4093,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -7345,115 +4162,115 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_dm", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":768 + /* "trunk/gensim/models/doc2vec_inner.pyx":365 * * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int tw = train_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 768; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 768; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":769 + /* "trunk/gensim/models/doc2vec_inner.pyx":366 * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int tw = train_words * cdef int tl = train_lbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 769; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":770 + /* "trunk/gensim/models/doc2vec_inner.pyx":367 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int tw = train_words # <<<<<<<<<<<<<< * cdef int tl = train_lbls * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 770; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_tw = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":771 + /* "trunk/gensim/models/doc2vec_inner.pyx":368 * cdef int negative = model.negative * cdef int tw = train_words * cdef int tl = train_lbls # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 771; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_tl = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":772 + /* "trunk/gensim/models/doc2vec_inner.pyx":369 * cdef int tw = train_words * cdef int tl = train_lbls * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 772; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":774 + /* "trunk/gensim/models/doc2vec_inner.pyx":371 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 774; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":777 + /* "trunk/gensim/models/doc2vec_inner.pyx":374 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 777; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":778 + /* "trunk/gensim/models/doc2vec_inner.pyx":375 * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 778; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 778; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":787 + /* "trunk/gensim/models/doc2vec_inner.pyx":384 * cdef int sentence_len * cdef int lbl_length * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 787; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":790 + /* "trunk/gensim/models/doc2vec_inner.pyx":387 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -7462,7 +4279,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":805 + /* "trunk/gensim/models/doc2vec_inner.pyx":402 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -7472,23 +4289,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":806 + /* "trunk/gensim/models/doc2vec_inner.pyx":403 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/doc2vec_inner.pyx":808 + /* "trunk/gensim/models/doc2vec_inner.pyx":405 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -7498,116 +4315,116 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":809 + /* "trunk/gensim/models/doc2vec_inner.pyx":406 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 809; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 809; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":810 + /* "trunk/gensim/models/doc2vec_inner.pyx":407 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 810; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 810; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":811 + /* "trunk/gensim/models/doc2vec_inner.pyx":408 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 811; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":812 + /* "trunk/gensim/models/doc2vec_inner.pyx":409 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":815 + /* "trunk/gensim/models/doc2vec_inner.pyx":412 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 815; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":816 + /* "trunk/gensim/models/doc2vec_inner.pyx":413 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 816; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 413; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":817 + /* "trunk/gensim/models/doc2vec_inner.pyx":414 * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 817; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -7616,7 +4433,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/doc2vec_inner.pyx":819 + /* "trunk/gensim/models/doc2vec_inner.pyx":416 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -7627,19 +4444,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":820 + /* "trunk/gensim/models/doc2vec_inner.pyx":417 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 820; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":821 + /* "trunk/gensim/models/doc2vec_inner.pyx":418 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -7650,7 +4467,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":822 + /* "trunk/gensim/models/doc2vec_inner.pyx":419 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -7662,20 +4479,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":824 + /* "trunk/gensim/models/doc2vec_inner.pyx":421 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 824; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 824; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":825 + /* "trunk/gensim/models/doc2vec_inner.pyx":422 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -7685,49 +4502,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":826 + /* "trunk/gensim/models/doc2vec_inner.pyx":423 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/doc2vec_inner.pyx":827 + /* "trunk/gensim/models/doc2vec_inner.pyx":424 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 424; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 827; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 424; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":828 + /* "trunk/gensim/models/doc2vec_inner.pyx":425 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 828; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":830 + /* "trunk/gensim/models/doc2vec_inner.pyx":427 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -7738,7 +4555,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":831 + /* "trunk/gensim/models/doc2vec_inner.pyx":428 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -7750,7 +4567,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L7:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":833 + /* "trunk/gensim/models/doc2vec_inner.pyx":430 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -7758,17 +4575,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -7782,7 +4599,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -7796,7 +4613,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -7804,9 +4621,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -7814,16 +4631,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -7832,7 +4649,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 833; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -7843,17 +4660,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":834 + /* "trunk/gensim/models/doc2vec_inner.pyx":431 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 834; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":833 + /* "trunk/gensim/models/doc2vec_inner.pyx":430 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -7863,14 +4680,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":836 + /* "trunk/gensim/models/doc2vec_inner.pyx":433 * reduced_windows[i] = item * * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< * for i in range(lbl_length): * word = lbls[i] */ - __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 836; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_10 < __pyx_t_9) != 0)) { __pyx_t_5 = __pyx_t_10; @@ -7879,7 +4696,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_v_lbl_length = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":837 + /* "trunk/gensim/models/doc2vec_inner.pyx":434 * * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * for i in range(lbl_length): # <<<<<<<<<<<<<< @@ -7890,19 +4707,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":838 + /* "trunk/gensim/models/doc2vec_inner.pyx":435 * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * for i in range(lbl_length): * word = lbls[i] # <<<<<<<<<<<<<< * if word is None: * lbl_codelens[i] = 0 */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 838; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_7); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":839 + /* "trunk/gensim/models/doc2vec_inner.pyx":436 * for i in range(lbl_length): * word = lbls[i] * if word is None: # <<<<<<<<<<<<<< @@ -7913,7 +4730,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_t_12 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":840 + /* "trunk/gensim/models/doc2vec_inner.pyx":437 * word = lbls[i] * if word is None: * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< @@ -7925,20 +4742,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":842 + /* "trunk/gensim/models/doc2vec_inner.pyx":439 * lbl_codelens[i] = 0 * else: * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * lbl_codelens[i] = len(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 842; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":843 + /* "trunk/gensim/models/doc2vec_inner.pyx":440 * else: * lbl_indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -7948,23 +4765,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":844 + /* "trunk/gensim/models/doc2vec_inner.pyx":441 * lbl_indexes[i] = word.index * if hs: * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< * else: * lbl_codelens[i] = 1 */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 844; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); goto __pyx_L14; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":846 + /* "trunk/gensim/models/doc2vec_inner.pyx":443 * lbl_codelens[i] = len(word.code) * else: * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< @@ -7975,7 +4792,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":847 + /* "trunk/gensim/models/doc2vec_inner.pyx":444 * else: * lbl_codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -7987,7 +4804,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L13:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":850 + /* "trunk/gensim/models/doc2vec_inner.pyx":447 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -8001,7 +4818,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":851 + /* "trunk/gensim/models/doc2vec_inner.pyx":448 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -8012,7 +4829,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":852 + /* "trunk/gensim/models/doc2vec_inner.pyx":449 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -8022,7 +4839,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":853 + /* "trunk/gensim/models/doc2vec_inner.pyx":450 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -8032,7 +4849,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence goto __pyx_L18_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":854 + /* "trunk/gensim/models/doc2vec_inner.pyx":451 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -8041,7 +4858,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":855 + /* "trunk/gensim/models/doc2vec_inner.pyx":452 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -8051,7 +4868,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = ((__pyx_v_j < 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":856 + /* "trunk/gensim/models/doc2vec_inner.pyx":453 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -8063,7 +4880,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L21:; - /* "trunk/gensim/models/doc2vec_inner.pyx":857 + /* "trunk/gensim/models/doc2vec_inner.pyx":454 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -8072,7 +4889,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":858 + /* "trunk/gensim/models/doc2vec_inner.pyx":455 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -8082,7 +4899,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":859 + /* "trunk/gensim/models/doc2vec_inner.pyx":456 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -8094,7 +4911,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L22:; - /* "trunk/gensim/models/doc2vec_inner.pyx":860 + /* "trunk/gensim/models/doc2vec_inner.pyx":457 * if k > sentence_len: * k = sentence_len * if hs: # <<<<<<<<<<<<<< @@ -8104,19 +4921,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":861 + /* "trunk/gensim/models/doc2vec_inner.pyx":458 * k = sentence_len * if hs: * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, # <<<<<<<<<<<<<< * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) * if negative: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_lbl_length, __pyx_v_tw, __pyx_v_tl); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_lbl_length, __pyx_v_tw, __pyx_v_tl); goto __pyx_L23; } __pyx_L23:; - /* "trunk/gensim/models/doc2vec_inner.pyx":863 + /* "trunk/gensim/models/doc2vec_inner.pyx":460 * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) * if negative: # <<<<<<<<<<<<<< @@ -8126,14 +4943,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":864 + /* "trunk/gensim/models/doc2vec_inner.pyx":461 * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) * if negative: * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, # <<<<<<<<<<<<<< * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, * cbow_mean, next_random, lbl_length, tw, tl) */ - __pyx_v_next_random = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_lbl_length, __pyx_v_tw, __pyx_v_tl); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_lbl_length, __pyx_v_tw, __pyx_v_tl); goto __pyx_L24; } __pyx_L24:; @@ -8141,7 +4958,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":850 + /* "trunk/gensim/models/doc2vec_inner.pyx":447 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -8159,7 +4976,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":868 + /* "trunk/gensim/models/doc2vec_inner.pyx":465 * cbow_mean, next_random, lbl_length, tw, tl) * * return result # <<<<<<<<<<<<<< @@ -8167,13 +4984,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 868; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":767 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * * * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< @@ -8199,7 +5016,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":871 +/* "trunk/gensim/models/doc2vec_inner.pyx":468 * * * def init(): # <<<<<<<<<<<<<< @@ -8238,7 +5055,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":883 + /* "trunk/gensim/models/doc2vec_inner.pyx":478 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -8248,7 +5065,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":884 + /* "trunk/gensim/models/doc2vec_inner.pyx":479 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -8258,7 +5075,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":885 + /* "trunk/gensim/models/doc2vec_inner.pyx":480 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -8267,7 +5084,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/doc2vec_inner.pyx":886 + /* "trunk/gensim/models/doc2vec_inner.pyx":481 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -8276,7 +5093,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_size = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":891 + /* "trunk/gensim/models/doc2vec_inner.pyx":486 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -8286,7 +5103,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":892 + /* "trunk/gensim/models/doc2vec_inner.pyx":487 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -8295,7 +5112,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/doc2vec_inner.pyx":893 + /* "trunk/gensim/models/doc2vec_inner.pyx":488 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -8305,7 +5122,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":896 + /* "trunk/gensim/models/doc2vec_inner.pyx":491 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -8314,67 +5131,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":897 + /* "trunk/gensim/models/doc2vec_inner.pyx":492 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< * if (abs(d_res - expected) < 0.0001): - * fast_sentence_dbow_hs = fast_sentence0_dbow_hs + * our_dot = our_dot_double */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/doc2vec_inner.pyx":898 + /* "trunk/gensim/models/doc2vec_inner.pyx":493 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs = fast_sentence0_dbow_hs - * fast_sentence_dbow_neg = fast_sentence0_dbow_neg + * our_dot = our_dot_double + * our_saxpy = saxpy */ __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":899 + /* "trunk/gensim/models/doc2vec_inner.pyx":494 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): - * fast_sentence_dbow_hs = fast_sentence0_dbow_hs # <<<<<<<<<<<<<< - * fast_sentence_dbow_neg = fast_sentence0_dbow_neg - * fast_sentence_dm_hs = fast_sentence0_dm_hs - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dbow_hs; - - /* "trunk/gensim/models/doc2vec_inner.pyx":900 - * if (abs(d_res - expected) < 0.0001): - * fast_sentence_dbow_hs = fast_sentence0_dbow_hs - * fast_sentence_dbow_neg = fast_sentence0_dbow_neg # <<<<<<<<<<<<<< - * fast_sentence_dm_hs = fast_sentence0_dm_hs - * fast_sentence_dm_neg = fast_sentence0_dm_neg - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dbow_neg; - - /* "trunk/gensim/models/doc2vec_inner.pyx":901 - * fast_sentence_dbow_hs = fast_sentence0_dbow_hs - * fast_sentence_dbow_neg = fast_sentence0_dbow_neg - * fast_sentence_dm_hs = fast_sentence0_dm_hs # <<<<<<<<<<<<<< - * fast_sentence_dm_neg = fast_sentence0_dm_neg + * our_dot = our_dot_double # <<<<<<<<<<<<<< + * our_saxpy = saxpy * return 0 # double */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dm_hs; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double; - /* "trunk/gensim/models/doc2vec_inner.pyx":902 - * fast_sentence_dbow_neg = fast_sentence0_dbow_neg - * fast_sentence_dm_hs = fast_sentence0_dm_hs - * fast_sentence_dm_neg = fast_sentence0_dm_neg # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":495 + * if (abs(d_res - expected) < 0.0001): + * our_dot = our_dot_double + * our_saxpy = saxpy # <<<<<<<<<<<<<< * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence0_dm_neg; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":903 - * fast_sentence_dm_hs = fast_sentence0_dm_hs - * fast_sentence_dm_neg = fast_sentence0_dm_neg + /* "trunk/gensim/models/doc2vec_inner.pyx":496 + * our_dot = our_dot_double + * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< * elif (abs(p_res[0] - expected) < 0.0001): - * fast_sentence_dbow_hs = fast_sentence1_dbow_hs + * our_dot = our_dot_float */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_int_0); @@ -8382,55 +5181,37 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":904 - * fast_sentence_dm_neg = fast_sentence0_dm_neg + /* "trunk/gensim/models/doc2vec_inner.pyx":497 + * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs = fast_sentence1_dbow_hs - * fast_sentence_dbow_neg = fast_sentence1_dbow_neg + * our_dot = our_dot_float + * our_saxpy = saxpy */ __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":905 + /* "trunk/gensim/models/doc2vec_inner.pyx":498 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): - * fast_sentence_dbow_hs = fast_sentence1_dbow_hs # <<<<<<<<<<<<<< - * fast_sentence_dbow_neg = fast_sentence1_dbow_neg - * fast_sentence_dm_hs = fast_sentence1_dm_hs - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_hs; - - /* "trunk/gensim/models/doc2vec_inner.pyx":906 - * elif (abs(p_res[0] - expected) < 0.0001): - * fast_sentence_dbow_hs = fast_sentence1_dbow_hs - * fast_sentence_dbow_neg = fast_sentence1_dbow_neg # <<<<<<<<<<<<<< - * fast_sentence_dm_hs = fast_sentence1_dm_hs - * fast_sentence_dm_neg = fast_sentence1_dm_neg - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dbow_neg; - - /* "trunk/gensim/models/doc2vec_inner.pyx":907 - * fast_sentence_dbow_hs = fast_sentence1_dbow_hs - * fast_sentence_dbow_neg = fast_sentence1_dbow_neg - * fast_sentence_dm_hs = fast_sentence1_dm_hs # <<<<<<<<<<<<<< - * fast_sentence_dm_neg = fast_sentence1_dm_neg + * our_dot = our_dot_float # <<<<<<<<<<<<<< + * our_saxpy = saxpy * return 1 # float */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dm_hs; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float; - /* "trunk/gensim/models/doc2vec_inner.pyx":908 - * fast_sentence_dbow_neg = fast_sentence1_dbow_neg - * fast_sentence_dm_hs = fast_sentence1_dm_hs - * fast_sentence_dm_neg = fast_sentence1_dm_neg # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":499 + * elif (abs(p_res[0] - expected) < 0.0001): + * our_dot = our_dot_float + * our_saxpy = saxpy # <<<<<<<<<<<<<< * return 1 # float * else: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence1_dm_neg; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":909 - * fast_sentence_dm_hs = fast_sentence1_dm_hs - * fast_sentence_dm_neg = fast_sentence1_dm_neg + /* "trunk/gensim/models/doc2vec_inner.pyx":500 + * our_dot = our_dot_float + * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< * else: * # neither => use cython loops, no BLAS @@ -8442,45 +5223,27 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":913 + /* "trunk/gensim/models/doc2vec_inner.pyx":504 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here - * fast_sentence_dbow_hs = fast_sentence2_dbow_hs # <<<<<<<<<<<<<< - * fast_sentence_dbow_neg = fast_sentence2_dbow_neg - * fast_sentence_dm_hs = fast_sentence2_dm_hs - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dbow_hs; - - /* "trunk/gensim/models/doc2vec_inner.pyx":914 - * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here - * fast_sentence_dbow_hs = fast_sentence2_dbow_hs - * fast_sentence_dbow_neg = fast_sentence2_dbow_neg # <<<<<<<<<<<<<< - * fast_sentence_dm_hs = fast_sentence2_dm_hs - * fast_sentence_dm_neg = fast_sentence2_dm_neg - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dbow_neg; - - /* "trunk/gensim/models/doc2vec_inner.pyx":915 - * fast_sentence_dbow_hs = fast_sentence2_dbow_hs - * fast_sentence_dbow_neg = fast_sentence2_dbow_neg - * fast_sentence_dm_hs = fast_sentence2_dm_hs # <<<<<<<<<<<<<< - * fast_sentence_dm_neg = fast_sentence2_dm_neg + * our_dot = our_dot_noblas # <<<<<<<<<<<<<< + * our_saxpy = our_saxpy_noblas * return 2 */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dm_hs; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":916 - * fast_sentence_dbow_neg = fast_sentence2_dbow_neg - * fast_sentence_dm_hs = fast_sentence2_dm_hs - * fast_sentence_dm_neg = fast_sentence2_dm_neg # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":505 + * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here + * our_dot = our_dot_noblas + * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< * return 2 * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence2_dm_neg; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":917 - * fast_sentence_dm_hs = fast_sentence2_dm_hs - * fast_sentence_dm_neg = fast_sentence2_dm_neg + /* "trunk/gensim/models/doc2vec_inner.pyx":506 + * our_dot = our_dot_noblas + * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< * * FAST_VERSION = init() # initialize the module @@ -8491,7 +5254,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":871 + /* "trunk/gensim/models/doc2vec_inner.pyx":468 * * * def init(): # <<<<<<<<<<<<<< @@ -10617,8 +7380,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 737; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -10630,31 +7393,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":716 + /* "trunk/gensim/models/doc2vec_inner.pyx":313 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 716; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/doc2vec_inner.pyx":812 + /* "trunk/gensim/models/doc2vec_inner.pyx":409 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 812; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -10724,41 +7487,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "trunk/gensim/models/doc2vec_inner.pyx":675 - * return next_random + /* "trunk/gensim/models/doc2vec_inner.pyx":272 + * * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__11 = PyTuple_Pack(35, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__11 = PyTuple_Pack(35, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(7, 0, 35, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 675, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(7, 0, 35, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 272, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":767 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * * * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(41, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__13 = PyTuple_Pack(41, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(8, 0, 41, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 767, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(8, 0, 41, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 364, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":871 + /* "trunk/gensim/models/doc2vec_inner.pyx":468 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 871, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 468, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -10924,115 +7687,115 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":57 - * int i, int j, int k, int cbow_mean, unsigned long long next_random, int lbl_length, int tw, int tl) nogil + /* "trunk/gensim/models/doc2vec_inner.pyx":34 + * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x # <<<<<<<<<<<<<< * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_scopy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_scopy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_scopy = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_scopy_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":58 + /* "trunk/gensim/models/doc2vec_inner.pyx":35 * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x # <<<<<<<<<<<<<< * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_saxpy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_saxpy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_saxpy_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":59 + /* "trunk/gensim/models/doc2vec_inner.pyx":36 * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) # <<<<<<<<<<<<<< * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sdot_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":60 + /* "trunk/gensim/models/doc2vec_inner.pyx":37 * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) # <<<<<<<<<<<<<< * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_dsdot_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":61 + /* "trunk/gensim/models/doc2vec_inner.pyx":38 * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) # <<<<<<<<<<<<<< * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x - * cdef fast_sentence_dbow_hs_ptr fast_sentence_dbow_hs + * */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_snrm2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_snrm2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_snrm2 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":62 + /* "trunk/gensim/models/doc2vec_inner.pyx":39 * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x # <<<<<<<<<<<<<< - * cdef fast_sentence_dbow_hs_ptr fast_sentence_dbow_hs - * cdef fast_sentence_dbow_neg_ptr fast_sentence_dbow_neg + * + * DEF EXP_TABLE_SIZE = 1000 */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sscal); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sscal); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":73 + /* "trunk/gensim/models/doc2vec_inner.pyx":46 * cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -11041,57 +7804,57 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":74 + /* "trunk/gensim/models/doc2vec_inner.pyx":47 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< * - * cdef void fast_sentence0_dbow_hs( + * # function implementations swapped based on BLAS detected */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":675 - * return next_random + /* "trunk/gensim/models/doc2vec_inner.pyx":272 + * * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 675; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":767 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * * * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 767; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":871 + /* "trunk/gensim/models/doc2vec_inner.pyx":468 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 871; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":919 + /* "trunk/gensim/models/doc2vec_inner.pyx":508 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -11104,14 +7867,14 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 919; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":1 diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 5fb564f7e9..156fa1c2db 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -31,39 +31,12 @@ ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, con ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil -ctypedef void (*fast_sentence_dbow_hs_ptr) ( - const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int tw, int tl) nogil - -ctypedef unsigned long long (*fast_sentence_dbow_neg_ptr) ( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random, int tw, int tl) nogil - -ctypedef void (*fast_sentence_dm_hs_ptr) ( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, - REAL_t *work, int i, int j, int k, int cbow_mean, int lbl_length, int tw, int tl) nogil - -ctypedef unsigned long long (*fast_sentence_dm_neg_ptr) ( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random, int lbl_length, int tw, int tl) nogil - cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x -cdef fast_sentence_dbow_hs_ptr fast_sentence_dbow_hs -cdef fast_sentence_dbow_neg_ptr fast_sentence_dbow_neg -cdef fast_sentence_dm_hs_ptr fast_sentence_dm_hs -cdef fast_sentence_dm_neg_ptr fast_sentence_dm_neg DEF EXP_TABLE_SIZE = 1000 DEF MAX_EXP = 6 @@ -73,55 +46,39 @@ cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE cdef int ONE = 1 cdef REAL_t ONEF = 1.0 -cdef void fast_sentence0_dbow_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int tw, int tl) nogil: - - cdef long long a, b - cdef long long row1 = word2_index * size, row2 - cdef REAL_t f, g - - memset(work, 0, size * cython.sizeof(REAL_t)) - for b in range(codelen): - row2 = word_point[b] * size - f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - if tw: - saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - if tl: - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) +# function implementations swapped based on BLAS detected +ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil +ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil +cdef our_dot_ptr our_dot +cdef our_saxpy_ptr our_saxpy -cdef void fast_sentence1_dbow_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int tw, int tl) nogil: +# for when fblas.sdot returns a double +cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + return dsdot(N, X, incX, Y, incY) - cdef long long a, b - cdef long long row1 = word2_index * size, row2 - cdef REAL_t f, g +# for when fblas.sdot returns a float +cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + return sdot(N, X, incX, Y, incY) - memset(work, 0, size * cython.sizeof(REAL_t)) - for b in range(codelen): - row2 = word_point[b] * size - f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - if tw: - saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - if tl: - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) +# for when no blas available +cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + # not a true full dot()-implementation: just enough for our cases + cdef int i + cdef REAL_t a + a = 0.0 + for i from 0 <= i < N[0] by 1: + a += X[i] * Y[i] + return a + +# for when no blas available +cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: + cdef int i + for i from 0 <= i < N[0] by 1: + Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] -cdef void fast_sentence2_dbow_hs( +cdef void fast_sentence_dbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int tw, int tl) nogil: @@ -130,28 +87,22 @@ cdef void fast_sentence2_dbow_hs( cdef long long row1 = word2_index * size, row2 cdef REAL_t f, g - for a in range(size): - work[a] = 0.0 + memset(work, 0, size * cython.sizeof(REAL_t)) for b in range(codelen): row2 = word_point[b] * size - f = 0.0 - for a in range(size): - f += syn0[row1 + a] * syn1[row2 + a] + f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha - for a in range(size): - work[a] += g * syn1[row2 + a] + our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) if tw: - for a in range(size): - syn1[row2 + a] += g * syn0[row1 + a] + our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) if tl: - for a in range(size): - syn0[row1 + a] += work[a] + our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) -cdef unsigned long long fast_sentence0_dbow_neg( +cdef unsigned long long fast_sentence_dbow_neg( const int negative, np.uint32_t *table, unsigned long long table_len, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, @@ -178,109 +129,22 @@ cdef unsigned long long fast_sentence0_dbow_neg( label = 0.0 row2 = target_index * size - f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) if tw: - saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) if tl: - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) return next_random -cdef unsigned long long fast_sentence1_dbow_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random, int tw, int tl) nogil: - - cdef long long a - cdef long long row1 = word2_index * size, row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, label - cdef np.uint32_t target_index - cdef int d - - memset(work, 0, size * cython.sizeof(REAL_t)) - - for d in range(negative+1): - - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - if tw: - saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - if tl: - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - return next_random - -cdef unsigned long long fast_sentence2_dbow_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random, int tw, int tl) nogil: - - cdef long long a - cdef long long row1 = word2_index * size, row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, label - cdef np.uint32_t target_index - cdef int d - - for a in range(size): - work[a] = 0.0 - - for d in range(negative+1): - - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = 0.0 - for a in range(size): - f += syn0[row1 + a] * syn1neg[row2 + a] - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - for a in range(size): - work[a] += g * syn1neg[row2 + a] - if tw: - for a in range(size): - syn1neg[row2 + a] += g * syn0[row1 + a] - if tl: - for a in range(size): - syn0[row1 + a] += work[a] - - return next_random - -cdef void fast_sentence0_dm_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], +cdef void fast_sentence_dm_hs( + const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, int lbl_length, int tw, int tl) nogil: @@ -297,14 +161,14 @@ cdef void fast_sentence0_dm_hs( continue else: count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) for m in range(lbl_length): if lbl_codelens[m] == 0: continue else: count += ONEF - saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - + our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + if cbow_mean and count > (0.5): inv_count = ONEF/count sscal(&size, &inv_count, neu1, &ONE) @@ -312,147 +176,30 @@ cdef void fast_sentence0_dm_hs( memset(work, 0, size * cython.sizeof(REAL_t)) for b in range(codelens[i]): row2 = word_point[b] * size - f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - if tw: - saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - if tw: - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - if tl: - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - -cdef void fast_sentence1_dm_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], - const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, int lbl_length, int tw, int tl) nogil: - - cdef long long a, b - cdef long long row2 - cdef REAL_t f, g, count, inv_count - cdef int m - - memset(neu1, 0, size * cython.sizeof(REAL_t)) - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - count += ONEF - saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - - if cbow_mean and count > (0.5): - inv_count = ONEF/count - sscal(&size, &inv_count , neu1, &ONE) - - memset(work, 0, size * cython.sizeof(REAL_t)) - for b in range(codelens[i]): - row2 = word_point[b] * size - f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) + f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) if tw: - saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) + our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) if tw: for m in range(j, k): if m == i or codelens[m] == 0: continue else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) if tl: for m in range(lbl_length): if lbl_codelens[m] == 0: continue else: - saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - -cdef void fast_sentence2_dm_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], - const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, int lbl_length, int tw, int tl) nogil: - - cdef long long a, b - cdef long long row2 - cdef REAL_t f, g, count - cdef int m - - for a in range(size): - neu1[a] = 0.0 - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - for a in range(size): - neu1[a] += syn0[indexes[m] * size + a] - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - count += ONEF - for a in range(size): - neu1[a] += syn0[lbl_indexes[m] * size + a] + our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - if cbow_mean and count > (0.5): - for a in range(size): - neu1[a] /= count - - for a in range(size): - work[a] = 0.0 - for b in range(codelens[i]): - row2 = word_point[b] * size - f = 0.0 - for a in range(size): - f += neu1[a] * syn1[row2 + a] - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - for a in range(size): - work[a] += g * syn1[row2 + a] - if tw: - for a in range(size): - syn1[row2 + a] += g * neu1[a] - if tw: - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - for a in range(size): - syn0[indexes[m] * size + a] += work[a] - if tl: - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - for a in range(size): - syn0[lbl_indexes[m] * size + a] += work[a] -cdef unsigned long long fast_sentence0_dm_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], +cdef unsigned long long fast_sentence_dm_neg( + const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, unsigned long long next_random, int lbl_length, int tw, int tl) nogil: @@ -473,13 +220,13 @@ cdef unsigned long long fast_sentence0_dm_neg( continue else: count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) for m in range(lbl_length): if lbl_codelens[m] == 0: continue else: count += ONEF - saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) if cbow_mean and count > (0.5): inv_count = ONEF/count sscal(&size, &inv_count, neu1, &ONE) @@ -498,179 +245,29 @@ cdef unsigned long long fast_sentence0_dm_neg( label = 0.0 row2 = target_index * size - f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) if tw: - saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) if tw: for m in range(j,k): if m == i or codelens[m] == 0: continue else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) if tl: for m in range(lbl_length): if lbl_codelens[m] == 0: continue else: - saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) return next_random -cdef unsigned long long fast_sentence1_dm_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random, int lbl_length, int tw, int tl) nogil: - - cdef long long a - cdef long long row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, count, inv_count, label - cdef np.uint32_t target_index, word_index - cdef int d, m - - word_index = indexes[i] - - memset(neu1, 0, size * cython.sizeof(REAL_t)) - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - count += ONEF - saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - if cbow_mean and count > (0.5): - inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) - - memset(work, 0, size * cython.sizeof(REAL_t)) - - for d in range(negative+1): - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - if tw: - saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - if tw: - for m in range(j,k): - if m == i or codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - if tl: - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - - return next_random - -cdef unsigned long long fast_sentence2_dm_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random, int lbl_length, int tw, int tl) nogil: - - cdef long long a - cdef long long row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, count, inv_count, label - cdef np.uint32_t target_index, word_index - cdef int d, m - - word_index = indexes[i] - - for a in range(size): - neu1[a] = 0.0 - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - for a in range(size): - neu1[a] += syn0[indexes[m] * size + a] - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - count += ONEF - for a in range(size): - neu1[a] += syn0[lbl_indexes[m] * size + a] - if cbow_mean and count > (0.5): - for a in range(size): - neu1[a] /= count - - for a in range(size): - work[a] = 0.0 - - for d in range(negative+1): - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = 0.0 - for a in range(size): - f += neu1[a] * syn1neg[row2 + a] - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - for a in range(size): - work[a] += g * syn1neg[row2 + a] - if tw: - for a in range(size): - syn1neg[row2 + a] += g * neu1[a] - - if tw: - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - for a in range(size): - syn0[indexes[m] * size + a] += work[a] - if tl: - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - for a in range(size): - syn0[lbl_indexes[m] * size + a] += work[a] - - return next_random def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): cdef int hs = model.hs @@ -874,10 +471,8 @@ def init(): into table EXP_TABLE. """ - global fast_sentence_dbow_hs - global fast_sentence_dbow_neg - global fast_sentence_dm_hs - global fast_sentence_dm_neg + global our_dot + global our_saxpy cdef int i cdef float *x = [10.0] @@ -896,24 +491,18 @@ def init(): d_res = dsdot(&size, x, &ONE, y, &ONE) p_res = &d_res if (abs(d_res - expected) < 0.0001): - fast_sentence_dbow_hs = fast_sentence0_dbow_hs - fast_sentence_dbow_neg = fast_sentence0_dbow_neg - fast_sentence_dm_hs = fast_sentence0_dm_hs - fast_sentence_dm_neg = fast_sentence0_dm_neg + our_dot = our_dot_double + our_saxpy = saxpy return 0 # double elif (abs(p_res[0] - expected) < 0.0001): - fast_sentence_dbow_hs = fast_sentence1_dbow_hs - fast_sentence_dbow_neg = fast_sentence1_dbow_neg - fast_sentence_dm_hs = fast_sentence1_dm_hs - fast_sentence_dm_neg = fast_sentence1_dm_neg + our_dot = our_dot_float + our_saxpy = saxpy return 1 # float else: # neither => use cython loops, no BLAS # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here - fast_sentence_dbow_hs = fast_sentence2_dbow_hs - fast_sentence_dbow_neg = fast_sentence2_dbow_neg - fast_sentence_dm_hs = fast_sentence2_dm_hs - fast_sentence_dm_neg = fast_sentence2_dm_neg + our_dot = our_dot_noblas + our_saxpy = our_saxpy_noblas return 2 FAST_VERSION = init() # initialize the module From ff4cb98b176f6025d1c06765aff9fa94d9e17bca Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 6 May 2015 14:12:08 -0700 Subject: [PATCH 07/49] rename for clarity --- gensim/models/word2vec.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index e3b2e5b9e7..11f0fe29a1 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -149,7 +149,7 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): return len([word for word in sentence if word is not None]) -def train_sg_pair(model, word, word2, alpha, learn_weights=True, learn_vectors=True): +def train_sg_pair(model, word, word2, alpha, learn_hidden=True, learn_vectors=True): if isinstance(word2, Vocab): l1 = model.syn0[word2.index] else: @@ -161,7 +161,7 @@ def train_sg_pair(model, word, word2, alpha, learn_weights=True, learn_vectors=T l2a = deepcopy(model.syn1[word.point]) # 2d matrix, codelen x layer1_size fa = 1.0 / (1.0 + exp(-dot(l1, l2a.T))) # propagate hidden -> output ga = (1 - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate - if learn_weights: + if learn_hidden: model.syn1[word.point] += outer(ga, l1) # learn hidden -> output neu1e += dot(ga, l2a) # save error @@ -175,7 +175,7 @@ def train_sg_pair(model, word, word2, alpha, learn_weights=True, learn_vectors=T l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output gb = (model.neg_labels - fb) * alpha # vector of error gradients multiplied by the learning rate - if learn_weights: + if learn_hidden: model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error if learn_vectors: @@ -183,14 +183,14 @@ def train_sg_pair(model, word, word2, alpha, learn_weights=True, learn_vectors=T return neu1e -def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_weights=True, learn_vectors=True): +def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_hidden=True, learn_vectors=True): neu1e = zeros(l1.shape) if model.hs: l2a = model.syn1[word.point] # 2d matrix, codelen x layer1_size fa = 1. / (1. + exp(-dot(l1, l2a.T))) # propagate hidden -> output ga = (1. - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate - if learn_weights: + if learn_hidden: model.syn1[word.point] += outer(ga, l1) # learn hidden -> output neu1e += dot(ga, l2a) # save error @@ -204,7 +204,7 @@ def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_weights=Tr l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output gb = (model.neg_labels - fb) * alpha # vector of error gradients multiplied by the learning rate - if learn_weights: + if learn_hidden: model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error if learn_vectors: From d851078206fcda9e3c99a813e673b0bbfdf11d03 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Thu, 7 May 2015 18:22:06 -0700 Subject: [PATCH 08/49] merge pre-trained vectors; optionally lock syn0 indexes --- gensim/models/word2vec.py | 59 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 11f0fe29a1..97bef01991 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -81,7 +81,7 @@ from numpy import exp, dot, zeros, outer, random, dtype, float32 as REAL,\ uint32, seterr, array, uint8, vstack, argsort, fromstring, sqrt, newaxis,\ - ndarray, empty, sum as np_sum, prod + ndarray, empty, sum as np_sum, prod, ones, repeat as np_repeat logger = logging.getLogger("gensim.models.word2vec") @@ -152,8 +152,10 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): def train_sg_pair(model, word, word2, alpha, learn_hidden=True, learn_vectors=True): if isinstance(word2, Vocab): l1 = model.syn0[word2.index] + lock_factor = model.syn0locks[word2.index] else: l1 = word2 # passed-in candidate vector + lock_factor = 1.0 neu1e = zeros(l1.shape) if model.hs: @@ -179,7 +181,7 @@ def train_sg_pair(model, word, word2, alpha, learn_hidden=True, learn_vectors=Tr model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error if learn_vectors: - l1 += neu1e # learn input -> hidden (changes model.syn0[word2.index], if that is l1) + l1 += neu1e * lock_factor # learn input -> hidden (changes model.syn0[word2.index], if that is l1) return neu1e @@ -208,7 +210,9 @@ def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_hidden=Tru model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error if learn_vectors: - model.syn0[input_word_indices] += neu1e # learn input -> hidden, here for all words in the window separately + # learn input -> hidden, here for all words in the window separately + l = len(input_word_indices) + model.syn0[input_word_indices] += np_repeat(neu1e,l).reshape(l,model.vector_size) * model.syn0locks[input_word_indices][:,None] return neu1e @@ -327,7 +331,7 @@ def make_table(self, table_size=100000000, power=0.75): # compute sum of all power (Z in paper) train_words_pow = float(sum([self.vocab[word].count**power for word in self.vocab])) - # go through the whole table and fill it up with the word indexes proportional to a word's count**power + # go through the whole table and fill it up with the word indices proportional to a word's count**power widx = 0 # normalize count^0.75 by Z d1 = self.vocab[self.index2word[widx]].count**power / train_words_pow @@ -541,6 +545,8 @@ def reset_weights(self): self.syn1neg = zeros((len(self.vocab), self.layer1_size), dtype=REAL) self.syn0norm = None + self.syn0locks = ones(len(self.vocab), dtype=REAL) # zeros suppress training vectors + def seeded_vector(self, seed_string): """Create one 'random' vector (but deterministic by seed_string)""" # Note: Python's built in hash function can vary across versions of Python @@ -640,6 +646,51 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True): result.init_sims(norm_only) return result + def merge_word2vec_format(self, fname, binary=False): + """ + Merge the input-hidden weight matrix from the original C word2vec-tool format, + where it overlaps with the current vocabulary. + + `binary` is a boolean indicating whether the data is in binary word2vec format. + """ + counts = None + overlap_count = 0 + logger.info("loading projection weights from %s" % (fname)) + with utils.smart_open(fname) as fin: + header = utils.to_unicode(fin.readline()) + vocab_size, vector_size = map(int, header.split()) # throws for invalid file format + if not vector_size == self.vector_size: + logger.error("incompatible vector sizes") + # TOCONSIDER: maybe truncation/smaller vectors still useful enough to merge? + return # TODO raise ValueError()? + if binary: + binary_len = dtype(REAL).itemsize * vector_size + for line_no in xrange(vocab_size): + # mixed text and binary: read text first, then binary + word = [] + while True: + ch = fin.read(1) + if ch == b' ': + break + if ch != b'\n': # ignore newlines in front of words (some binary files have newline, some don't) + word.append(ch) + word = utils.to_unicode(b''.join(word)) + weights = fromstring(fin.read(binary_len), dtype=REAL) + if word in self.vocab: + overlap_count += 1 + self.syn0[self.vocab[word].index] = weights + self.syn0locks[self.vocab[word].index] = 0.0 # lock it + else: + for line_no, line in enumerate(fin): + parts = utils.to_unicode(line).split() + if len(parts) != vector_size + 1: + raise ValueError("invalid vector on line %s (is this really the text format?)" % (line_no)) + word, weights = parts[0], list(map(REAL, parts[1:])) + if word in self.vocab: + overlap_count += 1 + self.syn0[self.vocab[word].index] = weights + logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.syn0.shape, fname)) + def most_similar(self, positive=[], negative=[], topn=10): """ From 0a8bff56698c3f5a6669684cfeb5d752b348f1aa Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Thu, 7 May 2015 18:26:13 -0700 Subject: [PATCH 09/49] dbow_words option; parameter name/doc cleanup; expect cython dm_concat --- gensim/models/doc2vec.py | 78 +++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 5a5b931c76..8996a70b86 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -42,38 +42,45 @@ except ImportError: from Queue import Queue -from numpy import zeros, random, sum as np_sum, add as np_add, concatenate +from numpy import zeros, random, sum as np_sum, add as np_add, concatenate, repeat as np_repeat from six import string_types logger = logging.getLogger(__name__) from gensim import utils # utility fnc for pickling, common scipy operations etc -from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair +from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_sentence_sg try: - from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, FAST_VERSION + from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, train_sentence_dm_concat, FAST_VERSION except: # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 - def train_sentence_dbow(model, sentence, lbls, alpha, work=None, train_words=True, train_lbls=True): + def train_sentence_dbow(model, sentence, lbls, alpha, work=None, train_words=False, train_lbls=True): """ Update distributed bag of words model by training on a single sentence. The sentence is a list of Vocab objects (or None, where the corresponding word is not in the vocabulary. Called internally from `Doc2Vec.train()`. + If train_words is True, simultaneously train word-to-word (not just doc-to-word) + examples, exactly as per Word2Vec skip-gram training. (Without this option, + word vectors are neither consulted nor updated during DBOW doc vector training.) + This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from doc2vec_inner instead. """ - for label in lbls: - if label is None: - continue # OOV word in the input sentence => skip - for word in sentence: - if word is None: + if train_words: + train_sentence_sg(model, sentence, alpha, work) + if train_lbls: + for label in lbls: + if label is None: continue # OOV word in the input sentence => skip - train_sg_pair(model, word, label, alpha, train_words, train_lbls) + for word in sentence: + if word is None: + continue # OOV word in the input sentence => skip + train_sg_pair(model, word, label, alpha) return len([word for word in sentence if word is not None]) @@ -104,12 +111,12 @@ def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_ l1 /= (len(word2_indices) + lbl_len) neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, train_words, train_words) if train_lbls: - model.syn0[lbl_indices] += neu1e + model.syn0[lbl_indices] += neu1e * model.syn0locks[lbl_indices] return len([word for word in sentence if word is not None]) -def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): + def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): """ Update distributed memory model by training on a single sentence. @@ -122,11 +129,11 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, """ lbl_indices = [lbl.index for lbl in lbls if lbl is not None] if len(lbl_indices) != model.dm_lbl_count: - return # skip doc without expected lbl(s) + return 0 # skip doc without expected lbl(s) null_word = model.vocab['\0'] - pre_pad_count = int((model.window + 1) / 2) - post_pad_count = int(model.window / 2) + pre_pad_count = model.window + post_pad_count = model.window padded_sentence_indices = ( (pre_pad_count * [null_word.index]) # pre-padding + [word.index for word in sentence if word is not None] # elide out-of-Vocabulary words @@ -143,6 +150,8 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, l1 = model.syn0[l1_indices].ravel() # numpy advanced-indexing: copy; flatten to 1d neu1e = train_cbow_pair(model, word, None, l1, alpha, True, False) + neu1e = neu1e * np_repeat(model.syn0locks[l1_indices], model.vector_size) # respect any locks + if not train_lbls: # trim lbl indices/errors l1_indices = l1_indices[len(lbl_indices):] @@ -158,7 +167,7 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, return len(padded_sentence_indices) - pre_pad_count - post_pad_count -def infer_vector_dbow(model, document, alpha=0.025, min_alpha=0.0001, steps=50): +def infer_vector_dbow(model, document, alpha=0.1, min_alpha=0.0001, steps=5): """ Infer a vector for given post-bulk training document, in the 'dbow' model. @@ -183,7 +192,7 @@ def infer_vector_dbow(model, document, alpha=0.025, min_alpha=0.0001, steps=50): return vector -def infer_vector_dm(model, document, alpha=0.025, min_alpha=0.0001, steps=50): +def infer_vector_dm(model, document, alpha=0.1, min_alpha=0.0001, steps=5): """ Infer a vector representation for the given post-training document, in the 'dm' model. @@ -217,7 +226,7 @@ def infer_vector_dm(model, document, alpha=0.025, min_alpha=0.0001, steps=50): return vector -def infer_vector_dm_concat(model, document, alpha=0.025, min_alpha=0.0001, steps=50): +def infer_vector_dm_concat(model, document, alpha=0.1, min_alpha=0.0001, steps=5): """ Infer a vector representation for the given post-training document, in the 'dm_concat' model. @@ -232,8 +241,8 @@ def infer_vector_dm_concat(model, document, alpha=0.025, min_alpha=0.0001, steps sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] null_word = model.vocab['\0'] - pre_pad_count = int((model.window + 1) / 2) - post_pad_count = int(model.window / 2) + pre_pad_count = model.window + post_pad_count = model.window padded_sentence_indices = ( (pre_pad_count * [null_word.index]) # pre-padding + [word.index for word in sentence if word is not None] # elide out-of-Vocabulary words @@ -285,7 +294,7 @@ class Doc2Vec(Word2Vec): """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, - dm_mean=0, dm_concat=0, dm_lbl_count=1, train_words=True, train_lbls=True, **kwargs): + dbow_words=0, dm_mean=0, dm_concat=0, dm_lbl_count=1, **kwargs): """ Initialize the model from an iterable of `sentences`. Each sentence is a LabeledSentence object that will be used for training. @@ -296,8 +305,8 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it in some other way. - `dm` defines the training algorithm. By default (`dm=1`), distributed memory is used. - Otherwise, `dbow` is employed. + `dm` defines the training algorithm. By default (`dm=1`), 'distributed memory' (PV-DM) is used. + Otherwise, `distributed bag of words` (PV-DBOW) is employed. `size` is the dimensionality of the feature vectors. @@ -305,7 +314,8 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, `alpha` is the initial learning rate (will linearly drop to zero as training progresses). - `seed` = for the random number generator. + `seed` = for the random number generator. Only runs with a single worker will be + deterministically reproducible because of the ordering randomness in multi-threaded runs. `min_count` = ignore all words with total frequency lower than this. @@ -320,21 +330,25 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, specifies how many "noise words" should be drawn (usually between 5-20). `dm_mean` = if 0 (default), use the sum of the context word vectors. If 1, use the mean. - Only applies when dm is used. + Only applies when dm is used in non-concatenative mode. `dm_concat` = if 1, use concatenation of context vectors rather than sum/average; - default is 0 (off). + default is 0 (off). Note concatenation results in a much-larger model, as the input + is no longer the size of one (sampled or arithmatically combined) word vector, but the + size of the label(s) and all words in the context strung together. `dm_lbl_count` = expected constant number of sentence lbls per sentence, when using dm_concat mode; default is 1. + `dbow_words` if set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW + doc-vector training; default is 0 (faster training of doc-vectors only. + """ Word2Vec.__init__(self, size=size, alpha=alpha, window=window, min_count=min_count, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, null_word=dm_concat, **kwargs) - self.train_words = train_words - self.train_lbls = train_lbls + self.dbow_words = dbow_words self.dm_concat = dm_concat self.dm_lbl_count = dm_lbl_count if sentences is not None: @@ -344,7 +358,7 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, def reset_weights(self): if self.dm_concat: # expand l1 size to match concatenated lbls+words length - self.layer1_size = (self.dm_lbl_count + self.window) * self.vector_size + self.layer1_size = (self.dm_lbl_count + (2 * self.window)) * self.vector_size logger.info("using concatenative %d-dimensional layer1"% (self.layer1_size)) Word2Vec.reset_weights(self) @@ -383,11 +397,11 @@ def _prepare_sentences(self, sentences): def _get_job_words(self, alpha, work, job, neu1): if self.sg: - return sum(train_sentence_dbow(self, sentence, lbls, alpha, work, self.train_words, self.train_lbls) for sentence, lbls in job) + return sum(train_sentence_dbow(self, sentence, lbls, alpha, work, self.dbow_words, True) for sentence, lbls in job) elif self.dm_concat: - return sum(train_sentence_dm_concat(self, sentence, lbls, alpha, work, neu1, self.train_words, self.train_lbls) for sentence, lbls in job) + return sum(train_sentence_dm_concat(self, sentence, lbls, alpha, work, neu1, True, True) for sentence, lbls in job) else: - return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1, self.train_words, self.train_lbls) for sentence, lbls in job) + return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1, True, True) for sentence, lbls in job) def infer_vector(self, document, alpha=0.025, min_alpha=0.0001, steps=50): """ From eb04a73f31019b424447554ffc93788f233560f1 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Thu, 7 May 2015 18:35:45 -0700 Subject: [PATCH 10/49] cythonized dm_concat; dbow word cotraining; syn0locks support; parameter name cleanup --- gensim/models/doc2vec_inner.c | 4887 +++++++++++++++++++++---------- gensim/models/doc2vec_inner.pyx | 382 ++- 2 files changed, 3610 insertions(+), 1659 deletions(-) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 460be9f0d5..8bc1db5caa 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -1151,10 +1151,12 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float(int const *, float const *, int const *, float const *, int const *); /*proto*/ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas(int const *, float const *, int const *, float const *, int const *); /*proto*/ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, int, int, int); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, unsigned PY_LONG_LONG, int, int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int const , unsigned PY_LONG_LONG, int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ #define __Pyx_MODULE_NAME "trunk.gensim.models.doc2vec_inner" int __pyx_module_is_main_trunk__gensim__models__doc2vec_inner = 0; @@ -1164,8 +1166,9 @@ static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__train_words, PyObject *__pyx_v__train_lbls); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__learn_words, PyObject *__pyx_v__learn_lbls); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ static char __pyx_k_B[] = "B"; @@ -1183,12 +1186,15 @@ static char __pyx_k_i[] = "i"; static char __pyx_k_j[] = "j"; static char __pyx_k_k[] = "k"; static char __pyx_k_l[] = "l"; +static char __pyx_k_m[] = "m"; +static char __pyx_k_n[] = "n"; static char __pyx_k_q[] = "q"; static char __pyx_k_x[] = "x"; static char __pyx_k_y[] = "y"; static char __pyx_k_Zd[] = "Zd"; static char __pyx_k_Zf[] = "Zf"; static char __pyx_k_Zg[] = "Zg"; +static char __pyx_k__5[] = "\000"; static char __pyx_k_hs[] = "hs"; static char __pyx_k_np[] = "np"; static char __pyx_k_tl[] = "tl"; @@ -1209,6 +1215,7 @@ static char __pyx_k_word[] = "word"; static char __pyx_k_work[] = "_work"; static char __pyx_k_alpha[] = "alpha"; static char __pyx_k_codes[] = "codes"; +static char __pyx_k_count[] = "count"; static char __pyx_k_d_res[] = "d_res"; static char __pyx_k_fblas[] = "fblas"; static char __pyx_k_index[] = "index"; @@ -1222,6 +1229,7 @@ static char __pyx_k_scopy[] = "scopy"; static char __pyx_k_snrm2[] = "snrm2"; static char __pyx_k_sscal[] = "sscal"; static char __pyx_k_table[] = "table"; +static char __pyx_k_vocab[] = "vocab"; static char __pyx_k_import[] = "__import__"; static char __pyx_k_neu1_2[] = "neu1"; static char __pyx_k_points[] = "points"; @@ -1241,24 +1249,39 @@ static char __pyx_k_negative[] = "negative"; static char __pyx_k_sentence[] = "sentence"; static char __pyx_k_cbow_mean[] = "cbow_mean"; static char __pyx_k_enumerate[] = "enumerate"; +static char __pyx_k_inv_count[] = "inv_count"; static char __pyx_k_lbl_codes[] = "lbl_codes"; +static char __pyx_k_syn0locks[] = "syn0locks"; static char __pyx_k_table_len[] = "table_len"; static char __pyx_k_ValueError[] = "ValueError"; static char __pyx_k_lbl_length[] = "lbl_length"; static char __pyx_k_lbl_points[] = "lbl_points"; +static char __pyx_k_learn_lbls[] = "_learn_lbls"; static char __pyx_k_train_lbls[] = "train_lbls"; static char __pyx_k_layer1_size[] = "layer1_size"; static char __pyx_k_lbl_indexes[] = "lbl_indexes"; +static char __pyx_k_learn_words[] = "_learn_words"; static char __pyx_k_next_random[] = "next_random"; static char __pyx_k_train_words[] = "train_words"; +static char __pyx_k_vector_size[] = "vector_size"; static char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static char __pyx_k_RuntimeError[] = "RuntimeError"; +static char __pyx_k_dm_lbl_count[] = "dm_lbl_count"; static char __pyx_k_lbl_codelens[] = "lbl_codelens"; +static char __pyx_k_learn_hidden[] = "learn_hidden"; +static char __pyx_k_learn_lbls_2[] = "learn_lbls"; static char __pyx_k_sentence_len[] = "sentence_len"; +static char __pyx_k_train_lbls_2[] = "_train_lbls"; +static char __pyx_k_learn_words_2[] = "learn_words"; +static char __pyx_k_train_words_2[] = "_train_words"; +static char __pyx_k_window_indexes[] = "window_indexes"; +static char __pyx_k_null_word_index[] = "null_word_index"; static char __pyx_k_reduced_windows[] = "reduced_windows"; static char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static char __pyx_k_train_sentence_dm[] = "train_sentence_dm"; +static char __pyx_k_expected_lbl_length[] = "expected_lbl_length"; static char __pyx_k_train_sentence_dbow[] = "train_sentence_dbow"; +static char __pyx_k_train_sentence_dm_concat[] = "train_sentence_dm_concat"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; static char __pyx_k_Users_scratch_Documents_dev2015[] = "/Users/scratch/Documents/dev2015/gensim_venv/src/trunk/gensim/models/doc2vec_inner.pyx"; static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; @@ -1275,16 +1298,20 @@ static PyObject *__pyx_n_s_REAL; static PyObject *__pyx_n_s_RuntimeError; static PyObject *__pyx_kp_s_Users_scratch_Documents_dev2015; static PyObject *__pyx_n_s_ValueError; +static PyObject *__pyx_kp_s__5; static PyObject *__pyx_n_s_alpha; static PyObject *__pyx_n_s_alpha_2; static PyObject *__pyx_n_s_cbow_mean; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; static PyObject *__pyx_n_s_codes; +static PyObject *__pyx_n_s_count; static PyObject *__pyx_n_s_cpointer; static PyObject *__pyx_n_s_d_res; +static PyObject *__pyx_n_s_dm_lbl_count; static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; +static PyObject *__pyx_n_s_expected_lbl_length; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_hs; @@ -1293,6 +1320,7 @@ static PyObject *__pyx_n_s_import; static PyObject *__pyx_n_s_index; static PyObject *__pyx_n_s_indexes; static PyObject *__pyx_n_s_init; +static PyObject *__pyx_n_s_inv_count; static PyObject *__pyx_n_s_item; static PyObject *__pyx_n_s_j; static PyObject *__pyx_n_s_k; @@ -1303,8 +1331,15 @@ static PyObject *__pyx_n_s_lbl_indexes; static PyObject *__pyx_n_s_lbl_length; static PyObject *__pyx_n_s_lbl_points; static PyObject *__pyx_n_s_lbls; +static PyObject *__pyx_n_s_learn_hidden; +static PyObject *__pyx_n_s_learn_lbls; +static PyObject *__pyx_n_s_learn_lbls_2; +static PyObject *__pyx_n_s_learn_words; +static PyObject *__pyx_n_s_learn_words_2; +static PyObject *__pyx_n_s_m; static PyObject *__pyx_n_s_main; static PyObject *__pyx_n_s_model; +static PyObject *__pyx_n_s_n; static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous; static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou; static PyObject *__pyx_n_s_negative; @@ -1312,6 +1347,7 @@ static PyObject *__pyx_n_s_neu1; static PyObject *__pyx_n_s_neu1_2; static PyObject *__pyx_n_s_next_random; static PyObject *__pyx_n_s_np; +static PyObject *__pyx_n_s_null_word_index; static PyObject *__pyx_n_s_numpy; static PyObject *__pyx_n_s_p_res; static PyObject *__pyx_n_s_point; @@ -1331,6 +1367,7 @@ static PyObject *__pyx_n_s_size; static PyObject *__pyx_n_s_snrm2; static PyObject *__pyx_n_s_sscal; static PyObject *__pyx_n_s_syn0; +static PyObject *__pyx_n_s_syn0locks; static PyObject *__pyx_n_s_syn1; static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_table; @@ -1338,13 +1375,19 @@ static PyObject *__pyx_n_s_table_len; static PyObject *__pyx_n_s_test; static PyObject *__pyx_n_s_tl; static PyObject *__pyx_n_s_train_lbls; +static PyObject *__pyx_n_s_train_lbls_2; static PyObject *__pyx_n_s_train_sentence_dbow; static PyObject *__pyx_n_s_train_sentence_dm; +static PyObject *__pyx_n_s_train_sentence_dm_concat; static PyObject *__pyx_n_s_train_words; +static PyObject *__pyx_n_s_train_words_2; static PyObject *__pyx_n_s_trunk_gensim_models_doc2vec_inne; static PyObject *__pyx_n_s_tw; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; +static PyObject *__pyx_n_s_vector_size; +static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_window; +static PyObject *__pyx_n_s_window_indexes; static PyObject *__pyx_n_s_word; static PyObject *__pyx_n_s_work; static PyObject *__pyx_n_s_work_2; @@ -1358,18 +1401,22 @@ static PyObject *__pyx_tuple_; static PyObject *__pyx_tuple__2; static PyObject *__pyx_tuple__3; static PyObject *__pyx_tuple__4; -static PyObject *__pyx_tuple__5; static PyObject *__pyx_tuple__6; static PyObject *__pyx_tuple__7; static PyObject *__pyx_tuple__8; static PyObject *__pyx_tuple__9; static PyObject *__pyx_tuple__10; static PyObject *__pyx_tuple__11; +static PyObject *__pyx_tuple__12; static PyObject *__pyx_tuple__13; -static PyObject *__pyx_tuple__15; -static PyObject *__pyx_codeobj__12; -static PyObject *__pyx_codeobj__14; -static PyObject *__pyx_codeobj__16; +static PyObject *__pyx_tuple__14; +static PyObject *__pyx_tuple__16; +static PyObject *__pyx_tuple__18; +static PyObject *__pyx_tuple__20; +static PyObject *__pyx_codeobj__15; +static PyObject *__pyx_codeobj__17; +static PyObject *__pyx_codeobj__19; +static PyObject *__pyx_codeobj__21; /* "trunk/gensim/models/doc2vec_inner.pyx":57 * @@ -1556,7 +1603,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int * REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_tw, int __pyx_v_tl) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_train_hidden, int __pyx_v_train_inputs, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -1567,7 +1614,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs int __pyx_t_3; int __pyx_t_4; - /* "trunk/gensim/models/doc2vec_inner.pyx":87 + /* "trunk/gensim/models/doc2vec_inner.pyx":88 * * cdef long long a, b * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1576,7 +1623,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":90 + /* "trunk/gensim/models/doc2vec_inner.pyx":91 * cdef REAL_t f, g * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1585,7 +1632,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":91 + /* "trunk/gensim/models/doc2vec_inner.pyx":92 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): # <<<<<<<<<<<<<< @@ -1596,7 +1643,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":92 + /* "trunk/gensim/models/doc2vec_inner.pyx":93 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -1605,7 +1652,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":93 + /* "trunk/gensim/models/doc2vec_inner.pyx":94 * for b in range(codelen): * row2 = word_point[b] * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -1614,7 +1661,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":94 + /* "trunk/gensim/models/doc2vec_inner.pyx":95 * row2 = word_point[b] * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -1632,7 +1679,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":95 + /* "trunk/gensim/models/doc2vec_inner.pyx":96 * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -1642,7 +1689,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":96 + /* "trunk/gensim/models/doc2vec_inner.pyx":97 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -1651,40 +1698,40 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":97 + /* "trunk/gensim/models/doc2vec_inner.pyx":98 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: + * if train_hidden: */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":98 + /* "trunk/gensim/models/doc2vec_inner.pyx":99 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: + * if train_hidden: * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":99 + /* "trunk/gensim/models/doc2vec_inner.pyx":100 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< + * if train_hidden: # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if tl: + * if train_inputs: */ - __pyx_t_3 = (__pyx_v_tw != 0); + __pyx_t_3 = (__pyx_v_train_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":100 + /* "trunk/gensim/models/doc2vec_inner.pyx":101 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: + * if train_hidden: * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if tl: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * if train_inputs: + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L8; @@ -1693,24 +1740,24 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":101 - * if tw: + /* "trunk/gensim/models/doc2vec_inner.pyx":102 + * if train_hidden: * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if tl: # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * if train_inputs: # <<<<<<<<<<<<<< + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) * */ - __pyx_t_3 = (__pyx_v_tl != 0); + __pyx_t_3 = (__pyx_v_train_inputs != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":102 + /* "trunk/gensim/models/doc2vec_inner.pyx":103 * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if tl: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + * if train_inputs: + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L9; } __pyx_L9:; @@ -1726,7 +1773,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":105 +/* "trunk/gensim/models/doc2vec_inner.pyx":106 * * * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< @@ -1734,7 +1781,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_tw, int __pyx_v_tl) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_train_hidden, int __pyx_v_train_inputs, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; @@ -1749,7 +1796,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast int __pyx_t_3; int __pyx_t_4; - /* "trunk/gensim/models/doc2vec_inner.pyx":112 + /* "trunk/gensim/models/doc2vec_inner.pyx":113 * * cdef long long a * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1758,7 +1805,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":113 + /* "trunk/gensim/models/doc2vec_inner.pyx":114 * cdef long long a * cdef long long row1 = word2_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -1767,7 +1814,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":118 + /* "trunk/gensim/models/doc2vec_inner.pyx":119 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1776,7 +1823,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":120 + /* "trunk/gensim/models/doc2vec_inner.pyx":121 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -1787,7 +1834,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":121 + /* "trunk/gensim/models/doc2vec_inner.pyx":122 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -1797,7 +1844,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":122 + /* "trunk/gensim/models/doc2vec_inner.pyx":123 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -1806,7 +1853,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":123 + /* "trunk/gensim/models/doc2vec_inner.pyx":124 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -1818,7 +1865,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":125 + /* "trunk/gensim/models/doc2vec_inner.pyx":126 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -1827,7 +1874,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":126 + /* "trunk/gensim/models/doc2vec_inner.pyx":127 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -1836,7 +1883,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":127 + /* "trunk/gensim/models/doc2vec_inner.pyx":128 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -1846,7 +1893,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":128 + /* "trunk/gensim/models/doc2vec_inner.pyx":129 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< @@ -1856,7 +1903,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":129 + /* "trunk/gensim/models/doc2vec_inner.pyx":130 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -1867,7 +1914,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":131 + /* "trunk/gensim/models/doc2vec_inner.pyx":132 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -1876,7 +1923,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":132 + /* "trunk/gensim/models/doc2vec_inner.pyx":133 * * row2 = target_index * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -1885,7 +1932,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":133 + /* "trunk/gensim/models/doc2vec_inner.pyx":134 * row2 = target_index * size * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -1903,7 +1950,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":134 + /* "trunk/gensim/models/doc2vec_inner.pyx":135 * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -1913,7 +1960,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":135 + /* "trunk/gensim/models/doc2vec_inner.pyx":136 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -1922,40 +1969,40 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":136 + /* "trunk/gensim/models/doc2vec_inner.pyx":137 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: + * if train_hidden: */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":137 + /* "trunk/gensim/models/doc2vec_inner.pyx":138 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: + * if train_hidden: * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":138 + /* "trunk/gensim/models/doc2vec_inner.pyx":139 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< + * if train_hidden: # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: + * if train_inputs: */ - __pyx_t_3 = (__pyx_v_tw != 0); + __pyx_t_3 = (__pyx_v_train_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":139 + /* "trunk/gensim/models/doc2vec_inner.pyx":140 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: + * if train_hidden: * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if tl: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * if train_inputs: + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L10; @@ -1964,30 +2011,30 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":140 - * if tw: + /* "trunk/gensim/models/doc2vec_inner.pyx":141 + * if train_hidden: * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * if train_inputs: # <<<<<<<<<<<<<< + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) * */ - __pyx_t_3 = (__pyx_v_tl != 0); + __pyx_t_3 = (__pyx_v_train_inputs != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":141 + /* "trunk/gensim/models/doc2vec_inner.pyx":142 * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if tl: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + * if train_inputs: + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L11; } __pyx_L11:; - /* "trunk/gensim/models/doc2vec_inner.pyx":143 - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":144 + * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -1996,7 +2043,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":105 + /* "trunk/gensim/models/doc2vec_inner.pyx":106 * * * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< @@ -2009,7 +2056,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":146 +/* "trunk/gensim/models/doc2vec_inner.pyx":147 * * * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< @@ -2017,213 +2064,40 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_lbl_length, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; int __pyx_v_m; int __pyx_t_1; - int __pyx_t_2; + PY_LONG_LONG __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - PY_LONG_LONG __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":157 - * cdef int m - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":158 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":159 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":160 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":161 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":163 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":164 - * else: - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":165 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":166 - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":167 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L8_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":169 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":170 - * else: - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * - * if cbow_mean and count > (0.5): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L8_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":172 - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L12_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":173 - * - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/doc2vec_inner.pyx":174 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; + int __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":176 - * sscal(&size, &inv_count, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":160 * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< * for b in range(codelens[i]): * row2 = word_point[b] * size */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":177 - * - * memset(work, 0, size * cython.sizeof(REAL_t)) + /* "trunk/gensim/models/doc2vec_inner.pyx":161 + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error * for b in range(codelens[i]): # <<<<<<<<<<<<<< * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) */ __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_b = __pyx_t_5; + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":178 - * memset(work, 0, size * cython.sizeof(REAL_t)) + /* "trunk/gensim/models/doc2vec_inner.pyx":162 + * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) @@ -2231,7 +2105,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":179 + /* "trunk/gensim/models/doc2vec_inner.pyx":163 * for b in range(codelens[i]): * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2240,7 +2114,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":180 + /* "trunk/gensim/models/doc2vec_inner.pyx":164 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2251,24 +2125,24 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L17_bool_binop_done; + goto __pyx_L6_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L17_bool_binop_done:; + __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":181 + /* "trunk/gensim/models/doc2vec_inner.pyx":165 * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha */ - goto __pyx_L14_continue; + goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":182 + /* "trunk/gensim/models/doc2vec_inner.pyx":166 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2277,71 +2151,71 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":183 + /* "trunk/gensim/models/doc2vec_inner.pyx":167 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: + * if learn_hidden: */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":184 + /* "trunk/gensim/models/doc2vec_inner.pyx":168 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: + * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":185 + /* "trunk/gensim/models/doc2vec_inner.pyx":169 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< + * if learn_hidden: # <<<<<<<<<<<<<< * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: + * if learn_words: */ - __pyx_t_3 = (__pyx_v_tw != 0); + __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":186 + /* "trunk/gensim/models/doc2vec_inner.pyx":170 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if tw: + * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if tw: + * if learn_words: * for m in range(j, k): */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L19; + goto __pyx_L8; } - __pyx_L19:; - __pyx_L14_continue:; + __pyx_L8:; + __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":187 - * if tw: + /* "trunk/gensim/models/doc2vec_inner.pyx":171 + * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: # <<<<<<<<<<<<<< + * if learn_words: # <<<<<<<<<<<<<< * for m in range(j, k): * if m == i or codelens[m] == 0: */ - __pyx_t_3 = (__pyx_v_tw != 0); + __pyx_t_3 = (__pyx_v_learn_words != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":188 + /* "trunk/gensim/models/doc2vec_inner.pyx":172 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if tw: + * if learn_words: * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; + for (__pyx_t_5 = __pyx_v_j; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { + __pyx_v_m = __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":189 - * if tw: + /* "trunk/gensim/models/doc2vec_inner.pyx":173 + * if learn_words: * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue @@ -2351,62 +2225,62 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L24_bool_binop_done; + goto __pyx_L13_bool_binop_done; } __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L24_bool_binop_done:; + __pyx_L13_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":190 + /* "trunk/gensim/models/doc2vec_inner.pyx":174 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) */ - goto __pyx_L21_continue; + goto __pyx_L10_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":192 + /* "trunk/gensim/models/doc2vec_inner.pyx":176 * continue * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< - * if tl: + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< + * if learn_lbls: * for m in range(lbl_length): */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L21_continue:; + __pyx_L10_continue:; } - goto __pyx_L20; + goto __pyx_L9; } - __pyx_L20:; + __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":193 + /* "trunk/gensim/models/doc2vec_inner.pyx":177 * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - * if tl: # <<<<<<<<<<<<<< + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) + * if learn_lbls: # <<<<<<<<<<<<<< * for m in range(lbl_length): * if lbl_codelens[m] == 0: */ - __pyx_t_3 = (__pyx_v_tl != 0); + __pyx_t_3 = (__pyx_v_learn_lbls != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":194 - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - * if tl: + /* "trunk/gensim/models/doc2vec_inner.pyx":178 + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) + * if learn_lbls: * for m in range(lbl_length): # <<<<<<<<<<<<<< * if lbl_codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { + __pyx_v_m = __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":195 - * if tl: + /* "trunk/gensim/models/doc2vec_inner.pyx":179 + * if learn_lbls: * for m in range(lbl_length): * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< * continue @@ -2415,33 +2289,33 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":196 + /* "trunk/gensim/models/doc2vec_inner.pyx":180 * for m in range(lbl_length): * if lbl_codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) */ - goto __pyx_L27_continue; + goto __pyx_L16_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":198 + /* "trunk/gensim/models/doc2vec_inner.pyx":182 * continue * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_lbl_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L27_continue:; + __pyx_L16_continue:; } - goto __pyx_L26; + goto __pyx_L15; } - __pyx_L26:; + __pyx_L15:; - /* "trunk/gensim/models/doc2vec_inner.pyx":146 + /* "trunk/gensim/models/doc2vec_inner.pyx":147 * * * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< @@ -2452,7 +2326,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":201 +/* "trunk/gensim/models/doc2vec_inner.pyx":185 * * * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< @@ -2460,26 +2334,24 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_tw, int __pyx_v_tl) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; __pyx_t_5numpy_uint32_t __pyx_v_target_index; __pyx_t_5numpy_uint32_t __pyx_v_word_index; int __pyx_v_d; int __pyx_v_m; unsigned PY_LONG_LONG __pyx_r; - int __pyx_t_1; + long __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - long __pyx_t_5; + int __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":209 + /* "trunk/gensim/models/doc2vec_inner.pyx":194 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2488,208 +2360,37 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":214 - * cdef int d, m - * - * word_index = indexes[i] # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":200 * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":216 + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< * word_index = indexes[i] - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":217 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":218 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":219 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":220 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":222 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":223 - * else: - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":224 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":225 - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":226 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L8_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":228 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - - /* "trunk/gensim/models/doc2vec_inner.pyx":229 - * else: - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L8_continue:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":230 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L12_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":231 - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/doc2vec_inner.pyx":232 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) + * for d in range(negative+1): */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L11; - } - __pyx_L11:; + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":234 - * sscal(&size, &inv_count, neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * + /* "trunk/gensim/models/doc2vec_inner.pyx":201 + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * word_index = indexes[i] # <<<<<<<<<<<<<< * for d in range(negative+1): + * if d == 0: */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "trunk/gensim/models/doc2vec_inner.pyx":236 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * + /* "trunk/gensim/models/doc2vec_inner.pyx":202 + * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * word_index = indexes[i] * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: * target_index = word_index */ - __pyx_t_5 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; + __pyx_t_1 = (__pyx_v_negative + 1); + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":237 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":203 + * word_index = indexes[i] * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = word_index @@ -2698,7 +2399,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":238 + /* "trunk/gensim/models/doc2vec_inner.pyx":204 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2707,7 +2408,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":239 + /* "trunk/gensim/models/doc2vec_inner.pyx":205 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2715,11 +2416,11 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * target_index = table[(next_random >> 16) % table_len] */ __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; - goto __pyx_L16; + goto __pyx_L5; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":241 + /* "trunk/gensim/models/doc2vec_inner.pyx":207 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -2728,7 +2429,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":242 + /* "trunk/gensim/models/doc2vec_inner.pyx":208 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2737,7 +2438,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":243 + /* "trunk/gensim/models/doc2vec_inner.pyx":209 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2747,17 +2448,17 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":244 + /* "trunk/gensim/models/doc2vec_inner.pyx":210 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 * */ - goto __pyx_L14_continue; + goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":245 + /* "trunk/gensim/models/doc2vec_inner.pyx":211 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2766,9 +2467,9 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); } - __pyx_L16:; + __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":247 + /* "trunk/gensim/models/doc2vec_inner.pyx":213 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2777,7 +2478,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":248 + /* "trunk/gensim/models/doc2vec_inner.pyx":214 * * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2786,7 +2487,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":249 + /* "trunk/gensim/models/doc2vec_inner.pyx":215 * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2797,24 +2498,24 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L19_bool_binop_done; + goto __pyx_L8_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L19_bool_binop_done:; + __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":250 + /* "trunk/gensim/models/doc2vec_inner.pyx":216 * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha */ - goto __pyx_L14_continue; + goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":251 + /* "trunk/gensim/models/doc2vec_inner.pyx":217 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2823,225 +2524,1925 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":252 + /* "trunk/gensim/models/doc2vec_inner.pyx":218 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: + * if learn_hidden: */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":253 + /* "trunk/gensim/models/doc2vec_inner.pyx":219 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if tw: + * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":254 - * g = (label - f) * alpha - * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: # <<<<<<<<<<<<<< - * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if tw: + /* "trunk/gensim/models/doc2vec_inner.pyx":220 + * g = (label - f) * alpha + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if learn_hidden: # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if learn_words: + */ + __pyx_t_3 = (__pyx_v_learn_hidden != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":221 + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if learn_hidden: + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * if learn_words: + * for m in range(j,k): + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L10; + } + __pyx_L10:; + __pyx_L3_continue:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":222 + * if learn_hidden: + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if learn_words: # <<<<<<<<<<<<<< + * for m in range(j,k): + * if m == i or codelens[m] == 0: + */ + __pyx_t_3 = (__pyx_v_learn_words != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":223 + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if learn_words: + * for m in range(j,k): # <<<<<<<<<<<<<< + * if m == i or codelens[m] == 0: + * continue + */ + __pyx_t_2 = __pyx_v_k; + for (__pyx_t_5 = __pyx_v_j; __pyx_t_5 < __pyx_t_2; __pyx_t_5+=1) { + __pyx_v_m = __pyx_t_5; + + /* "trunk/gensim/models/doc2vec_inner.pyx":224 + * if learn_words: + * for m in range(j,k): + * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); + if (!__pyx_t_4) { + } else { + __pyx_t_3 = __pyx_t_4; + goto __pyx_L15_bool_binop_done; + } + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L15_bool_binop_done:; + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":225 + * for m in range(j,k): + * if m == i or codelens[m] == 0: + * continue # <<<<<<<<<<<<<< + * else: + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) + */ + goto __pyx_L12_continue; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":227 + * continue + * else: + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * if learn_lbls: + * for m in range(lbl_length): + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + __pyx_L12_continue:; + } + goto __pyx_L11; + } + __pyx_L11:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":228 + * else: + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) + * if learn_lbls: # <<<<<<<<<<<<<< + * for m in range(lbl_length): + * if lbl_codelens[m] == 0: + */ + __pyx_t_3 = (__pyx_v_learn_lbls != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":229 + * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) + * if learn_lbls: + * for m in range(lbl_length): # <<<<<<<<<<<<<< + * if lbl_codelens[m] == 0: + * continue + */ + __pyx_t_2 = __pyx_v_lbl_length; + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_2; __pyx_t_5+=1) { + __pyx_v_m = __pyx_t_5; + + /* "trunk/gensim/models/doc2vec_inner.pyx":230 + * if learn_lbls: + * for m in range(lbl_length): + * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":231 + * for m in range(lbl_length): + * if lbl_codelens[m] == 0: + * continue # <<<<<<<<<<<<<< + * else: + * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + */ + goto __pyx_L18_continue; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":233 + * continue + * else: + * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * + * return next_random + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_lbl_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + __pyx_L18_continue:; + } + goto __pyx_L17; + } + __pyx_L17:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":235 + * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + * + * return next_random # <<<<<<<<<<<<<< + * + * cdef void fast_sentence_dmc_hs( + */ + __pyx_r = __pyx_v_next_random; + goto __pyx_L0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":185 + * + * + * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], + * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/doc2vec_inner.pyx":237 + * return next_random + * + * cdef void fast_sentence_dmc_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, + * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int layer1_size, const int vector_size, + */ + +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_layer1_size, int const __pyx_v_vector_size, __pyx_t_5numpy_uint32_t const *__pyx_v_window_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_lbl_length, int const __pyx_v_window, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { + PY_LONG_LONG __pyx_v_b; + PY_LONG_LONG __pyx_v_row2; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; + int __pyx_v_m; + int __pyx_t_1; + PY_LONG_LONG __pyx_t_2; + int __pyx_t_3; + int __pyx_t_4; + int __pyx_t_5; + long __pyx_t_6; + + /* "trunk/gensim/models/doc2vec_inner.pyx":250 + * + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< + * for b in range(word_code_len): + * row2 = word_point[b] * layer1_size + */ + memset(__pyx_v_work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":251 + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * for b in range(word_code_len): # <<<<<<<<<<<<<< + * row2 = word_point[b] * layer1_size + * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) + */ + __pyx_t_1 = __pyx_v_word_code_len; + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_b = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":252 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * for b in range(word_code_len): + * row2 = word_point[b] * layer1_size # <<<<<<<<<<<<<< + * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: + */ + __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_layer1_size); + + /* "trunk/gensim/models/doc2vec_inner.pyx":253 + * for b in range(word_code_len): + * row2 = word_point[b] * layer1_size + * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * if f <= -MAX_EXP or f >= MAX_EXP: + * continue + */ + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_layer1_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + + /* "trunk/gensim/models/doc2vec_inner.pyx":254 + * row2 = word_point[b] * layer1_size + * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); + if (!__pyx_t_4) { + } else { + __pyx_t_3 = __pyx_t_4; + goto __pyx_L6_bool_binop_done; + } + __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L6_bool_binop_done:; + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":255 + * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: + * continue # <<<<<<<<<<<<<< + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha + */ + goto __pyx_L3_continue; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":256 + * if f <= -MAX_EXP or f >= MAX_EXP: + * continue + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) + */ + __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); + + /* "trunk/gensim/models/doc2vec_inner.pyx":257 + * continue + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< + * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) + * if learn_hidden: + */ + __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); + + /* "trunk/gensim/models/doc2vec_inner.pyx":258 + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * if learn_hidden: + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + + /* "trunk/gensim/models/doc2vec_inner.pyx":259 + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) + * if learn_hidden: # <<<<<<<<<<<<<< + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) + * if learn_lbls: + */ + __pyx_t_3 = (__pyx_v_learn_hidden != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":260 + * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) + * if learn_hidden: + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * if learn_lbls: + * for m in range(lbl_length): + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L8; + } + __pyx_L8:; + __pyx_L3_continue:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":261 + * if learn_hidden: + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) + * if learn_lbls: # <<<<<<<<<<<<<< + * for m in range(lbl_length): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + */ + __pyx_t_3 = (__pyx_v_learn_lbls != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":262 + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) + * if learn_lbls: + * for m in range(lbl_length): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + * &syn0[window_indexes[m] * vector_size], &ONE) + */ + __pyx_t_1 = __pyx_v_lbl_length; + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { + __pyx_v_m = __pyx_t_5; + + /* "trunk/gensim/models/doc2vec_inner.pyx":263 + * if learn_lbls: + * for m in range(lbl_length): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, # <<<<<<<<<<<<<< + * &syn0[window_indexes[m] * vector_size], &ONE) + * if learn_words: + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L9; + } + __pyx_L9:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":265 + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + * &syn0[window_indexes[m] * vector_size], &ONE) + * if learn_words: # <<<<<<<<<<<<<< + * for m in range(lbl_length, lbl_length + (2 * window)): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, + */ + __pyx_t_3 = (__pyx_v_learn_words != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":266 + * &syn0[window_indexes[m] * vector_size], &ONE) + * if learn_words: + * for m in range(lbl_length, lbl_length + (2 * window)): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, + * &syn0[window_indexes[m] * vector_size], &ONE) + */ + __pyx_t_6 = (__pyx_v_lbl_length + (2 * __pyx_v_window)); + for (__pyx_t_1 = __pyx_v_lbl_length; __pyx_t_1 < __pyx_t_6; __pyx_t_1+=1) { + __pyx_v_m = __pyx_t_1; + + /* "trunk/gensim/models/doc2vec_inner.pyx":267 + * if learn_words: + * for m in range(lbl_length, lbl_length + (2 * window)): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, # <<<<<<<<<<<<<< + * &syn0[window_indexes[m] * vector_size], &ONE) + * + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L12; + } + __pyx_L12:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":237 + * return next_random + * + * cdef void fast_sentence_dmc_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, + * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int layer1_size, const int vector_size, + */ + + /* function exit code */ +} + +/* "trunk/gensim/models/doc2vec_inner.pyx":271 + * + * + * cdef unsigned long long fast_sentence_dmc_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, + * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int layer1_size, const int vector_size, + */ + +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_layer1_size, int const __pyx_v_vector_size, __pyx_t_5numpy_uint32_t *__pyx_v_window_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_predict_word_index, int const __pyx_v_lbl_length, int const __pyx_v_window, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { + PY_LONG_LONG __pyx_v_row2; + unsigned PY_LONG_LONG __pyx_v_modulo; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; + __pyx_t_5numpy_uint32_t __pyx_v_target_index; + int __pyx_v_d; + int __pyx_v_m; + unsigned PY_LONG_LONG __pyx_r; + long __pyx_t_1; + int __pyx_t_2; + int __pyx_t_3; + int __pyx_t_4; + int __pyx_t_5; + + /* "trunk/gensim/models/doc2vec_inner.pyx":281 + * cdef long long a + * cdef long long row2 + * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< + * cdef REAL_t f, g, label + * cdef np.uint32_t target_index + */ + __pyx_v_modulo = 281474976710655ULL; + + /* "trunk/gensim/models/doc2vec_inner.pyx":287 + * + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< + * for d in range(negative+1): + * if d == 0: + */ + memset(__pyx_v_work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":288 + * # l1 already composed by caller, passed in as neu1 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * for d in range(negative+1): # <<<<<<<<<<<<<< + * if d == 0: + * target_index = predict_word_index + */ + __pyx_t_1 = (__pyx_v_negative + 1); + for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_d = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":289 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = predict_word_index + * label = ONEF + */ + __pyx_t_3 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":290 + * for d in range(negative+1): + * if d == 0: + * target_index = predict_word_index # <<<<<<<<<<<<<< + * label = ONEF + * else: + */ + __pyx_v_target_index = __pyx_v_predict_word_index; + + /* "trunk/gensim/models/doc2vec_inner.pyx":291 + * if d == 0: + * target_index = predict_word_index + * label = ONEF # <<<<<<<<<<<<<< + * else: + * target_index = table[(next_random >> 16) % table_len] + */ + __pyx_v_label = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF; + goto __pyx_L5; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":293 + * label = ONEF + * else: + * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == predict_word_index: + */ + __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); + + /* "trunk/gensim/models/doc2vec_inner.pyx":294 + * else: + * target_index = table[(next_random >> 16) % table_len] + * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< + * if target_index == predict_word_index: + * continue + */ + __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); + + /* "trunk/gensim/models/doc2vec_inner.pyx":295 + * target_index = table[(next_random >> 16) % table_len] + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == predict_word_index: # <<<<<<<<<<<<<< + * continue + * label = 0.0 + */ + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_predict_word_index) != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":296 + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == predict_word_index: + * continue # <<<<<<<<<<<<<< + * label = 0.0 + * + */ + goto __pyx_L3_continue; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":297 + * if target_index == predict_word_index: + * continue + * label = 0.0 # <<<<<<<<<<<<<< + * + * row2 = target_index * layer1_size + */ + __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); + } + __pyx_L5:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":299 + * label = 0.0 + * + * row2 = target_index * layer1_size # <<<<<<<<<<<<<< + * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: + */ + __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_layer1_size); + + /* "trunk/gensim/models/doc2vec_inner.pyx":300 + * + * row2 = target_index * layer1_size + * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * if f <= -MAX_EXP or f >= MAX_EXP: + * continue + */ + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_layer1_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + + /* "trunk/gensim/models/doc2vec_inner.pyx":301 + * row2 = target_index * layer1_size + * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< + * continue + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + */ + __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); + if (!__pyx_t_4) { + } else { + __pyx_t_3 = __pyx_t_4; + goto __pyx_L8_bool_binop_done; + } + __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L8_bool_binop_done:; + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":302 + * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: + * continue # <<<<<<<<<<<<<< + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha + */ + goto __pyx_L3_continue; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":303 + * if f <= -MAX_EXP or f >= MAX_EXP: + * continue + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< + * g = (label - f) * alpha + * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) + */ + __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); + + /* "trunk/gensim/models/doc2vec_inner.pyx":304 + * continue + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha # <<<<<<<<<<<<<< + * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if learn_hidden: + */ + __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); + + /* "trunk/gensim/models/doc2vec_inner.pyx":305 + * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + * g = (label - f) * alpha + * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * if learn_hidden: + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + + /* "trunk/gensim/models/doc2vec_inner.pyx":306 + * g = (label - f) * alpha + * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if learn_hidden: # <<<<<<<<<<<<<< + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if learn_lbls: + */ + __pyx_t_3 = (__pyx_v_learn_hidden != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":307 + * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) + * if learn_hidden: + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * if learn_lbls: + * for m in range(lbl_length): + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L10; + } + __pyx_L10:; + __pyx_L3_continue:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":308 + * if learn_hidden: + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if learn_lbls: # <<<<<<<<<<<<<< + * for m in range(lbl_length): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + */ + __pyx_t_3 = (__pyx_v_learn_lbls != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":309 + * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * if learn_lbls: + * for m in range(lbl_length): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + * &syn0[window_indexes[m] * vector_size], &ONE) + */ + __pyx_t_2 = __pyx_v_lbl_length; + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_2; __pyx_t_5+=1) { + __pyx_v_m = __pyx_t_5; + + /* "trunk/gensim/models/doc2vec_inner.pyx":310 + * if learn_lbls: + * for m in range(lbl_length): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, # <<<<<<<<<<<<<< + * &syn0[window_indexes[m] * vector_size], &ONE) + * if learn_words: + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L11; + } + __pyx_L11:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":312 + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + * &syn0[window_indexes[m] * vector_size], &ONE) + * if learn_words: # <<<<<<<<<<<<<< + * for m in range(lbl_length, lbl_length + (2 * window)): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, + */ + __pyx_t_3 = (__pyx_v_learn_words != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":313 + * &syn0[window_indexes[m] * vector_size], &ONE) + * if learn_words: + * for m in range(lbl_length, lbl_length + (2 * window)): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, + * &syn0[window_indexes[m] * vector_size], &ONE) + */ + __pyx_t_1 = (__pyx_v_lbl_length + (2 * __pyx_v_window)); + for (__pyx_t_2 = __pyx_v_lbl_length; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":314 + * if learn_words: + * for m in range(lbl_length, lbl_length + (2 * window)): + * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, # <<<<<<<<<<<<<< + * &syn0[window_indexes[m] * vector_size], &ONE) + * + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L14; + } + __pyx_L14:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":317 + * &syn0[window_indexes[m] * vector_size], &ONE) + * + * return next_random # <<<<<<<<<<<<<< + * + * + */ + __pyx_r = __pyx_v_next_random; + goto __pyx_L0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":271 + * + * + * cdef unsigned long long fast_sentence_dmc_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, + * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int layer1_size, const int vector_size, + */ + + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/doc2vec_inner.pyx":320 + * + * + * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + +/* Python wrapper */ +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow = {"train_sentence_dbow", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { + PyObject *__pyx_v_model = 0; + PyObject *__pyx_v_sentence = 0; + PyObject *__pyx_v_lbls = 0; + PyObject *__pyx_v_alpha = 0; + PyObject *__pyx_v__work = 0; + PyObject *__pyx_v_train_words = 0; + PyObject *__pyx_v_train_lbls = 0; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + PyObject *__pyx_r = 0; + __Pyx_RefNannyDeclarations + __Pyx_RefNannySetupContext("train_sentence_dbow (wrapper)", 0); + { + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_train_lbls,0}; + PyObject* values[7] = {0,0,0,0,0,0,0}; + if (unlikely(__pyx_kwds)) { + Py_ssize_t kw_args; + const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); + switch (pos_args) { + case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + case 0: break; + default: goto __pyx_L5_argtuple_error; + } + kw_args = PyDict_Size(__pyx_kwds); + switch (pos_args) { + case 0: + if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; + else goto __pyx_L5_argtuple_error; + case 1: + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 2: + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 3: + if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 4: + if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 5: + if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 6: + if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } + if (unlikely(kw_args > 0)) { + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + } else if (PyTuple_GET_SIZE(__pyx_args) != 7) { + goto __pyx_L5_argtuple_error; + } else { + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + } + __pyx_v_model = values[0]; + __pyx_v_sentence = values[1]; + __pyx_v_lbls = values[2]; + __pyx_v_alpha = values[3]; + __pyx_v__work = values[4]; + __pyx_v_train_words = values[5]; + __pyx_v_train_lbls = values[6]; + } + goto __pyx_L4_argument_unpacking_done; + __pyx_L5_argtuple_error:; + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __pyx_L3_error:; + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_RefNannyFinishContext(); + return NULL; + __pyx_L4_argument_unpacking_done:; + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v_train_words, __pyx_v_train_lbls); + + /* function exit code */ + __Pyx_RefNannyFinishContext(); + return __pyx_r; +} + +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls) { + int __pyx_v_hs; + int __pyx_v_negative; + int __pyx_v_tw; + int __pyx_v_tl; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; + int __pyx_v_size; + int __pyx_v_codelens[10000]; + int __pyx_v_lbl_codelens[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_lbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; + int __pyx_v_sentence_len; + int __pyx_v_lbl_length; + int __pyx_v_window; + int __pyx_v_i; + int __pyx_v_j; + long __pyx_v_result; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1; + __pyx_t_5numpy_uint32_t *__pyx_v_points[10000]; + __pyx_t_5numpy_uint8_t *__pyx_v_codes[10000]; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg; + __pyx_t_5numpy_uint32_t *__pyx_v_table; + unsigned PY_LONG_LONG __pyx_v_table_len; + unsigned PY_LONG_LONG __pyx_v_next_random; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks; + PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_item = NULL; + long __pyx_v_k; + PyObject *__pyx_r = NULL; + __Pyx_RefNannyDeclarations + PyObject *__pyx_t_1 = NULL; + int __pyx_t_2; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_t_3; + int __pyx_t_4; + Py_ssize_t __pyx_t_5; + PyObject *__pyx_t_6 = NULL; + PyObject *__pyx_t_7 = NULL; + unsigned PY_LONG_LONG __pyx_t_8; + long __pyx_t_9; + Py_ssize_t __pyx_t_10; + int __pyx_t_11; + int __pyx_t_12; + __pyx_t_5numpy_uint32_t __pyx_t_13; + PyObject *__pyx_t_14 = NULL; + PyObject *__pyx_t_15 = NULL; + PyObject *__pyx_t_16 = NULL; + PyObject *(*__pyx_t_17)(PyObject *); + int __pyx_t_18; + int __pyx_t_19; + int __pyx_lineno = 0; + const char *__pyx_filename = NULL; + int __pyx_clineno = 0; + __Pyx_RefNannySetupContext("train_sentence_dbow", 0); + + /* "trunk/gensim/models/doc2vec_inner.pyx":321 + * + * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + * cdef int hs = model.hs # <<<<<<<<<<<<<< + * cdef int negative = model.negative + * cdef int tw = train_words + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_hs = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":322 + * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + * cdef int hs = model.hs + * cdef int negative = model.negative # <<<<<<<<<<<<<< + * cdef int tw = train_words + * cdef int tl = train_lbls + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_negative = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":323 + * cdef int hs = model.hs + * cdef int negative = model.negative + * cdef int tw = train_words # <<<<<<<<<<<<<< + * cdef int tl = train_lbls + * + */ + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_tw = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":324 + * cdef int negative = model.negative + * cdef int tw = train_words + * cdef int tl = train_lbls # <<<<<<<<<<<<<< + * + * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + */ + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_tl = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":326 + * cdef int tl = train_lbls + * + * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< + * cdef REAL_t *work + * cdef REAL_t _alpha = alpha + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":328 + * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + * cdef REAL_t *work + * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< + * cdef int size = model.layer1_size + * + */ + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__alpha = __pyx_t_3; + + /* "trunk/gensim/models/doc2vec_inner.pyx":329 + * cdef REAL_t *work + * cdef REAL_t _alpha = alpha + * cdef int size = model.layer1_size # <<<<<<<<<<<<<< + * + * cdef int codelens[MAX_SENTENCE_LEN] + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_size = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":338 + * cdef int sentence_len + * cdef int lbl_length + * cdef int window = model.window # <<<<<<<<<<<<<< + * + * cdef int i, j + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_window = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":341 + * + * cdef int i, j + * cdef long result = 0 # <<<<<<<<<<<<<< + * + * # For hierarchical softmax + */ + __pyx_v_result = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":357 + * cdef REAL_t *syn0locks + * + * if hs: # <<<<<<<<<<<<<< + * syn1 = (np.PyArray_DATA(model.syn1)) + * + */ + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":358 + * + * if hs: + * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< + * + * if negative: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + goto __pyx_L3; + } + __pyx_L3:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":360 + * syn1 = (np.PyArray_DATA(model.syn1)) + * + * if negative: # <<<<<<<<<<<<<< + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * table = (np.PyArray_DATA(model.table)) + */ + __pyx_t_4 = (__pyx_v_negative != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":361 + * + * if negative: + * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< + * table = (np.PyArray_DATA(model.table)) + * table_len = len(model.table) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":362 + * if negative: + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< + * table_len = len(model.table) + * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":363 + * syn1neg = (np.PyArray_DATA(model.syn1neg)) + * table = (np.PyArray_DATA(model.table)) + * table_len = len(model.table) # <<<<<<<<<<<<<< + * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_table_len = __pyx_t_5; + + /* "trunk/gensim/models/doc2vec_inner.pyx":364 + * table = (np.PyArray_DATA(model.table)) + * table_len = len(model.table) + * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< + * + * # convert Python structures to primitive types, so we can release the GIL + */ + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_v_next_random = __pyx_t_8; + goto __pyx_L4; + } + __pyx_L4:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":367 + * + * # convert Python structures to primitive types, so we can release the GIL + * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + */ + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":368 + * # convert Python structures to primitive types, so we can release the GIL + * work = np.PyArray_DATA(_work) + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + * + */ + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = 10000; + if (((__pyx_t_5 < __pyx_t_9) != 0)) { + __pyx_t_10 = __pyx_t_5; + } else { + __pyx_t_10 = __pyx_t_9; + } + __pyx_v_sentence_len = ((int)__pyx_t_10); + + /* "trunk/gensim/models/doc2vec_inner.pyx":369 + * work = np.PyArray_DATA(_work) + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< + * + * syn0locks = np.PyArray_DATA(model.syn0locks) + */ + __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = 10000; + if (((__pyx_t_10 < __pyx_t_9) != 0)) { + __pyx_t_5 = __pyx_t_10; + } else { + __pyx_t_5 = __pyx_t_9; + } + __pyx_v_lbl_length = ((int)__pyx_t_5); + + /* "trunk/gensim/models/doc2vec_inner.pyx":371 + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + * + * syn0locks = np.PyArray_DATA(model.syn0locks) # <<<<<<<<<<<<<< + * + * for i in range(sentence_len): + */ + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn0locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":373 + * syn0locks = np.PyArray_DATA(model.syn0locks) + * + * for i in range(sentence_len): # <<<<<<<<<<<<<< + * word = sentence[i] + * if word is None: + */ + __pyx_t_2 = __pyx_v_sentence_len; + for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { + __pyx_v_i = __pyx_t_11; + + /* "trunk/gensim/models/doc2vec_inner.pyx":374 + * + * for i in range(sentence_len): + * word = sentence[i] # <<<<<<<<<<<<<< + * if word is None: + * codelens[i] = 0 + */ + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_6); + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); + __pyx_t_6 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":375 + * for i in range(sentence_len): + * word = sentence[i] + * if word is None: # <<<<<<<<<<<<<< + * codelens[i] = 0 + * else: + */ + __pyx_t_4 = (__pyx_v_word == Py_None); + __pyx_t_12 = (__pyx_t_4 != 0); + if (__pyx_t_12) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":376 + * word = sentence[i] + * if word is None: + * codelens[i] = 0 # <<<<<<<<<<<<<< + * else: + * indexes[i] = word.index + */ + (__pyx_v_codelens[__pyx_v_i]) = 0; + goto __pyx_L7; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":378 + * codelens[i] = 0 + * else: + * indexes[i] = word.index # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(word.code) + */ + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; + + /* "trunk/gensim/models/doc2vec_inner.pyx":379 + * else: + * indexes[i] = word.index + * if hs: # <<<<<<<<<<<<<< + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) + */ + __pyx_t_12 = (__pyx_v_hs != 0); + if (__pyx_t_12) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":380 + * indexes[i] = word.index + * if hs: + * codelens[i] = len(word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) + */ + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); + + /* "trunk/gensim/models/doc2vec_inner.pyx":381 + * if hs: + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(word.point) + * else: + */ + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":382 + * codelens[i] = len(word.code) + * codes[i] = np.PyArray_DATA(word.code) + * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + * else: + * codelens[i] = 1 + */ + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + goto __pyx_L8; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":384 + * points[i] = np.PyArray_DATA(word.point) + * else: + * codelens[i] = 1 # <<<<<<<<<<<<<< + * result += 1 + * # single randint() call avoids a big thread-sync slowdown + */ + (__pyx_v_codelens[__pyx_v_i]) = 1; + } + __pyx_L8:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":385 + * else: + * codelens[i] = 1 + * result += 1 # <<<<<<<<<<<<<< + * # single randint() call avoids a big thread-sync slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + */ + __pyx_v_result = (__pyx_v_result + 1); + } + __pyx_L7:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":387 + * result += 1 + * # single randint() call avoids a big thread-sync slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * reduced_windows[i] = item + * for i in range(lbl_length): + */ + __pyx_t_2 = 0; + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_14); + __pyx_t_15 = NULL; + __pyx_t_5 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { + __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_7); + if (likely(__pyx_t_15)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); + __Pyx_INCREF(__pyx_t_15); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_7, function); + __pyx_t_5 = 1; + } + } + __pyx_t_16 = PyTuple_New(3+__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_16); + if (__pyx_t_15) { + PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + } + __Pyx_INCREF(__pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_5, __pyx_int_0); + __Pyx_GIVEREF(__pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_5, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_5, __pyx_t_14); + __Pyx_GIVEREF(__pyx_t_14); + __pyx_t_1 = 0; + __pyx_t_14 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { + __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_5 = 0; + __pyx_t_17 = NULL; + } else { + __pyx_t_5 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + for (;;) { + if (likely(!__pyx_t_17)) { + if (likely(PyList_CheckExact(__pyx_t_7))) { + if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_7)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_7)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } + } else { + __pyx_t_6 = __pyx_t_17(__pyx_t_7); + if (unlikely(!__pyx_t_6)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_6); + } + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_6); + __pyx_t_6 = 0; + __pyx_v_i = __pyx_t_2; + __pyx_t_2 = (__pyx_t_2 + 1); + + /* "trunk/gensim/models/doc2vec_inner.pyx":388 + * # single randint() call avoids a big thread-sync slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * reduced_windows[i] = item # <<<<<<<<<<<<<< + * for i in range(lbl_length): + * word = lbls[i] + */ + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 388; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; + + /* "trunk/gensim/models/doc2vec_inner.pyx":387 + * result += 1 + * # single randint() call avoids a big thread-sync slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * reduced_windows[i] = item + * for i in range(lbl_length): + */ + } + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":389 + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * reduced_windows[i] = item + * for i in range(lbl_length): # <<<<<<<<<<<<<< + * word = lbls[i] + * if word is None: + */ + __pyx_t_2 = __pyx_v_lbl_length; + for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { + __pyx_v_i = __pyx_t_11; + + /* "trunk/gensim/models/doc2vec_inner.pyx":390 + * reduced_windows[i] = item + * for i in range(lbl_length): + * word = lbls[i] # <<<<<<<<<<<<<< + * if word is None: + * lbl_codelens[i] = 0 + */ + __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_7); + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); + __pyx_t_7 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":391 + * for i in range(lbl_length): + * word = lbls[i] + * if word is None: # <<<<<<<<<<<<<< + * lbl_codelens[i] = 0 + * else: + */ + __pyx_t_12 = (__pyx_v_word == Py_None); + __pyx_t_4 = (__pyx_t_12 != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":392 + * word = lbls[i] + * if word is None: + * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< + * else: + * lbl_indexes[i] = word.index + */ + (__pyx_v_lbl_codelens[__pyx_v_i]) = 0; + goto __pyx_L13; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":394 + * lbl_codelens[i] = 0 + * else: + * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< + * if hs: + * lbl_codelens[i] = len(word.code) + */ + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 394; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 394; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; + + /* "trunk/gensim/models/doc2vec_inner.pyx":395 + * else: + * lbl_indexes[i] = word.index + * if hs: # <<<<<<<<<<<<<< + * lbl_codelens[i] = len(word.code) + * else: + */ + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":396 + * lbl_indexes[i] = word.index + * if hs: + * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< + * else: + * lbl_codelens[i] = 1 + */ + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); + goto __pyx_L14; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":398 + * lbl_codelens[i] = len(word.code) + * else: + * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< + * result += 1 + * + */ + (__pyx_v_lbl_codelens[__pyx_v_i]) = 1; + } + __pyx_L14:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":399 + * else: + * lbl_codelens[i] = 1 + * result += 1 # <<<<<<<<<<<<<< + * + * # release GIL & train on the sentence + */ + __pyx_v_result = (__pyx_v_result + 1); + } + __pyx_L13:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":402 + * + * # release GIL & train on the sentence + * with nogil: # <<<<<<<<<<<<<< + * for i in range(sentence_len): + * if codelens[i] == 0: + */ + { + #ifdef WITH_THREAD + PyThreadState *_save; + Py_UNBLOCK_THREADS + #endif + /*try:*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":403 + * # release GIL & train on the sentence + * with nogil: + * for i in range(sentence_len): # <<<<<<<<<<<<<< + * if codelens[i] == 0: + * continue */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { + __pyx_t_2 = __pyx_v_sentence_len; + for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { + __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":255 - * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if tw: - * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if tw: - * for m in range(j,k): + /* "trunk/gensim/models/doc2vec_inner.pyx":404 + * with nogil: + * for i in range(sentence_len): + * if codelens[i] == 0: # <<<<<<<<<<<<<< + * continue + * if tw: # simultaneous skip-gram wordvec-training */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L21; - } - __pyx_L21:; - __pyx_L14_continue:; - } + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":256 - * if tw: - * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if tw: # <<<<<<<<<<<<<< - * for m in range(j,k): - * if m == i or codelens[m] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":405 + * for i in range(sentence_len): + * if codelens[i] == 0: + * continue # <<<<<<<<<<<<<< + * if tw: # simultaneous skip-gram wordvec-training + * j = i - window + reduced_windows[i] */ - __pyx_t_3 = (__pyx_v_tw != 0); - if (__pyx_t_3) { + goto __pyx_L18_continue; + } - /* "trunk/gensim/models/doc2vec_inner.pyx":257 - * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if tw: - * for m in range(j,k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":406 + * if codelens[i] == 0: * continue + * if tw: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< + * j = i - window + reduced_windows[i] + * if j < 0: */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; + __pyx_t_4 = (__pyx_v_tw != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":258 - * if tw: - * for m in range(j,k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":407 * continue - * else: + * if tw: # simultaneous skip-gram wordvec-training + * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< + * if j < 0: + * j = 0 */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L26_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L26_bool_binop_done:; - if (__pyx_t_3) { + __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":259 - * for m in range(j,k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":408 + * if tw: # simultaneous skip-gram wordvec-training + * j = i - window + reduced_windows[i] + * if j < 0: # <<<<<<<<<<<<<< + * j = 0 + * k = i + window + 1 - reduced_windows[i] */ - goto __pyx_L23_continue; - } - /*else*/ { + __pyx_t_4 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":261 - * continue - * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * if tl: - * for m in range(lbl_length): + /* "trunk/gensim/models/doc2vec_inner.pyx":409 + * j = i - window + reduced_windows[i] + * if j < 0: + * j = 0 # <<<<<<<<<<<<<< + * k = i + window + 1 - reduced_windows[i] + * if k > sentence_len: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L23_continue:; - } - goto __pyx_L22; - } - __pyx_L22:; + __pyx_v_j = 0; + goto __pyx_L22; + } + __pyx_L22:; - /* "trunk/gensim/models/doc2vec_inner.pyx":262 - * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * if tl: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":410 + * if j < 0: + * j = 0 + * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< + * if k > sentence_len: + * k = sentence_len */ - __pyx_t_3 = (__pyx_v_tl != 0); - if (__pyx_t_3) { + __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":263 - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * if tl: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue + /* "trunk/gensim/models/doc2vec_inner.pyx":411 + * j = 0 + * k = i + window + 1 - reduced_windows[i] + * if k > sentence_len: # <<<<<<<<<<<<<< + * k = sentence_len + * for j in range(j, k): */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; + __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":264 - * if tl: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: + /* "trunk/gensim/models/doc2vec_inner.pyx":412 + * k = i + window + 1 - reduced_windows[i] + * if k > sentence_len: + * k = sentence_len # <<<<<<<<<<<<<< + * for j in range(j, k): + * if j == i or codelens[j] == 0: */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { + __pyx_v_k = __pyx_v_sentence_len; + goto __pyx_L23; + } + __pyx_L23:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":413 + * if k > sentence_len: + * k = sentence_len + * for j in range(j, k): # <<<<<<<<<<<<<< + * if j == i or codelens[j] == 0: + * continue + */ + __pyx_t_9 = __pyx_v_k; + for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_9; __pyx_t_18+=1) { + __pyx_v_j = __pyx_t_18; + + /* "trunk/gensim/models/doc2vec_inner.pyx":414 + * k = sentence_len + * for j in range(j, k): + * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< + * continue + * if hs: + */ + __pyx_t_12 = ((__pyx_v_j == __pyx_v_i) != 0); + if (!__pyx_t_12) { + } else { + __pyx_t_4 = __pyx_t_12; + goto __pyx_L27_bool_binop_done; + } + __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_j]) == 0) != 0); + __pyx_t_4 = __pyx_t_12; + __pyx_L27_bool_binop_done:; + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":415 + * for j in range(j, k): + * if j == i or codelens[j] == 0: + * continue # <<<<<<<<<<<<<< + * if hs: + * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + */ + goto __pyx_L24_continue; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":416 + * if j == i or codelens[j] == 0: + * continue + * if hs: # <<<<<<<<<<<<<< + * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], + */ + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":418 + * if hs: + * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], # <<<<<<<<<<<<<< + * _alpha, work, 1, 1, syn0locks) + * if negative: + */ + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, 1, 1, __pyx_v_syn0locks); + goto __pyx_L29; + } + __pyx_L29:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":420 + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], + * _alpha, work, 1, 1, syn0locks) + * if negative: # <<<<<<<<<<<<<< + * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, + */ + __pyx_t_4 = (__pyx_v_negative != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":422 + * if negative: + * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, # <<<<<<<<<<<<<< + * indexes[i], indexes[j], _alpha, work, next_random, + * 1, 1, syn0locks) + */ + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, 1, 1, __pyx_v_syn0locks); + goto __pyx_L30; + } + __pyx_L30:; + __pyx_L24_continue:; + } + goto __pyx_L21; + } + __pyx_L21:; - /* "trunk/gensim/models/doc2vec_inner.pyx":265 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":426 + * 1, 1, syn0locks) + * + * if tl: # docvec-training # <<<<<<<<<<<<<< + * for j in range(lbl_length): + * if lbl_codelens[j] == 0: */ - goto __pyx_L29_continue; + __pyx_t_4 = (__pyx_v_tl != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":427 + * + * if tl: # docvec-training + * for j in range(lbl_length): # <<<<<<<<<<<<<< + * if lbl_codelens[j] == 0: + * continue + */ + __pyx_t_18 = __pyx_v_lbl_length; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_j = __pyx_t_19; + + /* "trunk/gensim/models/doc2vec_inner.pyx":428 + * if tl: # docvec-training + * for j in range(lbl_length): + * if lbl_codelens[j] == 0: # <<<<<<<<<<<<<< + * continue + * if hs: + */ + __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_j]) == 0) != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":429 + * for j in range(lbl_length): + * if lbl_codelens[j] == 0: + * continue # <<<<<<<<<<<<<< + * if hs: + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], + */ + goto __pyx_L32_continue; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":430 + * if lbl_codelens[j] == 0: + * continue + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], + * _alpha, work, 1, 1, syn0locks) + */ + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":431 + * continue + * if hs: + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], # <<<<<<<<<<<<<< + * _alpha, work, 1, 1, syn0locks) + * if negative: + */ + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, 1, 1, __pyx_v_syn0locks); + goto __pyx_L35; + } + __pyx_L35:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":433 + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], + * _alpha, work, 1, 1, syn0locks) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, + * indexes[i], lbl_indexes[j], _alpha, work, next_random, + */ + __pyx_t_4 = (__pyx_v_negative != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":434 + * _alpha, work, 1, 1, syn0locks) + * if negative: + * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, # <<<<<<<<<<<<<< + * indexes[i], lbl_indexes[j], _alpha, work, next_random, + * 1, 1, syn0locks) + */ + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, 1, 1, __pyx_v_syn0locks); + goto __pyx_L36; + } + __pyx_L36:; + __pyx_L32_continue:; + } + goto __pyx_L31; + } + __pyx_L31:; + __pyx_L18_continue:; + } } - /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":267 - * continue - * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":402 * - * return next_random + * # release GIL & train on the sentence + * with nogil: # <<<<<<<<<<<<<< + * for i in range(sentence_len): + * if codelens[i] == 0: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + /*finally:*/ { + /*normal exit:*/{ + #ifdef WITH_THREAD + Py_BLOCK_THREADS + #endif + goto __pyx_L17; + } + __pyx_L17:; } - __pyx_L29_continue:; - } - goto __pyx_L28; } - __pyx_L28:; - /* "trunk/gensim/models/doc2vec_inner.pyx":269 - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":438 + * 1, 1, syn0locks) * - * return next_random # <<<<<<<<<<<<<< + * return result # <<<<<<<<<<<<<< * * */ - __pyx_r = __pyx_v_next_random; + __Pyx_XDECREF(__pyx_r); + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 438; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_r = __pyx_t_7; + __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":201 + /* "trunk/gensim/models/doc2vec_inner.pyx":320 * * - * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative */ /* function exit code */ + __pyx_L1_error:; + __Pyx_XDECREF(__pyx_t_1); + __Pyx_XDECREF(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_14); + __Pyx_XDECREF(__pyx_t_15); + __Pyx_XDECREF(__pyx_t_16); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __pyx_r = NULL; __pyx_L0:; + __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_item); + __Pyx_XGIVEREF(__pyx_r); + __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":272 +/* "trunk/gensim/models/doc2vec_inner.pyx":441 * * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< + * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ /* Python wrapper */ -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow = {"train_sentence_dbow", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm = {"train_sentence_dm", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_sentence = 0; PyObject *__pyx_v_lbls = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v__work = 0; - PyObject *__pyx_v_train_words = 0; - PyObject *__pyx_v_train_lbls = 0; + PyObject *__pyx_v__neu1 = 0; + PyObject *__pyx_v__train_words = 0; + PyObject *__pyx_v__train_lbls = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("train_sentence_dbow (wrapper)", 0); + __Pyx_RefNannySetupContext("train_sentence_dm (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_train_lbls,0}; - PyObject* values[7] = {0,0,0,0,0,0,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_train_words_2,&__pyx_n_s_train_lbls_2,0}; + PyObject* values[8] = {0,0,0,0,0,0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); @@ -3060,38 +4461,43 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: - if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words)) != 0)) kw_args--; + if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: - if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls)) != 0)) kw_args--; + if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words_2)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + } + case 7: + if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls_2)) != 0)) kw_args--; + else { + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - } else if (PyTuple_GET_SIZE(__pyx_args) != 7) { + } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { goto __pyx_L5_argtuple_error; } else { values[0] = PyTuple_GET_ITEM(__pyx_args, 0); @@ -3101,37 +4507,44 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence values[4] = PyTuple_GET_ITEM(__pyx_args, 4); values[5] = PyTuple_GET_ITEM(__pyx_args, 5); values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + values[7] = PyTuple_GET_ITEM(__pyx_args, 7); } __pyx_v_model = values[0]; __pyx_v_sentence = values[1]; __pyx_v_lbls = values[2]; __pyx_v_alpha = values[3]; __pyx_v__work = values[4]; - __pyx_v_train_words = values[5]; - __pyx_v_train_lbls = values[6]; + __pyx_v__neu1 = values[5]; + __pyx_v__train_words = values[6]; + __pyx_v__train_lbls = values[7]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v_train_words, __pyx_v_train_lbls); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v__neu1, __pyx_v__train_words, __pyx_v__train_lbls); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__train_words, PyObject *__pyx_v__train_lbls) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v_tw; - int __pyx_v_tl; + int __pyx_v_learn_words; + int __pyx_v_learn_lbls; + int __pyx_v_learn_hidden; + int __pyx_v_cbow_mean; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; int __pyx_v_codelens[10000]; @@ -3144,6 +4557,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ int __pyx_v_window; int __pyx_v_i; int __pyx_v_j; + int __pyx_v_k; + int __pyx_v_m; long __pyx_v_result; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1; __pyx_t_5numpy_uint32_t *__pyx_v_points[10000]; @@ -3152,6 +4567,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5numpy_uint32_t *__pyx_v_table; unsigned PY_LONG_LONG __pyx_v_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks; PyObject *__pyx_v_word = NULL; PyObject *__pyx_v_item = NULL; PyObject *__pyx_r = NULL; @@ -3178,113 +4594,135 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("train_sentence_dbow", 0); + __Pyx_RefNannySetupContext("train_sentence_dm", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":273 + /* "trunk/gensim/models/doc2vec_inner.pyx":442 * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int tw = train_words + * cdef int learn_words = _train_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":274 - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + /* "trunk/gensim/models/doc2vec_inner.pyx":443 + * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int tw = train_words - * cdef int tl = train_lbls + * cdef int learn_words = _train_words + * cdef int learn_lbls = _train_lbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":275 + /* "trunk/gensim/models/doc2vec_inner.pyx":444 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int tw = train_words # <<<<<<<<<<<<<< - * cdef int tl = train_lbls - * + * cdef int learn_words = _train_words # <<<<<<<<<<<<<< + * cdef int learn_lbls = _train_lbls + * cdef int learn_hidden = True */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_tw = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":276 + /* "trunk/gensim/models/doc2vec_inner.pyx":445 * cdef int negative = model.negative - * cdef int tw = train_words - * cdef int tl = train_lbls # <<<<<<<<<<<<<< + * cdef int learn_words = _train_words + * cdef int learn_lbls = _train_lbls # <<<<<<<<<<<<<< + * cdef int learn_hidden = True + * cdef int cbow_mean = model.cbow_mean + */ + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_learn_lbls = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":446 + * cdef int learn_words = _train_words + * cdef int learn_lbls = _train_lbls + * cdef int learn_hidden = True # <<<<<<<<<<<<<< + * cdef int cbow_mean = model.cbow_mean + * cdef REAL_t count, inv_count + */ + __pyx_v_learn_hidden = 1; + + /* "trunk/gensim/models/doc2vec_inner.pyx":447 + * cdef int learn_lbls = _train_lbls + * cdef int learn_hidden = True + * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< + * cdef REAL_t count, inv_count * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_tl = __pyx_t_2; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":278 - * cdef int tl = train_lbls + /* "trunk/gensim/models/doc2vec_inner.pyx":450 + * cdef REAL_t count, inv_count * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work - * cdef REAL_t _alpha = alpha + * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":280 - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + /* "trunk/gensim/models/doc2vec_inner.pyx":453 * cdef REAL_t *work + * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":281 - * cdef REAL_t *work + /* "trunk/gensim/models/doc2vec_inner.pyx":454 + * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":290 + /* "trunk/gensim/models/doc2vec_inner.pyx":463 * cdef int sentence_len * cdef int lbl_length * cdef int window = model.window # <<<<<<<<<<<<<< * - * cdef int i, j + * cdef int i, j, k, m */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":293 + /* "trunk/gensim/models/doc2vec_inner.pyx":466 * - * cdef int i, j + * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< * * # For hierarchical softmax */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":306 + /* "trunk/gensim/models/doc2vec_inner.pyx":481 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -3294,23 +4732,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":307 + /* "trunk/gensim/models/doc2vec_inner.pyx":482 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/doc2vec_inner.pyx":309 + /* "trunk/gensim/models/doc2vec_inner.pyx":484 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3320,106 +4758,116 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":310 + /* "trunk/gensim/models/doc2vec_inner.pyx":485 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":311 + /* "trunk/gensim/models/doc2vec_inner.pyx":486 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) - * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) + * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":312 + /* "trunk/gensim/models/doc2vec_inner.pyx":487 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< - * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) + * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 312; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":313 + /* "trunk/gensim/models/doc2vec_inner.pyx":488 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) - * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< + * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":316 + /* "trunk/gensim/models/doc2vec_inner.pyx":491 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< + * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 491; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":317 + /* "trunk/gensim/models/doc2vec_inner.pyx":492 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) + * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * + */ + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":493 + * work = np.PyArray_DATA(_work) + * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * + * syn0locks = np.PyArray_DATA(model.syn0locks) */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -3428,24 +4876,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/doc2vec_inner.pyx":318 - * work = np.PyArray_DATA(_work) + /* "trunk/gensim/models/doc2vec_inner.pyx":495 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< + * + * syn0locks = np.PyArray_DATA(model.syn0locks) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_10 < __pyx_t_9) != 0)) { - __pyx_t_5 = __pyx_t_10; - } else { - __pyx_t_5 = __pyx_t_9; - } - __pyx_v_lbl_length = ((int)__pyx_t_5); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 495; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 495; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn0locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":320 - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + /* "trunk/gensim/models/doc2vec_inner.pyx":497 + * syn0locks = np.PyArray_DATA(model.syn0locks) * * for i in range(sentence_len): # <<<<<<<<<<<<<< * word = sentence[i] @@ -3455,19 +4900,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":321 + /* "trunk/gensim/models/doc2vec_inner.pyx":498 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":322 + /* "trunk/gensim/models/doc2vec_inner.pyx":499 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -3478,7 +4923,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":323 + /* "trunk/gensim/models/doc2vec_inner.pyx":500 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -3490,20 +4935,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":325 + /* "trunk/gensim/models/doc2vec_inner.pyx":502 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 502; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 502; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":326 + /* "trunk/gensim/models/doc2vec_inner.pyx":503 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -3513,49 +4958,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":327 + /* "trunk/gensim/models/doc2vec_inner.pyx":504 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 504; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 504; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/doc2vec_inner.pyx":328 + /* "trunk/gensim/models/doc2vec_inner.pyx":505 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 505; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 505; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":329 + /* "trunk/gensim/models/doc2vec_inner.pyx":506 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":331 + /* "trunk/gensim/models/doc2vec_inner.pyx":508 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -3566,7 +5011,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":332 + /* "trunk/gensim/models/doc2vec_inner.pyx":509 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -3578,28 +5023,28 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L7:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":334 + /* "trunk/gensim/models/doc2vec_inner.pyx":511 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * for i in range(lbl_length): + * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; - __pyx_t_5 = 0; + __pyx_t_10 = 0; if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_7); if (likely(__pyx_t_15)) { @@ -3607,51 +5052,51 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_INCREF(__pyx_t_15); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_7, function); - __pyx_t_5 = 1; + __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_5, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_10, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_5, __pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_10, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_5, __pyx_t_14); + PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_10, __pyx_t_14); __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { - __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_5 = 0; + __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { if (likely(!__pyx_t_17)) { if (likely(PyList_CheckExact(__pyx_t_7))) { - if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_7)) break; + if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_7)) break; + if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -3660,7 +5105,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -3671,29 +5116,45 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":335 + /* "trunk/gensim/models/doc2vec_inner.pyx":512 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< - * for i in range(lbl_length): - * word = lbls[i] + * + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":334 + /* "trunk/gensim/models/doc2vec_inner.pyx":511 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * for i in range(lbl_length): + * */ } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":336 - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + /* "trunk/gensim/models/doc2vec_inner.pyx":514 * reduced_windows[i] = item + * + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< + * for i in range(lbl_length): + * word = lbls[i] + */ + __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 514; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = 10000; + if (((__pyx_t_10 < __pyx_t_9) != 0)) { + __pyx_t_5 = __pyx_t_10; + } else { + __pyx_t_5 = __pyx_t_9; + } + __pyx_v_lbl_length = ((int)__pyx_t_5); + + /* "trunk/gensim/models/doc2vec_inner.pyx":515 + * + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * for i in range(lbl_length): # <<<<<<<<<<<<<< * word = lbls[i] * if word is None: @@ -3702,19 +5163,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":337 - * reduced_windows[i] = item + /* "trunk/gensim/models/doc2vec_inner.pyx":516 + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * for i in range(lbl_length): * word = lbls[i] # <<<<<<<<<<<<<< * if word is None: * lbl_codelens[i] = 0 */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 516; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_7); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":338 + /* "trunk/gensim/models/doc2vec_inner.pyx":517 * for i in range(lbl_length): * word = lbls[i] * if word is None: # <<<<<<<<<<<<<< @@ -3725,7 +5186,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_t_12 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":339 + /* "trunk/gensim/models/doc2vec_inner.pyx":518 * word = lbls[i] * if word is None: * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< @@ -3737,20 +5198,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":341 + /* "trunk/gensim/models/doc2vec_inner.pyx":520 * lbl_codelens[i] = 0 * else: * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * lbl_codelens[i] = len(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":342 + /* "trunk/gensim/models/doc2vec_inner.pyx":521 * else: * lbl_indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -3760,23 +5221,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":343 + /* "trunk/gensim/models/doc2vec_inner.pyx":522 * lbl_indexes[i] = word.index * if hs: * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< * else: * lbl_codelens[i] = 1 */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); goto __pyx_L14; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":345 + /* "trunk/gensim/models/doc2vec_inner.pyx":524 * lbl_codelens[i] = len(word.code) * else: * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< @@ -3787,7 +5248,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":346 + /* "trunk/gensim/models/doc2vec_inner.pyx":525 * else: * lbl_codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -3799,12 +5260,12 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L13:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":349 + /* "trunk/gensim/models/doc2vec_inner.pyx":528 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: + * for i in range(sentence_len): + * if codelens[i] == 0: */ { #ifdef WITH_THREAD @@ -3813,123 +5274,323 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":350 + /* "trunk/gensim/models/doc2vec_inner.pyx":529 * # release GIL & train on the sentence * with nogil: - * for j in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[j] == 0: + * for i in range(sentence_len): # <<<<<<<<<<<<<< + * if codelens[i] == 0: * continue */ - __pyx_t_2 = __pyx_v_lbl_length; + __pyx_t_2 = __pyx_v_sentence_len; for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_j = __pyx_t_11; + __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":351 + /* "trunk/gensim/models/doc2vec_inner.pyx":530 * with nogil: - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: # <<<<<<<<<<<<<< + * for i in range(sentence_len): + * if codelens[i] == 0: # <<<<<<<<<<<<<< * continue - * for i in range(sentence_len): + * j = i - window + reduced_windows[i] */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_j]) == 0) != 0); + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":352 - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":531 + * for i in range(sentence_len): + * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< - * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window + reduced_windows[i] + * if j < 0: */ goto __pyx_L18_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":353 - * if lbl_codelens[j] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":532 + * if codelens[i] == 0: * continue - * for i in range(sentence_len): # <<<<<<<<<<<<<< - * if codelens[i] == 0: - * continue + * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< + * if j < 0: + * j = 0 */ - __pyx_t_18 = __pyx_v_sentence_len; - for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { - __pyx_v_i = __pyx_t_19; + __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":354 + /* "trunk/gensim/models/doc2vec_inner.pyx":533 * continue - * for i in range(sentence_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< + * j = i - window + reduced_windows[i] + * if j < 0: # <<<<<<<<<<<<<< + * j = 0 + * k = i + window + 1 - reduced_windows[i] + */ + __pyx_t_4 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":534 + * j = i - window + reduced_windows[i] + * if j < 0: + * j = 0 # <<<<<<<<<<<<<< + * k = i + window + 1 - reduced_windows[i] + * if k > sentence_len: + */ + __pyx_v_j = 0; + goto __pyx_L21; + } + __pyx_L21:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":535 + * if j < 0: + * j = 0 + * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< + * if k > sentence_len: + * k = sentence_len + */ + __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); + + /* "trunk/gensim/models/doc2vec_inner.pyx":536 + * j = 0 + * k = i + window + 1 - reduced_windows[i] + * if k > sentence_len: # <<<<<<<<<<<<<< + * k = sentence_len + * + */ + __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":537 + * k = i + window + 1 - reduced_windows[i] + * if k > sentence_len: + * k = sentence_len # <<<<<<<<<<<<<< + * + * # compose l1 (in neu1) + */ + __pyx_v_k = __pyx_v_sentence_len; + goto __pyx_L22; + } + __pyx_L22:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":540 + * + * # compose l1 (in neu1) + * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * count = 0.0 + * for m in range(j, k): + */ + memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":541 + * # compose l1 (in neu1) + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + * count = 0.0 # <<<<<<<<<<<<<< + * for m in range(j, k): + * if m == i or codelens[m] == 0: + */ + __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); + + /* "trunk/gensim/models/doc2vec_inner.pyx":542 + * memset(neu1, 0, size * cython.sizeof(REAL_t)) + * count = 0.0 + * for m in range(j, k): # <<<<<<<<<<<<<< + * if m == i or codelens[m] == 0: + * continue + */ + __pyx_t_18 = __pyx_v_k; + for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_m = __pyx_t_19; + + /* "trunk/gensim/models/doc2vec_inner.pyx":543 + * count = 0.0 + * for m in range(j, k): + * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue - * if hs: + * else: */ - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); + __pyx_t_12 = ((__pyx_v_m == __pyx_v_i) != 0); + if (!__pyx_t_12) { + } else { + __pyx_t_4 = __pyx_t_12; + goto __pyx_L26_bool_binop_done; + } + __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); + __pyx_t_4 = __pyx_t_12; + __pyx_L26_bool_binop_done:; if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":355 - * for i in range(sentence_len): - * if codelens[i] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":544 + * for m in range(j, k): + * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< - * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) + * else: + * count += ONEF + */ + goto __pyx_L23_continue; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":546 + * continue + * else: + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * for m in range(lbl_length): + */ + __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); + + /* "trunk/gensim/models/doc2vec_inner.pyx":547 + * else: + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * for m in range(lbl_length): + * if lbl_codelens[m] == 0: */ - goto __pyx_L21_continue; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } + __pyx_L23_continue:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":548 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * for m in range(lbl_length): # <<<<<<<<<<<<<< + * if lbl_codelens[m] == 0: + * continue + */ + __pyx_t_18 = __pyx_v_lbl_length; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":356 - * if codelens[i] == 0: + /* "trunk/gensim/models/doc2vec_inner.pyx":549 + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * for m in range(lbl_length): + * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< * continue - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) - * if negative: + * else: */ - __pyx_t_4 = (__pyx_v_hs != 0); + __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":357 + /* "trunk/gensim/models/doc2vec_inner.pyx":550 + * for m in range(lbl_length): + * if lbl_codelens[m] == 0: + * continue # <<<<<<<<<<<<<< + * else: + * count += ONEF + */ + goto __pyx_L28_continue; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":552 * continue - * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) # <<<<<<<<<<<<<< - * if negative: - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) + * else: + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + * if cbow_mean and count > (0.5): + */ + __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); + + /* "trunk/gensim/models/doc2vec_inner.pyx":553 + * else: + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * if cbow_mean and count > (0.5): + * inv_count = ONEF/count */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_tw, __pyx_v_tl); - goto __pyx_L24; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L24:; + __pyx_L28_continue:; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":554 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + */ + __pyx_t_12 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_12) { + } else { + __pyx_t_4 = __pyx_t_12; + goto __pyx_L32_bool_binop_done; + } + __pyx_t_12 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); + __pyx_t_4 = __pyx_t_12; + __pyx_L32_bool_binop_done:; + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":358 - * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) + /* "trunk/gensim/models/doc2vec_inner.pyx":555 + * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + * if cbow_mean and count > (0.5): + * inv_count = ONEF/count # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - /* "trunk/gensim/models/doc2vec_inner.pyx":359 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) - * if negative: - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":556 + * if cbow_mean and count > (0.5): + * inv_count = ONEF/count + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * - * return result + * if hs: */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_tw, __pyx_v_tl); - goto __pyx_L25; - } - __pyx_L25:; - __pyx_L21_continue:; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L31; } + __pyx_L31:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":558 + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, + * size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, + */ + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":559 + * + * if hs: + * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, # <<<<<<<<<<<<<< + * size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, + * learn_hidden, learn_lbls, learn_words, syn0locks) + */ + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_lbl_length, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); + goto __pyx_L34; + } + __pyx_L34:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":562 + * size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, + * learn_hidden, learn_lbls, learn_words, syn0locks) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, + * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, + */ + __pyx_t_4 = (__pyx_v_negative != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":563 + * learn_hidden, learn_lbls, learn_words, syn0locks) + * if negative: + * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, # <<<<<<<<<<<<<< + * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, + * next_random, lbl_length, learn_hidden, learn_lbls, learn_words, syn0locks) + */ + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_next_random, __pyx_v_lbl_length, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); + goto __pyx_L35; + } + __pyx_L35:; __pyx_L18_continue:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":349 + /* "trunk/gensim/models/doc2vec_inner.pyx":528 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: + * for i in range(sentence_len): + * if codelens[i] == 0: */ /*finally:*/ { /*normal exit:*/{ @@ -3942,24 +5603,24 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } } - /* "trunk/gensim/models/doc2vec_inner.pyx":361 - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) + /* "trunk/gensim/models/doc2vec_inner.pyx":567 + * next_random, lbl_length, learn_hidden, learn_lbls, learn_words, syn0locks) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":272 + /* "trunk/gensim/models/doc2vec_inner.pyx":441 * * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< + * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ @@ -3972,7 +5633,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_XDECREF(__pyx_t_14); __Pyx_XDECREF(__pyx_t_15); __Pyx_XDECREF(__pyx_t_16); - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); @@ -3982,34 +5643,34 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":364 +/* "trunk/gensim/models/doc2vec_inner.pyx":570 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ /* Python wrapper */ -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm = {"train_sentence_dm", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat = {"train_sentence_dm_concat", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_sentence = 0; PyObject *__pyx_v_lbls = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v__work = 0; PyObject *__pyx_v__neu1 = 0; - PyObject *__pyx_v_train_words = 0; - PyObject *__pyx_v_train_lbls = 0; + PyObject *__pyx_v__learn_words = 0; + PyObject *__pyx_v__learn_lbls = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("train_sentence_dm (wrapper)", 0); + __Pyx_RefNannySetupContext("train_sentence_dm_concat (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_train_words,&__pyx_n_s_train_lbls,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_words,&__pyx_n_s_learn_lbls,0}; PyObject* values[8] = {0,0,0,0,0,0,0,0}; if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; @@ -4034,41 +5695,41 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 5: if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 6: - if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words)) != 0)) kw_args--; + if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_words)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 7: - if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls)) != 0)) kw_args--; + if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_lbls)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { goto __pyx_L5_argtuple_error; @@ -4088,47 +5749,50 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __pyx_v_alpha = values[3]; __pyx_v__work = values[4]; __pyx_v__neu1 = values[5]; - __pyx_v_train_words = values[6]; - __pyx_v_train_lbls = values[7]; + __pyx_v__learn_words = values[6]; + __pyx_v__learn_lbls = values[7]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v__neu1, __pyx_v_train_words, __pyx_v_train_lbls); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v__neu1, __pyx_v__learn_words, __pyx_v__learn_lbls); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__learn_words, PyObject *__pyx_v__learn_lbls) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v_tw; - int __pyx_v_tl; - int __pyx_v_cbow_mean; + int __pyx_v_learn_hidden; + int __pyx_v_learn_lbls; + int __pyx_v_learn_words; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; - int __pyx_v_size; + int __pyx_v_layer1_size; + int __pyx_v_vector_size; int __pyx_v_codelens[10000]; - int __pyx_v_lbl_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_lbl_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_window_indexes[10000]; int __pyx_v_sentence_len; int __pyx_v_lbl_length; int __pyx_v_window; + int __pyx_v_expected_lbl_length; int __pyx_v_i; int __pyx_v_j; int __pyx_v_k; + int __pyx_v_m; + int __pyx_v_n; long __pyx_v_result; + int __pyx_v_null_word_index; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1; __pyx_t_5numpy_uint32_t *__pyx_v_points[10000]; __pyx_t_5numpy_uint8_t *__pyx_v_codes[10000]; @@ -4136,306 +5800,406 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5numpy_uint32_t *__pyx_v_table; unsigned PY_LONG_LONG __pyx_v_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks; PyObject *__pyx_v_word = NULL; - PyObject *__pyx_v_item = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_t_3; - int __pyx_t_4; + PyObject *__pyx_t_4 = NULL; Py_ssize_t __pyx_t_5; - PyObject *__pyx_t_6 = NULL; - PyObject *__pyx_t_7 = NULL; - unsigned PY_LONG_LONG __pyx_t_8; - long __pyx_t_9; - Py_ssize_t __pyx_t_10; + long __pyx_t_6; + Py_ssize_t __pyx_t_7; + int __pyx_t_8; + PyObject *__pyx_t_9 = NULL; + unsigned PY_LONG_LONG __pyx_t_10; int __pyx_t_11; int __pyx_t_12; __pyx_t_5numpy_uint32_t __pyx_t_13; - PyObject *__pyx_t_14 = NULL; - PyObject *__pyx_t_15 = NULL; - PyObject *__pyx_t_16 = NULL; - PyObject *(*__pyx_t_17)(PyObject *); + int __pyx_t_14; + int __pyx_t_15; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("train_sentence_dm", 0); + __Pyx_RefNannySetupContext("train_sentence_dm_concat", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":365 + /* "trunk/gensim/models/doc2vec_inner.pyx":571 * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): + * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int tw = train_words + * cdef int learn_hidden = True */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 571; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 571; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":366 - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): + /* "trunk/gensim/models/doc2vec_inner.pyx":572 + * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int tw = train_words - * cdef int tl = train_lbls + * cdef int learn_hidden = True + * cdef int learn_lbls = _learn_lbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":367 + /* "trunk/gensim/models/doc2vec_inner.pyx":573 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int tw = train_words # <<<<<<<<<<<<<< - * cdef int tl = train_lbls - * cdef int cbow_mean = model.cbow_mean + * cdef int learn_hidden = True # <<<<<<<<<<<<<< + * cdef int learn_lbls = _learn_lbls + * cdef int learn_words = _learn_words */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_tw = __pyx_t_2; + __pyx_v_learn_hidden = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":368 + /* "trunk/gensim/models/doc2vec_inner.pyx":574 * cdef int negative = model.negative - * cdef int tw = train_words - * cdef int tl = train_lbls # <<<<<<<<<<<<<< - * cdef int cbow_mean = model.cbow_mean + * cdef int learn_hidden = True + * cdef int learn_lbls = _learn_lbls # <<<<<<<<<<<<<< + * cdef int learn_words = _learn_words * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_tl = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__learn_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_learn_lbls = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":369 - * cdef int tw = train_words - * cdef int tl = train_lbls - * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":575 + * cdef int learn_hidden = True + * cdef int learn_lbls = _learn_lbls + * cdef int learn_words = _learn_words # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_cbow_mean = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":371 - * cdef int cbow_mean = model.cbow_mean + /* "trunk/gensim/models/doc2vec_inner.pyx":577 + * cdef int learn_words = _learn_words * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":374 + /* "trunk/gensim/models/doc2vec_inner.pyx":580 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< - * cdef int size = model.layer1_size - * + * cdef int layer1_size = model.layer1_size + * cdef int vector_size = model.vector_size */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":375 + /* "trunk/gensim/models/doc2vec_inner.pyx":581 * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha - * cdef int size = model.layer1_size # <<<<<<<<<<<<<< + * cdef int layer1_size = model.layer1_size # <<<<<<<<<<<<<< + * cdef int vector_size = model.vector_size + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_layer1_size = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":582 + * cdef REAL_t _alpha = alpha + * cdef int layer1_size = model.layer1_size + * cdef int vector_size = model.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_size = __pyx_t_2; + __pyx_v_vector_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":384 + /* "trunk/gensim/models/doc2vec_inner.pyx":590 * cdef int sentence_len * cdef int lbl_length * cdef int window = model.window # <<<<<<<<<<<<<< + * cdef int expected_lbl_length = model.dm_lbl_count + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 590; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 590; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_window = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":591 + * cdef int lbl_length + * cdef int window = model.window + * cdef int expected_lbl_length = model.dm_lbl_count # <<<<<<<<<<<<<< * - * cdef int i, j, k + * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_lbl_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 591; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 591; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_window = __pyx_t_2; + __pyx_v_expected_lbl_length = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":387 + /* "trunk/gensim/models/doc2vec_inner.pyx":594 * - * cdef int i, j, k + * cdef int i, j, k, m, n * cdef long result = 0 # <<<<<<<<<<<<<< + * cdef int null_word_index = model.vocab['\0'].index * - * # For hierarchical softmax */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":402 + /* "trunk/gensim/models/doc2vec_inner.pyx":595 + * cdef int i, j, k, m, n + * cdef long result = 0 + * cdef int null_word_index = model.vocab['\0'].index # <<<<<<<<<<<<<< + * + * # For hierarchical softmax + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_null_word_index = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":610 * cdef unsigned long long next_random * + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< + * if lbl_length != expected_lbl_length: + * return 0 # skip doc without expected nmber of lbls + */ + __pyx_t_5 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = 10000; + if (((__pyx_t_5 < __pyx_t_6) != 0)) { + __pyx_t_7 = __pyx_t_5; + } else { + __pyx_t_7 = __pyx_t_6; + } + __pyx_v_lbl_length = ((int)__pyx_t_7); + + /* "trunk/gensim/models/doc2vec_inner.pyx":611 + * + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + * if lbl_length != expected_lbl_length: # <<<<<<<<<<<<<< + * return 0 # skip doc without expected nmber of lbls + * + */ + __pyx_t_8 = ((__pyx_v_lbl_length != __pyx_v_expected_lbl_length) != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":612 + * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + * if lbl_length != expected_lbl_length: + * return 0 # skip doc without expected nmber of lbls # <<<<<<<<<<<<<< + * + * if hs: + */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_int_0); + __pyx_r = __pyx_int_0; + goto __pyx_L0; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":614 + * return 0 # skip doc without expected nmber of lbls + * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) * */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_8 = (__pyx_v_hs != 0); + if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":403 + /* "trunk/gensim/models/doc2vec_inner.pyx":615 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L3; + goto __pyx_L4; } - __pyx_L3:; + __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":405 + /* "trunk/gensim/models/doc2vec_inner.pyx":617 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_8 = (__pyx_v_negative != 0); + if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":406 + /* "trunk/gensim/models/doc2vec_inner.pyx":618 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":407 + /* "trunk/gensim/models/doc2vec_inner.pyx":619 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":408 + /* "trunk/gensim/models/doc2vec_inner.pyx":620 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 620; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 620; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_table_len = __pyx_t_5; + __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":409 + /* "trunk/gensim/models/doc2vec_inner.pyx":621 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_9); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_next_random = __pyx_t_8; - goto __pyx_L4; + __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; + __pyx_t_10 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_10 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_v_next_random = __pyx_t_10; + goto __pyx_L5; } - __pyx_L4:; + __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":412 + /* "trunk/gensim/models/doc2vec_inner.pyx":624 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":413 + /* "trunk/gensim/models/doc2vec_inner.pyx":625 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * + * # optional locking of some vactors against backprop-learnind */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 413; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":414 - * work = np.PyArray_DATA(_work) - * neu1 = np.PyArray_DATA(_neu1) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":628 + * + * # optional locking of some vactors against backprop-learnind + * syn0locks = np.PyArray_DATA(model.syn0locks) # <<<<<<<<<<<<<< + * + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + */ + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_syn0locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":630 + * syn0locks = np.PyArray_DATA(model.syn0locks) * + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< + * j = 0 * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_5 < __pyx_t_9) != 0)) { - __pyx_t_10 = __pyx_t_5; + __pyx_t_7 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = 10000; + if (((__pyx_t_7 < __pyx_t_6) != 0)) { + __pyx_t_5 = __pyx_t_7; } else { - __pyx_t_10 = __pyx_t_9; + __pyx_t_5 = __pyx_t_6; } - __pyx_v_sentence_len = ((int)__pyx_t_10); + __pyx_v_sentence_len = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":416 - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + /* "trunk/gensim/models/doc2vec_inner.pyx":631 * + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * j = 0 # <<<<<<<<<<<<<< + * for i in range(sentence_len): + * word = sentence[i] + */ + __pyx_v_j = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":632 + * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< * word = sentence[i] * if word is None: @@ -4444,261 +6208,149 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":417 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":633 + * j = 0 * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: - * codelens[i] = 0 + * # shrink sentence to leave out word */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); - __pyx_t_6 = 0; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 633; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_4); + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_4); + __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":418 + /* "trunk/gensim/models/doc2vec_inner.pyx":634 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< - * codelens[i] = 0 - * else: + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 */ - __pyx_t_4 = (__pyx_v_word == Py_None); - __pyx_t_12 = (__pyx_t_4 != 0); + __pyx_t_8 = (__pyx_v_word == Py_None); + __pyx_t_12 = (__pyx_t_8 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":419 - * word = sentence[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":636 * if word is None: - * codelens[i] = 0 # <<<<<<<<<<<<<< + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< + * continue # leaving j unchanged * else: - * indexes[i] = word.index */ - (__pyx_v_codelens[__pyx_v_i]) = 0; - goto __pyx_L7; + __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); + + /* "trunk/gensim/models/doc2vec_inner.pyx":637 + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 + * continue # leaving j unchanged # <<<<<<<<<<<<<< + * else: + * indexes[j] = word.index + */ + goto __pyx_L6_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":421 - * codelens[i] = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":639 + * continue # leaving j unchanged * else: - * indexes[i] = word.index # <<<<<<<<<<<<<< + * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: - * codelens[i] = len(word.code) + * codelens[j] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 421; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":422 + /* "trunk/gensim/models/doc2vec_inner.pyx":640 * else: - * indexes[i] = word.index + * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) + * codelens[j] = len(word.code) + * codes[j] = np.PyArray_DATA(word.code) */ __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/doc2vec_inner.pyx":423 - * indexes[i] = word.index + /* "trunk/gensim/models/doc2vec_inner.pyx":641 + * indexes[j] = word.index * if hs: - * codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) + * codelens[j] = len(word.code) # <<<<<<<<<<<<<< + * codes[j] = np.PyArray_DATA(word.code) + * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":424 + /* "trunk/gensim/models/doc2vec_inner.pyx":642 * if hs: - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[i] = np.PyArray_DATA(word.point) + * codelens[j] = len(word.code) + * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[j] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 424; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 424; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 642; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 642; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":425 - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":643 + * codelens[j] = len(word.code) + * codes[j] = np.PyArray_DATA(word.code) + * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: - * codelens[i] = 1 + * codelens[j] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L8; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 643; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 643; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + goto __pyx_L9; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":427 - * points[i] = np.PyArray_DATA(word.point) - * else: - * codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 - * # single randint() call avoids a big thread-sync slowdown - */ - (__pyx_v_codelens[__pyx_v_i]) = 1; - } - __pyx_L8:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":428 + /* "trunk/gensim/models/doc2vec_inner.pyx":645 + * points[j] = np.PyArray_DATA(word.point) * else: - * codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): - */ - __pyx_v_result = (__pyx_v_result + 1); - } - __pyx_L7:; - } - - /* "trunk/gensim/models/doc2vec_inner.pyx":430 + * codelens[j] = 1 # <<<<<<<<<<<<<< * result += 1 - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< - * reduced_windows[i] = item - * + * j = j + 1 */ - __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = NULL; - __pyx_t_10 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_7); - if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); - __Pyx_INCREF(__pyx_t_15); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_7, function); - __pyx_t_10 = 1; - } - } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_15) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; - } - __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_10, __pyx_int_0); - __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_10, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_10, __pyx_t_14); - __Pyx_GIVEREF(__pyx_t_14); - __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { - __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; - __pyx_t_17 = NULL; - } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - for (;;) { - if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_7))) { - if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif - } else { - if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif - } - } else { - __pyx_t_6 = __pyx_t_17(__pyx_t_7); - if (unlikely(!__pyx_t_6)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 430; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - break; + (__pyx_v_codelens[__pyx_v_j]) = 1; } - __Pyx_GOTREF(__pyx_t_6); - } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_6); - __pyx_t_6 = 0; - __pyx_v_i = __pyx_t_2; - __pyx_t_2 = (__pyx_t_2 + 1); - - /* "trunk/gensim/models/doc2vec_inner.pyx":431 - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): - * reduced_windows[i] = item # <<<<<<<<<<<<<< - * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 431; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; + __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":430 - * result += 1 - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< - * reduced_windows[i] = item + /* "trunk/gensim/models/doc2vec_inner.pyx":646 + * else: + * codelens[j] = 1 + * result += 1 # <<<<<<<<<<<<<< + * j = j + 1 * */ - } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":433 - * reduced_windows[i] = item + /* "trunk/gensim/models/doc2vec_inner.pyx":647 + * codelens[j] = 1 + * result += 1 + * j = j + 1 # <<<<<<<<<<<<<< * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< * for i in range(lbl_length): - * word = lbls[i] */ - __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 433; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_10 < __pyx_t_9) != 0)) { - __pyx_t_5 = __pyx_t_10; - } else { - __pyx_t_5 = __pyx_t_9; + __pyx_v_j = (__pyx_v_j + 1); + } + __pyx_L6_continue:; } - __pyx_v_lbl_length = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":434 + /* "trunk/gensim/models/doc2vec_inner.pyx":649 + * j = j + 1 * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) * for i in range(lbl_length): # <<<<<<<<<<<<<< * word = lbls[i] * if word is None: @@ -4707,109 +6359,73 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":435 - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + /* "trunk/gensim/models/doc2vec_inner.pyx":650 + * * for i in range(lbl_length): * word = lbls[i] # <<<<<<<<<<<<<< * if word is None: - * lbl_codelens[i] = 0 + * # no support for missing lbls where expected; skip sentence */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 435; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); - __pyx_t_7 = 0; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 650; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_4); + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_4); + __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":436 + /* "trunk/gensim/models/doc2vec_inner.pyx":651 * for i in range(lbl_length): * word = lbls[i] * if word is None: # <<<<<<<<<<<<<< - * lbl_codelens[i] = 0 - * else: + * # no support for missing lbls where expected; skip sentence + * return 0 */ __pyx_t_12 = (__pyx_v_word == Py_None); - __pyx_t_4 = (__pyx_t_12 != 0); - if (__pyx_t_4) { + __pyx_t_8 = (__pyx_t_12 != 0); + if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":437 - * word = lbls[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":653 * if word is None: - * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< + * # no support for missing lbls where expected; skip sentence + * return 0 # <<<<<<<<<<<<<< * else: - * lbl_indexes[i] = word.index + * window_indexes[i] = word.index */ - (__pyx_v_lbl_codelens[__pyx_v_i]) = 0; - goto __pyx_L13; + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(__pyx_int_0); + __pyx_r = __pyx_int_0; + goto __pyx_L0; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":439 - * lbl_codelens[i] = 0 - * else: - * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< - * if hs: - * lbl_codelens[i] = len(word.code) - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 439; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; - - /* "trunk/gensim/models/doc2vec_inner.pyx":440 + /* "trunk/gensim/models/doc2vec_inner.pyx":655 + * return 0 * else: - * lbl_indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * lbl_codelens[i] = len(word.code) - * else: - */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":441 - * lbl_indexes[i] = word.index - * if hs: - * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * else: - * lbl_codelens[i] = 1 - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - goto __pyx_L14; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":443 - * lbl_codelens[i] = len(word.code) - * else: - * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< + * window_indexes[i] = word.index # <<<<<<<<<<<<<< * result += 1 * */ - (__pyx_v_lbl_codelens[__pyx_v_i]) = 1; - } - __pyx_L14:; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 655; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 655; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + (__pyx_v_window_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":444 - * else: - * lbl_codelens[i] = 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":656 + * else: + * window_indexes[i] = word.index * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ __pyx_v_result = (__pyx_v_result + 1); } - __pyx_L13:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":447 + /* "trunk/gensim/models/doc2vec_inner.pyx":659 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window # negative OK: will pad with null word */ { #ifdef WITH_THREAD @@ -4818,182 +6434,239 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":448 + /* "trunk/gensim/models/doc2vec_inner.pyx":660 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< - * if codelens[i] == 0: - * continue + * j = i - window # negative OK: will pad with null word + * k = i + window + 1 # past sentence end OK: will pad with null word */ __pyx_t_2 = __pyx_v_sentence_len; for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/doc2vec_inner.pyx":449 + /* "trunk/gensim/models/doc2vec_inner.pyx":661 * with nogil: * for i in range(sentence_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< - * continue - * j = i - window + reduced_windows[i] + * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< + * k = i + window + 1 # past sentence end OK: will pad with null word + * */ - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); - if (__pyx_t_4) { + __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "trunk/gensim/models/doc2vec_inner.pyx":450 + /* "trunk/gensim/models/doc2vec_inner.pyx":662 * for i in range(sentence_len): - * if codelens[i] == 0: - * continue # <<<<<<<<<<<<<< - * j = i - window + reduced_windows[i] - * if j < 0: + * j = i - window # negative OK: will pad with null word + * k = i + window + 1 # past sentence end OK: will pad with null word # <<<<<<<<<<<<<< + * + * # compose l1 & clear work */ - goto __pyx_L18_continue; - } + __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":451 - * if codelens[i] == 0: - * continue - * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< - * if j < 0: - * j = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":665 + * + * # compose l1 & clear work + * n = lbl_length # <<<<<<<<<<<<<< + * for m in range(j, k): + * if m == i: */ - __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); + __pyx_v_n = __pyx_v_lbl_length; - /* "trunk/gensim/models/doc2vec_inner.pyx":452 - * continue - * j = i - window + reduced_windows[i] - * if j < 0: # <<<<<<<<<<<<<< - * j = 0 - * k = i + window + 1 - reduced_windows[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":666 + * # compose l1 & clear work + * n = lbl_length + * for m in range(j, k): # <<<<<<<<<<<<<< + * if m == i: + * continue */ - __pyx_t_4 = ((__pyx_v_j < 0) != 0); - if (__pyx_t_4) { + __pyx_t_14 = __pyx_v_k; + for (__pyx_t_15 = __pyx_v_j; __pyx_t_15 < __pyx_t_14; __pyx_t_15+=1) { + __pyx_v_m = __pyx_t_15; - /* "trunk/gensim/models/doc2vec_inner.pyx":453 - * j = i - window + reduced_windows[i] - * if j < 0: - * j = 0 # <<<<<<<<<<<<<< - * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: + /* "trunk/gensim/models/doc2vec_inner.pyx":667 + * n = lbl_length + * for m in range(j, k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * if m < 0 or m >= sentence_len: */ - __pyx_v_j = 0; - goto __pyx_L21; + __pyx_t_8 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":668 + * for m in range(j, k): + * if m == i: + * continue # <<<<<<<<<<<<<< + * if m < 0 or m >= sentence_len: + * window_indexes[n] = null_word_index + */ + goto __pyx_L18_continue; + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":669 + * if m == i: + * continue + * if m < 0 or m >= sentence_len: # <<<<<<<<<<<<<< + * window_indexes[n] = null_word_index + * else: + */ + __pyx_t_12 = ((__pyx_v_m < 0) != 0); + if (!__pyx_t_12) { + } else { + __pyx_t_8 = __pyx_t_12; + goto __pyx_L22_bool_binop_done; + } + __pyx_t_12 = ((__pyx_v_m >= __pyx_v_sentence_len) != 0); + __pyx_t_8 = __pyx_t_12; + __pyx_L22_bool_binop_done:; + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":670 + * continue + * if m < 0 or m >= sentence_len: + * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< + * else: + * window_indexes[n] = indexes[m] + */ + (__pyx_v_window_indexes[__pyx_v_n]) = __pyx_v_null_word_index; + goto __pyx_L21; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":672 + * window_indexes[n] = null_word_index + * else: + * window_indexes[n] = indexes[m] # <<<<<<<<<<<<<< + * n = n + 1 + * for m in range(lbl_length + (2 * window)): + */ + (__pyx_v_window_indexes[__pyx_v_n]) = (__pyx_v_indexes[__pyx_v_m]); + } + __pyx_L21:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":673 + * else: + * window_indexes[n] = indexes[m] + * n = n + 1 # <<<<<<<<<<<<<< + * for m in range(lbl_length + (2 * window)): + * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) + */ + __pyx_v_n = (__pyx_v_n + 1); + __pyx_L18_continue:; } - __pyx_L21:; - /* "trunk/gensim/models/doc2vec_inner.pyx":454 - * if j < 0: - * j = 0 - * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< - * if k > sentence_len: - * k = sentence_len + /* "trunk/gensim/models/doc2vec_inner.pyx":674 + * window_indexes[n] = indexes[m] + * n = n + 1 + * for m in range(lbl_length + (2 * window)): # <<<<<<<<<<<<<< + * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) */ - __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); + __pyx_t_6 = (__pyx_v_lbl_length + (2 * __pyx_v_window)); + for (__pyx_t_14 = 0; __pyx_t_14 < __pyx_t_6; __pyx_t_14+=1) { + __pyx_v_m = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":455 - * j = 0 - * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len - * if hs: + /* "trunk/gensim/models/doc2vec_inner.pyx":675 + * n = n + 1 + * for m in range(lbl_length + (2 * window)): + * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) + * */ - __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); - if (__pyx_t_4) { + memcpy((&(__pyx_v_neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + } - /* "trunk/gensim/models/doc2vec_inner.pyx":456 - * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: - * k = sentence_len # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":676 + * for m in range(lbl_length + (2 * window)): + * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * * if hs: - * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, */ - __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L22; - } - __pyx_L22:; + memset(__pyx_v_work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":457 - * if k > sentence_len: - * k = sentence_len + /* "trunk/gensim/models/doc2vec_inner.pyx":678 + * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) + * * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, - * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) + * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], neu1, syn0, syn1, + * layer1_size, vector_size, window_indexes, _alpha, */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_8 = (__pyx_v_hs != 0); + if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":458 - * k = sentence_len + /* "trunk/gensim/models/doc2vec_inner.pyx":679 + * * if hs: - * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, # <<<<<<<<<<<<<< - * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) - * if negative: + * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], neu1, syn0, syn1, # <<<<<<<<<<<<<< + * layer1_size, vector_size, window_indexes, _alpha, + * work, lbl_length, window, */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_lbl_length, __pyx_v_tw, __pyx_v_tl); - goto __pyx_L23; + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v_window_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_lbl_length, __pyx_v_window, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); + goto __pyx_L26; } - __pyx_L23:; + __pyx_L26:; - /* "trunk/gensim/models/doc2vec_inner.pyx":460 - * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, - * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) + /* "trunk/gensim/models/doc2vec_inner.pyx":683 + * work, lbl_length, window, + * learn_hidden, learn_lbls, learn_words, syn0locks) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, - * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, + * next_random = fast_sentence_dmc_neg(negative, table, table_len, neu1, syn0, syn1neg, + * layer1_size, vector_size, window_indexes, _alpha, */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_8 = (__pyx_v_negative != 0); + if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":461 - * lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) + /* "trunk/gensim/models/doc2vec_inner.pyx":684 + * learn_hidden, learn_lbls, learn_words, syn0locks) * if negative: - * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, # <<<<<<<<<<<<<< - * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, - * cbow_mean, next_random, lbl_length, tw, tl) + * next_random = fast_sentence_dmc_neg(negative, table, table_len, neu1, syn0, syn1neg, # <<<<<<<<<<<<<< + * layer1_size, vector_size, window_indexes, _alpha, + * work, indexes[i], lbl_length, window, */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_lbl_length, __pyx_v_tw, __pyx_v_tl); - goto __pyx_L24; + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v_window_indexes, __pyx_v__alpha, __pyx_v_work, (__pyx_v_indexes[__pyx_v_i]), __pyx_v_lbl_length, __pyx_v_window, __pyx_v_next_random, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); + goto __pyx_L27; } - __pyx_L24:; - __pyx_L18_continue:; + __pyx_L27:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":447 + /* "trunk/gensim/models/doc2vec_inner.pyx":659 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window # negative OK: will pad with null word */ /*finally:*/ { /*normal exit:*/{ #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L17; + goto __pyx_L15; } - __pyx_L17:; + __pyx_L15:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":465 - * cbow_mean, next_random, lbl_length, tw, tl) + /* "trunk/gensim/models/doc2vec_inner.pyx":689 + * next_random, learn_hidden, learn_lbls, learn_words, syn0locks) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; + __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 689; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_r = __pyx_t_4; + __pyx_t_4 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":570 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ @@ -5001,22 +6674,18 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_14); - __Pyx_XDECREF(__pyx_t_15); - __Pyx_XDECREF(__pyx_t_16); - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_XDECREF(__pyx_t_4); + __Pyx_XDECREF(__pyx_t_9); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); - __Pyx_XDECREF(__pyx_v_item); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":468 +/* "trunk/gensim/models/doc2vec_inner.pyx":692 * * * def init(): # <<<<<<<<<<<<<< @@ -5025,21 +6694,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ /* Python wrapper */ -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ -static char __pyx_doc_5trunk_6gensim_6models_13doc2vec_inner_4init[] = "\n Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized\n into table EXP_TABLE.\n\n "; -static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5init = {"init", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5init, METH_NOARGS, __pyx_doc_5trunk_6gensim_6models_13doc2vec_inner_4init}; -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_7init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused); /*proto*/ +static char __pyx_doc_5trunk_6gensim_6models_13doc2vec_inner_6init[] = "\n Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized\n into table EXP_TABLE.\n\n "; +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init = {"init", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_7init, METH_NOARGS, __pyx_doc_5trunk_6gensim_6models_13doc2vec_inner_6init}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_7init(PyObject *__pyx_self, CYTHON_UNUSED PyObject *unused) { PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("init (wrapper)", 0); - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(__pyx_self); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(__pyx_self); /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UNUSED PyObject *__pyx_self) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UNUSED PyObject *__pyx_self) { int __pyx_v_i; float *__pyx_v_x; float *__pyx_v_y; @@ -5055,7 +6724,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":478 + /* "trunk/gensim/models/doc2vec_inner.pyx":702 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -5065,7 +6734,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":479 + /* "trunk/gensim/models/doc2vec_inner.pyx":703 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -5075,7 +6744,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":480 + /* "trunk/gensim/models/doc2vec_inner.pyx":704 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -5084,7 +6753,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/doc2vec_inner.pyx":481 + /* "trunk/gensim/models/doc2vec_inner.pyx":705 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -5093,7 +6762,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_size = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":486 + /* "trunk/gensim/models/doc2vec_inner.pyx":710 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -5103,7 +6772,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":487 + /* "trunk/gensim/models/doc2vec_inner.pyx":711 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -5112,7 +6781,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/doc2vec_inner.pyx":488 + /* "trunk/gensim/models/doc2vec_inner.pyx":712 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -5122,7 +6791,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":491 + /* "trunk/gensim/models/doc2vec_inner.pyx":715 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -5131,7 +6800,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":492 + /* "trunk/gensim/models/doc2vec_inner.pyx":716 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -5140,7 +6809,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/doc2vec_inner.pyx":493 + /* "trunk/gensim/models/doc2vec_inner.pyx":717 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -5150,7 +6819,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":494 + /* "trunk/gensim/models/doc2vec_inner.pyx":718 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -5159,7 +6828,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double; - /* "trunk/gensim/models/doc2vec_inner.pyx":495 + /* "trunk/gensim/models/doc2vec_inner.pyx":719 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -5168,7 +6837,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":496 + /* "trunk/gensim/models/doc2vec_inner.pyx":720 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -5181,7 +6850,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":497 + /* "trunk/gensim/models/doc2vec_inner.pyx":721 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -5191,7 +6860,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":498 + /* "trunk/gensim/models/doc2vec_inner.pyx":722 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -5200,7 +6869,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float; - /* "trunk/gensim/models/doc2vec_inner.pyx":499 + /* "trunk/gensim/models/doc2vec_inner.pyx":723 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -5209,7 +6878,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":500 + /* "trunk/gensim/models/doc2vec_inner.pyx":724 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -5223,7 +6892,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":504 + /* "trunk/gensim/models/doc2vec_inner.pyx":728 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -5232,7 +6901,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":505 + /* "trunk/gensim/models/doc2vec_inner.pyx":729 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -5241,7 +6910,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":506 + /* "trunk/gensim/models/doc2vec_inner.pyx":730 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -5254,7 +6923,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":468 + /* "trunk/gensim/models/doc2vec_inner.pyx":692 * * * def init(): # <<<<<<<<<<<<<< @@ -5425,7 +7094,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -5465,7 +7134,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * * info.buf = PyArray_DATA(self) */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -5742,7 +7411,7 @@ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, P * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -6554,7 +8223,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -6606,7 +8275,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__12, NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_3); __Pyx_Raise(__pyx_t_3, 0, 0, 0); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -6707,7 +8376,7 @@ static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__13, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_Raise(__pyx_t_4, 0, 0, 0); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; @@ -7302,16 +8971,20 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1}, {&__pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_k_Users_scratch_Documents_dev2015, sizeof(__pyx_k_Users_scratch_Documents_dev2015), 0, 0, 1, 0}, {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1}, + {&__pyx_kp_s__5, __pyx_k__5, sizeof(__pyx_k__5), 0, 0, 1, 0}, {&__pyx_n_s_alpha, __pyx_k_alpha, sizeof(__pyx_k_alpha), 0, 0, 1, 1}, {&__pyx_n_s_alpha_2, __pyx_k_alpha_2, sizeof(__pyx_k_alpha_2), 0, 0, 1, 1}, {&__pyx_n_s_cbow_mean, __pyx_k_cbow_mean, sizeof(__pyx_k_cbow_mean), 0, 0, 1, 1}, {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, {&__pyx_n_s_codes, __pyx_k_codes, sizeof(__pyx_k_codes), 0, 0, 1, 1}, + {&__pyx_n_s_count, __pyx_k_count, sizeof(__pyx_k_count), 0, 0, 1, 1}, {&__pyx_n_s_cpointer, __pyx_k_cpointer, sizeof(__pyx_k_cpointer), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, + {&__pyx_n_s_dm_lbl_count, __pyx_k_dm_lbl_count, sizeof(__pyx_k_dm_lbl_count), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, + {&__pyx_n_s_expected_lbl_length, __pyx_k_expected_lbl_length, sizeof(__pyx_k_expected_lbl_length), 0, 0, 1, 1}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, @@ -7320,6 +8993,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_index, __pyx_k_index, sizeof(__pyx_k_index), 0, 0, 1, 1}, {&__pyx_n_s_indexes, __pyx_k_indexes, sizeof(__pyx_k_indexes), 0, 0, 1, 1}, {&__pyx_n_s_init, __pyx_k_init, sizeof(__pyx_k_init), 0, 0, 1, 1}, + {&__pyx_n_s_inv_count, __pyx_k_inv_count, sizeof(__pyx_k_inv_count), 0, 0, 1, 1}, {&__pyx_n_s_item, __pyx_k_item, sizeof(__pyx_k_item), 0, 0, 1, 1}, {&__pyx_n_s_j, __pyx_k_j, sizeof(__pyx_k_j), 0, 0, 1, 1}, {&__pyx_n_s_k, __pyx_k_k, sizeof(__pyx_k_k), 0, 0, 1, 1}, @@ -7330,8 +9004,15 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_lbl_length, __pyx_k_lbl_length, sizeof(__pyx_k_lbl_length), 0, 0, 1, 1}, {&__pyx_n_s_lbl_points, __pyx_k_lbl_points, sizeof(__pyx_k_lbl_points), 0, 0, 1, 1}, {&__pyx_n_s_lbls, __pyx_k_lbls, sizeof(__pyx_k_lbls), 0, 0, 1, 1}, + {&__pyx_n_s_learn_hidden, __pyx_k_learn_hidden, sizeof(__pyx_k_learn_hidden), 0, 0, 1, 1}, + {&__pyx_n_s_learn_lbls, __pyx_k_learn_lbls, sizeof(__pyx_k_learn_lbls), 0, 0, 1, 1}, + {&__pyx_n_s_learn_lbls_2, __pyx_k_learn_lbls_2, sizeof(__pyx_k_learn_lbls_2), 0, 0, 1, 1}, + {&__pyx_n_s_learn_words, __pyx_k_learn_words, sizeof(__pyx_k_learn_words), 0, 0, 1, 1}, + {&__pyx_n_s_learn_words_2, __pyx_k_learn_words_2, sizeof(__pyx_k_learn_words_2), 0, 0, 1, 1}, + {&__pyx_n_s_m, __pyx_k_m, sizeof(__pyx_k_m), 0, 0, 1, 1}, {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, {&__pyx_n_s_model, __pyx_k_model, sizeof(__pyx_k_model), 0, 0, 1, 1}, + {&__pyx_n_s_n, __pyx_k_n, sizeof(__pyx_k_n), 0, 0, 1, 1}, {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0}, {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0}, {&__pyx_n_s_negative, __pyx_k_negative, sizeof(__pyx_k_negative), 0, 0, 1, 1}, @@ -7339,6 +9020,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_neu1_2, __pyx_k_neu1_2, sizeof(__pyx_k_neu1_2), 0, 0, 1, 1}, {&__pyx_n_s_next_random, __pyx_k_next_random, sizeof(__pyx_k_next_random), 0, 0, 1, 1}, {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1}, + {&__pyx_n_s_null_word_index, __pyx_k_null_word_index, sizeof(__pyx_k_null_word_index), 0, 0, 1, 1}, {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1}, {&__pyx_n_s_p_res, __pyx_k_p_res, sizeof(__pyx_k_p_res), 0, 0, 1, 1}, {&__pyx_n_s_point, __pyx_k_point, sizeof(__pyx_k_point), 0, 0, 1, 1}, @@ -7358,6 +9040,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_snrm2, __pyx_k_snrm2, sizeof(__pyx_k_snrm2), 0, 0, 1, 1}, {&__pyx_n_s_sscal, __pyx_k_sscal, sizeof(__pyx_k_sscal), 0, 0, 1, 1}, {&__pyx_n_s_syn0, __pyx_k_syn0, sizeof(__pyx_k_syn0), 0, 0, 1, 1}, + {&__pyx_n_s_syn0locks, __pyx_k_syn0locks, sizeof(__pyx_k_syn0locks), 0, 0, 1, 1}, {&__pyx_n_s_syn1, __pyx_k_syn1, sizeof(__pyx_k_syn1), 0, 0, 1, 1}, {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, {&__pyx_n_s_table, __pyx_k_table, sizeof(__pyx_k_table), 0, 0, 1, 1}, @@ -7365,13 +9048,19 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, {&__pyx_n_s_tl, __pyx_k_tl, sizeof(__pyx_k_tl), 0, 0, 1, 1}, {&__pyx_n_s_train_lbls, __pyx_k_train_lbls, sizeof(__pyx_k_train_lbls), 0, 0, 1, 1}, + {&__pyx_n_s_train_lbls_2, __pyx_k_train_lbls_2, sizeof(__pyx_k_train_lbls_2), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dbow, __pyx_k_train_sentence_dbow, sizeof(__pyx_k_train_sentence_dbow), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dm, __pyx_k_train_sentence_dm, sizeof(__pyx_k_train_sentence_dm), 0, 0, 1, 1}, + {&__pyx_n_s_train_sentence_dm_concat, __pyx_k_train_sentence_dm_concat, sizeof(__pyx_k_train_sentence_dm_concat), 0, 0, 1, 1}, {&__pyx_n_s_train_words, __pyx_k_train_words, sizeof(__pyx_k_train_words), 0, 0, 1, 1}, + {&__pyx_n_s_train_words_2, __pyx_k_train_words_2, sizeof(__pyx_k_train_words_2), 0, 0, 1, 1}, {&__pyx_n_s_trunk_gensim_models_doc2vec_inne, __pyx_k_trunk_gensim_models_doc2vec_inne, sizeof(__pyx_k_trunk_gensim_models_doc2vec_inne), 0, 0, 1, 1}, {&__pyx_n_s_tw, __pyx_k_tw, sizeof(__pyx_k_tw), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, + {&__pyx_n_s_vector_size, __pyx_k_vector_size, sizeof(__pyx_k_vector_size), 0, 0, 1, 1}, + {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, + {&__pyx_n_s_window_indexes, __pyx_k_window_indexes, sizeof(__pyx_k_window_indexes), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, {&__pyx_n_s_work, __pyx_k_work, sizeof(__pyx_k_work), 0, 0, 1, 1}, {&__pyx_n_s_work_2, __pyx_k_work_2, sizeof(__pyx_k_work_2), 0, 0, 1, 1}, @@ -7380,8 +9069,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -7393,34 +9082,48 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":313 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/doc2vec_inner.pyx":409 + /* "trunk/gensim/models/doc2vec_inner.pyx":488 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); + /* "trunk/gensim/models/doc2vec_inner.pyx":621 + * table = (np.PyArray_DATA(model.table)) + * table_len = len(model.table) + * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< + * + * # convert Python structures to primitive types, so we can release the GIL + */ + __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__6); + __Pyx_GIVEREF(__pyx_tuple__6); + __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__7); + __Pyx_GIVEREF(__pyx_tuple__7); + /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":218 * if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS) * and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)): @@ -7428,9 +9131,9 @@ static int __Pyx_InitCachedConstants(void) { * * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) */ - __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__5)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__5); - __Pyx_GIVEREF(__pyx_tuple__5); + __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__8); + __Pyx_GIVEREF(__pyx_tuple__8); /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":222 * if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS) @@ -7439,9 +9142,9 @@ static int __Pyx_InitCachedConstants(void) { * * info.buf = PyArray_DATA(self) */ - __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__6); - __Pyx_GIVEREF(__pyx_tuple__6); + __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__9)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 222; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__9); + __Pyx_GIVEREF(__pyx_tuple__9); /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":260 * if ((descr.byteorder == c'>' and little_endian) or @@ -7450,9 +9153,9 @@ static int __Pyx_InitCachedConstants(void) { * if t == NPY_BYTE: f = "b" * elif t == NPY_UBYTE: f = "B" */ - __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__7); - __Pyx_GIVEREF(__pyx_tuple__7); + __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__10)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 260; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__10); + __Pyx_GIVEREF(__pyx_tuple__10); /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":802 * @@ -7461,9 +9164,9 @@ static int __Pyx_InitCachedConstants(void) { * * if ((child.byteorder == c'>' and little_endian) or */ - __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__8)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__8); - __Pyx_GIVEREF(__pyx_tuple__8); + __pyx_tuple__11 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__11); + __Pyx_GIVEREF(__pyx_tuple__11); /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":806 * if ((child.byteorder == c'>' and little_endian) or @@ -7472,9 +9175,9 @@ static int __Pyx_InitCachedConstants(void) { * # One could encode it in the format string and have Cython * # complain instead, BUT: < and > in format strings also imply */ - __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__9)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__9); - __Pyx_GIVEREF(__pyx_tuple__9); + __pyx_tuple__12 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__12)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 806; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__12); + __Pyx_GIVEREF(__pyx_tuple__12); /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":826 * t = child.type_num @@ -7483,45 +9186,57 @@ static int __Pyx_InitCachedConstants(void) { * * # Until ticket #99 is fixed, use integers to avoid warnings */ - __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__10)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__10); - __Pyx_GIVEREF(__pyx_tuple__10); + __pyx_tuple__13 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 826; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__13); + __Pyx_GIVEREF(__pyx_tuple__13); - /* "trunk/gensim/models/doc2vec_inner.pyx":272 + /* "trunk/gensim/models/doc2vec_inner.pyx":320 * * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__11 = PyTuple_Pack(35, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__11); - __Pyx_GIVEREF(__pyx_tuple__11); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(7, 0, 35, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 272, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(37, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_syn0locks, __pyx_n_s_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__14); + __Pyx_GIVEREF(__pyx_tuple__14); + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(7, 0, 37, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 320, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":441 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< + * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(41, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__13); - __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(8, 0, 41, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 364, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_train_words_2, __pyx_n_s_train_lbls_2, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_words_2, __pyx_n_s_learn_lbls_2, __pyx_n_s_learn_hidden, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_syn0locks, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__16); + __Pyx_GIVEREF(__pyx_tuple__16); + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(8, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 441, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":468 + /* "trunk/gensim/models/doc2vec_inner.pyx":570 + * + * + * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + __pyx_tuple__18 = PyTuple_Pack(45, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_words, __pyx_n_s_learn_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_hidden, __pyx_n_s_learn_lbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_expected_lbl_length, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_syn0locks, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__18); + __Pyx_GIVEREF(__pyx_tuple__18); + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(8, 0, 45, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 570, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + + /* "trunk/gensim/models/doc2vec_inner.pyx":692 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_tuple__15); - __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 468, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_tuple__20); + __Pyx_GIVEREF(__pyx_tuple__20); + __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 692, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -7813,48 +9528,60 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":272 + /* "trunk/gensim/models/doc2vec_inner.pyx":320 * * * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":441 + * + * + * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":570 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":468 + /* "trunk/gensim/models/doc2vec_inner.pyx":692 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":508 + /* "trunk/gensim/models/doc2vec_inner.pyx":732 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -7867,14 +9594,14 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 508; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":1 diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 156fa1c2db..88caea2259 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -12,7 +12,7 @@ import numpy as np cimport numpy as np from libc.math cimport exp -from libc.string cimport memset +from libc.string cimport memset, memcpy cdef extern from "voidptr.h": void* PyCObject_AsVoidPtr(object obj) @@ -81,7 +81,8 @@ cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, con cdef void fast_sentence_dbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int tw, int tl) nogil: + const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int train_hidden, int train_inputs, + REAL_t *syn0locks) nogil: cdef long long a, b cdef long long row1 = word2_index * size, row2 @@ -96,17 +97,17 @@ cdef void fast_sentence_dbow_hs( f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - if tw: + if train_hidden: our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - if tl: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + if train_inputs: + our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) cdef unsigned long long fast_sentence_dbow_neg( const int negative, np.uint32_t *table, unsigned long long table_len, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random, int tw, int tl) nogil: + unsigned long long next_random, int train_hidden, int train_inputs, REAL_t *syn0locks) nogil: cdef long long a cdef long long row1 = word2_index * size, row2 @@ -135,10 +136,10 @@ cdef unsigned long long fast_sentence_dbow_neg( f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - if tw: + if train_hidden: our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - if tl: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + if train_inputs: + our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) return next_random @@ -146,34 +147,17 @@ cdef unsigned long long fast_sentence_dbow_neg( cdef void fast_sentence_dm_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], - const REAL_t alpha, REAL_t *work, int i, int j, int k, int cbow_mean, int lbl_length, int tw, int tl) nogil: + const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], + const REAL_t alpha, REAL_t *work, int i, int j, int k, int lbl_length, int learn_hidden, + int learn_lbls, int learn_words, REAL_t *syn0locks) nogil: cdef long long a, b cdef long long row2 cdef REAL_t f, g, count, inv_count cdef int m - memset(neu1, 0, size * cython.sizeof(REAL_t)) - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - count += ONEF - our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - - if cbow_mean and count > (0.5): - inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) - - memset(work, 0, size * cython.sizeof(REAL_t)) + # l1 already composed by caller, passed in as neu1 + memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error for b in range(codelens[i]): row2 = word_point[b] * size f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) @@ -182,27 +166,28 @@ cdef void fast_sentence_dm_hs( f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - if tw: + if learn_hidden: our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - if tw: + if learn_words: for m in range(j, k): if m == i or codelens[m] == 0: continue else: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - if tl: + our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) + if learn_lbls: for m in range(lbl_length): if lbl_codelens[m] == 0: continue else: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) cdef unsigned long long fast_sentence_dm_neg( const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random, int lbl_length, int tw, int tl) nogil: + int i, int j, int k, unsigned long long next_random, int lbl_length, int learn_hidden, int learn_lbls, + int learn_words, REAL_t *syn0locks) nogil: cdef long long a cdef long long row2 @@ -211,28 +196,9 @@ cdef unsigned long long fast_sentence_dm_neg( cdef np.uint32_t target_index, word_index cdef int d, m + # l1 already composed by caller, passed in as neu1 + memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error word_index = indexes[i] - - memset(neu1, 0, size * cython.sizeof(REAL_t)) - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - count += ONEF - our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) - if cbow_mean and count > (0.5): - inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) - - memset(work, 0, size * cython.sizeof(REAL_t)) - for d in range(negative+1): if d == 0: target_index = word_index @@ -251,20 +217,102 @@ cdef unsigned long long fast_sentence_dm_neg( f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - if tw: + if learn_hidden: our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - if tw: + if learn_words: for m in range(j,k): if m == i or codelens[m] == 0: continue else: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - if tl: + our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) + if learn_lbls: for m in range(lbl_length): if lbl_codelens[m] == 0: continue else: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) + + return next_random + +cdef void fast_sentence_dmc_hs( + const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, + REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int layer1_size, const int vector_size, + const np.uint32_t window_indexes[MAX_SENTENCE_LEN], + const REAL_t alpha, REAL_t *work, const int lbl_length, const int window, + int learn_hidden, int learn_lbls, int learn_words, REAL_t *syn0locks) nogil: + + cdef long long a, b + cdef long long row2 + cdef REAL_t f, g + cdef int m + + # l1 already composed by caller, passed in as neu1 + memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + for b in range(word_code_len): + row2 = word_point[b] * layer1_size + f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) + if f <= -MAX_EXP or f >= MAX_EXP: + continue + f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + g = (1 - word_code[b] - f) * alpha + our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) + if learn_hidden: + our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) + if learn_lbls: + for m in range(lbl_length): + our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + &syn0[window_indexes[m] * vector_size], &ONE) + if learn_words: + for m in range(lbl_length, lbl_length + (2 * window)): + our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, + &syn0[window_indexes[m] * vector_size], &ONE) + + +cdef unsigned long long fast_sentence_dmc_neg( + const int negative, np.uint32_t *table, unsigned long long table_len, + REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int layer1_size, const int vector_size, + np.uint32_t window_indexes[MAX_SENTENCE_LEN], + const REAL_t alpha, REAL_t *work, const int predict_word_index, + const int lbl_length, const int window, unsigned long long next_random, + int learn_hidden, int learn_lbls, int learn_words, REAL_t *syn0locks) nogil: + + cdef long long a + cdef long long row2 + cdef unsigned long long modulo = 281474976710655ULL + cdef REAL_t f, g, label + cdef np.uint32_t target_index + cdef int d, m + + # l1 already composed by caller, passed in as neu1 + memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + for d in range(negative+1): + if d == 0: + target_index = predict_word_index + label = ONEF + else: + target_index = table[(next_random >> 16) % table_len] + next_random = (next_random * 25214903917ULL + 11) & modulo + if target_index == predict_word_index: + continue + label = 0.0 + + row2 = target_index * layer1_size + f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) + if f <= -MAX_EXP or f >= MAX_EXP: + continue + f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + g = (label - f) * alpha + our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) + if learn_hidden: + our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + if learn_lbls: + for m in range(lbl_length): + our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, + &syn0[window_indexes[m] * vector_size], &ONE) + if learn_words: + for m in range(lbl_length, lbl_length + (2 * window)): + our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, + &syn0[window_indexes[m] * vector_size], &ONE) return next_random @@ -303,6 +351,9 @@ def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_ cdef unsigned long long table_len cdef unsigned long long next_random + # lock some of syn0 against training + cdef REAL_t *syn0locks + if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -317,6 +368,8 @@ def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_ sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + syn0locks = np.PyArray_DATA(model.syn0locks) + for i in range(sentence_len): word = sentence[i] if word is None: @@ -347,26 +400,52 @@ def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_ # release GIL & train on the sentence with nogil: - for j in range(lbl_length): - if lbl_codelens[j] == 0: + for i in range(sentence_len): + if codelens[i] == 0: continue - for i in range(sentence_len): - if codelens[i] == 0: - continue - if hs: - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], _alpha, work, tw, tl) - if negative: - next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], lbl_indexes[j], _alpha, work, next_random, tw, tl) + if tw: # simultaneous skip-gram wordvec-training + j = i - window + reduced_windows[i] + if j < 0: + j = 0 + k = i + window + 1 - reduced_windows[i] + if k > sentence_len: + k = sentence_len + for j in range(j, k): + if j == i or codelens[j] == 0: + continue + if hs: + # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], + _alpha, work, 1, 1, syn0locks) + if negative: + # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose + next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, + indexes[i], indexes[j], _alpha, work, next_random, + 1, 1, syn0locks) + + if tl: # docvec-training + for j in range(lbl_length): + if lbl_codelens[j] == 0: + continue + if hs: + fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], + _alpha, work, 1, 1, syn0locks) + if negative: + next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, + indexes[i], lbl_indexes[j], _alpha, work, next_random, + 1, 1, syn0locks) return result -def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, train_lbls): +def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): cdef int hs = model.hs cdef int negative = model.negative - cdef int tw = train_words - cdef int tl = train_lbls + cdef int learn_words = _train_words + cdef int learn_lbls = _train_lbls + cdef int learn_hidden = True cdef int cbow_mean = model.cbow_mean + cdef REAL_t count, inv_count cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) cdef REAL_t *work @@ -383,7 +462,7 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, t cdef int lbl_length cdef int window = model.window - cdef int i, j, k + cdef int i, j, k, m cdef long result = 0 # For hierarchical softmax @@ -413,6 +492,8 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, t neu1 = np.PyArray_DATA(_neu1) sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + syn0locks = np.PyArray_DATA(model.syn0locks) + for i in range(sentence_len): word = sentence[i] if word is None: @@ -454,13 +535,156 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, train_words, t k = i + window + 1 - reduced_windows[i] if k > sentence_len: k = sentence_len + + # compose l1 (in neu1) + memset(neu1, 0, size * cython.sizeof(REAL_t)) + count = 0.0 + for m in range(j, k): + if m == i or codelens[m] == 0: + continue + else: + count += ONEF + our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + for m in range(lbl_length): + if lbl_codelens[m] == 0: + continue + else: + count += ONEF + our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + if cbow_mean and count > (0.5): + inv_count = ONEF/count + sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + if hs: - fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, size, indexes, - lbl_indexes, _alpha, work, i, j, k, cbow_mean, lbl_length, tw, tl) + fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, + size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, + learn_hidden, learn_lbls, learn_words, syn0locks) if negative: next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, - cbow_mean, next_random, lbl_length, tw, tl) + next_random, lbl_length, learn_hidden, learn_lbls, learn_words, syn0locks) + + return result + + +def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): + cdef int hs = model.hs + cdef int negative = model.negative + cdef int learn_hidden = True + cdef int learn_lbls = _learn_lbls + cdef int learn_words = _learn_words + + cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + cdef REAL_t *work + cdef REAL_t *neu1 + cdef REAL_t _alpha = alpha + cdef int layer1_size = model.layer1_size + cdef int vector_size = model.vector_size + + cdef int codelens[MAX_SENTENCE_LEN] + cdef int lbl_codelens[MAX_SENTENCE_LEN] + cdef np.uint32_t indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t window_indexes[MAX_SENTENCE_LEN] + cdef int sentence_len + cdef int lbl_length + cdef int window = model.window + cdef int expected_lbl_length = model.dm_lbl_count + + cdef int i, j, k, m, n + cdef long result = 0 + cdef int null_word_index = model.vocab['\0'].index + + # For hierarchical softmax + cdef REAL_t *syn1 + cdef np.uint32_t *points[MAX_SENTENCE_LEN] + cdef np.uint8_t *codes[MAX_SENTENCE_LEN] + cdef np.uint32_t *lbl_points[MAX_SENTENCE_LEN] + cdef np.uint8_t *lbl_codes[MAX_SENTENCE_LEN] + + # For negative sampling + cdef REAL_t *syn1neg + cdef np.uint32_t *table + cdef unsigned long long table_len + cdef unsigned long long next_random + + lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + if lbl_length != expected_lbl_length: + return 0 # skip doc without expected nmber of lbls + + if hs: + syn1 = (np.PyArray_DATA(model.syn1)) + + if negative: + syn1neg = (np.PyArray_DATA(model.syn1neg)) + table = (np.PyArray_DATA(model.table)) + table_len = len(model.table) + next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) + + # convert Python structures to primitive types, so we can release the GIL + work = np.PyArray_DATA(_work) + neu1 = np.PyArray_DATA(_neu1) + + # optional locking of some vactors against backprop-learnind + syn0locks = np.PyArray_DATA(model.syn0locks) + + sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + j = 0 + for i in range(sentence_len): + word = sentence[i] + if word is None: + # shrink sentence to leave out word + sentence_len = sentence_len - 1 + continue # leaving j unchanged + else: + indexes[j] = word.index + if hs: + codelens[j] = len(word.code) + codes[j] = np.PyArray_DATA(word.code) + points[j] = np.PyArray_DATA(word.point) + else: + codelens[j] = 1 + result += 1 + j = j + 1 + + for i in range(lbl_length): + word = lbls[i] + if word is None: + # no support for missing lbls where expected; skip sentence + return 0 + else: + window_indexes[i] = word.index + result += 1 + + # release GIL & train on the sentence + with nogil: + for i in range(sentence_len): + j = i - window # negative OK: will pad with null word + k = i + window + 1 # past sentence end OK: will pad with null word + + # compose l1 & clear work + n = lbl_length + for m in range(j, k): + if m == i: + continue + if m < 0 or m >= sentence_len: + window_indexes[n] = null_word_index + else: + window_indexes[n] = indexes[m] + n = n + 1 + for m in range(lbl_length + (2 * window)): + memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) + memset(work, 0, layer1_size * cython.sizeof(REAL_t)) + + if hs: + fast_sentence_dmc_hs(points[i], codes[i], codelens[i], neu1, syn0, syn1, + layer1_size, vector_size, window_indexes, _alpha, + work, lbl_length, window, + learn_hidden, learn_lbls, learn_words, syn0locks) + if negative: + next_random = fast_sentence_dmc_neg(negative, table, table_len, neu1, syn0, syn1neg, + layer1_size, vector_size, window_indexes, _alpha, + work, indexes[i], lbl_length, window, + next_random, learn_hidden, learn_lbls, learn_words, syn0locks) return result From bc6287b21825a35f05da46fc6755d7f36c2c59ba Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 9 May 2015 03:28:00 -0700 Subject: [PATCH 11/49] parameters to support doc2vec inference modes --- gensim/models/word2vec.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 97bef01991..a180456e2b 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -149,30 +149,33 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): return len([word for word in sentence if word is not None]) -def train_sg_pair(model, word, word2, alpha, learn_hidden=True, learn_vectors=True): - if isinstance(word2, Vocab): - l1 = model.syn0[word2.index] - lock_factor = model.syn0locks[word2.index] - else: - l1 = word2 # passed-in candidate vector - lock_factor = 1.0 +def train_sg_pair(model, predict_word, context_token, alpha, learn_vectors=True, learn_hidden=True, + context_vectors=None, context_locks=None): + if context_vectors is None: + context_vectors = model.syn0 + if context_locks is None: + context_locks = model.syn0locks + + l1 = context_vectors[context_token.index] + lock_factor = context_locks[context_token.index] + neu1e = zeros(l1.shape) if model.hs: # work on the entire tree at once, to push as much work into numpy's C routines as possible (performance) - l2a = deepcopy(model.syn1[word.point]) # 2d matrix, codelen x layer1_size + l2a = deepcopy(model.syn1[predict_word.point]) # 2d matrix, codelen x layer1_size fa = 1.0 / (1.0 + exp(-dot(l1, l2a.T))) # propagate hidden -> output - ga = (1 - word.code - fa) * alpha # vector of error gradients multiplied by the learning rate + ga = (1 - predict_word.code - fa) * alpha # vector of error gradients multiplied by the learning rate if learn_hidden: - model.syn1[word.point] += outer(ga, l1) # learn hidden -> output + model.syn1[predict_word.point] += outer(ga, l1) # learn hidden -> output neu1e += dot(ga, l2a) # save error if model.negative: # use this word (label = 1) + `negative` other random words not from this sentence (label = 0) - word_indices = [word.index] + word_indices = [predict_word.index] while len(word_indices) < model.negative + 1: w = model.table[random.randint(model.table.shape[0])] - if w != word.index: + if w != predict_word.index: word_indices.append(w) l2b = model.syn1neg[word_indices] # 2d matrix, k+1 x layer1_size fb = 1. / (1. + exp(-dot(l1, l2b.T))) # propagate hidden -> output @@ -181,11 +184,11 @@ def train_sg_pair(model, word, word2, alpha, learn_hidden=True, learn_vectors=Tr model.syn1neg[word_indices] += outer(gb, l1) # learn hidden -> output neu1e += dot(gb, l2b) # save error if learn_vectors: - l1 += neu1e * lock_factor # learn input -> hidden (changes model.syn0[word2.index], if that is l1) + l1 += neu1e * lock_factor # learn input -> hidden (mutates model.syn0[word2.index], if that is l1) return neu1e -def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_hidden=True, learn_vectors=True): +def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=True, learn_hidden=True): neu1e = zeros(l1.shape) if model.hs: From de9eafb1c85ad9922db508f935d0ec1eca41b330 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 9 May 2015 03:30:02 -0700 Subject: [PATCH 12/49] train_sentence_* refactoring, parameterization to support inference via shared paths --- gensim/models/doc2vec.py | 299 +- gensim/models/doc2vec_inner.c | 5620 +++++++++++++++++-------------- gensim/models/doc2vec_inner.pyx | 502 +-- 3 files changed, 3500 insertions(+), 2921 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 8996a70b86..b01bf163a7 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -42,21 +42,25 @@ except ImportError: from Queue import Queue -from numpy import zeros, random, sum as np_sum, add as np_add, concatenate, repeat as np_repeat +from numpy import zeros, random, sum as np_sum, add as np_add, concatenate,\ + repeat as np_repeat, array, float32 as REAL, empty, ones from six import string_types logger = logging.getLogger(__name__) -from gensim import utils # utility fnc for pickling, common scipy operations etc +from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_sentence_sg try: - from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, train_sentence_dm_concat, FAST_VERSION + from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, train_sentence_dm_concat,\ + FAST_VERSION except: # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 - def train_sentence_dbow(model, sentence, lbls, alpha, work=None, train_words=False, train_lbls=True): + def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): """ Update distributed bag of words model by training on a single sentence. @@ -67,24 +71,31 @@ def train_sentence_dbow(model, sentence, lbls, alpha, work=None, train_words=Fal examples, exactly as per Word2Vec skip-gram training. (Without this option, word vectors are neither consulted nor updated during DBOW doc vector training.) + If learn_words is True, training examples will cause word vectors to be + updated. If learn_hidden is True, training examples will update the internal + hidden layer weights. + This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from doc2vec_inner instead. """ - if train_words: - train_sentence_sg(model, sentence, alpha, work) - if train_lbls: - for label in lbls: - if label is None: + if train_words and learn_words: + train_sentence_sg(model, word_vocabs, alpha, work) + for doclbl in doclbl_vocabs: + if doclbl is None: + continue # OOV token => skip + for word in word_vocabs: + if word is None: continue # OOV word in the input sentence => skip - for word in sentence: - if word is None: - continue # OOV word in the input sentence => skip - train_sg_pair(model, word, label, alpha) + train_sg_pair(model, word, doclbl, alpha, learn_vectors=learn_doclbls, + learn_hidden=learn_hidden, context_vectors=doclbl_vectors, + context_locks=doclbl_locks) - return len([word for word in sentence if word is not None]) + return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): + def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + learn_doclbls=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): """ Update distributed memory model by training on a single sentence. @@ -95,30 +106,46 @@ def train_sentence_dm(model, sentence, lbls, alpha, work=None, neu1=None, train_ will use the optimized version from doc2vec_inner instead. """ - lbl_indices = [lbl.index for lbl in lbls if lbl is not None] - lbl_sum = np_sum(model.syn0[lbl_indices], axis=0) - lbl_len = len(lbl_indices) - - for pos, word in enumerate(sentence): + if word_vectors is None: + word_vectors = model.syn0 + if word_locks is None: + word_locks = model.syn0locks + if doclbl_vectors is None: + doclbl_vectors = model.syn0 + if doclbl_locks is None: + doclbl_locks = model.syn0locks + + doclbl_indices = [doclbl.index for doclbl in doclbl_vocabs if doclbl is not None] + doclbl_sum = np_sum(doclbl_vectors[doclbl_indices], axis=0) + doclbl_len = len(doclbl_indices) + + for pos, word in enumerate(word_vocabs): if word is None: continue # OOV word in the input sentence => skip reduced_window = random.randint(model.window) # `b` in the original doc2vec code start = max(0, pos - model.window + reduced_window) - window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start) + window_pos = enumerate(word_vocabs[start : pos + model.window + 1 - reduced_window], start) word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(model.syn0[word2_indices], axis=0) + lbl_sum # 1 x layer1_size + l1 = np_sum(word_vectors[word2_indices], axis=0) + doclbl_sum # 1 x layer1_size if word2_indices and model.cbow_mean: - l1 /= (len(word2_indices) + lbl_len) - neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, train_words, train_words) - if train_lbls: - model.syn0[lbl_indices] += neu1e * model.syn0locks[lbl_indices] + l1 /= (len(word2_indices) + doclbl_len) + neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, learn_vectors=False, learn_hidden=True) + if learn_doclbls: + doclbl_vectors[doclbl_indices] += \ + neu1e * np_repeat(doclbl_locks[doclbl_indices],model.vector_size).reshape(-1,model.vector_size) + if learn_words: + word_vectors[word2_indices] += \ + neu1e * np_repeat(word_locks[word2_indices],model.vector_size).reshape(-1,model.vector_size) - return len([word for word in sentence if word is not None]) + return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, train_words=True, train_lbls=True): + def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + learn_doclbls=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): """ - Update distributed memory model by training on a single sentence. + Update distributed memory model by training on a single sentence, using a + concatenation of the context window word vectors (rather than a sum or average). The sentence is a list of Vocab objects (or None, where the corresponding word is not in the vocabulary. Called internally from `Doc2Vec.train()`. @@ -127,147 +154,53 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, work=None, neu1=None, will use the optimized version from doc2vec_inner instead. """ - lbl_indices = [lbl.index for lbl in lbls if lbl is not None] - if len(lbl_indices) != model.dm_lbl_count: - return 0 # skip doc without expected lbl(s) + if word_vectors is None: + word_vectors = model.syn0 + if word_locks is None: + word_locks = model.syn0locks + if doclbl_vectors is None: + doclbl_vectors = model.syn0 + if doclbl_locks is None: + doclbl_locks = model.syn0locks + + doclbl_indices = [doclbl.index for doclbl in doclbl_vocabs if doclbl is not None] + doclbl_len = len(doclbl_indices) + if doclbl_len != model.dm_lbl_count: + return 0 # skip doc without expected doclbl(s) null_word = model.vocab['\0'] pre_pad_count = model.window post_pad_count = model.window padded_sentence_indices = ( (pre_pad_count * [null_word.index]) # pre-padding - + [word.index for word in sentence if word is not None] # elide out-of-Vocabulary words + + [word.index for word in word_vocabs if word is not None] # elide out-of-Vocabulary words + (post_pad_count * [null_word.index]) # post-padding ) for pos in range(pre_pad_count, len(padded_sentence_indices) - post_pad_count): - l1_indices = ( - lbl_indices # doc vector(s) - + padded_sentence_indices[pos - pre_pad_count : pos] # preceding words + word_context_indices = ( + padded_sentence_indices[pos - pre_pad_count : pos] # preceding words + padded_sentence_indices[pos + 1 : pos + 1 + post_pad_count] # following words ) - word = model.vocab[model.index2word[padded_sentence_indices[pos]]] - l1 = model.syn0[l1_indices].ravel() # numpy advanced-indexing: copy; flatten to 1d - neu1e = train_cbow_pair(model, word, None, l1, alpha, True, False) - - neu1e = neu1e * np_repeat(model.syn0locks[l1_indices], model.vector_size) # respect any locks - - if not train_lbls: - # trim lbl indices/errors - l1_indices = l1_indices[len(lbl_indices):] - neu1e = neu1e[len(lbl_indices) * model.vector_size:] - if not train_words: - # trim word-vector indices/errors - l1_indices = l1_indices[:-model.window] - neu1e = neu1e[:-model.window * model.vector_size] - if l1_indices: - # if indices left to train, do so - np_add.at(model.syn0, l1_indices, neu1e.reshape(len(l1_indices), model.vector_size)) + word_context_len = len(word_context_indices) + predict_word = model.vocab[model.index2word[padded_sentence_indices[pos]]] + # numpy advanced-indexing copies; concatenate, flatten to 1d + l1 = concatenate((doclbl_vectors[doclbl_indices], word_vectors[word_context_indices])).ravel() + neu1e = train_cbow_pair(model, predict_word, None, l1, alpha, learn_hidden=learn_hidden, learn_vectors=False) + + # filter by locks and shape for addition to source vectors + e_locks = concatenate((doclbl_locks[doclbl_indices], word_locks[word_context_indices])) + neu1e_r = (neu1e.reshape(-1,model.vector_size) + * np_repeat(e_locks,model.vector_size).reshape(-1,model.vector_size)) + + if learn_doclbls: + np_add.at(doclbl_vectors, doclbl_indices, neu1e_r[:doclbl_len]) + if learn_words: + np_add.at(word_vectors, word_context_indices, neu1e_r[doclbl_len:]) return len(padded_sentence_indices) - pre_pad_count - post_pad_count -def infer_vector_dbow(model, document, alpha=0.1, min_alpha=0.0001, steps=5): - """ - Infer a vector for given post-bulk training document, in the 'dbow' model. - - Document should be a list of tokens. - - No cythonized alternative yet. - """ - if not hasattr(model, 'neg_labels'): - model.pretrain() - - vector = model.seeded_vector(' '.join(document)) - sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] - - for i in range(steps): - for word in sentence: - if word is None: - continue # OOV word in the input sentence => skip - neu1e = train_sg_pair(model, word, vector, alpha, False, False) - vector += neu1e - alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha - - return vector - - -def infer_vector_dm(model, document, alpha=0.1, min_alpha=0.0001, steps=5): - """ - Infer a vector representation for the given post-training document, in the 'dm' model. - - Document should be a list of tokens. - - No cythonized alternative yet. - """ - if not hasattr(model, 'neg_labels'): - model.pretrain() - - vector = model.seeded_vector(' '.join(document)) - sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] - - for i in range(steps): - - for pos, word in enumerate(sentence): - if word is None: - continue # OOV word in the input sentence => skip - reduced_window = random.randint(model.window) # `b` in the original doc2vec code - start = max(0, pos - model.window + reduced_window) - window_pos = enumerate(sentence[start : pos + model.window + 1 - reduced_window], start) - word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(model.syn0[word2_indices], axis=0) + vector # 1 x layer1_size - if word2_indices and model.cbow_mean: - l1 /= (len(word2_indices) + 1) - neu1e = train_cbow_pair(model, word, None, l1, alpha, False, False) - vector += neu1e # learn input -> hidden - - alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha - - return vector - - -def infer_vector_dm_concat(model, document, alpha=0.1, min_alpha=0.0001, steps=5): - """ - Infer a vector representation for the given post-training document, in the 'dm_concat' model. - - Document should be a list of tokens. - - No cythonized alternative yet. - """ - if not hasattr(model, 'neg_labels'): - model.pretrain() - - vector = model.seeded_vector(' '.join(document)) - sentence = next(model._prepare_sentences([LabeledSentence(document, [])]))[0] - - null_word = model.vocab['\0'] - pre_pad_count = model.window - post_pad_count = model.window - padded_sentence_indices = ( - (pre_pad_count * [null_word.index]) # pre-padding - + [word.index for word in sentence if word is not None] # elide out-of-Vocabulary words - + (post_pad_count * [null_word.index]) # post-padding - ) - - for i in range(steps): - - for pos in range(pre_pad_count, len(padded_sentence_indices)-post_pad_count): - word = model.vocab[model.index2word[padded_sentence_indices[pos]]] - l1 = concatenate([ - [vector], # doc vector-in-training - model.syn0[padded_sentence_indices[pos - pre_pad_count : pos]], # preceding words - model.syn0[padded_sentence_indices[pos + 1 : pos + 1 + post_pad_count]], # following words - ]).ravel() - - neu1e = train_cbow_pair(model, word, None, l1, alpha, False, False) - - vector += neu1e[:model.vector_size] # train doc vector only - - alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha - - return vector - - class LabeledSentence(object): """ A single labeled sentence = text item. @@ -390,31 +323,59 @@ def _vocab_from(sentences): def _prepare_sentences(self, sentences): for sentence in sentences: # avoid calling random_sample() where prob >= 1, to speed things up a little: - sampled = [self.vocab[word] for word in sentence.words - if word in self.vocab and (self.vocab[word].sample_probability >= 1.0 or - self.vocab[word].sample_probability >= random.random_sample())] - yield (sampled, [self.vocab[word] for word in sentence.labels if word in self.vocab]) + yield (self._tokens_to_vocabs(sentence.words), self._tokens_to_vocabs(sentence.labels, sample=False)) + + def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): + if source_dict is None: + source_dict = self.vocab + if sample: + return [source_dict[token] for token in tokens if token in source_dict + and (source_dict[token].sample_probability >= 1.0 or + source_dict[token].sample_probability >= random.random_sample())] + else: + return [source_dict[token] for token in tokens if token in source_dict] def _get_job_words(self, alpha, work, job, neu1): if self.sg: - return sum(train_sentence_dbow(self, sentence, lbls, alpha, work, self.dbow_words, True) for sentence, lbls in job) + return sum(train_sentence_dbow(self, sentence, lbls, alpha, work, train_words=self.dbow_words) + for sentence, lbls in job) elif self.dm_concat: - return sum(train_sentence_dm_concat(self, sentence, lbls, alpha, work, neu1, True, True) for sentence, lbls in job) + return sum(train_sentence_dm_concat(self, sentence, lbls, alpha, work, neu1) for sentence, lbls in job) else: - return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1, True, True) for sentence, lbls in job) + return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1) for sentence, lbls in job) - def infer_vector(self, document, alpha=0.025, min_alpha=0.0001, steps=50): + def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): """ Infer a vector for given post-bulk training document. - Document should be a list of tokens. + Document should be a list of (word) tokens. """ - if self.sg: - return infer_vector_dbow(self, document, alpha, min_alpha, steps) - elif self.dm_concat: - return infer_vector_dm_concat(self, document, alpha, min_alpha, steps) - else: - return infer_vector_dm(self, document, alpha, min_alpha, steps) + doclbl_vectors = empty((1, self.vector_size), dtype=REAL) + doclbl_vectors[0] = self.seeded_vector(' '.join(document)) + doclbl_locks = ones(1, dtype=REAL) + doclbl_vocabs = [Vocab(index=0) for doclbl in doclbl_vectors] + word_vocabs = self._tokens_to_vocabs(document) + + work = zeros(self.layer1_size, dtype=REAL) + if not self.sg: + neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) + + for i in range(steps): + if self.sg: + train_sentence_dbow(self, word_vocabs, doclbl_vocabs, alpha, work, + learn_words=False, learn_hidden=False, + doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) + elif self.dm_concat: + train_sentence_dm_concat(self, word_vocabs, doclbl_vocabs, alpha, work, neu1, + learn_words=False, learn_hidden=False, + doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) + else: + train_sentence_dm(self, word_vocabs, doclbl_vocabs, alpha, work, neu1, + learn_words=False, learn_hidden=False, + doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) + alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha + + return doclbl_vectors[0] def __str__(self): return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 8bc1db5caa..7eff9ea096 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -648,7 +648,7 @@ typedef npy_double __pyx_t_5numpy_double_t; */ typedef npy_longdouble __pyx_t_5numpy_longdouble_t; -/* "trunk/gensim/models/doc2vec_inner.pyx":23 +/* "trunk/gensim/models/doc2vec_inner.pyx":24 * * REAL = np.float32 * ctypedef np.float32_t REAL_t # <<<<<<<<<<<<<< @@ -715,7 +715,7 @@ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; */ typedef npy_cdouble __pyx_t_5numpy_complex_t; -/* "trunk/gensim/models/doc2vec_inner.pyx":27 +/* "trunk/gensim/models/doc2vec_inner.pyx":28 * DEF MAX_SENTENCE_LEN = 10000 * * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< @@ -724,7 +724,7 @@ typedef npy_cdouble __pyx_t_5numpy_complex_t; */ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_scopy_ptr)(int const *, float const *, int const *, float *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":28 +/* "trunk/gensim/models/doc2vec_inner.pyx":29 * * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< @@ -733,7 +733,7 @@ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_scopy_ptr)(int con */ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_saxpy_ptr)(int const *, float const *, float const *, int const *, float *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":29 +/* "trunk/gensim/models/doc2vec_inner.pyx":30 * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< @@ -742,7 +742,7 @@ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_saxpy_ptr)(int con */ typedef float (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sdot_ptr)(int const *, float const *, int const *, float const *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":30 +/* "trunk/gensim/models/doc2vec_inner.pyx":31 * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< @@ -751,7 +751,7 @@ typedef float (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sdot_ptr)(int con */ typedef double (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_dsdot_ptr)(int const *, float const *, int const *, float const *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":31 +/* "trunk/gensim/models/doc2vec_inner.pyx":32 * ctypedef float (*sdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< @@ -760,7 +760,7 @@ typedef double (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_dsdot_ptr)(int c */ typedef double (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr)(int const *, float const *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":32 +/* "trunk/gensim/models/doc2vec_inner.pyx":33 * ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< @@ -769,7 +769,7 @@ typedef double (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr)(int c */ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr)(int const *, float const *, float const *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":50 +/* "trunk/gensim/models/doc2vec_inner.pyx":51 * * # function implementations swapped based on BLAS detected * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< @@ -778,7 +778,7 @@ typedef void (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr)(int con */ typedef __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t (*__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_our_dot_ptr)(int const *, float const *, int const *, float const *, int const *); -/* "trunk/gensim/models/doc2vec_inner.pyx":51 +/* "trunk/gensim/models/doc2vec_inner.pyx":52 * # function implementations swapped based on BLAS detected * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< @@ -907,6 +907,12 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, int wraparound, int boundscheck); +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); @@ -940,12 +946,6 @@ static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); #else @@ -1153,10 +1153,10 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, int, int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, int *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, int, unsigned PY_LONG_LONG, int, int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , __pyx_t_5numpy_uint32_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int const , unsigned PY_LONG_LONG, int, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int); /*proto*/ #define __Pyx_MODULE_NAME "trunk.gensim.models.doc2vec_inner" int __pyx_module_is_main_trunk__gensim__models__doc2vec_inner = 0; @@ -1165,9 +1165,9 @@ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__train_words, PyObject *__pyx_v__train_lbls); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__learn_words, PyObject *__pyx_v__learn_lbls); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ @@ -1197,26 +1197,24 @@ static char __pyx_k_Zg[] = "Zg"; static char __pyx_k__5[] = "\000"; static char __pyx_k_hs[] = "hs"; static char __pyx_k_np[] = "np"; -static char __pyx_k_tl[] = "tl"; -static char __pyx_k_tw[] = "tw"; static char __pyx_k_REAL[] = "REAL"; static char __pyx_k_code[] = "code"; static char __pyx_k_init[] = "init"; static char __pyx_k_item[] = "item"; -static char __pyx_k_lbls[] = "lbls"; static char __pyx_k_main[] = "__main__"; -static char __pyx_k_neu1[] = "_neu1"; +static char __pyx_k_neu1[] = "neu1"; static char __pyx_k_sdot[] = "sdot"; static char __pyx_k_size[] = "size"; static char __pyx_k_syn0[] = "syn0"; static char __pyx_k_syn1[] = "syn1"; static char __pyx_k_test[] = "__test__"; static char __pyx_k_word[] = "word"; -static char __pyx_k_work[] = "_work"; +static char __pyx_k_work[] = "work"; static char __pyx_k_alpha[] = "alpha"; static char __pyx_k_codes[] = "codes"; static char __pyx_k_count[] = "count"; static char __pyx_k_d_res[] = "d_res"; +static char __pyx_k_dtype[] = "dtype"; static char __pyx_k_fblas[] = "fblas"; static char __pyx_k_index[] = "index"; static char __pyx_k_model[] = "model"; @@ -1229,14 +1227,16 @@ static char __pyx_k_scopy[] = "scopy"; static char __pyx_k_snrm2[] = "snrm2"; static char __pyx_k_sscal[] = "sscal"; static char __pyx_k_table[] = "table"; +static char __pyx_k_token[] = "token"; static char __pyx_k_vocab[] = "vocab"; +static char __pyx_k_zeros[] = "zeros"; static char __pyx_k_import[] = "__import__"; -static char __pyx_k_neu1_2[] = "neu1"; +static char __pyx_k_neu1_2[] = "_neu1"; static char __pyx_k_points[] = "points"; static char __pyx_k_random[] = "random"; static char __pyx_k_result[] = "result"; static char __pyx_k_window[] = "window"; -static char __pyx_k_work_2[] = "work"; +static char __pyx_k_work_2[] = "_work"; static char __pyx_k_alpha_2[] = "_alpha"; static char __pyx_k_float32[] = "float32"; static char __pyx_k_indexes[] = "indexes"; @@ -1246,40 +1246,47 @@ static char __pyx_k_codelens[] = "codelens"; static char __pyx_k_cpointer[] = "_cpointer"; static char __pyx_k_expected[] = "expected"; static char __pyx_k_negative[] = "negative"; -static char __pyx_k_sentence[] = "sentence"; static char __pyx_k_cbow_mean[] = "cbow_mean"; static char __pyx_k_enumerate[] = "enumerate"; static char __pyx_k_inv_count[] = "inv_count"; -static char __pyx_k_lbl_codes[] = "lbl_codes"; static char __pyx_k_syn0locks[] = "syn0locks"; static char __pyx_k_table_len[] = "table_len"; static char __pyx_k_ValueError[] = "ValueError"; -static char __pyx_k_lbl_length[] = "lbl_length"; -static char __pyx_k_lbl_points[] = "lbl_points"; -static char __pyx_k_learn_lbls[] = "_learn_lbls"; -static char __pyx_k_train_lbls[] = "train_lbls"; +static char __pyx_k_doclbl_len[] = "doclbl_len"; +static char __pyx_k_word_locks[] = "word_locks"; static char __pyx_k_layer1_size[] = "layer1_size"; -static char __pyx_k_lbl_indexes[] = "lbl_indexes"; -static char __pyx_k_learn_words[] = "_learn_words"; +static char __pyx_k_learn_words[] = "learn_words"; static char __pyx_k_next_random[] = "next_random"; static char __pyx_k_train_words[] = "train_words"; static char __pyx_k_vector_size[] = "vector_size"; +static char __pyx_k_word_vocabs[] = "word_vocabs"; static char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static char __pyx_k_RuntimeError[] = "RuntimeError"; static char __pyx_k_dm_lbl_count[] = "dm_lbl_count"; -static char __pyx_k_lbl_codelens[] = "lbl_codelens"; +static char __pyx_k_doclbl_locks[] = "doclbl_locks"; static char __pyx_k_learn_hidden[] = "learn_hidden"; -static char __pyx_k_learn_lbls_2[] = "learn_lbls"; +static char __pyx_k_predict_word[] = "predict_word"; static char __pyx_k_sentence_len[] = "sentence_len"; -static char __pyx_k_train_lbls_2[] = "_train_lbls"; -static char __pyx_k_learn_words_2[] = "learn_words"; +static char __pyx_k_word_locks_2[] = "_word_locks"; +static char __pyx_k_word_vectors[] = "word_vectors"; +static char __pyx_k_context_token[] = "context_token"; +static char __pyx_k_doclbl_vocabs[] = "doclbl_vocabs"; +static char __pyx_k_learn_doclbls[] = "learn_doclbls"; +static char __pyx_k_learn_words_2[] = "_learn_words"; static char __pyx_k_train_words_2[] = "_train_words"; +static char __pyx_k_doclbl_indexes[] = "doclbl_indexes"; +static char __pyx_k_doclbl_locks_2[] = "_doclbl_locks"; +static char __pyx_k_doclbl_vectors[] = "doclbl_vectors"; +static char __pyx_k_learn_hidden_2[] = "_learn_hidden"; static char __pyx_k_window_indexes[] = "window_indexes"; +static char __pyx_k_word_vectors_2[] = "_word_vectors"; +static char __pyx_k_learn_doclbls_2[] = "_learn_doclbls"; static char __pyx_k_null_word_index[] = "null_word_index"; static char __pyx_k_reduced_windows[] = "reduced_windows"; +static char __pyx_k_doclbl_vectors_2[] = "_doclbl_vectors"; static char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static char __pyx_k_train_sentence_dm[] = "train_sentence_dm"; -static char __pyx_k_expected_lbl_length[] = "expected_lbl_length"; +static char __pyx_k_expected_doclbl_len[] = "expected_doclbl_len"; static char __pyx_k_train_sentence_dbow[] = "train_sentence_dbow"; static char __pyx_k_train_sentence_dm_concat[] = "train_sentence_dm_concat"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; @@ -1305,13 +1312,22 @@ static PyObject *__pyx_n_s_cbow_mean; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; static PyObject *__pyx_n_s_codes; +static PyObject *__pyx_n_s_context_token; static PyObject *__pyx_n_s_count; static PyObject *__pyx_n_s_cpointer; static PyObject *__pyx_n_s_d_res; static PyObject *__pyx_n_s_dm_lbl_count; +static PyObject *__pyx_n_s_doclbl_indexes; +static PyObject *__pyx_n_s_doclbl_len; +static PyObject *__pyx_n_s_doclbl_locks; +static PyObject *__pyx_n_s_doclbl_locks_2; +static PyObject *__pyx_n_s_doclbl_vectors; +static PyObject *__pyx_n_s_doclbl_vectors_2; +static PyObject *__pyx_n_s_doclbl_vocabs; +static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; -static PyObject *__pyx_n_s_expected_lbl_length; +static PyObject *__pyx_n_s_expected_doclbl_len; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_hs; @@ -1325,15 +1341,10 @@ static PyObject *__pyx_n_s_item; static PyObject *__pyx_n_s_j; static PyObject *__pyx_n_s_k; static PyObject *__pyx_n_s_layer1_size; -static PyObject *__pyx_n_s_lbl_codelens; -static PyObject *__pyx_n_s_lbl_codes; -static PyObject *__pyx_n_s_lbl_indexes; -static PyObject *__pyx_n_s_lbl_length; -static PyObject *__pyx_n_s_lbl_points; -static PyObject *__pyx_n_s_lbls; +static PyObject *__pyx_n_s_learn_doclbls; +static PyObject *__pyx_n_s_learn_doclbls_2; static PyObject *__pyx_n_s_learn_hidden; -static PyObject *__pyx_n_s_learn_lbls; -static PyObject *__pyx_n_s_learn_lbls_2; +static PyObject *__pyx_n_s_learn_hidden_2; static PyObject *__pyx_n_s_learn_words; static PyObject *__pyx_n_s_learn_words_2; static PyObject *__pyx_n_s_m; @@ -1352,6 +1363,7 @@ static PyObject *__pyx_n_s_numpy; static PyObject *__pyx_n_s_p_res; static PyObject *__pyx_n_s_point; static PyObject *__pyx_n_s_points; +static PyObject *__pyx_n_s_predict_word; static PyObject *__pyx_n_s_randint; static PyObject *__pyx_n_s_random; static PyObject *__pyx_n_s_range; @@ -1361,7 +1373,6 @@ static PyObject *__pyx_n_s_saxpy; static PyObject *__pyx_n_s_scipy_linalg_blas; static PyObject *__pyx_n_s_scopy; static PyObject *__pyx_n_s_sdot; -static PyObject *__pyx_n_s_sentence; static PyObject *__pyx_n_s_sentence_len; static PyObject *__pyx_n_s_size; static PyObject *__pyx_n_s_snrm2; @@ -1373,26 +1384,29 @@ static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_table; static PyObject *__pyx_n_s_table_len; static PyObject *__pyx_n_s_test; -static PyObject *__pyx_n_s_tl; -static PyObject *__pyx_n_s_train_lbls; -static PyObject *__pyx_n_s_train_lbls_2; +static PyObject *__pyx_n_s_token; static PyObject *__pyx_n_s_train_sentence_dbow; static PyObject *__pyx_n_s_train_sentence_dm; static PyObject *__pyx_n_s_train_sentence_dm_concat; static PyObject *__pyx_n_s_train_words; static PyObject *__pyx_n_s_train_words_2; static PyObject *__pyx_n_s_trunk_gensim_models_doc2vec_inne; -static PyObject *__pyx_n_s_tw; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; static PyObject *__pyx_n_s_vector_size; static PyObject *__pyx_n_s_vocab; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_window_indexes; static PyObject *__pyx_n_s_word; +static PyObject *__pyx_n_s_word_locks; +static PyObject *__pyx_n_s_word_locks_2; +static PyObject *__pyx_n_s_word_vectors; +static PyObject *__pyx_n_s_word_vectors_2; +static PyObject *__pyx_n_s_word_vocabs; static PyObject *__pyx_n_s_work; static PyObject *__pyx_n_s_work_2; static PyObject *__pyx_n_s_x; static PyObject *__pyx_n_s_y; +static PyObject *__pyx_n_s_zeros; static PyObject *__pyx_int_0; static PyObject *__pyx_int_1; static PyObject *__pyx_int_2; @@ -1418,7 +1432,7 @@ static PyObject *__pyx_codeobj__17; static PyObject *__pyx_codeobj__19; static PyObject *__pyx_codeobj__21; -/* "trunk/gensim/models/doc2vec_inner.pyx":57 +/* "trunk/gensim/models/doc2vec_inner.pyx":58 * * # for when fblas.sdot returns a double * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1429,7 +1443,7 @@ static PyObject *__pyx_codeobj__21; static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_r; - /* "trunk/gensim/models/doc2vec_inner.pyx":58 + /* "trunk/gensim/models/doc2vec_inner.pyx":59 * # for when fblas.sdot returns a double * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: * return dsdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< @@ -1439,7 +1453,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen __pyx_r = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":57 + /* "trunk/gensim/models/doc2vec_inner.pyx":58 * * # for when fblas.sdot returns a double * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1452,7 +1466,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":61 +/* "trunk/gensim/models/doc2vec_inner.pyx":62 * * # for when fblas.sdot returns a float * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1463,7 +1477,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_r; - /* "trunk/gensim/models/doc2vec_inner.pyx":62 + /* "trunk/gensim/models/doc2vec_inner.pyx":63 * # for when fblas.sdot returns a float * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: * return sdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< @@ -1473,7 +1487,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen __pyx_r = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":61 + /* "trunk/gensim/models/doc2vec_inner.pyx":62 * * # for when fblas.sdot returns a float * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1486,7 +1500,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":65 +/* "trunk/gensim/models/doc2vec_inner.pyx":66 * * # for when no blas available * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1500,7 +1514,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_r; int __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":69 + /* "trunk/gensim/models/doc2vec_inner.pyx":70 * cdef int i * cdef REAL_t a * a = 0.0 # <<<<<<<<<<<<<< @@ -1509,7 +1523,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen */ __pyx_v_a = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":70 + /* "trunk/gensim/models/doc2vec_inner.pyx":71 * cdef REAL_t a * a = 0.0 * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< @@ -1519,7 +1533,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen __pyx_t_1 = (__pyx_v_N[0]); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { - /* "trunk/gensim/models/doc2vec_inner.pyx":71 + /* "trunk/gensim/models/doc2vec_inner.pyx":72 * a = 0.0 * for i from 0 <= i < N[0] by 1: * a += X[i] * Y[i] # <<<<<<<<<<<<<< @@ -1529,7 +1543,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen __pyx_v_a = (__pyx_v_a + ((__pyx_v_X[__pyx_v_i]) * (__pyx_v_Y[__pyx_v_i]))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":72 + /* "trunk/gensim/models/doc2vec_inner.pyx":73 * for i from 0 <= i < N[0] by 1: * a += X[i] * Y[i] * return a # <<<<<<<<<<<<<< @@ -1539,7 +1553,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen __pyx_r = __pyx_v_a; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":65 + /* "trunk/gensim/models/doc2vec_inner.pyx":66 * * # for when no blas available * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1552,7 +1566,7 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":75 +/* "trunk/gensim/models/doc2vec_inner.pyx":76 * * # for when no blas available * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1564,7 +1578,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int int __pyx_v_i; int __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":77 + /* "trunk/gensim/models/doc2vec_inner.pyx":78 * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: * cdef int i * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< @@ -1574,7 +1588,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int __pyx_t_1 = (__pyx_v_N[0]); for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { - /* "trunk/gensim/models/doc2vec_inner.pyx":78 + /* "trunk/gensim/models/doc2vec_inner.pyx":79 * cdef int i * for i from 0 <= i < N[0] by 1: * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] # <<<<<<<<<<<<<< @@ -1584,7 +1598,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))]) = (((__pyx_v_alpha[0]) * (__pyx_v_X[(__pyx_v_i * (__pyx_v_incX[0]))])) + (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))])); } - /* "trunk/gensim/models/doc2vec_inner.pyx":75 + /* "trunk/gensim/models/doc2vec_inner.pyx":76 * * # for when no blas available * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< @@ -1595,15 +1609,15 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":81 +/* "trunk/gensim/models/doc2vec_inner.pyx":82 * * * cdef void fast_sentence_dbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, + * REAL_t *context_vectors, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_train_hidden, int __pyx_v_train_inputs, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_vectors, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_context_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_learn_context, int __pyx_v_learn_hidden, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_locks) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -1614,16 +1628,16 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs int __pyx_t_3; int __pyx_t_4; - /* "trunk/gensim/models/doc2vec_inner.pyx":88 + /* "trunk/gensim/models/doc2vec_inner.pyx":89 * * cdef long long a, b - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< + * cdef long long row1 = context_index * size, row2 # <<<<<<<<<<<<<< * cdef REAL_t f, g * */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); + __pyx_v_row1 = (__pyx_v_context_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":91 + /* "trunk/gensim/models/doc2vec_inner.pyx":92 * cdef REAL_t f, g * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1632,38 +1646,38 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":92 + /* "trunk/gensim/models/doc2vec_inner.pyx":93 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): # <<<<<<<<<<<<<< * row2 = word_point[b] * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) */ __pyx_t_1 = __pyx_v_codelen; for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":93 + /* "trunk/gensim/models/doc2vec_inner.pyx":94 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":94 + /* "trunk/gensim/models/doc2vec_inner.pyx":95 * for b in range(codelen): * row2 = word_point[b] * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":95 + /* "trunk/gensim/models/doc2vec_inner.pyx":96 * row2 = word_point[b] * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1679,8 +1693,8 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":96 - * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":97 + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1689,7 +1703,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":97 + /* "trunk/gensim/models/doc2vec_inner.pyx":98 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -1698,90 +1712,90 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":98 + /* "trunk/gensim/models/doc2vec_inner.pyx":99 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if train_hidden: + * if learn_hidden: */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":99 + /* "trunk/gensim/models/doc2vec_inner.pyx":100 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if train_hidden: - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * if learn_hidden: + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":100 + /* "trunk/gensim/models/doc2vec_inner.pyx":101 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if train_hidden: # <<<<<<<<<<<<<< - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if train_inputs: + * if learn_hidden: # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) + * if learn_context: */ - __pyx_t_3 = (__pyx_v_train_hidden != 0); + __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":101 + /* "trunk/gensim/models/doc2vec_inner.pyx":102 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * if train_hidden: - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if train_inputs: - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + * if learn_hidden: + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * if learn_context: + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L8; } __pyx_L8:; __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":102 - * if train_hidden: - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if train_inputs: # <<<<<<<<<<<<<< - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":103 + * if learn_hidden: + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) + * if learn_context: # <<<<<<<<<<<<<< + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) * */ - __pyx_t_3 = (__pyx_v_train_inputs != 0); + __pyx_t_3 = (__pyx_v_learn_context != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":103 - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if train_inputs: - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":104 + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) + * if learn_context: + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_context_locks[__pyx_v_context_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L9; } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":81 + /* "trunk/gensim/models/doc2vec_inner.pyx":82 * * * cdef void fast_sentence_dbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, + * REAL_t *context_vectors, REAL_t *syn1, const int size, */ /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":106 +/* "trunk/gensim/models/doc2vec_inner.pyx":107 * * * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, + * REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_train_hidden, int __pyx_v_train_inputs, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_vectors, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_context_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_learn_context, int __pyx_v_learn_hidden, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_locks) { PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; @@ -1796,25 +1810,25 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast int __pyx_t_3; int __pyx_t_4; - /* "trunk/gensim/models/doc2vec_inner.pyx":113 + /* "trunk/gensim/models/doc2vec_inner.pyx":114 * * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< + * cdef long long row1 = context_index * size, row2 # <<<<<<<<<<<<<< * cdef unsigned long long modulo = 281474976710655ULL * cdef REAL_t f, g, label */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); + __pyx_v_row1 = (__pyx_v_context_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":114 + /* "trunk/gensim/models/doc2vec_inner.pyx":115 * cdef long long a - * cdef long long row1 = word2_index * size, row2 + * cdef long long row1 = context_index * size, row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< * cdef REAL_t f, g, label * cdef np.uint32_t target_index */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":119 + /* "trunk/gensim/models/doc2vec_inner.pyx":120 * cdef int d * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1823,7 +1837,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":121 + /* "trunk/gensim/models/doc2vec_inner.pyx":122 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -1834,7 +1848,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":122 + /* "trunk/gensim/models/doc2vec_inner.pyx":123 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -1844,7 +1858,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":123 + /* "trunk/gensim/models/doc2vec_inner.pyx":124 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -1853,7 +1867,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":124 + /* "trunk/gensim/models/doc2vec_inner.pyx":125 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -1865,7 +1879,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":126 + /* "trunk/gensim/models/doc2vec_inner.pyx":127 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -1874,7 +1888,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":127 + /* "trunk/gensim/models/doc2vec_inner.pyx":128 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -1883,7 +1897,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":128 + /* "trunk/gensim/models/doc2vec_inner.pyx":129 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -1893,48 +1907,48 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":129 + /* "trunk/gensim/models/doc2vec_inner.pyx":130 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 - * + * row2 = target_index * size */ goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":130 + /* "trunk/gensim/models/doc2vec_inner.pyx":131 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< - * * row2 = target_index * size + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) */ __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); } __pyx_L5:; /* "trunk/gensim/models/doc2vec_inner.pyx":132 + * continue * label = 0.0 - * * row2 = target_index * size # <<<<<<<<<<<<<< - * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); /* "trunk/gensim/models/doc2vec_inner.pyx":133 - * + * label = 0.0 * row2 = target_index * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); /* "trunk/gensim/models/doc2vec_inner.pyx":134 * row2 = target_index * size - * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1951,7 +1965,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast if (__pyx_t_3) { /* "trunk/gensim/models/doc2vec_inner.pyx":135 - * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1974,7 +1988,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if train_hidden: + * if learn_hidden: */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); @@ -1982,29 +1996,29 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * if train_hidden: - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + * if learn_hidden: + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); /* "trunk/gensim/models/doc2vec_inner.pyx":139 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if train_hidden: # <<<<<<<<<<<<<< - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if train_inputs: + * if learn_hidden: # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) + * if learn_context: */ - __pyx_t_3 = (__pyx_v_train_hidden != 0); + __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { /* "trunk/gensim/models/doc2vec_inner.pyx":140 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * if train_hidden: - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if train_inputs: - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + * if learn_hidden: + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * if learn_context: + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L10; } __pyx_L10:; @@ -2012,29 +2026,29 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /* "trunk/gensim/models/doc2vec_inner.pyx":141 - * if train_hidden: - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if train_inputs: # <<<<<<<<<<<<<< - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + * if learn_hidden: + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) + * if learn_context: # <<<<<<<<<<<<<< + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) * */ - __pyx_t_3 = (__pyx_v_train_inputs != 0); + __pyx_t_3 = (__pyx_v_learn_context != 0); if (__pyx_t_3) { /* "trunk/gensim/models/doc2vec_inner.pyx":142 - * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if train_inputs: - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) + * if learn_context: + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_context_locks[__pyx_v_context_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_context_vectors[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L11; } __pyx_L11:; /* "trunk/gensim/models/doc2vec_inner.pyx":144 - * our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -2043,12 +2057,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":106 + /* "trunk/gensim/models/doc2vec_inner.pyx":107 * * * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, + * REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ /* function exit code */ @@ -2060,53 +2074,42 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * * * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, + * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, + * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5numpy_uint32_t const *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_lbl_length, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - int __pyx_v_m; int __pyx_t_1; PY_LONG_LONG __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - int __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":160 - * - * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":161 + /* "trunk/gensim/models/doc2vec_inner.pyx":158 * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error - * for b in range(codelens[i]): # <<<<<<<<<<<<<< + * # work (also passed in) will accumulate l1 error + * for b in range(word_code_len): # <<<<<<<<<<<<<< * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); + __pyx_t_1 = __pyx_v_word_code_len; for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":162 - * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error - * for b in range(codelens[i]): + /* "trunk/gensim/models/doc2vec_inner.pyx":159 + * # work (also passed in) will accumulate l1 error + * for b in range(word_code_len): * row2 = word_point[b] * size # <<<<<<<<<<<<<< * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":163 - * for b in range(codelens[i]): + /* "trunk/gensim/models/doc2vec_inner.pyx":160 + * for b in range(word_code_len): * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: @@ -2114,7 +2117,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":164 + /* "trunk/gensim/models/doc2vec_inner.pyx":161 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2132,7 +2135,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":165 + /* "trunk/gensim/models/doc2vec_inner.pyx":162 * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2142,7 +2145,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":166 + /* "trunk/gensim/models/doc2vec_inner.pyx":163 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2151,7 +2154,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":167 + /* "trunk/gensim/models/doc2vec_inner.pyx":164 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2160,7 +2163,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":168 + /* "trunk/gensim/models/doc2vec_inner.pyx":165 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2169,22 +2172,22 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":169 + /* "trunk/gensim/models/doc2vec_inner.pyx":166 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if learn_words: + * */ __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":170 + /* "trunk/gensim/models/doc2vec_inner.pyx":167 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if learn_words: - * for m in range(j, k): + * + * */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L8; @@ -2193,224 +2196,81 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":171 - * if learn_hidden: - * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if learn_words: # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_learn_words != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":172 - * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if learn_words: - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_5 = __pyx_v_j; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_m = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":173 - * if learn_words: - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L13_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L13_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":174 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) - */ - goto __pyx_L10_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":176 - * continue - * else: - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< - * if learn_lbls: - * for m in range(lbl_length): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L10_continue:; - } - goto __pyx_L9; - } - __pyx_L9:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":177 - * else: - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) - * if learn_lbls: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_learn_lbls != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":178 - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) - * if learn_lbls: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_m = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":179 - * if learn_lbls: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":180 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - */ - goto __pyx_L16_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":182 - * continue - * else: - * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * - * - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_lbl_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L16_continue:; - } - goto __pyx_L15; - } - __pyx_L15:; - /* "trunk/gensim/models/doc2vec_inner.pyx":147 * * * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, + * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, + * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":185 +/* "trunk/gensim/models/doc2vec_inner.pyx":170 * * * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, + * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, int *__pyx_v_lbl_codelens, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t *__pyx_v_indexes, __pyx_t_5numpy_uint32_t *__pyx_v_lbl_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_lbl_length, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_predict_word_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; __pyx_t_5numpy_uint32_t __pyx_v_target_index; - __pyx_t_5numpy_uint32_t __pyx_v_word_index; int __pyx_v_d; - int __pyx_v_m; unsigned PY_LONG_LONG __pyx_r; long __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - int __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":194 - * cdef long long a + /* "trunk/gensim/models/doc2vec_inner.pyx":176 + * * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, count, inv_count, label - * cdef np.uint32_t target_index, word_index + * cdef REAL_t f, g, label + * cdef np.uint32_t target_index */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":200 - * - * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< - * word_index = indexes[i] - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":201 + /* "trunk/gensim/models/doc2vec_inner.pyx":183 * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error - * word_index = indexes[i] # <<<<<<<<<<<<<< - * for d in range(negative+1): - * if d == 0: - */ - __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - - /* "trunk/gensim/models/doc2vec_inner.pyx":202 - * memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error - * word_index = indexes[i] + * # work (also passsed in) will accumulate l1 error for outside application * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: - * target_index = word_index + * target_index = predict_word_index */ __pyx_t_1 = (__pyx_v_negative + 1); for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":203 - * word_index = indexes[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":184 + * # work (also passsed in) will accumulate l1 error for outside application * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index + * target_index = predict_word_index * label = ONEF */ __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":204 + /* "trunk/gensim/models/doc2vec_inner.pyx":185 * for d in range(negative+1): * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< + * target_index = predict_word_index # <<<<<<<<<<<<<< * label = ONEF * else: */ - __pyx_v_target_index = __pyx_v_word_index; + __pyx_v_target_index = __pyx_v_predict_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":205 + /* "trunk/gensim/models/doc2vec_inner.pyx":186 * if d == 0: - * target_index = word_index + * target_index = predict_word_index * label = ONEF # <<<<<<<<<<<<<< * else: * target_index = table[(next_random >> 16) % table_len] @@ -2420,37 +2280,37 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":207 + /* "trunk/gensim/models/doc2vec_inner.pyx":188 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: + * if target_index == predict_word_index: */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":208 + /* "trunk/gensim/models/doc2vec_inner.pyx":189 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: + * if target_index == predict_word_index: * continue */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":209 + /* "trunk/gensim/models/doc2vec_inner.pyx":190 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< + * if target_index == predict_word_index: # <<<<<<<<<<<<<< * continue * label = 0.0 */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_predict_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":210 + /* "trunk/gensim/models/doc2vec_inner.pyx":191 * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: + * if target_index == predict_word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 * @@ -2458,8 +2318,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":211 - * if target_index == word_index: + /* "trunk/gensim/models/doc2vec_inner.pyx":192 + * if target_index == predict_word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< * @@ -2469,7 +2329,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":213 + /* "trunk/gensim/models/doc2vec_inner.pyx":194 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2478,7 +2338,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":214 + /* "trunk/gensim/models/doc2vec_inner.pyx":195 * * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2487,7 +2347,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":215 + /* "trunk/gensim/models/doc2vec_inner.pyx":196 * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2505,7 +2365,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":216 + /* "trunk/gensim/models/doc2vec_inner.pyx":197 * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2515,7 +2375,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":217 + /* "trunk/gensim/models/doc2vec_inner.pyx":198 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2524,7 +2384,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":218 + /* "trunk/gensim/models/doc2vec_inner.pyx":199 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2533,7 +2393,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":219 + /* "trunk/gensim/models/doc2vec_inner.pyx":200 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2542,22 +2402,22 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":220 + /* "trunk/gensim/models/doc2vec_inner.pyx":201 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if learn_words: + * */ __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":221 + /* "trunk/gensim/models/doc2vec_inner.pyx":202 * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if learn_words: - * for m in range(j,k): + * + * return next_random */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L10; @@ -2566,130 +2426,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":222 - * if learn_hidden: - * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if learn_words: # <<<<<<<<<<<<<< - * for m in range(j,k): - * if m == i or codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_learn_words != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":223 + /* "trunk/gensim/models/doc2vec_inner.pyx":204 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if learn_words: - * for m in range(j,k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_2 = __pyx_v_k; - for (__pyx_t_5 = __pyx_v_j; __pyx_t_5 < __pyx_t_2; __pyx_t_5+=1) { - __pyx_v_m = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":224 - * if learn_words: - * for m in range(j,k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L15_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L15_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":225 - * for m in range(j,k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) - */ - goto __pyx_L12_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":227 - * continue - * else: - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * if learn_lbls: - * for m in range(lbl_length): - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L12_continue:; - } - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":228 - * else: - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) - * if learn_lbls: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - */ - __pyx_t_3 = (__pyx_v_learn_lbls != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":229 - * our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) - * if learn_lbls: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue - */ - __pyx_t_2 = __pyx_v_lbl_length; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_2; __pyx_t_5+=1) { - __pyx_v_m = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":230 - * if learn_lbls: - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_3 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":231 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) - */ - goto __pyx_L18_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":233 - * continue - * else: - * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_syn0locks[(__pyx_v_lbl_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L18_continue:; - } - goto __pyx_L17; - } - __pyx_L17:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":235 - * our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -2698,12 +2436,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":185 + /* "trunk/gensim/models/doc2vec_inner.pyx":170 * * * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, + * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ /* function exit code */ @@ -2711,39 +2449,27 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":237 +/* "trunk/gensim/models/doc2vec_inner.pyx":206 * return next_random * * cdef void fast_sentence_dmc_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int layer1_size, const int vector_size, + * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_layer1_size, int const __pyx_v_vector_size, __pyx_t_5numpy_uint32_t const *__pyx_v_window_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_lbl_length, int const __pyx_v_window, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_layer1_size, CYTHON_UNUSED int const __pyx_v_vector_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_g; - int __pyx_v_m; int __pyx_t_1; PY_LONG_LONG __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - int __pyx_t_5; - long __pyx_t_6; - /* "trunk/gensim/models/doc2vec_inner.pyx":250 - * - * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< - * for b in range(word_code_len): - * row2 = word_point[b] * layer1_size - */ - memset(__pyx_v_work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":251 + /* "trunk/gensim/models/doc2vec_inner.pyx":218 * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * # work accumulates net l1 error; eventually applied by caller * for b in range(word_code_len): # <<<<<<<<<<<<<< * row2 = word_point[b] * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) @@ -2752,8 +2478,8 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":252 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + /* "trunk/gensim/models/doc2vec_inner.pyx":219 + * # work accumulates net l1 error; eventually applied by caller * for b in range(word_code_len): * row2 = word_point[b] * layer1_size # <<<<<<<<<<<<<< * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) @@ -2761,7 +2487,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_layer1_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":253 + /* "trunk/gensim/models/doc2vec_inner.pyx":220 * for b in range(word_code_len): * row2 = word_point[b] * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2770,7 +2496,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_layer1_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":254 + /* "trunk/gensim/models/doc2vec_inner.pyx":221 * row2 = word_point[b] * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2788,7 +2514,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":255 + /* "trunk/gensim/models/doc2vec_inner.pyx":222 * f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -2798,7 +2524,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":256 + /* "trunk/gensim/models/doc2vec_inner.pyx":223 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2807,7 +2533,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":257 + /* "trunk/gensim/models/doc2vec_inner.pyx":224 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2816,7 +2542,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":258 + /* "trunk/gensim/models/doc2vec_inner.pyx":225 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2825,22 +2551,22 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":259 + /* "trunk/gensim/models/doc2vec_inner.pyx":226 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if learn_lbls: + * */ __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":260 + /* "trunk/gensim/models/doc2vec_inner.pyx":227 * our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if learn_lbls: - * for m in range(lbl_length): + * + * */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L8; @@ -2849,94 +2575,26 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":261 - * if learn_hidden: - * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if learn_lbls: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - */ - __pyx_t_3 = (__pyx_v_learn_lbls != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":262 - * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) - * if learn_lbls: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - * &syn0[window_indexes[m] * vector_size], &ONE) - */ - __pyx_t_1 = __pyx_v_lbl_length; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_m = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":263 - * if learn_lbls: - * for m in range(lbl_length): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, # <<<<<<<<<<<<<< - * &syn0[window_indexes[m] * vector_size], &ONE) - * if learn_words: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - goto __pyx_L9; - } - __pyx_L9:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":265 - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - * &syn0[window_indexes[m] * vector_size], &ONE) - * if learn_words: # <<<<<<<<<<<<<< - * for m in range(lbl_length, lbl_length + (2 * window)): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, - */ - __pyx_t_3 = (__pyx_v_learn_words != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":266 - * &syn0[window_indexes[m] * vector_size], &ONE) - * if learn_words: - * for m in range(lbl_length, lbl_length + (2 * window)): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, - * &syn0[window_indexes[m] * vector_size], &ONE) - */ - __pyx_t_6 = (__pyx_v_lbl_length + (2 * __pyx_v_window)); - for (__pyx_t_1 = __pyx_v_lbl_length; __pyx_t_1 < __pyx_t_6; __pyx_t_1+=1) { - __pyx_v_m = __pyx_t_1; - - /* "trunk/gensim/models/doc2vec_inner.pyx":267 - * if learn_words: - * for m in range(lbl_length, lbl_length + (2 * window)): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, # <<<<<<<<<<<<<< - * &syn0[window_indexes[m] * vector_size], &ONE) - * - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - goto __pyx_L12; - } - __pyx_L12:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":237 + /* "trunk/gensim/models/doc2vec_inner.pyx":206 * return next_random * * cdef void fast_sentence_dmc_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int layer1_size, const int vector_size, + * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ /* function exit code */ } -/* "trunk/gensim/models/doc2vec_inner.pyx":271 +/* "trunk/gensim/models/doc2vec_inner.pyx":230 * * * cdef unsigned long long fast_sentence_dmc_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int layer1_size, const int vector_size, + * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, + * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_layer1_size, int const __pyx_v_vector_size, __pyx_t_5numpy_uint32_t *__pyx_v_window_indexes, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_predict_word_index, int const __pyx_v_lbl_length, int const __pyx_v_window, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_learn_hidden, int __pyx_v_learn_lbls, int __pyx_v_learn_words, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_predict_word_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_layer1_size, CYTHON_UNUSED int const __pyx_v_vector_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; @@ -2944,15 +2602,13 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_label; __pyx_t_5numpy_uint32_t __pyx_v_target_index; int __pyx_v_d; - int __pyx_v_m; unsigned PY_LONG_LONG __pyx_r; long __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - int __pyx_t_5; - /* "trunk/gensim/models/doc2vec_inner.pyx":281 + /* "trunk/gensim/models/doc2vec_inner.pyx":237 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -2961,18 +2617,9 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/doc2vec_inner.pyx":287 - * - * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error # <<<<<<<<<<<<<< - * for d in range(negative+1): - * if d == 0: - */ - memset(__pyx_v_work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/doc2vec_inner.pyx":288 + /* "trunk/gensim/models/doc2vec_inner.pyx":244 * # l1 already composed by caller, passed in as neu1 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + * # work accumulates net l1 error; eventually applied by caller * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: * target_index = predict_word_index @@ -2981,8 +2628,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":289 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + /* "trunk/gensim/models/doc2vec_inner.pyx":245 + * # work accumulates net l1 error; eventually applied by caller * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = predict_word_index @@ -2991,7 +2638,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":290 + /* "trunk/gensim/models/doc2vec_inner.pyx":246 * for d in range(negative+1): * if d == 0: * target_index = predict_word_index # <<<<<<<<<<<<<< @@ -3000,7 +2647,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = __pyx_v_predict_word_index; - /* "trunk/gensim/models/doc2vec_inner.pyx":291 + /* "trunk/gensim/models/doc2vec_inner.pyx":247 * if d == 0: * target_index = predict_word_index * label = ONEF # <<<<<<<<<<<<<< @@ -3012,7 +2659,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":293 + /* "trunk/gensim/models/doc2vec_inner.pyx":249 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -3021,7 +2668,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/doc2vec_inner.pyx":294 + /* "trunk/gensim/models/doc2vec_inner.pyx":250 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -3030,7 +2677,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/doc2vec_inner.pyx":295 + /* "trunk/gensim/models/doc2vec_inner.pyx":251 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: # <<<<<<<<<<<<<< @@ -3040,7 +2687,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_predict_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":296 + /* "trunk/gensim/models/doc2vec_inner.pyx":252 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == predict_word_index: * continue # <<<<<<<<<<<<<< @@ -3050,7 +2697,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":297 + /* "trunk/gensim/models/doc2vec_inner.pyx":253 * if target_index == predict_word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -3061,7 +2708,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":299 + /* "trunk/gensim/models/doc2vec_inner.pyx":255 * label = 0.0 * * row2 = target_index * layer1_size # <<<<<<<<<<<<<< @@ -3070,7 +2717,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_layer1_size); - /* "trunk/gensim/models/doc2vec_inner.pyx":300 + /* "trunk/gensim/models/doc2vec_inner.pyx":256 * * row2 = target_index * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -3079,7 +2726,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot((&__pyx_v_layer1_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":301 + /* "trunk/gensim/models/doc2vec_inner.pyx":257 * row2 = target_index * layer1_size * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -3097,7 +2744,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":302 + /* "trunk/gensim/models/doc2vec_inner.pyx":258 * f = our_dot(&layer1_size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< @@ -3107,7 +2754,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast goto __pyx_L3_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":303 + /* "trunk/gensim/models/doc2vec_inner.pyx":259 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -3116,7 +2763,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/doc2vec_inner.pyx":304 + /* "trunk/gensim/models/doc2vec_inner.pyx":260 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -3125,7 +2772,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/doc2vec_inner.pyx":305 + /* "trunk/gensim/models/doc2vec_inner.pyx":261 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -3134,22 +2781,22 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":306 + /* "trunk/gensim/models/doc2vec_inner.pyx":262 * g = (label - f) * alpha * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: # <<<<<<<<<<<<<< * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if learn_lbls: + * */ __pyx_t_3 = (__pyx_v_learn_hidden != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/doc2vec_inner.pyx":307 + /* "trunk/gensim/models/doc2vec_inner.pyx":263 * our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) * if learn_hidden: * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if learn_lbls: - * for m in range(lbl_length): + * + * return next_random */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_layer1_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L10; @@ -3158,76 +2805,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_L3_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":308 - * if learn_hidden: - * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if learn_lbls: # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - */ - __pyx_t_3 = (__pyx_v_learn_lbls != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":309 + /* "trunk/gensim/models/doc2vec_inner.pyx":265 * our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * if learn_lbls: - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - * &syn0[window_indexes[m] * vector_size], &ONE) - */ - __pyx_t_2 = __pyx_v_lbl_length; - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_2; __pyx_t_5+=1) { - __pyx_v_m = __pyx_t_5; - - /* "trunk/gensim/models/doc2vec_inner.pyx":310 - * if learn_lbls: - * for m in range(lbl_length): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, # <<<<<<<<<<<<<< - * &syn0[window_indexes[m] * vector_size], &ONE) - * if learn_words: - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - goto __pyx_L11; - } - __pyx_L11:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":312 - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - * &syn0[window_indexes[m] * vector_size], &ONE) - * if learn_words: # <<<<<<<<<<<<<< - * for m in range(lbl_length, lbl_length + (2 * window)): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, - */ - __pyx_t_3 = (__pyx_v_learn_words != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":313 - * &syn0[window_indexes[m] * vector_size], &ONE) - * if learn_words: - * for m in range(lbl_length, lbl_length + (2 * window)): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, - * &syn0[window_indexes[m] * vector_size], &ONE) - */ - __pyx_t_1 = (__pyx_v_lbl_length + (2 * __pyx_v_window)); - for (__pyx_t_2 = __pyx_v_lbl_length; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":314 - * if learn_words: - * for m in range(lbl_length, lbl_length + (2 * window)): - * our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, # <<<<<<<<<<<<<< - * &syn0[window_indexes[m] * vector_size], &ONE) - * - */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v_syn0locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v_work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - goto __pyx_L14; - } - __pyx_L14:; - - /* "trunk/gensim/models/doc2vec_inner.pyx":317 - * &syn0[window_indexes[m] * vector_size], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -3236,12 +2815,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":271 + /* "trunk/gensim/models/doc2vec_inner.pyx":230 * * * cdef unsigned long long fast_sentence_dmc_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int layer1_size, const int vector_size, + * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, + * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ /* function exit code */ @@ -3249,12 +2828,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":320 +/* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ /* Python wrapper */ @@ -3262,12 +2841,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow = {"train_sentence_dbow", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_sentence = 0; - PyObject *__pyx_v_lbls = 0; + PyObject *__pyx_v_word_vocabs = 0; + PyObject *__pyx_v_doclbl_vocabs = 0; PyObject *__pyx_v_alpha = 0; - PyObject *__pyx_v__work = 0; + PyObject *__pyx_v_work = 0; PyObject *__pyx_v_train_words = 0; - PyObject *__pyx_v_train_lbls = 0; + PyObject *__pyx_v_learn_doclbls = 0; + PyObject *__pyx_v_learn_words = 0; + PyObject *__pyx_v_learn_hidden = 0; + PyObject *__pyx_v_word_vectors = 0; + PyObject *__pyx_v_word_locks = 0; + PyObject *__pyx_v_doclbl_vectors = 0; + PyObject *__pyx_v_doclbl_locks = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -3275,12 +2860,43 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dbow (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_train_lbls,0}; - PyObject* values[7] = {0,0,0,0,0,0,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_vocabs,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; + values[4] = ((PyObject *)Py_None); + + /* "trunk/gensim/models/doc2vec_inner.pyx":269 + * + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * cdef int hs = model.hs + */ + values[5] = ((PyObject *)Py_False); + values[6] = ((PyObject *)Py_True); + values[7] = ((PyObject *)Py_True); + values[8] = ((PyObject *)Py_True); + + /* "trunk/gensim/models/doc2vec_inner.pyx":270 + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + values[9] = ((PyObject *)Py_None); + values[10] = ((PyObject *)Py_None); + values[11] = ((PyObject *)Py_None); + values[12] = ((PyObject *)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { + case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12); + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); @@ -3297,89 +2913,145 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: - if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work); + if (value) { values[4] = value; kw_args--; } } case 5: - if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words); + if (value) { values[5] = value; kw_args--; } } case 6: - if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doclbls); + if (value) { values[6] = value; kw_args--; } + } + case 7: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_words); + if (value) { values[7] = value; kw_args--; } + } + case 8: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_hidden); + if (value) { values[8] = value; kw_args--; } + } + case 9: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vectors); + if (value) { values[9] = value; kw_args--; } + } + case 10: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_locks); + if (value) { values[10] = value; kw_args--; } + } + case 11: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vectors); + if (value) { values[11] = value; kw_args--; } + } + case 12: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_locks); + if (value) { values[12] = value; kw_args--; } } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - } else if (PyTuple_GET_SIZE(__pyx_args) != 7) { - goto __pyx_L5_argtuple_error; } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - values[5] = PyTuple_GET_ITEM(__pyx_args, 5); - values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12); + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } } __pyx_v_model = values[0]; - __pyx_v_sentence = values[1]; - __pyx_v_lbls = values[2]; + __pyx_v_word_vocabs = values[1]; + __pyx_v_doclbl_vocabs = values[2]; __pyx_v_alpha = values[3]; - __pyx_v__work = values[4]; + __pyx_v_work = values[4]; __pyx_v_train_words = values[5]; - __pyx_v_train_lbls = values[6]; + __pyx_v_learn_doclbls = values[6]; + __pyx_v_learn_words = values[7]; + __pyx_v_learn_hidden = values[8]; + __pyx_v_word_vectors = values[9]; + __pyx_v_word_locks = values[10]; + __pyx_v_doclbl_vectors = values[11]; + __pyx_v_doclbl_locks = values[12]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 1, 7, 7, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v_train_words, __pyx_v_train_lbls); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_vocabs, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + + /* "trunk/gensim/models/doc2vec_inner.pyx":268 + * + * + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + */ /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_train_lbls) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v_tw; - int __pyx_v_tl; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work; + int __pyx_v__train_words; + int __pyx_v__learn_words; + int __pyx_v__learn_hidden; + int __pyx_v__learn_doclbls; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__work; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; int __pyx_v_codelens[10000]; - int __pyx_v_lbl_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_lbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_doclbl_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; int __pyx_v_sentence_len; - int __pyx_v_lbl_length; + int __pyx_v_doclbl_len; int __pyx_v_window; int __pyx_v_i; int __pyx_v_j; @@ -3391,9 +3063,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5numpy_uint32_t *__pyx_v_table; unsigned PY_LONG_LONG __pyx_v_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks; - PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_predict_word = NULL; PyObject *__pyx_v_item = NULL; + PyObject *__pyx_v_context_token = NULL; long __pyx_v_k; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -3401,16 +3073,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ int __pyx_t_2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_t_3; int __pyx_t_4; - Py_ssize_t __pyx_t_5; - PyObject *__pyx_t_6 = NULL; + int __pyx_t_5; + Py_ssize_t __pyx_t_6; PyObject *__pyx_t_7 = NULL; - unsigned PY_LONG_LONG __pyx_t_8; - long __pyx_t_9; - Py_ssize_t __pyx_t_10; - int __pyx_t_11; - int __pyx_t_12; - __pyx_t_5numpy_uint32_t __pyx_t_13; - PyObject *__pyx_t_14 = NULL; + PyObject *__pyx_t_8 = NULL; + unsigned PY_LONG_LONG __pyx_t_9; + PyObject *__pyx_t_10 = NULL; + long __pyx_t_11; + Py_ssize_t __pyx_t_12; + int __pyx_t_13; + __pyx_t_5numpy_uint32_t __pyx_t_14; PyObject *__pyx_t_15 = NULL; PyObject *__pyx_t_16 = NULL; PyObject *(*__pyx_t_17)(PyObject *); @@ -3420,103 +3092,115 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_dbow", 0); + __Pyx_INCREF(__pyx_v_work); + __Pyx_INCREF(__pyx_v_word_vectors); + __Pyx_INCREF(__pyx_v_word_locks); + __Pyx_INCREF(__pyx_v_doclbl_vectors); + __Pyx_INCREF(__pyx_v_doclbl_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":321 - * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + /* "trunk/gensim/models/doc2vec_inner.pyx":271 + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int tw = train_words + * cdef int _train_words = train_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 271; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":322 - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): + /* "trunk/gensim/models/doc2vec_inner.pyx":272 + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int tw = train_words - * cdef int tl = train_lbls + * cdef int _train_words = train_words + * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":323 + /* "trunk/gensim/models/doc2vec_inner.pyx":273 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int tw = train_words # <<<<<<<<<<<<<< - * cdef int tl = train_lbls - * + * cdef int _train_words = train_words # <<<<<<<<<<<<<< + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_tw = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 273; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__train_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":324 + /* "trunk/gensim/models/doc2vec_inner.pyx":274 * cdef int negative = model.negative - * cdef int tw = train_words - * cdef int tl = train_lbls # <<<<<<<<<<<<<< + * cdef int _train_words = train_words + * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< + * cdef int _learn_hidden = learn_hidden + * cdef int _learn_doclbls = learn_doclbls + */ + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_words = __pyx_t_2; + + /* "trunk/gensim/models/doc2vec_inner.pyx":275 + * cdef int _train_words = train_words + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< + * cdef int _learn_doclbls = learn_doclbls * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_tl = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":326 - * cdef int tl = train_lbls + /* "trunk/gensim/models/doc2vec_inner.pyx":276 + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden + * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< - * cdef REAL_t *work - * cdef REAL_t _alpha = alpha + * cdef REAL_t *_word_vectors */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_doclbls = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":328 - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) - * cdef REAL_t *work + /* "trunk/gensim/models/doc2vec_inner.pyx":283 + * cdef REAL_t *_doclbl_locks + * cdef REAL_t *_work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":329 - * cdef REAL_t *work + /* "trunk/gensim/models/doc2vec_inner.pyx":284 + * cdef REAL_t *_work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":338 + /* "trunk/gensim/models/doc2vec_inner.pyx":292 * cdef int sentence_len - * cdef int lbl_length + * cdef int doclbl_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":341 + /* "trunk/gensim/models/doc2vec_inner.pyx":295 * * cdef int i, j * cdef long result = 0 # <<<<<<<<<<<<<< @@ -3525,8 +3209,152 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":357 - * cdef REAL_t *syn0locks + /* "trunk/gensim/models/doc2vec_inner.pyx":308 + * cdef unsigned long long next_random + * + * if word_vectors is None: # <<<<<<<<<<<<<< + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + */ + __pyx_t_4 = (__pyx_v_word_vectors == Py_None); + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":309 + * + * if word_vectors is None: + * word_vectors = model.syn0 # <<<<<<<<<<<<<< + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L3; + } + __pyx_L3:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":310 + * if word_vectors is None: + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< + * + * if doclbl_vectors is None: + */ + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":312 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * + * if doclbl_vectors is None: # <<<<<<<<<<<<<< + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + */ + __pyx_t_5 = (__pyx_v_doclbl_vectors == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":313 + * + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 # <<<<<<<<<<<<<< + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L4; + } + __pyx_L4:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":314 + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< + * + * if word_locks is None: + */ + if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":316 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * + * if word_locks is None: # <<<<<<<<<<<<<< + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) + */ + __pyx_t_4 = (__pyx_v_word_locks == Py_None); + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":317 + * + * if word_locks is None: + * word_locks = model.syn0locks # <<<<<<<<<<<<<< + * _word_locks = (np.PyArray_DATA(word_locks)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L5; + } + __pyx_L5:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":318 + * if word_locks is None: + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< + * + * if doclbl_locks is None: + */ + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":320 + * _word_locks = (np.PyArray_DATA(word_locks)) + * + * if doclbl_locks is None: # <<<<<<<<<<<<<< + * doclbl_locks = model.syn0locks + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + */ + __pyx_t_5 = (__pyx_v_doclbl_locks == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":321 + * + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks # <<<<<<<<<<<<<< + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L6; + } + __pyx_L6:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":322 + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< + * + * if hs: + */ + if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":324 + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -3535,23 +3363,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":358 + /* "trunk/gensim/models/doc2vec_inner.pyx":325 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L3; + goto __pyx_L7; } - __pyx_L3:; + __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":360 + /* "trunk/gensim/models/doc2vec_inner.pyx":327 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3561,256 +3389,334 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":361 + /* "trunk/gensim/models/doc2vec_inner.pyx":328 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":362 + /* "trunk/gensim/models/doc2vec_inner.pyx":329 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 362; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":363 + /* "trunk/gensim/models/doc2vec_inner.pyx":330 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_table_len = __pyx_t_5; + __pyx_v_table_len = __pyx_t_6; - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":331 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_7); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_7); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_next_random = __pyx_t_8; - goto __pyx_L4; + __pyx_v_next_random = __pyx_t_9; + goto __pyx_L8; } - __pyx_L4:; + __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":367 + /* "trunk/gensim/models/doc2vec_inner.pyx":334 * * # convert Python structures to primitive types, so we can release the GIL - * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + * if work is None: # <<<<<<<<<<<<<< + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); + __pyx_t_4 = (__pyx_v_work == Py_None); + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":368 + /* "trunk/gensim/models/doc2vec_inner.pyx":335 * # convert Python structures to primitive types, so we can release the GIL - * work = np.PyArray_DATA(_work) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - * + * if work is None: + * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< + * _work = np.PyArray_DATA(work) + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_5 < __pyx_t_9) != 0)) { - __pyx_t_10 = __pyx_t_5; - } else { - __pyx_t_10 = __pyx_t_9; + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); + __Pyx_GIVEREF(__pyx_t_8); + __pyx_t_8 = 0; + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); + __pyx_t_10 = 0; + goto __pyx_L9; } - __pyx_v_sentence_len = ((int)__pyx_t_10); + __pyx_L9:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":336 + * if work is None: + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + */ + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":369 - * work = np.PyArray_DATA(_work) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":337 + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) * - * syn0locks = np.PyArray_DATA(model.syn0locks) */ - __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_10 < __pyx_t_9) != 0)) { - __pyx_t_5 = __pyx_t_10; + __pyx_t_6 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = 10000; + if (((__pyx_t_6 < __pyx_t_11) != 0)) { + __pyx_t_12 = __pyx_t_6; } else { - __pyx_t_5 = __pyx_t_9; + __pyx_t_12 = __pyx_t_11; } - __pyx_v_lbl_length = ((int)__pyx_t_5); + __pyx_v_sentence_len = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":371 - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - * - * syn0locks = np.PyArray_DATA(model.syn0locks) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":338 + * _work = np.PyArray_DATA(work) + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_vocabs); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = 10000; + if (((__pyx_t_12 < __pyx_t_11) != 0)) { + __pyx_t_6 = __pyx_t_12; + } else { + __pyx_t_6 = __pyx_t_11; + } + __pyx_v_doclbl_len = ((int)__pyx_t_6); - /* "trunk/gensim/models/doc2vec_inner.pyx":373 - * syn0locks = np.PyArray_DATA(model.syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":340 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< - * word = sentence[i] - * if word is None: + * predict_word = word_vocabs[i] + * if predict_word is None: */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":374 + /* "trunk/gensim/models/doc2vec_inner.pyx":341 * * for i in range(sentence_len): - * word = sentence[i] # <<<<<<<<<<<<<< - * if word is None: + * predict_word = word_vocabs[i] # <<<<<<<<<<<<<< + * if predict_word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); - __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_10); + __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_10); + __pyx_t_10 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":375 + /* "trunk/gensim/models/doc2vec_inner.pyx":342 * for i in range(sentence_len): - * word = sentence[i] - * if word is None: # <<<<<<<<<<<<<< + * predict_word = word_vocabs[i] + * if predict_word is None: # <<<<<<<<<<<<<< * codelens[i] = 0 * else: */ - __pyx_t_4 = (__pyx_v_word == Py_None); - __pyx_t_12 = (__pyx_t_4 != 0); - if (__pyx_t_12) { + __pyx_t_5 = (__pyx_v_predict_word == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":376 - * word = sentence[i] - * if word is None: + /* "trunk/gensim/models/doc2vec_inner.pyx":343 + * predict_word = word_vocabs[i] + * if predict_word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< * else: - * indexes[i] = word.index + * indexes[i] = predict_word.index */ (__pyx_v_codelens[__pyx_v_i]) = 0; - goto __pyx_L7; + goto __pyx_L12; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":378 + /* "trunk/gensim/models/doc2vec_inner.pyx":345 * codelens[i] = 0 * else: - * indexes[i] = word.index # <<<<<<<<<<<<<< + * indexes[i] = predict_word.index # <<<<<<<<<<<<<< + * reduced_windows[i] = np.random.randint(window) * if hs: - * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 378; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":379 + /* "trunk/gensim/models/doc2vec_inner.pyx":346 * else: - * indexes[i] = word.index + * indexes[i] = predict_word.index + * reduced_windows[i] = np.random.randint(window) # <<<<<<<<<<<<<< + * if hs: + * codelens[i] = len(predict_word.code) + */ + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_7 = NULL; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_8))) { + __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_8); + if (likely(__pyx_t_7)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8); + __Pyx_INCREF(__pyx_t_7); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_8, function); + } + } + if (!__pyx_t_7) { + __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_GOTREF(__pyx_t_10); + } else { + __pyx_t_15 = PyTuple_New(1+1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); __pyx_t_7 = NULL; + PyTuple_SET_ITEM(__pyx_t_15, 0+1, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + __pyx_t_1 = 0; + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_15, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + } + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; + + /* "trunk/gensim/models/doc2vec_inner.pyx":347 + * indexes[i] = predict_word.index + * reduced_windows[i] = np.random.randint(window) * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":380 - * indexes[i] = word.index + /* "trunk/gensim/models/doc2vec_inner.pyx":348 + * reduced_windows[i] = np.random.randint(window) * if hs: - * codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) + * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 380; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_5); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_6 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_6); - /* "trunk/gensim/models/doc2vec_inner.pyx":381 + /* "trunk/gensim/models/doc2vec_inner.pyx":349 * if hs: - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[i] = np.PyArray_DATA(word.point) + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< + * points[i] = np.PyArray_DATA(predict_word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":382 - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":350 + * codelens[i] = len(predict_word.code) + * codes[i] = np.PyArray_DATA(predict_word.code) + * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L8; + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + goto __pyx_L13; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":384 - * points[i] = np.PyArray_DATA(word.point) + /* "trunk/gensim/models/doc2vec_inner.pyx":352 + * points[i] = np.PyArray_DATA(predict_word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< * result += 1 @@ -3818,9 +3724,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ (__pyx_v_codelens[__pyx_v_i]) = 1; } - __pyx_L8:; + __pyx_L13:; - /* "trunk/gensim/models/doc2vec_inner.pyx":385 + /* "trunk/gensim/models/doc2vec_inner.pyx":353 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -3829,231 +3735,169 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_result = (__pyx_v_result + 1); } - __pyx_L7:; + __pyx_L12:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":387 + /* "trunk/gensim/models/doc2vec_inner.pyx":355 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * for i in range(lbl_length): + * for i in range(doclbl_len): */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_15 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_15, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; + __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_15); + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_14); - __pyx_t_15 = NULL; - __pyx_t_5 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_7); - if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); - __Pyx_INCREF(__pyx_t_15); + __pyx_t_7 = NULL; + __pyx_t_6 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_8))) { + __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_8); + if (likely(__pyx_t_7)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8); + __Pyx_INCREF(__pyx_t_7); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_7, function); - __pyx_t_5 = 1; + __Pyx_DECREF_SET(__pyx_t_8, function); + __pyx_t_6 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_5); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_6); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_15) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + if (__pyx_t_7) { + PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); __pyx_t_7 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_5, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_6, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_5, __pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_6, __pyx_t_15); + __Pyx_GIVEREF(__pyx_t_15); + PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_6, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_5, __pyx_t_14); - __Pyx_GIVEREF(__pyx_t_14); + __pyx_t_15 = 0; __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { - __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_5 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + if (likely(PyList_CheckExact(__pyx_t_10)) || PyTuple_CheckExact(__pyx_t_10)) { + __pyx_t_8 = __pyx_t_10; __Pyx_INCREF(__pyx_t_8); __pyx_t_6 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_5 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; for (;;) { if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_7))) { - if (__pyx_t_5 >= PyList_GET_SIZE(__pyx_t_7)) break; + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_10); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_5 >= PyTuple_GET_SIZE(__pyx_t_7)) break; + if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_5); __Pyx_INCREF(__pyx_t_6); __pyx_t_5++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_10); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_5); __pyx_t_5++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_6 = __pyx_t_17(__pyx_t_7); - if (unlikely(!__pyx_t_6)) { + __pyx_t_10 = __pyx_t_17(__pyx_t_8); + if (unlikely(!__pyx_t_10)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } - __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_10); } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); + __pyx_t_10 = 0; __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":388 + /* "trunk/gensim/models/doc2vec_inner.pyx":356 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< - * for i in range(lbl_length): - * word = lbls[i] + * for i in range(doclbl_len): + * context_token = doclbl_vocabs[i] */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 388; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":387 + /* "trunk/gensim/models/doc2vec_inner.pyx":355 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * for i in range(lbl_length): + * for i in range(doclbl_len): */ } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":389 + /* "trunk/gensim/models/doc2vec_inner.pyx":357 * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item - * for i in range(lbl_length): # <<<<<<<<<<<<<< - * word = lbls[i] - * if word is None: + * for i in range(doclbl_len): # <<<<<<<<<<<<<< + * context_token = doclbl_vocabs[i] + * doclbl_indexes[i] = context_token.index */ - __pyx_t_2 = __pyx_v_lbl_length; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + __pyx_t_2 = __pyx_v_doclbl_len; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":390 + /* "trunk/gensim/models/doc2vec_inner.pyx":358 * reduced_windows[i] = item - * for i in range(lbl_length): - * word = lbls[i] # <<<<<<<<<<<<<< - * if word is None: - * lbl_codelens[i] = 0 - */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); - __pyx_t_7 = 0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":391 - * for i in range(lbl_length): - * word = lbls[i] - * if word is None: # <<<<<<<<<<<<<< - * lbl_codelens[i] = 0 - * else: - */ - __pyx_t_12 = (__pyx_v_word == Py_None); - __pyx_t_4 = (__pyx_t_12 != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":392 - * word = lbls[i] - * if word is None: - * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< - * else: - * lbl_indexes[i] = word.index - */ - (__pyx_v_lbl_codelens[__pyx_v_i]) = 0; - goto __pyx_L13; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":394 - * lbl_codelens[i] = 0 - * else: - * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< - * if hs: - * lbl_codelens[i] = len(word.code) - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 394; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 394; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; - - /* "trunk/gensim/models/doc2vec_inner.pyx":395 - * else: - * lbl_indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * lbl_codelens[i] = len(word.code) - * else: - */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":396 - * lbl_indexes[i] = word.index - * if hs: - * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * else: - * lbl_codelens[i] = 1 - */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - goto __pyx_L14; - } - /*else*/ { + * for i in range(doclbl_len): + * context_token = doclbl_vocabs[i] # <<<<<<<<<<<<<< + * doclbl_indexes[i] = context_token.index + * result += 1 + */ + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_8); + __Pyx_XDECREF_SET(__pyx_v_context_token, __pyx_t_8); + __pyx_t_8 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":398 - * lbl_codelens[i] = len(word.code) - * else: - * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":359 + * for i in range(doclbl_len): + * context_token = doclbl_vocabs[i] + * doclbl_indexes[i] = context_token.index # <<<<<<<<<<<<<< + * result += 1 * */ - (__pyx_v_lbl_codelens[__pyx_v_i]) = 1; - } - __pyx_L14:; + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_context_token, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + (__pyx_v_doclbl_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":399 - * else: - * lbl_codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":360 + * context_token = doclbl_vocabs[i] + * doclbl_indexes[i] = context_token.index + * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_v_result = (__pyx_v_result + 1); - } - __pyx_L13:; + __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":402 + /* "trunk/gensim/models/doc2vec_inner.pyx":363 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4067,7 +3911,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":403 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4075,50 +3919,50 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * continue */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":404 + /* "trunk/gensim/models/doc2vec_inner.pyx":365 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< * continue - * if tw: # simultaneous skip-gram wordvec-training + * if _train_words: # simultaneous skip-gram wordvec-training */ __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":405 + /* "trunk/gensim/models/doc2vec_inner.pyx":366 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< - * if tw: # simultaneous skip-gram wordvec-training + * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] */ - goto __pyx_L18_continue; + goto __pyx_L21_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":406 + /* "trunk/gensim/models/doc2vec_inner.pyx":367 * if codelens[i] == 0: * continue - * if tw: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< + * if _train_words: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < 0: */ - __pyx_t_4 = (__pyx_v_tw != 0); + __pyx_t_4 = (__pyx_v__train_words != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":407 + /* "trunk/gensim/models/doc2vec_inner.pyx":368 * continue - * if tw: # simultaneous skip-gram wordvec-training + * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< * if j < 0: * j = 0 */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":408 - * if tw: # simultaneous skip-gram wordvec-training + /* "trunk/gensim/models/doc2vec_inner.pyx":369 + * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 @@ -4127,7 +3971,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = ((__pyx_v_j < 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":409 + /* "trunk/gensim/models/doc2vec_inner.pyx":370 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -4135,11 +3979,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if k > sentence_len: */ __pyx_v_j = 0; - goto __pyx_L22; + goto __pyx_L25; } - __pyx_L22:; + __pyx_L25:; - /* "trunk/gensim/models/doc2vec_inner.pyx":410 + /* "trunk/gensim/models/doc2vec_inner.pyx":371 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4148,7 +3992,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":411 + /* "trunk/gensim/models/doc2vec_inner.pyx":372 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -4158,7 +4002,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":412 + /* "trunk/gensim/models/doc2vec_inner.pyx":373 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -4166,192 +4010,158 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if j == i or codelens[j] == 0: */ __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L23; + goto __pyx_L26; } - __pyx_L23:; + __pyx_L26:; - /* "trunk/gensim/models/doc2vec_inner.pyx":413 + /* "trunk/gensim/models/doc2vec_inner.pyx":374 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< * if j == i or codelens[j] == 0: * continue */ - __pyx_t_9 = __pyx_v_k; - for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_9; __pyx_t_18+=1) { + __pyx_t_11 = __pyx_v_k; + for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_11; __pyx_t_18+=1) { __pyx_v_j = __pyx_t_18; - /* "trunk/gensim/models/doc2vec_inner.pyx":414 + /* "trunk/gensim/models/doc2vec_inner.pyx":375 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< * continue * if hs: */ - __pyx_t_12 = ((__pyx_v_j == __pyx_v_i) != 0); - if (!__pyx_t_12) { + __pyx_t_5 = ((__pyx_v_j == __pyx_v_i) != 0); + if (!__pyx_t_5) { } else { - __pyx_t_4 = __pyx_t_12; - goto __pyx_L27_bool_binop_done; + __pyx_t_4 = __pyx_t_5; + goto __pyx_L30_bool_binop_done; } - __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_j]) == 0) != 0); - __pyx_t_4 = __pyx_t_12; - __pyx_L27_bool_binop_done:; + __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_j]) == 0) != 0); + __pyx_t_4 = __pyx_t_5; + __pyx_L30_bool_binop_done:; if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":415 + /* "trunk/gensim/models/doc2vec_inner.pyx":376 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose */ - goto __pyx_L24_continue; + goto __pyx_L27_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":416 + /* "trunk/gensim/models/doc2vec_inner.pyx":377 * if j == i or codelens[j] == 0: * continue * if hs: # <<<<<<<<<<<<<< * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], */ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":418 + /* "trunk/gensim/models/doc2vec_inner.pyx":379 * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], # <<<<<<<<<<<<<< - * _alpha, work, 1, 1, syn0locks) + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< + * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, 1, 1, __pyx_v_syn0locks); - goto __pyx_L29; + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__word_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); + goto __pyx_L32; } - __pyx_L29:; + __pyx_L32:; - /* "trunk/gensim/models/doc2vec_inner.pyx":420 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], - * _alpha, work, 1, 1, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":381 + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], + * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, + * next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, */ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":422 + /* "trunk/gensim/models/doc2vec_inner.pyx":383 * if negative: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, # <<<<<<<<<<<<<< - * indexes[i], indexes[j], _alpha, work, next_random, - * 1, 1, syn0locks) + * next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< + * indexes[i], indexes[j], _alpha, _work, next_random, + * _learn_words, _learn_hidden, _word_locks) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, 1, 1, __pyx_v_syn0locks); - goto __pyx_L30; + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__word_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); + goto __pyx_L33; } - __pyx_L30:; - __pyx_L24_continue:; + __pyx_L33:; + __pyx_L27_continue:; } - goto __pyx_L21; + goto __pyx_L24; } - __pyx_L21:; + __pyx_L24:; - /* "trunk/gensim/models/doc2vec_inner.pyx":426 - * 1, 1, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":388 * - * if tl: # docvec-training # <<<<<<<<<<<<<< - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: - */ - __pyx_t_4 = (__pyx_v_tl != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":427 - * - * if tl: # docvec-training - * for j in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[j] == 0: - * continue - */ - __pyx_t_18 = __pyx_v_lbl_length; - for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { - __pyx_v_j = __pyx_t_19; - - /* "trunk/gensim/models/doc2vec_inner.pyx":428 - * if tl: # docvec-training - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: # <<<<<<<<<<<<<< - * continue - * if hs: + * # docvec-training + * for j in range(doclbl_len): # <<<<<<<<<<<<<< + * if hs: + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_j]) == 0) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":429 - * for j in range(lbl_length): - * if lbl_codelens[j] == 0: - * continue # <<<<<<<<<<<<<< - * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], - */ - goto __pyx_L32_continue; - } + __pyx_t_18 = __pyx_v_doclbl_len; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_j = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":430 - * if lbl_codelens[j] == 0: - * continue - * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], - * _alpha, work, 1, 1, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":389 + * # docvec-training + * for j in range(doclbl_len): + * if hs: # <<<<<<<<<<<<<< + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_4 = (__pyx_v_hs != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":431 - * continue - * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], # <<<<<<<<<<<<<< - * _alpha, work, 1, 1, syn0locks) - * if negative: + /* "trunk/gensim/models/doc2vec_inner.pyx":390 + * for j in range(doclbl_len): + * if hs: + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], # <<<<<<<<<<<<<< + * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * if negative: */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, 1, 1, __pyx_v_syn0locks); - goto __pyx_L35; - } - __pyx_L35:; + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doclbl_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); + goto __pyx_L36; + } + __pyx_L36:; - /* "trunk/gensim/models/doc2vec_inner.pyx":433 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], - * _alpha, work, 1, 1, syn0locks) - * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, - * indexes[i], lbl_indexes[j], _alpha, work, next_random, + /* "trunk/gensim/models/doc2vec_inner.pyx":392 + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * if negative: # <<<<<<<<<<<<<< + * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, + * indexes[i], doclbl_indexes[j], _alpha, _work, next_random, */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_4 = (__pyx_v_negative != 0); + if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":434 - * _alpha, work, 1, 1, syn0locks) - * if negative: - * next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, # <<<<<<<<<<<<<< - * indexes[i], lbl_indexes[j], _alpha, work, next_random, - * 1, 1, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":393 + * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * if negative: + * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, # <<<<<<<<<<<<<< + * indexes[i], doclbl_indexes[j], _alpha, _work, next_random, + * _learn_doclbls, _learn_hidden, _doclbl_locks) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_lbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, 1, 1, __pyx_v_syn0locks); - goto __pyx_L36; - } - __pyx_L36:; - __pyx_L32_continue:; + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doclbl_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); + goto __pyx_L37; } - goto __pyx_L31; + __pyx_L37:; } - __pyx_L31:; - __pyx_L18_continue:; + __pyx_L21_continue:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":402 + /* "trunk/gensim/models/doc2vec_inner.pyx":363 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4363,58 +4173,64 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L17; + goto __pyx_L20; } - __pyx_L17:; + __pyx_L20:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":438 - * 1, 1, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":397 + * _learn_doclbls, _learn_hidden, _doclbl_locks) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 438; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; + __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_r = __pyx_t_8; + __pyx_t_8 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":320 + /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_14); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_15); __Pyx_XDECREF(__pyx_t_16); __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; - __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_predict_word); __Pyx_XDECREF(__pyx_v_item); + __Pyx_XDECREF(__pyx_v_context_token); + __Pyx_XDECREF(__pyx_v_work); + __Pyx_XDECREF(__pyx_v_word_vectors); + __Pyx_XDECREF(__pyx_v_word_locks); + __Pyx_XDECREF(__pyx_v_doclbl_vectors); + __Pyx_XDECREF(__pyx_v_doclbl_locks); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":441 +/* "trunk/gensim/models/doc2vec_inner.pyx":400 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ /* Python wrapper */ @@ -4422,13 +4238,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm = {"train_sentence_dm", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_sentence = 0; - PyObject *__pyx_v_lbls = 0; + PyObject *__pyx_v_word_vocabs = 0; + PyObject *__pyx_v_doclbl_vocabs = 0; PyObject *__pyx_v_alpha = 0; - PyObject *__pyx_v__work = 0; - PyObject *__pyx_v__neu1 = 0; - PyObject *__pyx_v__train_words = 0; - PyObject *__pyx_v__train_lbls = 0; + PyObject *__pyx_v_work = 0; + PyObject *__pyx_v_neu1 = 0; + PyObject *__pyx_v_learn_doclbls = 0; + PyObject *__pyx_v_learn_words = 0; + PyObject *__pyx_v_learn_hidden = 0; + PyObject *__pyx_v_word_vectors = 0; + PyObject *__pyx_v_word_locks = 0; + PyObject *__pyx_v_doclbl_vectors = 0; + PyObject *__pyx_v_doclbl_locks = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -4436,12 +4257,42 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dm (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_train_words_2,&__pyx_n_s_train_lbls_2,0}; - PyObject* values[8] = {0,0,0,0,0,0,0,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_vocabs,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; + values[4] = ((PyObject *)Py_None); + values[5] = ((PyObject *)Py_None); + + /* "trunk/gensim/models/doc2vec_inner.pyx":401 + * + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + * learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * cdef int hs = model.hs + */ + values[6] = ((PyObject *)Py_True); + values[7] = ((PyObject *)Py_True); + values[8] = ((PyObject *)Py_True); + + /* "trunk/gensim/models/doc2vec_inner.pyx":402 + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + values[9] = ((PyObject *)Py_None); + values[10] = ((PyObject *)Py_None); + values[11] = ((PyObject *)Py_None); + values[12] = ((PyObject *)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { + case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12); + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); @@ -4459,101 +4310,148 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: - if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work); + if (value) { values[4] = value; kw_args--; } } case 5: - if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1); + if (value) { values[5] = value; kw_args--; } } case 6: - if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_words_2)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doclbls); + if (value) { values[6] = value; kw_args--; } } case 7: - if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_train_lbls_2)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_words); + if (value) { values[7] = value; kw_args--; } + } + case 8: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_hidden); + if (value) { values[8] = value; kw_args--; } + } + case 9: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vectors); + if (value) { values[9] = value; kw_args--; } + } + case 10: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_locks); + if (value) { values[10] = value; kw_args--; } + } + case 11: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vectors); + if (value) { values[11] = value; kw_args--; } + } + case 12: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_locks); + if (value) { values[12] = value; kw_args--; } } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { - goto __pyx_L5_argtuple_error; } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - values[5] = PyTuple_GET_ITEM(__pyx_args, 5); - values[6] = PyTuple_GET_ITEM(__pyx_args, 6); - values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12); + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } } __pyx_v_model = values[0]; - __pyx_v_sentence = values[1]; - __pyx_v_lbls = values[2]; + __pyx_v_word_vocabs = values[1]; + __pyx_v_doclbl_vocabs = values[2]; __pyx_v_alpha = values[3]; - __pyx_v__work = values[4]; - __pyx_v__neu1 = values[5]; - __pyx_v__train_words = values[6]; - __pyx_v__train_lbls = values[7]; + __pyx_v_work = values[4]; + __pyx_v_neu1 = values[5]; + __pyx_v_learn_doclbls = values[6]; + __pyx_v_learn_words = values[7]; + __pyx_v_learn_hidden = values[8]; + __pyx_v_word_vectors = values[9]; + __pyx_v_word_locks = values[10]; + __pyx_v_doclbl_vectors = values[11]; + __pyx_v_doclbl_locks = values[12]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v__neu1, __pyx_v__train_words, __pyx_v__train_lbls); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_vocabs, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + + /* "trunk/gensim/models/doc2vec_inner.pyx":400 + * + * + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + */ /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__train_words, PyObject *__pyx_v__train_lbls) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v_learn_words; - int __pyx_v_learn_lbls; - int __pyx_v_learn_hidden; + int __pyx_v__learn_doclbls; + int __pyx_v__learn_words; + int __pyx_v__learn_hidden; int __pyx_v_cbow_mean; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__work; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__neu1; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; int __pyx_v_codelens[10000]; - int __pyx_v_lbl_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_lbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_doclbl_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; int __pyx_v_sentence_len; - int __pyx_v_lbl_length; + int __pyx_v_doclbl_len; int __pyx_v_window; int __pyx_v_i; int __pyx_v_j; @@ -4567,25 +4465,25 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5numpy_uint32_t *__pyx_v_table; unsigned PY_LONG_LONG __pyx_v_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks; PyObject *__pyx_v_word = NULL; PyObject *__pyx_v_item = NULL; + PyObject *__pyx_v_token = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; int __pyx_t_2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_t_3; int __pyx_t_4; - Py_ssize_t __pyx_t_5; - PyObject *__pyx_t_6 = NULL; + int __pyx_t_5; + Py_ssize_t __pyx_t_6; PyObject *__pyx_t_7 = NULL; - unsigned PY_LONG_LONG __pyx_t_8; - long __pyx_t_9; - Py_ssize_t __pyx_t_10; - int __pyx_t_11; - int __pyx_t_12; - __pyx_t_5numpy_uint32_t __pyx_t_13; - PyObject *__pyx_t_14 = NULL; + PyObject *__pyx_t_8 = NULL; + unsigned PY_LONG_LONG __pyx_t_9; + PyObject *__pyx_t_10 = NULL; + long __pyx_t_11; + Py_ssize_t __pyx_t_12; + int __pyx_t_13; + __pyx_t_5numpy_uint32_t __pyx_t_14; PyObject *__pyx_t_15 = NULL; PyObject *__pyx_t_16 = NULL; PyObject *(*__pyx_t_17)(PyObject *); @@ -4595,125 +4493,119 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_dm", 0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":442 - * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): + __Pyx_INCREF(__pyx_v_work); + __Pyx_INCREF(__pyx_v_neu1); + __Pyx_INCREF(__pyx_v_word_vectors); + __Pyx_INCREF(__pyx_v_word_locks); + __Pyx_INCREF(__pyx_v_doclbl_vectors); + __Pyx_INCREF(__pyx_v_doclbl_locks); + + /* "trunk/gensim/models/doc2vec_inner.pyx":403 + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int learn_words = _train_words + * cdef int _learn_doclbls = learn_doclbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":443 - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): + /* "trunk/gensim/models/doc2vec_inner.pyx":404 + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int learn_words = _train_words - * cdef int learn_lbls = _train_lbls + * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":444 + /* "trunk/gensim/models/doc2vec_inner.pyx":405 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int learn_words = _train_words # <<<<<<<<<<<<<< - * cdef int learn_lbls = _train_lbls - * cdef int learn_hidden = True + * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__train_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_learn_words = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_doclbls = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":445 + /* "trunk/gensim/models/doc2vec_inner.pyx":406 * cdef int negative = model.negative - * cdef int learn_words = _train_words - * cdef int learn_lbls = _train_lbls # <<<<<<<<<<<<<< - * cdef int learn_hidden = True + * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< + * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__train_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_learn_lbls = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":446 - * cdef int learn_words = _train_words - * cdef int learn_lbls = _train_lbls - * cdef int learn_hidden = True # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":407 + * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count */ - __pyx_v_learn_hidden = 1; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":447 - * cdef int learn_lbls = _train_lbls - * cdef int learn_hidden = True + /* "trunk/gensim/models/doc2vec_inner.pyx":408 + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * cdef REAL_t count, inv_count * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":450 - * cdef REAL_t count, inv_count - * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< - * cdef REAL_t *work - * cdef REAL_t *neu1 - */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":453 - * cdef REAL_t *work - * cdef REAL_t *neu1 + /* "trunk/gensim/models/doc2vec_inner.pyx":417 + * cdef REAL_t *_work + * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":454 - * cdef REAL_t *neu1 + /* "trunk/gensim/models/doc2vec_inner.pyx":418 + * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":463 + /* "trunk/gensim/models/doc2vec_inner.pyx":426 * cdef int sentence_len - * cdef int lbl_length + * cdef int doclbl_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k, m */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":466 + /* "trunk/gensim/models/doc2vec_inner.pyx":429 * * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< @@ -4722,8 +4614,152 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":481 - * cdef unsigned long long next_random + /* "trunk/gensim/models/doc2vec_inner.pyx":443 + * + * # default vectors, locks from syn0 + * if word_vectors is None: # <<<<<<<<<<<<<< + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + */ + __pyx_t_4 = (__pyx_v_word_vectors == Py_None); + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":444 + * # default vectors, locks from syn0 + * if word_vectors is None: + * word_vectors = model.syn0 # <<<<<<<<<<<<<< + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * if doclbl_vectors is None: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L3; + } + __pyx_L3:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":445 + * if word_vectors is None: + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 + */ + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":446 + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * if doclbl_vectors is None: # <<<<<<<<<<<<<< + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + */ + __pyx_t_5 = (__pyx_v_doclbl_vectors == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":447 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 # <<<<<<<<<<<<<< + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if word_locks is None: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L4; + } + __pyx_L4:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":448 + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< + * if word_locks is None: + * word_locks = model.syn0locks + */ + if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":449 + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if word_locks is None: # <<<<<<<<<<<<<< + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) + */ + __pyx_t_4 = (__pyx_v_word_locks == Py_None); + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":450 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if word_locks is None: + * word_locks = model.syn0locks # <<<<<<<<<<<<<< + * _word_locks = (np.PyArray_DATA(word_locks)) + * if doclbl_locks is None: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L5; + } + __pyx_L5:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":451 + * if word_locks is None: + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks + */ + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":452 + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) + * if doclbl_locks is None: # <<<<<<<<<<<<<< + * doclbl_locks = model.syn0locks + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + */ + __pyx_t_5 = (__pyx_v_doclbl_locks == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":453 + * _word_locks = (np.PyArray_DATA(word_locks)) + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks # <<<<<<<<<<<<<< + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L6; + } + __pyx_L6:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":454 + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< + * + * if hs: + */ + if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":456 + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -4732,23 +4768,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":482 + /* "trunk/gensim/models/doc2vec_inner.pyx":457 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L3; + goto __pyx_L7; } - __pyx_L3:; + __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":484 + /* "trunk/gensim/models/doc2vec_inner.pyx":459 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -4758,564 +4794,623 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":485 + /* "trunk/gensim/models/doc2vec_inner.pyx":460 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":486 + /* "trunk/gensim/models/doc2vec_inner.pyx":461 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":487 + /* "trunk/gensim/models/doc2vec_inner.pyx":462 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_table_len = __pyx_t_5; + __pyx_v_table_len = __pyx_t_6; - /* "trunk/gensim/models/doc2vec_inner.pyx":488 + /* "trunk/gensim/models/doc2vec_inner.pyx":463 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_7); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_7); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_v_next_random = __pyx_t_8; - goto __pyx_L4; + __pyx_v_next_random = __pyx_t_9; + goto __pyx_L8; } - __pyx_L4:; + __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":491 + /* "trunk/gensim/models/doc2vec_inner.pyx":466 * * # convert Python structures to primitive types, so we can release the GIL - * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * neu1 = np.PyArray_DATA(_neu1) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * if work is None: # <<<<<<<<<<<<<< + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 491; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); + __pyx_t_4 = (__pyx_v_work == Py_None); + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":492 + /* "trunk/gensim/models/doc2vec_inner.pyx":467 * # convert Python structures to primitive types, so we can release the GIL - * work = np.PyArray_DATA(_work) - * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - * + * if work is None: + * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< + * _work = np.PyArray_DATA(work) + * if neu1 is None: */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); + __Pyx_GIVEREF(__pyx_t_8); + __pyx_t_8 = 0; + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); + __pyx_t_10 = 0; + goto __pyx_L9; + } + __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":493 - * work = np.PyArray_DATA(_work) - * neu1 = np.PyArray_DATA(_neu1) - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":468 + * if work is None: + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< + * if neu1 is None: + * neu1 = zeros(model.layer1_size, dtype=REAL) + */ + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":469 + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) + * if neu1 is None: # <<<<<<<<<<<<<< + * neu1 = zeros(model.layer1_size, dtype=REAL) + * _neu1 = np.PyArray_DATA(neu1) + */ + __pyx_t_5 = (__pyx_v_neu1 == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":470 + * _work = np.PyArray_DATA(work) + * if neu1 is None: + * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< + * _neu1 = np.PyArray_DATA(neu1) * - * syn0locks = np.PyArray_DATA(model.syn0locks) */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_5 < __pyx_t_9) != 0)) { - __pyx_t_10 = __pyx_t_5; - } else { - __pyx_t_10 = __pyx_t_9; + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); + __Pyx_GIVEREF(__pyx_t_8); + __pyx_t_8 = 0; + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_7) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_7); + __pyx_t_7 = 0; + goto __pyx_L10; } - __pyx_v_sentence_len = ((int)__pyx_t_10); + __pyx_L10:; - /* "trunk/gensim/models/doc2vec_inner.pyx":495 - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + /* "trunk/gensim/models/doc2vec_inner.pyx":471 + * if neu1 is None: + * neu1 = zeros(model.layer1_size, dtype=REAL) + * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * - * syn0locks = np.PyArray_DATA(model.syn0locks) # <<<<<<<<<<<<<< + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + */ + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":473 + * _neu1 = np.PyArray_DATA(neu1) * + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< + * j = 0 * for i in range(sentence_len): */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 495; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 495; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = 10000; + if (((__pyx_t_6 < __pyx_t_11) != 0)) { + __pyx_t_12 = __pyx_t_6; + } else { + __pyx_t_12 = __pyx_t_11; + } + __pyx_v_sentence_len = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":497 - * syn0locks = np.PyArray_DATA(model.syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":474 * + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * j = 0 # <<<<<<<<<<<<<< + * for i in range(sentence_len): + * word = word_vocabs[i] + */ + __pyx_v_j = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":475 + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< - * word = sentence[i] + * word = word_vocabs[i] * if word is None: */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":498 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":476 + * j = 0 * for i in range(sentence_len): - * word = sentence[i] # <<<<<<<<<<<<<< + * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: - * codelens[i] = 0 + * # shrink sentence to leave out word */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 498; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_6); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); - __pyx_t_6 = 0; + __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 476; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_7); + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); + __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":499 + /* "trunk/gensim/models/doc2vec_inner.pyx":477 * for i in range(sentence_len): - * word = sentence[i] + * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< - * codelens[i] = 0 - * else: + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 */ __pyx_t_4 = (__pyx_v_word == Py_None); - __pyx_t_12 = (__pyx_t_4 != 0); - if (__pyx_t_12) { + __pyx_t_5 = (__pyx_t_4 != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":500 - * word = sentence[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":479 * if word is None: - * codelens[i] = 0 # <<<<<<<<<<<<<< + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< + * continue # leaving j unchanged * else: - * indexes[i] = word.index */ - (__pyx_v_codelens[__pyx_v_i]) = 0; - goto __pyx_L7; + __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); + + /* "trunk/gensim/models/doc2vec_inner.pyx":480 + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 + * continue # leaving j unchanged # <<<<<<<<<<<<<< + * else: + * indexes[j] = word.index + */ + goto __pyx_L11_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":502 - * codelens[i] = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":482 + * continue # leaving j unchanged * else: - * indexes[i] = word.index # <<<<<<<<<<<<<< + * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: - * codelens[i] = len(word.code) + * codelens[j] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 502; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 502; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":503 + /* "trunk/gensim/models/doc2vec_inner.pyx":483 * else: - * indexes[i] = word.index + * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) + * codelens[j] = len(word.code) + * codes[j] = np.PyArray_DATA(word.code) */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":504 - * indexes[i] = word.index + /* "trunk/gensim/models/doc2vec_inner.pyx":484 + * indexes[j] = word.index * if hs: - * codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) + * codelens[j] = len(word.code) # <<<<<<<<<<<<<< + * codes[j] = np.PyArray_DATA(word.code) + * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 504; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 504; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + __pyx_t_12 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":505 + /* "trunk/gensim/models/doc2vec_inner.pyx":485 * if hs: - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< - * points[i] = np.PyArray_DATA(word.point) - * else: + * codelens[j] = len(word.code) + * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< + * points[j] = np.PyArray_DATA(word.point) + * result += 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 505; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 505; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":506 - * codelens[i] = len(word.code) - * codes[i] = np.PyArray_DATA(word.code) - * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< - * else: - * codelens[i] = 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":486 + * codelens[j] = len(word.code) + * codes[j] = np.PyArray_DATA(word.code) + * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< + * result += 1 + * j = j + 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 506; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - goto __pyx_L8; + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); + if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + goto __pyx_L14; } - /*else*/ { + __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":508 - * points[i] = np.PyArray_DATA(word.point) - * else: - * codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":487 + * codes[j] = np.PyArray_DATA(word.code) + * points[j] = np.PyArray_DATA(word.point) + * result += 1 # <<<<<<<<<<<<<< + * j = j + 1 * # single randint() call avoids a big thread-sync slowdown */ - (__pyx_v_codelens[__pyx_v_i]) = 1; - } - __pyx_L8:; + __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":509 - * else: - * codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":488 + * points[j] = np.PyArray_DATA(word.point) + * result += 1 + * j = j + 1 # <<<<<<<<<<<<<< * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): */ - __pyx_v_result = (__pyx_v_result + 1); + __pyx_v_j = (__pyx_v_j + 1); } - __pyx_L7:; + __pyx_L11_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":511 - * result += 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":490 + * j = j + 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_14); + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); __pyx_t_15 = NULL; - __pyx_t_10 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_7))) { - __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_7); + __pyx_t_12 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_8))) { + __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_8); if (likely(__pyx_t_15)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_7); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8); __Pyx_INCREF(__pyx_t_15); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_7, function); - __pyx_t_10 = 1; + __Pyx_DECREF_SET(__pyx_t_8, function); + __pyx_t_12 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; } __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_10, __pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_12, __pyx_int_0); __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_10, __pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_12, __pyx_t_1); __Pyx_GIVEREF(__pyx_t_1); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_10, __pyx_t_14); - __Pyx_GIVEREF(__pyx_t_14); + PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_12, __pyx_t_10); + __Pyx_GIVEREF(__pyx_t_10); __pyx_t_1 = 0; - __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = 0; + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { - __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + if (likely(PyList_CheckExact(__pyx_t_7)) || PyTuple_CheckExact(__pyx_t_7)) { + __pyx_t_8 = __pyx_t_7; __Pyx_INCREF(__pyx_t_8); __pyx_t_12 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; for (;;) { if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_7))) { - if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_12 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_7); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { - if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; + if (__pyx_t_12 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_7); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_6 = __pyx_t_17(__pyx_t_7); - if (unlikely(!__pyx_t_6)) { + __pyx_t_7 = __pyx_t_17(__pyx_t_8); + if (unlikely(!__pyx_t_7)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 511; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } - __Pyx_GOTREF(__pyx_t_6); + __Pyx_GOTREF(__pyx_t_7); } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_7); + __pyx_t_7 = 0; __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":512 + /* "trunk/gensim/models/doc2vec_inner.pyx":491 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 512; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 491; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":511 - * result += 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":490 + * j = j + 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":514 + /* "trunk/gensim/models/doc2vec_inner.pyx":493 * reduced_windows[i] = item * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< - * for i in range(lbl_length): - * word = lbls[i] + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) # <<<<<<<<<<<<<< + * j = 0 + * for i in range(doclbl_len): */ - __pyx_t_10 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 514; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_t_9 = 10000; - if (((__pyx_t_10 < __pyx_t_9) != 0)) { - __pyx_t_5 = __pyx_t_10; + __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_vocabs); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = 10000; + if (((__pyx_t_12 < __pyx_t_11) != 0)) { + __pyx_t_6 = __pyx_t_12; } else { - __pyx_t_5 = __pyx_t_9; + __pyx_t_6 = __pyx_t_11; } - __pyx_v_lbl_length = ((int)__pyx_t_5); + __pyx_v_doclbl_len = ((int)__pyx_t_6); - /* "trunk/gensim/models/doc2vec_inner.pyx":515 + /* "trunk/gensim/models/doc2vec_inner.pyx":494 * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - * for i in range(lbl_length): # <<<<<<<<<<<<<< - * word = lbls[i] - * if word is None: + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * j = 0 # <<<<<<<<<<<<<< + * for i in range(doclbl_len): + * token = doclbl_vocabs[i] */ - __pyx_t_2 = __pyx_v_lbl_length; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + __pyx_v_j = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":516 - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - * for i in range(lbl_length): - * word = lbls[i] # <<<<<<<<<<<<<< - * if word is None: - * lbl_codelens[i] = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":495 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * j = 0 + * for i in range(doclbl_len): # <<<<<<<<<<<<<< + * token = doclbl_vocabs[i] + * if token is None: */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 516; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); - __pyx_t_7 = 0; + __pyx_t_2 = __pyx_v_doclbl_len; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":517 - * for i in range(lbl_length): - * word = lbls[i] - * if word is None: # <<<<<<<<<<<<<< - * lbl_codelens[i] = 0 - * else: + /* "trunk/gensim/models/doc2vec_inner.pyx":496 + * j = 0 + * for i in range(doclbl_len): + * token = doclbl_vocabs[i] # <<<<<<<<<<<<<< + * if token is None: + * doclbl_len = doclbl_len - 1 + */ + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 496; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_8); + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_8); + __pyx_t_8 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":497 + * for i in range(doclbl_len): + * token = doclbl_vocabs[i] + * if token is None: # <<<<<<<<<<<<<< + * doclbl_len = doclbl_len - 1 + * continue # leaving j unchanged */ - __pyx_t_12 = (__pyx_v_word == Py_None); - __pyx_t_4 = (__pyx_t_12 != 0); + __pyx_t_5 = (__pyx_v_token == Py_None); + __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":518 - * word = lbls[i] - * if word is None: - * lbl_codelens[i] = 0 # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":498 + * token = doclbl_vocabs[i] + * if token is None: + * doclbl_len = doclbl_len - 1 # <<<<<<<<<<<<<< + * continue # leaving j unchanged * else: - * lbl_indexes[i] = word.index */ - (__pyx_v_lbl_codelens[__pyx_v_i]) = 0; - goto __pyx_L13; - } - /*else*/ { + __pyx_v_doclbl_len = (__pyx_v_doclbl_len - 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":520 - * lbl_codelens[i] = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":499 + * if token is None: + * doclbl_len = doclbl_len - 1 + * continue # leaving j unchanged # <<<<<<<<<<<<<< * else: - * lbl_indexes[i] = word.index # <<<<<<<<<<<<<< - * if hs: - * lbl_codelens[i] = len(word.code) + * doclbl_indexes[j] = token.index */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 520; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_lbl_indexes[__pyx_v_i]) = __pyx_t_13; + goto __pyx_L17_continue; + } + /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":521 + /* "trunk/gensim/models/doc2vec_inner.pyx":501 + * continue # leaving j unchanged * else: - * lbl_indexes[i] = word.index - * if hs: # <<<<<<<<<<<<<< - * lbl_codelens[i] = len(word.code) - * else: - */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":522 - * lbl_indexes[i] = word.index - * if hs: - * lbl_codelens[i] = len(word.code) # <<<<<<<<<<<<<< - * else: - * lbl_codelens[i] = 1 + * doclbl_indexes[j] = token.index # <<<<<<<<<<<<<< + * result += 1 + * j = j + 1 */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_5 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 522; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - (__pyx_v_lbl_codelens[__pyx_v_i]) = ((int)__pyx_t_5); - goto __pyx_L14; - } - /*else*/ { + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_token, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 501; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 501; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + (__pyx_v_doclbl_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":524 - * lbl_codelens[i] = len(word.code) - * else: - * lbl_codelens[i] = 1 # <<<<<<<<<<<<<< - * result += 1 + /* "trunk/gensim/models/doc2vec_inner.pyx":502 + * else: + * doclbl_indexes[j] = token.index + * result += 1 # <<<<<<<<<<<<<< + * j = j + 1 * */ - (__pyx_v_lbl_codelens[__pyx_v_i]) = 1; - } - __pyx_L14:; + __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":525 - * else: - * lbl_codelens[i] = 1 - * result += 1 # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":503 + * doclbl_indexes[j] = token.index + * result += 1 + * j = j + 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_v_result = (__pyx_v_result + 1); + __pyx_v_j = (__pyx_v_j + 1); } - __pyx_L13:; + __pyx_L17_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":528 + /* "trunk/gensim/models/doc2vec_inner.pyx":506 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window + reduced_windows[i] */ { #ifdef WITH_THREAD PyThreadState *_save; - Py_UNBLOCK_THREADS - #endif - /*try:*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":529 - * # release GIL & train on the sentence - * with nogil: - * for i in range(sentence_len): # <<<<<<<<<<<<<< - * if codelens[i] == 0: - * continue - */ - __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; - - /* "trunk/gensim/models/doc2vec_inner.pyx":530 - * with nogil: - * for i in range(sentence_len): - * if codelens[i] == 0: # <<<<<<<<<<<<<< - * continue - * j = i - window + reduced_windows[i] - */ - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); - if (__pyx_t_4) { + Py_UNBLOCK_THREADS + #endif + /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":531 - * for i in range(sentence_len): - * if codelens[i] == 0: - * continue # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":507 + * # release GIL & train on the sentence + * with nogil: + * for i in range(sentence_len): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < 0: */ - goto __pyx_L18_continue; - } + __pyx_t_2 = __pyx_v_sentence_len; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":532 - * if codelens[i] == 0: - * continue + /* "trunk/gensim/models/doc2vec_inner.pyx":508 + * with nogil: + * for i in range(sentence_len): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< * if j < 0: * j = 0 */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":533 - * continue + /* "trunk/gensim/models/doc2vec_inner.pyx":509 + * for i in range(sentence_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 @@ -5324,7 +5419,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = ((__pyx_v_j < 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":534 + /* "trunk/gensim/models/doc2vec_inner.pyx":510 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5332,11 +5427,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if k > sentence_len: */ __pyx_v_j = 0; - goto __pyx_L21; + goto __pyx_L25; } - __pyx_L21:; + __pyx_L25:; - /* "trunk/gensim/models/doc2vec_inner.pyx":535 + /* "trunk/gensim/models/doc2vec_inner.pyx":511 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -5345,7 +5440,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":536 + /* "trunk/gensim/models/doc2vec_inner.pyx":512 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -5355,282 +5450,350 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":537 + /* "trunk/gensim/models/doc2vec_inner.pyx":513 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< * - * # compose l1 (in neu1) + * # compose l1 (in _neu1) & clear _work */ __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L22; + goto __pyx_L26; } - __pyx_L22:; + __pyx_L26:; - /* "trunk/gensim/models/doc2vec_inner.pyx":540 + /* "trunk/gensim/models/doc2vec_inner.pyx":516 * - * # compose l1 (in neu1) - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * # compose l1 (in _neu1) & clear _work + * memset(_neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 * for m in range(j, k): */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + memset(__pyx_v__neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":541 - * # compose l1 (in neu1) - * memset(neu1, 0, size * cython.sizeof(REAL_t)) + /* "trunk/gensim/models/doc2vec_inner.pyx":517 + * # compose l1 (in _neu1) & clear _work + * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< * for m in range(j, k): - * if m == i or codelens[m] == 0: + * if m == i: */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":542 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) + /* "trunk/gensim/models/doc2vec_inner.pyx":518 + * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: + * if m == i: * continue */ __pyx_t_18 = __pyx_v_k; for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":543 + /* "trunk/gensim/models/doc2vec_inner.pyx":519 * count = 0.0 * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< + * if m == i: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_12 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_12) { - } else { - __pyx_t_4 = __pyx_t_12; - goto __pyx_L26_bool_binop_done; - } - __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_12; - __pyx_L26_bool_binop_done:; + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":544 + /* "trunk/gensim/models/doc2vec_inner.pyx":520 * for m in range(j, k): - * if m == i or codelens[m] == 0: + * if m == i: * continue # <<<<<<<<<<<<<< * else: * count += ONEF */ - goto __pyx_L23_continue; + goto __pyx_L27_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":546 + /* "trunk/gensim/models/doc2vec_inner.pyx":522 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): + * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) + * for m in range(doclbl_len): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":547 + /* "trunk/gensim/models/doc2vec_inner.pyx":523 * else: * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: + * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< + * for m in range(doclbl_len): + * count += ONEF */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L23_continue:; + __pyx_L27_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":548 + /* "trunk/gensim/models/doc2vec_inner.pyx":524 * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): # <<<<<<<<<<<<<< - * if lbl_codelens[m] == 0: - * continue + * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) + * for m in range(doclbl_len): # <<<<<<<<<<<<<< + * count += ONEF + * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) */ - __pyx_t_18 = __pyx_v_lbl_length; + __pyx_t_18 = __pyx_v_doclbl_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":549 - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = (((__pyx_v_lbl_codelens[__pyx_v_m]) == 0) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":550 - * for m in range(lbl_length): - * if lbl_codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L28_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":552 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":525 + * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) + * for m in range(doclbl_len): + * count += ONEF # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) * if cbow_mean and count > (0.5): */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); + __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":553 - * else: - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":526 + * for m in range(doclbl_len): + * count += ONEF + * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< * if cbow_mean and count > (0.5): * inv_count = ONEF/count */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_lbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - } - __pyx_L28_continue:; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - /* "trunk/gensim/models/doc2vec_inner.pyx":554 - * count += ONEF - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":527 + * count += ONEF + * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) */ - __pyx_t_12 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_12) { + __pyx_t_5 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_5) { } else { - __pyx_t_4 = __pyx_t_12; - goto __pyx_L32_bool_binop_done; + __pyx_t_4 = __pyx_t_5; + goto __pyx_L33_bool_binop_done; } - __pyx_t_12 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_4 = __pyx_t_12; - __pyx_L32_bool_binop_done:; + __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); + __pyx_t_4 = __pyx_t_5; + __pyx_L33_bool_binop_done:; if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":555 - * our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":528 + * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) * if cbow_mean and count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * + * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error */ __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); - /* "trunk/gensim/models/doc2vec_inner.pyx":556 + /* "trunk/gensim/models/doc2vec_inner.pyx":529 * if cbow_mean and count > (0.5): * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * - * if hs: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - goto __pyx_L31; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L32; } - __pyx_L31:; + __pyx_L32:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":530 + * inv_count = ONEF/count + * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< + * + * if hs: + */ + memset(__pyx_v__work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":558 - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + /* "trunk/gensim/models/doc2vec_inner.pyx":532 + * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, - * size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, + * fast_sentence_dm_hs(points[i], codes[i], codelens[i], + * _neu1, syn1, _alpha, _work, */ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":559 + /* "trunk/gensim/models/doc2vec_inner.pyx":533 * * if hs: - * fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, # <<<<<<<<<<<<<< - * size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, - * learn_hidden, learn_lbls, learn_words, syn0locks) + * fast_sentence_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< + * _neu1, syn1, _alpha, _work, + * size, _learn_hidden) */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_lbl_length, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); - goto __pyx_L34; + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); + goto __pyx_L35; } - __pyx_L34:; + __pyx_L35:; - /* "trunk/gensim/models/doc2vec_inner.pyx":562 - * size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, - * learn_hidden, learn_lbls, learn_words, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":536 + * _neu1, syn1, _alpha, _work, + * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, - * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, + * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, + * _neu1, syn1neg, indexes[i], _alpha, _work, */ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":563 - * learn_hidden, learn_lbls, learn_words, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":537 + * size, _learn_hidden) * if negative: - * next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, # <<<<<<<<<<<<<< - * syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, - * next_random, lbl_length, learn_hidden, learn_lbls, learn_words, syn0locks) + * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< + * _neu1, syn1neg, indexes[i], _alpha, _work, + * size, _learn_hidden) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_lbl_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v_lbl_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_next_random, __pyx_v_lbl_length, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); - goto __pyx_L35; + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); + goto __pyx_L36; } - __pyx_L35:; - __pyx_L18_continue:; + __pyx_L36:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":542 + * + * # apply accumulated error in work + * if _learn_doclbls: # <<<<<<<<<<<<<< + * for m in range(doclbl_len): + * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, + */ + __pyx_t_4 = (__pyx_v__learn_doclbls != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":543 + * # apply accumulated error in work + * if _learn_doclbls: + * for m in range(doclbl_len): # <<<<<<<<<<<<<< + * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + */ + __pyx_t_18 = __pyx_v_doclbl_len; + for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_m = __pyx_t_19; + + /* "trunk/gensim/models/doc2vec_inner.pyx":544 + * if _learn_doclbls: + * for m in range(doclbl_len): + * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, # <<<<<<<<<<<<<< + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + * if _learn_words: + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doclbl_locks[(__pyx_v_doclbl_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L37; + } + __pyx_L37:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":546 + * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + * if _learn_words: # <<<<<<<<<<<<<< + * for m in range(j, k): + * if m == i: + */ + __pyx_t_4 = (__pyx_v__learn_words != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":547 + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + * if _learn_words: + * for m in range(j, k): # <<<<<<<<<<<<<< + * if m == i: + * continue + */ + __pyx_t_18 = __pyx_v_k; + for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { + __pyx_v_m = __pyx_t_19; + + /* "trunk/gensim/models/doc2vec_inner.pyx":548 + * if _learn_words: + * for m in range(j, k): + * if m == i: # <<<<<<<<<<<<<< + * continue + * else: + */ + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":549 + * for m in range(j, k): + * if m == i: + * continue # <<<<<<<<<<<<<< + * else: + * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, + */ + goto __pyx_L41_continue; + } + /*else*/ { + + /* "trunk/gensim/models/doc2vec_inner.pyx":551 + * continue + * else: + * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, # <<<<<<<<<<<<<< + * &_word_vectors[indexes[m] * size], &ONE) + * + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + __pyx_L41_continue:; + } + goto __pyx_L40; + } + __pyx_L40:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":528 + /* "trunk/gensim/models/doc2vec_inner.pyx":506 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< * for i in range(sentence_len): - * if codelens[i] == 0: + * j = i - window + reduced_windows[i] */ /*finally:*/ { /*normal exit:*/{ #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L17; + goto __pyx_L22; } - __pyx_L17:; + __pyx_L22:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":567 - * next_random, lbl_length, learn_hidden, learn_lbls, learn_words, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":554 + * &_word_vectors[indexes[m] * size], &ONE) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_r = __pyx_t_7; - __pyx_t_7 = 0; + __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_r = __pyx_t_8; + __pyx_t_8 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":441 + /* "trunk/gensim/models/doc2vec_inner.pyx":400 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_7); - __Pyx_XDECREF(__pyx_t_14); + __Pyx_XDECREF(__pyx_t_8); + __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_15); __Pyx_XDECREF(__pyx_t_16); __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); @@ -5638,17 +5801,24 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); __Pyx_XDECREF(__pyx_v_item); + __Pyx_XDECREF(__pyx_v_token); + __Pyx_XDECREF(__pyx_v_work); + __Pyx_XDECREF(__pyx_v_neu1); + __Pyx_XDECREF(__pyx_v_word_vectors); + __Pyx_XDECREF(__pyx_v_word_locks); + __Pyx_XDECREF(__pyx_v_doclbl_vectors); + __Pyx_XDECREF(__pyx_v_doclbl_locks); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":570 +/* "trunk/gensim/models/doc2vec_inner.pyx":557 * * - * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ /* Python wrapper */ @@ -5656,13 +5826,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat = {"train_sentence_dm_concat", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, METH_VARARGS|METH_KEYWORDS, 0}; static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; - PyObject *__pyx_v_sentence = 0; - PyObject *__pyx_v_lbls = 0; + PyObject *__pyx_v_word_vocabs = 0; + PyObject *__pyx_v_doclbl_vocabs = 0; PyObject *__pyx_v_alpha = 0; - PyObject *__pyx_v__work = 0; - PyObject *__pyx_v__neu1 = 0; - PyObject *__pyx_v__learn_words = 0; - PyObject *__pyx_v__learn_lbls = 0; + PyObject *__pyx_v_work = 0; + PyObject *__pyx_v_neu1 = 0; + PyObject *__pyx_v_learn_doclbls = 0; + PyObject *__pyx_v_learn_words = 0; + PyObject *__pyx_v_learn_hidden = 0; + PyObject *__pyx_v_word_vectors = 0; + PyObject *__pyx_v_word_locks = 0; + PyObject *__pyx_v_doclbl_vectors = 0; + PyObject *__pyx_v_doclbl_locks = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -5670,12 +5845,42 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dm_concat (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_sentence,&__pyx_n_s_lbls,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_words,&__pyx_n_s_learn_lbls,0}; - PyObject* values[8] = {0,0,0,0,0,0,0,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_vocabs,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; + values[4] = ((PyObject *)Py_None); + values[5] = ((PyObject *)Py_None); + + /* "trunk/gensim/models/doc2vec_inner.pyx":558 + * + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + * learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * cdef int hs = model.hs + */ + values[6] = ((PyObject *)Py_True); + values[7] = ((PyObject *)Py_True); + values[8] = ((PyObject *)Py_True); + + /* "trunk/gensim/models/doc2vec_inner.pyx":559 + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< + * cdef int hs = model.hs + * cdef int negative = model.negative + */ + values[9] = ((PyObject *)Py_None); + values[10] = ((PyObject *)Py_None); + values[11] = ((PyObject *)Py_None); + values[12] = ((PyObject *)Py_None); if (unlikely(__pyx_kwds)) { Py_ssize_t kw_args; const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args); switch (pos_args) { + case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12); + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); @@ -5693,99 +5898,148 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_model)) != 0)) kw_args--; else goto __pyx_L5_argtuple_error; case 1: - if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; + if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_lbls)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: - if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work); + if (value) { values[4] = value; kw_args--; } } case 5: - if (likely((values[5] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 5); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1); + if (value) { values[5] = value; kw_args--; } } case 6: - if (likely((values[6] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_words)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 6); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doclbls); + if (value) { values[6] = value; kw_args--; } } case 7: - if (likely((values[7] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_lbls)) != 0)) kw_args--; - else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, 7); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_words); + if (value) { values[7] = value; kw_args--; } + } + case 8: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_hidden); + if (value) { values[8] = value; kw_args--; } + } + case 9: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vectors); + if (value) { values[9] = value; kw_args--; } + } + case 10: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_locks); + if (value) { values[10] = value; kw_args--; } + } + case 11: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vectors); + if (value) { values[11] = value; kw_args--; } + } + case 12: + if (kw_args > 0) { + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_locks); + if (value) { values[12] = value; kw_args--; } } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } - } else if (PyTuple_GET_SIZE(__pyx_args) != 8) { - goto __pyx_L5_argtuple_error; } else { - values[0] = PyTuple_GET_ITEM(__pyx_args, 0); - values[1] = PyTuple_GET_ITEM(__pyx_args, 1); - values[2] = PyTuple_GET_ITEM(__pyx_args, 2); - values[3] = PyTuple_GET_ITEM(__pyx_args, 3); - values[4] = PyTuple_GET_ITEM(__pyx_args, 4); - values[5] = PyTuple_GET_ITEM(__pyx_args, 5); - values[6] = PyTuple_GET_ITEM(__pyx_args, 6); - values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + switch (PyTuple_GET_SIZE(__pyx_args)) { + case 13: values[12] = PyTuple_GET_ITEM(__pyx_args, 12); + case 12: values[11] = PyTuple_GET_ITEM(__pyx_args, 11); + case 11: values[10] = PyTuple_GET_ITEM(__pyx_args, 10); + case 10: values[9] = PyTuple_GET_ITEM(__pyx_args, 9); + case 9: values[8] = PyTuple_GET_ITEM(__pyx_args, 8); + case 8: values[7] = PyTuple_GET_ITEM(__pyx_args, 7); + case 7: values[6] = PyTuple_GET_ITEM(__pyx_args, 6); + case 6: values[5] = PyTuple_GET_ITEM(__pyx_args, 5); + case 5: values[4] = PyTuple_GET_ITEM(__pyx_args, 4); + case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3); + values[2] = PyTuple_GET_ITEM(__pyx_args, 2); + values[1] = PyTuple_GET_ITEM(__pyx_args, 1); + values[0] = PyTuple_GET_ITEM(__pyx_args, 0); + break; + default: goto __pyx_L5_argtuple_error; + } } __pyx_v_model = values[0]; - __pyx_v_sentence = values[1]; - __pyx_v_lbls = values[2]; + __pyx_v_word_vocabs = values[1]; + __pyx_v_doclbl_vocabs = values[2]; __pyx_v_alpha = values[3]; - __pyx_v__work = values[4]; - __pyx_v__neu1 = values[5]; - __pyx_v__learn_words = values[6]; - __pyx_v__learn_lbls = values[7]; + __pyx_v_work = values[4]; + __pyx_v_neu1 = values[5]; + __pyx_v_learn_doclbls = values[6]; + __pyx_v_learn_words = values[7]; + __pyx_v_learn_hidden = values[8]; + __pyx_v_word_vectors = values[9]; + __pyx_v_word_locks = values[10]; + __pyx_v_doclbl_vectors = values[11]; + __pyx_v_doclbl_locks = values[12]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 1, 8, 8, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_sentence, __pyx_v_lbls, __pyx_v_alpha, __pyx_v__work, __pyx_v__neu1, __pyx_v__learn_words, __pyx_v__learn_lbls); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_vocabs, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + + /* "trunk/gensim/models/doc2vec_inner.pyx":557 + * + * + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + */ /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_sentence, PyObject *__pyx_v_lbls, PyObject *__pyx_v_alpha, PyObject *__pyx_v__work, PyObject *__pyx_v__neu1, PyObject *__pyx_v__learn_words, PyObject *__pyx_v__learn_lbls) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v_learn_hidden; - int __pyx_v_learn_lbls; - int __pyx_v_learn_words; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1; + int __pyx_v__learn_doclbls; + int __pyx_v__learn_words; + int __pyx_v__learn_hidden; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__work; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__neu1; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_layer1_size; int __pyx_v_vector_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v_doclbl_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_window_indexes[10000]; int __pyx_v_sentence_len; - int __pyx_v_lbl_length; + int __pyx_v_doclbl_len; int __pyx_v_window; - int __pyx_v_expected_lbl_length; + int __pyx_v_expected_doclbl_len; int __pyx_v_i; int __pyx_v_j; int __pyx_v_k; @@ -5800,8 +6054,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_5numpy_uint32_t *__pyx_v_table; unsigned PY_LONG_LONG __pyx_v_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn0locks; PyObject *__pyx_v_word = NULL; + PyObject *__pyx_v_token = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -5812,149 +6066,144 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence long __pyx_t_6; Py_ssize_t __pyx_t_7; int __pyx_t_8; - PyObject *__pyx_t_9 = NULL; - unsigned PY_LONG_LONG __pyx_t_10; - int __pyx_t_11; - int __pyx_t_12; - __pyx_t_5numpy_uint32_t __pyx_t_13; - int __pyx_t_14; + int __pyx_t_9; + PyObject *__pyx_t_10 = NULL; + unsigned PY_LONG_LONG __pyx_t_11; + PyObject *__pyx_t_12 = NULL; + int __pyx_t_13; + __pyx_t_5numpy_uint32_t __pyx_t_14; int __pyx_t_15; + int __pyx_t_16; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_dm_concat", 0); - - /* "trunk/gensim/models/doc2vec_inner.pyx":571 - * - * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): + __Pyx_INCREF(__pyx_v_work); + __Pyx_INCREF(__pyx_v_neu1); + __Pyx_INCREF(__pyx_v_word_vectors); + __Pyx_INCREF(__pyx_v_word_locks); + __Pyx_INCREF(__pyx_v_doclbl_vectors); + __Pyx_INCREF(__pyx_v_doclbl_locks); + + /* "trunk/gensim/models/doc2vec_inner.pyx":560 + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int learn_hidden = True + * cdef int _learn_doclbls = learn_doclbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 571; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 560; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 571; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 560; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":572 - * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): + /* "trunk/gensim/models/doc2vec_inner.pyx":561 + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int learn_hidden = True - * cdef int learn_lbls = _learn_lbls + * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 561; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 561; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":573 + /* "trunk/gensim/models/doc2vec_inner.pyx":562 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int learn_hidden = True # <<<<<<<<<<<<<< - * cdef int learn_lbls = _learn_lbls - * cdef int learn_words = _learn_words + * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden */ - __pyx_v_learn_hidden = 1; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 562; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_doclbls = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":574 + /* "trunk/gensim/models/doc2vec_inner.pyx":563 * cdef int negative = model.negative - * cdef int learn_hidden = True - * cdef int learn_lbls = _learn_lbls # <<<<<<<<<<<<<< - * cdef int learn_words = _learn_words - * - */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__learn_lbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_learn_lbls = __pyx_t_2; - - /* "trunk/gensim/models/doc2vec_inner.pyx":575 - * cdef int learn_hidden = True - * cdef int learn_lbls = _learn_lbls - * cdef int learn_words = _learn_words # <<<<<<<<<<<<<< + * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< + * cdef int _learn_hidden = learn_hidden * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v__learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_learn_words = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 563; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":577 - * cdef int learn_words = _learn_words + /* "trunk/gensim/models/doc2vec_inner.pyx":564 + * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_words = learn_words + * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * - * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< - * cdef REAL_t *work - * cdef REAL_t *neu1 + * cdef REAL_t *_word_vectors */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 564; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":580 - * cdef REAL_t *work - * cdef REAL_t *neu1 + /* "trunk/gensim/models/doc2vec_inner.pyx":572 + * cdef REAL_t *_work + * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":581 - * cdef REAL_t *neu1 + /* "trunk/gensim/models/doc2vec_inner.pyx":573 + * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size # <<<<<<<<<<<<<< * cdef int vector_size = model.vector_size * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 573; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 573; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_layer1_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":582 + /* "trunk/gensim/models/doc2vec_inner.pyx":574 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_vector_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":590 + /* "trunk/gensim/models/doc2vec_inner.pyx":582 * cdef int sentence_len - * cdef int lbl_length + * cdef int doclbl_len * cdef int window = model.window # <<<<<<<<<<<<<< - * cdef int expected_lbl_length = model.dm_lbl_count + * cdef int expected_doclbl_len = model.dm_lbl_count * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 590; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 590; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":591 - * cdef int lbl_length + /* "trunk/gensim/models/doc2vec_inner.pyx":583 + * cdef int doclbl_len * cdef int window = model.window - * cdef int expected_lbl_length = model.dm_lbl_count # <<<<<<<<<<<<<< + * cdef int expected_doclbl_len = model.dm_lbl_count # <<<<<<<<<<<<<< * * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_lbl_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 591; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_lbl_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 591; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_expected_lbl_length = __pyx_t_2; + __pyx_v_expected_doclbl_len = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":594 + /* "trunk/gensim/models/doc2vec_inner.pyx":586 * * cdef int i, j, k, m, n * cdef long result = 0 # <<<<<<<<<<<<<< @@ -5963,57 +6212,57 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":595 + /* "trunk/gensim/models/doc2vec_inner.pyx":587 * cdef int i, j, k, m, n * cdef long result = 0 * cdef int null_word_index = model.vocab['\0'].index # <<<<<<<<<<<<<< * * # For hierarchical softmax */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 595; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_null_word_index = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":610 + /* "trunk/gensim/models/doc2vec_inner.pyx":600 * cdef unsigned long long next_random * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) # <<<<<<<<<<<<<< - * if lbl_length != expected_lbl_length: + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) # <<<<<<<<<<<<<< + * if doclbl_len != expected_doclbl_len: * return 0 # skip doc without expected nmber of lbls */ - __pyx_t_5 = PyObject_Length(__pyx_v_lbls); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_doclbl_vocabs); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_5 < __pyx_t_6) != 0)) { __pyx_t_7 = __pyx_t_5; } else { __pyx_t_7 = __pyx_t_6; } - __pyx_v_lbl_length = ((int)__pyx_t_7); + __pyx_v_doclbl_len = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":611 + /* "trunk/gensim/models/doc2vec_inner.pyx":601 * - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - * if lbl_length != expected_lbl_length: # <<<<<<<<<<<<<< + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * if doclbl_len != expected_doclbl_len: # <<<<<<<<<<<<<< * return 0 # skip doc without expected nmber of lbls * */ - __pyx_t_8 = ((__pyx_v_lbl_length != __pyx_v_expected_lbl_length) != 0); + __pyx_t_8 = ((__pyx_v_doclbl_len != __pyx_v_expected_doclbl_len) != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":612 - * lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - * if lbl_length != expected_lbl_length: + /* "trunk/gensim/models/doc2vec_inner.pyx":602 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * if doclbl_len != expected_doclbl_len: * return 0 # skip doc without expected nmber of lbls # <<<<<<<<<<<<<< * - * if hs: + * # default vectors, locks from syn0 */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_int_0); @@ -6021,8 +6270,152 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence goto __pyx_L0; } + /* "trunk/gensim/models/doc2vec_inner.pyx":605 + * + * # default vectors, locks from syn0 + * if word_vectors is None: # <<<<<<<<<<<<<< + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + */ + __pyx_t_8 = (__pyx_v_word_vectors == Py_None); + __pyx_t_9 = (__pyx_t_8 != 0); + if (__pyx_t_9) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":606 + * # default vectors, locks from syn0 + * if word_vectors is None: + * word_vectors = model.syn0 # <<<<<<<<<<<<<< + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * if doclbl_vectors is None: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L4; + } + __pyx_L4:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":607 + * if word_vectors is None: + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 + */ + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 607; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":608 + * word_vectors = model.syn0 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * if doclbl_vectors is None: # <<<<<<<<<<<<<< + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + */ + __pyx_t_9 = (__pyx_v_doclbl_vectors == Py_None); + __pyx_t_8 = (__pyx_t_9 != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":609 + * _word_vectors = (np.PyArray_DATA(word_vectors)) + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 # <<<<<<<<<<<<<< + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if word_locks is None: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L5; + } + __pyx_L5:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":610 + * if doclbl_vectors is None: + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< + * if word_locks is None: + * word_locks = model.syn0locks + */ + if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":611 + * doclbl_vectors = model.syn0 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if word_locks is None: # <<<<<<<<<<<<<< + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) + */ + __pyx_t_8 = (__pyx_v_word_locks == Py_None); + __pyx_t_9 = (__pyx_t_8 != 0); + if (__pyx_t_9) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":612 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if word_locks is None: + * word_locks = model.syn0locks # <<<<<<<<<<<<<< + * _word_locks = (np.PyArray_DATA(word_locks)) + * if doclbl_locks is None: + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 612; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L6; + } + __pyx_L6:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":613 + * if word_locks is None: + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks + */ + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); + /* "trunk/gensim/models/doc2vec_inner.pyx":614 - * return 0 # skip doc without expected nmber of lbls + * word_locks = model.syn0locks + * _word_locks = (np.PyArray_DATA(word_locks)) + * if doclbl_locks is None: # <<<<<<<<<<<<<< + * doclbl_locks = model.syn0locks + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + */ + __pyx_t_9 = (__pyx_v_doclbl_locks == Py_None); + __pyx_t_8 = (__pyx_t_9 != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":615 + * _word_locks = (np.PyArray_DATA(word_locks)) + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks # <<<<<<<<<<<<<< + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); + __pyx_t_1 = 0; + goto __pyx_L7; + } + __pyx_L7:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":616 + * if doclbl_locks is None: + * doclbl_locks = model.syn0locks + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< + * + * if hs: + */ + if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); + + /* "trunk/gensim/models/doc2vec_inner.pyx":618 + * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -6031,23 +6424,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_v_hs != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":615 + /* "trunk/gensim/models/doc2vec_inner.pyx":619 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - goto __pyx_L4; + goto __pyx_L8; } - __pyx_L4:; + __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":617 + /* "trunk/gensim/models/doc2vec_inner.pyx":621 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -6057,129 +6450,204 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_v_negative != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":618 + /* "trunk/gensim/models/doc2vec_inner.pyx":622 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":619 + /* "trunk/gensim/models/doc2vec_inner.pyx":623 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":620 + /* "trunk/gensim/models/doc2vec_inner.pyx":624 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 620; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 620; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":621 + /* "trunk/gensim/models/doc2vec_inner.pyx":625 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_9); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_9, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_9); + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_9); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_10 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_10 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_v_next_random = __pyx_t_10; - goto __pyx_L5; + __pyx_v_next_random = __pyx_t_11; + goto __pyx_L9; } - __pyx_L5:; + __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":624 + /* "trunk/gensim/models/doc2vec_inner.pyx":628 * * # convert Python structures to primitive types, so we can release the GIL - * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< - * neu1 = np.PyArray_DATA(_neu1) - * + * if work is None: # <<<<<<<<<<<<<< + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); + __pyx_t_8 = (__pyx_v_work == Py_None); + __pyx_t_9 = (__pyx_t_8 != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":625 + /* "trunk/gensim/models/doc2vec_inner.pyx":629 * # convert Python structures to primitive types, so we can release the GIL - * work = np.PyArray_DATA(_work) - * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< - * - * # optional locking of some vactors against backprop-learnind + * if work is None: + * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< + * _work = np.PyArray_DATA(work) + * if neu1 is None: + */ + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_10); + __Pyx_GIVEREF(__pyx_t_10); + __pyx_t_10 = 0; + __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_12); + __pyx_t_12 = 0; + goto __pyx_L10; + } + __pyx_L10:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":630 + * if work is None: + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< + * if neu1 is None: + * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":628 + /* "trunk/gensim/models/doc2vec_inner.pyx":631 + * work = zeros(model.layer1_size, dtype=REAL) + * _work = np.PyArray_DATA(work) + * if neu1 is None: # <<<<<<<<<<<<<< + * neu1 = zeros(model.layer1_size, dtype=REAL) + * _neu1 = np.PyArray_DATA(neu1) + */ + __pyx_t_9 = (__pyx_v_neu1 == Py_None); + __pyx_t_8 = (__pyx_t_9 != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":632 + * _work = np.PyArray_DATA(work) + * if neu1 is None: + * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< + * _neu1 = np.PyArray_DATA(neu1) * - * # optional locking of some vactors against backprop-learnind - * syn0locks = np.PyArray_DATA(model.syn0locks) # <<<<<<<<<<<<<< + */ + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_12); + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_10); + __Pyx_GIVEREF(__pyx_t_10); + __pyx_t_10 = 0; + __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_4); + __pyx_t_4 = 0; + goto __pyx_L11; + } + __pyx_L11:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":633 + * if neu1 is None: + * neu1 = zeros(model.layer1_size, dtype=REAL) + * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v_syn0locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 633; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":630 - * syn0locks = np.PyArray_DATA(model.syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":635 + * _neu1 = np.PyArray_DATA(neu1) * - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 * for i in range(sentence_len): */ - __pyx_t_7 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 635; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_7 < __pyx_t_6) != 0)) { __pyx_t_5 = __pyx_t_7; @@ -6188,50 +6656,50 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":631 + /* "trunk/gensim/models/doc2vec_inner.pyx":636 * - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< * for i in range(sentence_len): - * word = sentence[i] + * word = word_vocabs[i] */ __pyx_v_j = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":632 - * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + /* "trunk/gensim/models/doc2vec_inner.pyx":637 + * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< - * word = sentence[i] + * word = word_vocabs[i] * if word is None: */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":633 + /* "trunk/gensim/models/doc2vec_inner.pyx":638 * j = 0 * for i in range(sentence_len): - * word = sentence[i] # <<<<<<<<<<<<<< + * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: * # shrink sentence to leave out word */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 633; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 638; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":634 + /* "trunk/gensim/models/doc2vec_inner.pyx":639 * for i in range(sentence_len): - * word = sentence[i] + * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< * # shrink sentence to leave out word * sentence_len = sentence_len - 1 */ __pyx_t_8 = (__pyx_v_word == Py_None); - __pyx_t_12 = (__pyx_t_8 != 0); - if (__pyx_t_12) { + __pyx_t_9 = (__pyx_t_8 != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":636 + /* "trunk/gensim/models/doc2vec_inner.pyx":641 * if word is None: * # shrink sentence to leave out word * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< @@ -6240,83 +6708,83 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":637 + /* "trunk/gensim/models/doc2vec_inner.pyx":642 * # shrink sentence to leave out word * sentence_len = sentence_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< * else: * indexes[j] = word.index */ - goto __pyx_L6_continue; + goto __pyx_L12_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":639 + /* "trunk/gensim/models/doc2vec_inner.pyx":644 * continue # leaving j unchanged * else: * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[j] = len(word.code) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 644; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 644; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_13; + (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":640 + /* "trunk/gensim/models/doc2vec_inner.pyx":645 * else: * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) */ - __pyx_t_12 = (__pyx_v_hs != 0); - if (__pyx_t_12) { + __pyx_t_9 = (__pyx_v_hs != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":641 + /* "trunk/gensim/models/doc2vec_inner.pyx":646 * indexes[j] = word.index * if hs: * codelens[j] = len(word.code) # <<<<<<<<<<<<<< * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 646; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 646; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":642 + /* "trunk/gensim/models/doc2vec_inner.pyx":647 * if hs: * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[j] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 642; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 642; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":643 + /* "trunk/gensim/models/doc2vec_inner.pyx":648 * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[j] = 1 */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 643; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 643; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - goto __pyx_L9; + goto __pyx_L15; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":645 + /* "trunk/gensim/models/doc2vec_inner.pyx":650 * points[j] = np.PyArray_DATA(word.point) * else: * codelens[j] = 1 # <<<<<<<<<<<<<< @@ -6325,9 +6793,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ (__pyx_v_codelens[__pyx_v_j]) = 1; } - __pyx_L9:; + __pyx_L15:; - /* "trunk/gensim/models/doc2vec_inner.pyx":646 + /* "trunk/gensim/models/doc2vec_inner.pyx":651 * else: * codelens[j] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -6336,58 +6804,58 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":647 + /* "trunk/gensim/models/doc2vec_inner.pyx":652 * codelens[j] = 1 * result += 1 * j = j + 1 # <<<<<<<<<<<<<< * - * for i in range(lbl_length): + * for i in range(doclbl_len): */ __pyx_v_j = (__pyx_v_j + 1); } - __pyx_L6_continue:; + __pyx_L12_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":649 + /* "trunk/gensim/models/doc2vec_inner.pyx":654 * j = j + 1 * - * for i in range(lbl_length): # <<<<<<<<<<<<<< - * word = lbls[i] - * if word is None: + * for i in range(doclbl_len): # <<<<<<<<<<<<<< + * token = doclbl_vocabs[i] + * if token is None: */ - __pyx_t_2 = __pyx_v_lbl_length; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + __pyx_t_2 = __pyx_v_doclbl_len; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":650 + /* "trunk/gensim/models/doc2vec_inner.pyx":655 * - * for i in range(lbl_length): - * word = lbls[i] # <<<<<<<<<<<<<< - * if word is None: - * # no support for missing lbls where expected; skip sentence + * for i in range(doclbl_len): + * token = doclbl_vocabs[i] # <<<<<<<<<<<<<< + * if token is None: + * # no current support for missing doclbls where expected; skip sentence */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_lbls, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 650; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doclbl_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 655; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_4); + __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":651 - * for i in range(lbl_length): - * word = lbls[i] - * if word is None: # <<<<<<<<<<<<<< - * # no support for missing lbls where expected; skip sentence + /* "trunk/gensim/models/doc2vec_inner.pyx":656 + * for i in range(doclbl_len): + * token = doclbl_vocabs[i] + * if token is None: # <<<<<<<<<<<<<< + * # no current support for missing doclbls where expected; skip sentence * return 0 */ - __pyx_t_12 = (__pyx_v_word == Py_None); - __pyx_t_8 = (__pyx_t_12 != 0); + __pyx_t_9 = (__pyx_v_token == Py_None); + __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":653 - * if word is None: - * # no support for missing lbls where expected; skip sentence + /* "trunk/gensim/models/doc2vec_inner.pyx":658 + * if token is None: + * # no current support for missing doclbls where expected; skip sentence * return 0 # <<<<<<<<<<<<<< * else: - * window_indexes[i] = word.index + * doclbl_indexes[i] = token.index */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_int_0); @@ -6396,22 +6864,22 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":655 + /* "trunk/gensim/models/doc2vec_inner.pyx":660 * return 0 * else: - * window_indexes[i] = word.index # <<<<<<<<<<<<<< + * doclbl_indexes[i] = token.index # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 655; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_token, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 660; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 655; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 660; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - (__pyx_v_window_indexes[__pyx_v_i]) = __pyx_t_13; + (__pyx_v_doclbl_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":656 + /* "trunk/gensim/models/doc2vec_inner.pyx":661 * else: - * window_indexes[i] = word.index + * doclbl_indexes[i] = token.index * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence @@ -6420,7 +6888,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":659 + /* "trunk/gensim/models/doc2vec_inner.pyx":664 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6434,7 +6902,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":660 + /* "trunk/gensim/models/doc2vec_inner.pyx":665 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6442,10 +6910,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * k = i + window + 1 # past sentence end OK: will pad with null word */ __pyx_t_2 = __pyx_v_sentence_len; - for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { - __pyx_v_i = __pyx_t_11; + for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { + __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":661 + /* "trunk/gensim/models/doc2vec_inner.pyx":666 * with nogil: * for i in range(sentence_len): * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< @@ -6454,7 +6922,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "trunk/gensim/models/doc2vec_inner.pyx":662 + /* "trunk/gensim/models/doc2vec_inner.pyx":667 * for i in range(sentence_len): * j = i - window # negative OK: will pad with null word * k = i + window + 1 # past sentence end OK: will pad with null word # <<<<<<<<<<<<<< @@ -6463,29 +6931,50 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":665 + /* "trunk/gensim/models/doc2vec_inner.pyx":670 * * # compose l1 & clear work - * n = lbl_length # <<<<<<<<<<<<<< + * for m in range(doclbl_len): # <<<<<<<<<<<<<< + * # doc vector(s) + * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], + */ + __pyx_t_15 = __pyx_v_doclbl_len; + for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { + __pyx_v_m = __pyx_t_16; + + /* "trunk/gensim/models/doc2vec_inner.pyx":672 + * for m in range(doclbl_len): + * # doc vector(s) + * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], # <<<<<<<<<<<<<< + * vector_size * cython.sizeof(REAL_t)) + * n = 0 + */ + memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + } + + /* "trunk/gensim/models/doc2vec_inner.pyx":674 + * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], + * vector_size * cython.sizeof(REAL_t)) + * n = 0 # <<<<<<<<<<<<<< * for m in range(j, k): - * if m == i: + * # word vectors in window */ - __pyx_v_n = __pyx_v_lbl_length; + __pyx_v_n = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":666 - * # compose l1 & clear work - * n = lbl_length + /* "trunk/gensim/models/doc2vec_inner.pyx":675 + * vector_size * cython.sizeof(REAL_t)) + * n = 0 * for m in range(j, k): # <<<<<<<<<<<<<< + * # word vectors in window * if m == i: - * continue */ - __pyx_t_14 = __pyx_v_k; - for (__pyx_t_15 = __pyx_v_j; __pyx_t_15 < __pyx_t_14; __pyx_t_15+=1) { - __pyx_v_m = __pyx_t_15; + __pyx_t_15 = __pyx_v_k; + for (__pyx_t_16 = __pyx_v_j; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { + __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":667 - * n = lbl_length + /* "trunk/gensim/models/doc2vec_inner.pyx":677 * for m in range(j, k): + * # word vectors in window * if m == i: # <<<<<<<<<<<<<< * continue * if m < 0 or m >= sentence_len: @@ -6493,35 +6982,35 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":668 - * for m in range(j, k): + /* "trunk/gensim/models/doc2vec_inner.pyx":678 + * # word vectors in window * if m == i: * continue # <<<<<<<<<<<<<< * if m < 0 or m >= sentence_len: * window_indexes[n] = null_word_index */ - goto __pyx_L18_continue; + goto __pyx_L26_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":669 + /* "trunk/gensim/models/doc2vec_inner.pyx":679 * if m == i: * continue * if m < 0 or m >= sentence_len: # <<<<<<<<<<<<<< * window_indexes[n] = null_word_index * else: */ - __pyx_t_12 = ((__pyx_v_m < 0) != 0); - if (!__pyx_t_12) { + __pyx_t_9 = ((__pyx_v_m < 0) != 0); + if (!__pyx_t_9) { } else { - __pyx_t_8 = __pyx_t_12; - goto __pyx_L22_bool_binop_done; + __pyx_t_8 = __pyx_t_9; + goto __pyx_L30_bool_binop_done; } - __pyx_t_12 = ((__pyx_v_m >= __pyx_v_sentence_len) != 0); - __pyx_t_8 = __pyx_t_12; - __pyx_L22_bool_binop_done:; + __pyx_t_9 = ((__pyx_v_m >= __pyx_v_sentence_len) != 0); + __pyx_t_8 = __pyx_t_9; + __pyx_L30_bool_binop_done:; if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":670 + /* "trunk/gensim/models/doc2vec_inner.pyx":680 * continue * if m < 0 or m >= sentence_len: * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< @@ -6529,109 +7018,177 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * window_indexes[n] = indexes[m] */ (__pyx_v_window_indexes[__pyx_v_n]) = __pyx_v_null_word_index; - goto __pyx_L21; + goto __pyx_L29; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":672 + /* "trunk/gensim/models/doc2vec_inner.pyx":682 * window_indexes[n] = null_word_index * else: * window_indexes[n] = indexes[m] # <<<<<<<<<<<<<< * n = n + 1 - * for m in range(lbl_length + (2 * window)): + * for m in range(2 * window): */ (__pyx_v_window_indexes[__pyx_v_n]) = (__pyx_v_indexes[__pyx_v_m]); } - __pyx_L21:; + __pyx_L29:; - /* "trunk/gensim/models/doc2vec_inner.pyx":673 + /* "trunk/gensim/models/doc2vec_inner.pyx":683 * else: * window_indexes[n] = indexes[m] * n = n + 1 # <<<<<<<<<<<<<< - * for m in range(lbl_length + (2 * window)): - * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) + * for m in range(2 * window): + * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], */ __pyx_v_n = (__pyx_v_n + 1); - __pyx_L18_continue:; + __pyx_L26_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":674 + /* "trunk/gensim/models/doc2vec_inner.pyx":684 * window_indexes[n] = indexes[m] * n = n + 1 - * for m in range(lbl_length + (2 * window)): # <<<<<<<<<<<<<< - * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) + * for m in range(2 * window): # <<<<<<<<<<<<<< + * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + * vector_size * cython.sizeof(REAL_t)) */ - __pyx_t_6 = (__pyx_v_lbl_length + (2 * __pyx_v_window)); - for (__pyx_t_14 = 0; __pyx_t_14 < __pyx_t_6; __pyx_t_14+=1) { - __pyx_v_m = __pyx_t_14; + __pyx_t_6 = (2 * __pyx_v_window); + for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { + __pyx_v_m = __pyx_t_15; - /* "trunk/gensim/models/doc2vec_inner.pyx":675 + /* "trunk/gensim/models/doc2vec_inner.pyx":685 * n = n + 1 - * for m in range(lbl_length + (2 * window)): - * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) - * + * for m in range(2 * window): + * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< + * vector_size * cython.sizeof(REAL_t)) + * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error */ - memcpy((&(__pyx_v_neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v_syn0[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + memcpy((&(__pyx_v__neu1[((__pyx_v_doclbl_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":676 - * for m in range(lbl_length + (2 * window)): - * memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/doc2vec_inner.pyx":687 + * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + * vector_size * cython.sizeof(REAL_t)) + * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< * * if hs: */ - memset(__pyx_v_work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + memset(__pyx_v__work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":678 - * memset(work, 0, layer1_size * cython.sizeof(REAL_t)) + /* "trunk/gensim/models/doc2vec_inner.pyx":689 + * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], neu1, syn0, syn1, - * layer1_size, vector_size, window_indexes, _alpha, + * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], + * _neu1, syn1, _alpha, _work, */ __pyx_t_8 = (__pyx_v_hs != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":679 + /* "trunk/gensim/models/doc2vec_inner.pyx":690 * * if hs: - * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], neu1, syn0, syn1, # <<<<<<<<<<<<<< - * layer1_size, vector_size, window_indexes, _alpha, - * work, lbl_length, window, + * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< + * _neu1, syn1, _alpha, _work, + * layer1_size, vector_size, _learn_hidden) */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v_window_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_lbl_length, __pyx_v_window, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); - goto __pyx_L26; + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); + goto __pyx_L34; } - __pyx_L26:; + __pyx_L34:; - /* "trunk/gensim/models/doc2vec_inner.pyx":683 - * work, lbl_length, window, - * learn_hidden, learn_lbls, learn_words, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":693 + * _neu1, syn1, _alpha, _work, + * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dmc_neg(negative, table, table_len, neu1, syn0, syn1neg, - * layer1_size, vector_size, window_indexes, _alpha, + * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, + * _neu1, syn1neg, indexes[i], _alpha, _work, */ __pyx_t_8 = (__pyx_v_negative != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":684 - * learn_hidden, learn_lbls, learn_words, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":694 + * layer1_size, vector_size, _learn_hidden) * if negative: - * next_random = fast_sentence_dmc_neg(negative, table, table_len, neu1, syn0, syn1neg, # <<<<<<<<<<<<<< - * layer1_size, vector_size, window_indexes, _alpha, - * work, indexes[i], lbl_length, window, + * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< + * _neu1, syn1neg, indexes[i], _alpha, _work, + * layer1_size, vector_size, _learn_hidden) + */ + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); + goto __pyx_L35; + } + __pyx_L35:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":698 + * layer1_size, vector_size, _learn_hidden) + * + * if _learn_doclbls: # <<<<<<<<<<<<<< + * for m in range(doclbl_len): + * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], + */ + __pyx_t_8 = (__pyx_v__learn_doclbls != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":699 + * + * if _learn_doclbls: + * for m in range(doclbl_len): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + */ + __pyx_t_15 = __pyx_v_doclbl_len; + for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { + __pyx_v_m = __pyx_t_16; + + /* "trunk/gensim/models/doc2vec_inner.pyx":700 + * if _learn_doclbls: + * for m in range(doclbl_len): + * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + * if _learn_words: + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doclbl_locks[(__pyx_v_doclbl_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L36; + } + __pyx_L36:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":702 + * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + * if _learn_words: # <<<<<<<<<<<<<< + * for m in range(2 * window): + * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], + */ + __pyx_t_8 = (__pyx_v__learn_words != 0); + if (__pyx_t_8) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":703 + * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + * if _learn_words: + * for m in range(2 * window): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], + * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) + */ + __pyx_t_6 = (2 * __pyx_v_window); + for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { + __pyx_v_m = __pyx_t_15; + + /* "trunk/gensim/models/doc2vec_inner.pyx":704 + * if _learn_words: + * for m in range(2 * window): + * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], # <<<<<<<<<<<<<< + * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) + * */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v_window_indexes, __pyx_v__alpha, __pyx_v_work, (__pyx_v_indexes[__pyx_v_i]), __pyx_v_lbl_length, __pyx_v_window, __pyx_v_next_random, __pyx_v_learn_hidden, __pyx_v_learn_lbls, __pyx_v_learn_words, __pyx_v_syn0locks); - goto __pyx_L27; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__word_locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v__work[((__pyx_v_doclbl_len + __pyx_v_m) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + } + goto __pyx_L39; } - __pyx_L27:; + __pyx_L39:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":659 + /* "trunk/gensim/models/doc2vec_inner.pyx":664 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6643,49 +7200,57 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L15; + goto __pyx_L21; } - __pyx_L15:; + __pyx_L21:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":689 - * next_random, learn_hidden, learn_lbls, learn_words, syn0locks) + /* "trunk/gensim/models/doc2vec_inner.pyx":707 + * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 689; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 707; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_r = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":570 + /* "trunk/gensim/models/doc2vec_inner.pyx":557 * * - * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); __Pyx_XDECREF(__pyx_t_4); - __Pyx_XDECREF(__pyx_t_9); + __Pyx_XDECREF(__pyx_t_10); + __Pyx_XDECREF(__pyx_t_12); __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); + __Pyx_XDECREF(__pyx_v_token); + __Pyx_XDECREF(__pyx_v_work); + __Pyx_XDECREF(__pyx_v_neu1); + __Pyx_XDECREF(__pyx_v_word_vectors); + __Pyx_XDECREF(__pyx_v_word_locks); + __Pyx_XDECREF(__pyx_v_doclbl_vectors); + __Pyx_XDECREF(__pyx_v_doclbl_locks); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":692 +/* "trunk/gensim/models/doc2vec_inner.pyx":710 * * * def init(): # <<<<<<<<<<<<<< @@ -6724,7 +7289,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":702 + /* "trunk/gensim/models/doc2vec_inner.pyx":720 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6734,7 +7299,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":703 + /* "trunk/gensim/models/doc2vec_inner.pyx":721 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6744,7 +7309,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":704 + /* "trunk/gensim/models/doc2vec_inner.pyx":722 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6753,7 +7318,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/doc2vec_inner.pyx":705 + /* "trunk/gensim/models/doc2vec_inner.pyx":723 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6762,7 +7327,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_size = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":710 + /* "trunk/gensim/models/doc2vec_inner.pyx":728 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6772,7 +7337,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":711 + /* "trunk/gensim/models/doc2vec_inner.pyx":729 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6781,7 +7346,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/doc2vec_inner.pyx":712 + /* "trunk/gensim/models/doc2vec_inner.pyx":730 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6791,7 +7356,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":715 + /* "trunk/gensim/models/doc2vec_inner.pyx":733 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6800,7 +7365,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":716 + /* "trunk/gensim/models/doc2vec_inner.pyx":734 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -6809,7 +7374,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/doc2vec_inner.pyx":717 + /* "trunk/gensim/models/doc2vec_inner.pyx":735 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6819,7 +7384,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":718 + /* "trunk/gensim/models/doc2vec_inner.pyx":736 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -6828,7 +7393,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double; - /* "trunk/gensim/models/doc2vec_inner.pyx":719 + /* "trunk/gensim/models/doc2vec_inner.pyx":737 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6837,7 +7402,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":720 + /* "trunk/gensim/models/doc2vec_inner.pyx":738 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -6850,7 +7415,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":721 + /* "trunk/gensim/models/doc2vec_inner.pyx":739 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -6860,7 +7425,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":722 + /* "trunk/gensim/models/doc2vec_inner.pyx":740 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -6869,7 +7434,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float; - /* "trunk/gensim/models/doc2vec_inner.pyx":723 + /* "trunk/gensim/models/doc2vec_inner.pyx":741 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -6878,7 +7443,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":724 + /* "trunk/gensim/models/doc2vec_inner.pyx":742 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -6892,7 +7457,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":728 + /* "trunk/gensim/models/doc2vec_inner.pyx":746 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -6901,7 +7466,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":729 + /* "trunk/gensim/models/doc2vec_inner.pyx":747 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -6910,7 +7475,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":730 + /* "trunk/gensim/models/doc2vec_inner.pyx":748 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -6923,7 +7488,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":692 + /* "trunk/gensim/models/doc2vec_inner.pyx":710 * * * def init(): # <<<<<<<<<<<<<< @@ -8978,13 +9543,22 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, {&__pyx_n_s_codes, __pyx_k_codes, sizeof(__pyx_k_codes), 0, 0, 1, 1}, + {&__pyx_n_s_context_token, __pyx_k_context_token, sizeof(__pyx_k_context_token), 0, 0, 1, 1}, {&__pyx_n_s_count, __pyx_k_count, sizeof(__pyx_k_count), 0, 0, 1, 1}, {&__pyx_n_s_cpointer, __pyx_k_cpointer, sizeof(__pyx_k_cpointer), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, {&__pyx_n_s_dm_lbl_count, __pyx_k_dm_lbl_count, sizeof(__pyx_k_dm_lbl_count), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_indexes, __pyx_k_doclbl_indexes, sizeof(__pyx_k_doclbl_indexes), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_len, __pyx_k_doclbl_len, sizeof(__pyx_k_doclbl_len), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_locks, __pyx_k_doclbl_locks, sizeof(__pyx_k_doclbl_locks), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_locks_2, __pyx_k_doclbl_locks_2, sizeof(__pyx_k_doclbl_locks_2), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_vectors, __pyx_k_doclbl_vectors, sizeof(__pyx_k_doclbl_vectors), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_vectors_2, __pyx_k_doclbl_vectors_2, sizeof(__pyx_k_doclbl_vectors_2), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_vocabs, __pyx_k_doclbl_vocabs, sizeof(__pyx_k_doclbl_vocabs), 0, 0, 1, 1}, + {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, - {&__pyx_n_s_expected_lbl_length, __pyx_k_expected_lbl_length, sizeof(__pyx_k_expected_lbl_length), 0, 0, 1, 1}, + {&__pyx_n_s_expected_doclbl_len, __pyx_k_expected_doclbl_len, sizeof(__pyx_k_expected_doclbl_len), 0, 0, 1, 1}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, @@ -8998,15 +9572,10 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_j, __pyx_k_j, sizeof(__pyx_k_j), 0, 0, 1, 1}, {&__pyx_n_s_k, __pyx_k_k, sizeof(__pyx_k_k), 0, 0, 1, 1}, {&__pyx_n_s_layer1_size, __pyx_k_layer1_size, sizeof(__pyx_k_layer1_size), 0, 0, 1, 1}, - {&__pyx_n_s_lbl_codelens, __pyx_k_lbl_codelens, sizeof(__pyx_k_lbl_codelens), 0, 0, 1, 1}, - {&__pyx_n_s_lbl_codes, __pyx_k_lbl_codes, sizeof(__pyx_k_lbl_codes), 0, 0, 1, 1}, - {&__pyx_n_s_lbl_indexes, __pyx_k_lbl_indexes, sizeof(__pyx_k_lbl_indexes), 0, 0, 1, 1}, - {&__pyx_n_s_lbl_length, __pyx_k_lbl_length, sizeof(__pyx_k_lbl_length), 0, 0, 1, 1}, - {&__pyx_n_s_lbl_points, __pyx_k_lbl_points, sizeof(__pyx_k_lbl_points), 0, 0, 1, 1}, - {&__pyx_n_s_lbls, __pyx_k_lbls, sizeof(__pyx_k_lbls), 0, 0, 1, 1}, + {&__pyx_n_s_learn_doclbls, __pyx_k_learn_doclbls, sizeof(__pyx_k_learn_doclbls), 0, 0, 1, 1}, + {&__pyx_n_s_learn_doclbls_2, __pyx_k_learn_doclbls_2, sizeof(__pyx_k_learn_doclbls_2), 0, 0, 1, 1}, {&__pyx_n_s_learn_hidden, __pyx_k_learn_hidden, sizeof(__pyx_k_learn_hidden), 0, 0, 1, 1}, - {&__pyx_n_s_learn_lbls, __pyx_k_learn_lbls, sizeof(__pyx_k_learn_lbls), 0, 0, 1, 1}, - {&__pyx_n_s_learn_lbls_2, __pyx_k_learn_lbls_2, sizeof(__pyx_k_learn_lbls_2), 0, 0, 1, 1}, + {&__pyx_n_s_learn_hidden_2, __pyx_k_learn_hidden_2, sizeof(__pyx_k_learn_hidden_2), 0, 0, 1, 1}, {&__pyx_n_s_learn_words, __pyx_k_learn_words, sizeof(__pyx_k_learn_words), 0, 0, 1, 1}, {&__pyx_n_s_learn_words_2, __pyx_k_learn_words_2, sizeof(__pyx_k_learn_words_2), 0, 0, 1, 1}, {&__pyx_n_s_m, __pyx_k_m, sizeof(__pyx_k_m), 0, 0, 1, 1}, @@ -9025,6 +9594,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_p_res, __pyx_k_p_res, sizeof(__pyx_k_p_res), 0, 0, 1, 1}, {&__pyx_n_s_point, __pyx_k_point, sizeof(__pyx_k_point), 0, 0, 1, 1}, {&__pyx_n_s_points, __pyx_k_points, sizeof(__pyx_k_points), 0, 0, 1, 1}, + {&__pyx_n_s_predict_word, __pyx_k_predict_word, sizeof(__pyx_k_predict_word), 0, 0, 1, 1}, {&__pyx_n_s_randint, __pyx_k_randint, sizeof(__pyx_k_randint), 0, 0, 1, 1}, {&__pyx_n_s_random, __pyx_k_random, sizeof(__pyx_k_random), 0, 0, 1, 1}, {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1}, @@ -9034,7 +9604,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_scipy_linalg_blas, __pyx_k_scipy_linalg_blas, sizeof(__pyx_k_scipy_linalg_blas), 0, 0, 1, 1}, {&__pyx_n_s_scopy, __pyx_k_scopy, sizeof(__pyx_k_scopy), 0, 0, 1, 1}, {&__pyx_n_s_sdot, __pyx_k_sdot, sizeof(__pyx_k_sdot), 0, 0, 1, 1}, - {&__pyx_n_s_sentence, __pyx_k_sentence, sizeof(__pyx_k_sentence), 0, 0, 1, 1}, {&__pyx_n_s_sentence_len, __pyx_k_sentence_len, sizeof(__pyx_k_sentence_len), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, {&__pyx_n_s_snrm2, __pyx_k_snrm2, sizeof(__pyx_k_snrm2), 0, 0, 1, 1}, @@ -9046,31 +9615,34 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_table, __pyx_k_table, sizeof(__pyx_k_table), 0, 0, 1, 1}, {&__pyx_n_s_table_len, __pyx_k_table_len, sizeof(__pyx_k_table_len), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_n_s_tl, __pyx_k_tl, sizeof(__pyx_k_tl), 0, 0, 1, 1}, - {&__pyx_n_s_train_lbls, __pyx_k_train_lbls, sizeof(__pyx_k_train_lbls), 0, 0, 1, 1}, - {&__pyx_n_s_train_lbls_2, __pyx_k_train_lbls_2, sizeof(__pyx_k_train_lbls_2), 0, 0, 1, 1}, + {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dbow, __pyx_k_train_sentence_dbow, sizeof(__pyx_k_train_sentence_dbow), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dm, __pyx_k_train_sentence_dm, sizeof(__pyx_k_train_sentence_dm), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dm_concat, __pyx_k_train_sentence_dm_concat, sizeof(__pyx_k_train_sentence_dm_concat), 0, 0, 1, 1}, {&__pyx_n_s_train_words, __pyx_k_train_words, sizeof(__pyx_k_train_words), 0, 0, 1, 1}, {&__pyx_n_s_train_words_2, __pyx_k_train_words_2, sizeof(__pyx_k_train_words_2), 0, 0, 1, 1}, {&__pyx_n_s_trunk_gensim_models_doc2vec_inne, __pyx_k_trunk_gensim_models_doc2vec_inne, sizeof(__pyx_k_trunk_gensim_models_doc2vec_inne), 0, 0, 1, 1}, - {&__pyx_n_s_tw, __pyx_k_tw, sizeof(__pyx_k_tw), 0, 0, 1, 1}, {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, {&__pyx_n_s_vector_size, __pyx_k_vector_size, sizeof(__pyx_k_vector_size), 0, 0, 1, 1}, {&__pyx_n_s_vocab, __pyx_k_vocab, sizeof(__pyx_k_vocab), 0, 0, 1, 1}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_window_indexes, __pyx_k_window_indexes, sizeof(__pyx_k_window_indexes), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, + {&__pyx_n_s_word_locks, __pyx_k_word_locks, sizeof(__pyx_k_word_locks), 0, 0, 1, 1}, + {&__pyx_n_s_word_locks_2, __pyx_k_word_locks_2, sizeof(__pyx_k_word_locks_2), 0, 0, 1, 1}, + {&__pyx_n_s_word_vectors, __pyx_k_word_vectors, sizeof(__pyx_k_word_vectors), 0, 0, 1, 1}, + {&__pyx_n_s_word_vectors_2, __pyx_k_word_vectors_2, sizeof(__pyx_k_word_vectors_2), 0, 0, 1, 1}, + {&__pyx_n_s_word_vocabs, __pyx_k_word_vocabs, sizeof(__pyx_k_word_vocabs), 0, 0, 1, 1}, {&__pyx_n_s_work, __pyx_k_work, sizeof(__pyx_k_work), 0, 0, 1, 1}, {&__pyx_n_s_work_2, __pyx_k_work_2, sizeof(__pyx_k_work_2), 0, 0, 1, 1}, {&__pyx_n_s_x, __pyx_k_x, sizeof(__pyx_k_x), 0, 0, 1, 1}, {&__pyx_n_s_y, __pyx_k_y, sizeof(__pyx_k_y), 0, 0, 1, 1}, + {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 92; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -9082,45 +9654,45 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":331 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/doc2vec_inner.pyx":488 + /* "trunk/gensim/models/doc2vec_inner.pyx":463 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "trunk/gensim/models/doc2vec_inner.pyx":621 + /* "trunk/gensim/models/doc2vec_inner.pyx":625 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 621; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); @@ -9190,53 +9762,53 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - /* "trunk/gensim/models/doc2vec_inner.pyx":320 + /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_tuple__14 = PyTuple_Pack(37, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_train_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_tw, __pyx_n_s_tl, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_syn0locks, __pyx_n_s_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_vocabs, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doclbls_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_context_token, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(7, 0, 37, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 320, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":441 + /* "trunk/gensim/models/doc2vec_inner.pyx":400 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_tuple__16 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_train_words_2, __pyx_n_s_train_lbls_2, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_words_2, __pyx_n_s_learn_lbls_2, __pyx_n_s_learn_hidden, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_lbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_syn0locks, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(51, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_vocabs, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item, __pyx_n_s_token); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(8, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 441, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 51, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 400, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":570 + /* "trunk/gensim/models/doc2vec_inner.pyx":557 * * - * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(45, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_lbls, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_words, __pyx_n_s_learn_lbls, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_hidden, __pyx_n_s_learn_lbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_lbl_codelens, __pyx_n_s_indexes, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_lbl_length, __pyx_n_s_window, __pyx_n_s_expected_lbl_length, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_lbl_points, __pyx_n_s_lbl_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_syn0locks, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__18 = PyTuple_Pack(51, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_vocabs, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_expected_doclbl_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_token); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(8, 0, 45, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 570, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 51, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 557, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":692 + /* "trunk/gensim/models/doc2vec_inner.pyx":710 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__20); __Pyx_GIVEREF(__pyx_tuple__20); - __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 692, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 710, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -9358,159 +9930,187 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) * * import cython * import numpy as np # <<<<<<<<<<<<<< + * from numpy import zeros, float32 as REAL * cimport numpy as np - * */ __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 11; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":20 + /* "trunk/gensim/models/doc2vec_inner.pyx":12 + * import cython + * import numpy as np + * from numpy import zeros, float32 as REAL # <<<<<<<<<<<<<< + * cimport numpy as np + * + */ + __pyx_t_1 = PyList_New(2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_INCREF(__pyx_n_s_zeros); + PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_zeros); + __Pyx_GIVEREF(__pyx_n_s_zeros); + __Pyx_INCREF(__pyx_n_s_float32); + PyList_SET_ITEM(__pyx_t_1, 1, __pyx_n_s_float32); + __Pyx_GIVEREF(__pyx_n_s_float32); + __pyx_t_2 = __Pyx_Import(__pyx_n_s_numpy, __pyx_t_1, -1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_zeros); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_zeros, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_float32); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":21 * void* PyCObject_AsVoidPtr(object obj) * * from scipy.linalg.blas import fblas # <<<<<<<<<<<<<< * * REAL = np.float32 */ - __pyx_t_1 = PyList_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); + __pyx_t_2 = PyList_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_n_s_fblas); - PyList_SET_ITEM(__pyx_t_1, 0, __pyx_n_s_fblas); + PyList_SET_ITEM(__pyx_t_2, 0, __pyx_n_s_fblas); __Pyx_GIVEREF(__pyx_n_s_fblas); - __pyx_t_2 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_1, -1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_ImportFrom(__pyx_t_2, __pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_Import(__pyx_n_s_scipy_linalg_blas, __pyx_t_2, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 20; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __pyx_t_2 = __Pyx_ImportFrom(__pyx_t_1, __pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_fblas, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 21; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":22 + /* "trunk/gensim/models/doc2vec_inner.pyx":23 * from scipy.linalg.blas import fblas * * REAL = np.float32 # <<<<<<<<<<<<<< * ctypedef np.float32_t REAL_t * */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_float32); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_float32); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 23; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":34 + /* "trunk/gensim/models/doc2vec_inner.pyx":35 * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x # <<<<<<<<<<<<<< * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_scopy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_scopy); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_scopy = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_scopy_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_scopy = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_scopy_ptr)PyCObject_AsVoidPtr(__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":35 + /* "trunk/gensim/models/doc2vec_inner.pyx":36 * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x # <<<<<<<<<<<<<< * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_saxpy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_saxpy); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_saxpy_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_saxpy_ptr)PyCObject_AsVoidPtr(__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":36 + /* "trunk/gensim/models/doc2vec_inner.pyx":37 * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) # <<<<<<<<<<<<<< * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_sdot); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sdot_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sdot = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sdot_ptr)PyCObject_AsVoidPtr(__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":37 + /* "trunk/gensim/models/doc2vec_inner.pyx":38 * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) # <<<<<<<<<<<<<< * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_sdot); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_dsdot_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_dsdot_ptr)PyCObject_AsVoidPtr(__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":38 + /* "trunk/gensim/models/doc2vec_inner.pyx":39 * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) # <<<<<<<<<<<<<< * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x * */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_snrm2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_snrm2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_snrm2 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_snrm2 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_snrm2_ptr)PyCObject_AsVoidPtr(__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":39 + /* "trunk/gensim/models/doc2vec_inner.pyx":40 * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x # <<<<<<<<<<<<<< * * DEF EXP_TABLE_SIZE = 1000 */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sscal); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_sscal); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 40; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_sscal_ptr)PyCObject_AsVoidPtr(__pyx_t_2)); + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":46 + /* "trunk/gensim/models/doc2vec_inner.pyx":47 * cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -9519,7 +10119,7 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":47 + /* "trunk/gensim/models/doc2vec_inner.pyx":48 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< @@ -9528,91 +10128,91 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":320 + /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":441 + /* "trunk/gensim/models/doc2vec_inner.pyx":400 * * - * def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":570 + /* "trunk/gensim/models/doc2vec_inner.pyx":557 * * - * def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): # <<<<<<<<<<<<<< - * cdef int hs = model.hs - * cdef int negative = model.negative + * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doclbls=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 570; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":692 + /* "trunk/gensim/models/doc2vec_inner.pyx":710 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 692; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":732 + /* "trunk/gensim/models/doc2vec_inner.pyx":750 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_2); + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = NULL; - if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { - __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_2); + if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); if (likely(__pyx_t_3)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_2); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); __Pyx_INCREF(__pyx_t_3); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_2, function); + __Pyx_DECREF_SET(__pyx_t_1, function); } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 732; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":1 * #!/usr/bin/env cython # <<<<<<<<<<<<<< * # cython: boundscheck=False * # cython: wraparound=False */ - __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_2 = PyDict_New(); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_2); + if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "../../../../../../miniconda3/envs/gensim_cenv/lib/python3.4/site-packages/Cython/Includes/numpy/__init__.pxd":979 * arr.base = baseptr @@ -9941,6 +10541,55 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); } +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#ifdef __Pyx_CyFunction_USED + if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { +#else + if (likely(PyCFunction_Check(func))) { +#endif + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject* args = PyTuple_Pack(1, arg); + return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL; +} +#endif + static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) { #if CYTHON_COMPILING_IN_CPYTHON PyObject *tmp_type, *tmp_value, *tmp_tb; @@ -10157,55 +10806,6 @@ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { return value; } -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = PyCFunction_GET_FUNCTION(func); - self = PyCFunction_GET_SELF(func); - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -#if CYTHON_COMPILING_IN_CPYTHON -static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *result; - PyObject *args = PyTuple_New(1); - if (unlikely(!args)) return NULL; - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 0, arg); - result = __Pyx_PyObject_Call(func, args, NULL); - Py_DECREF(args); - return result; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { -#ifdef __Pyx_CyFunction_USED - if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { -#else - if (likely(PyCFunction_Check(func))) { -#endif - if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { - return __Pyx_PyObject_CallMethO(func, arg); - } - } - return __Pyx__PyObject_CallOneArg(func, arg); -} -#else -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject* args = PyTuple_Pack(1, arg); - return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL; -} -#endif - #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { #ifdef __Pyx_CyFunction_USED diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 88caea2259..bc8941add1 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -9,6 +9,7 @@ import cython import numpy as np +from numpy import zeros, float32 as REAL cimport numpy as np from libc.math cimport exp @@ -80,37 +81,37 @@ cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, con cdef void fast_sentence_dbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, int train_hidden, int train_inputs, - REAL_t *syn0locks) nogil: + REAL_t *context_vectors, REAL_t *syn1, const int size, + const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden, + REAL_t *context_locks) nogil: cdef long long a, b - cdef long long row1 = word2_index * size, row2 + cdef long long row1 = context_index * size, row2 cdef REAL_t f, g memset(work, 0, size * cython.sizeof(REAL_t)) for b in range(codelen): row2 = word_point[b] * size - f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + f = our_dot(&size, &context_vectors[row1], &ONE, &syn1[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - if train_hidden: - our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - if train_inputs: - our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + if learn_hidden: + our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1[row2], &ONE) + if learn_context: + our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) cdef unsigned long long fast_sentence_dbow_neg( const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random, int train_hidden, int train_inputs, REAL_t *syn0locks) nogil: + REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, + const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, + unsigned long long next_random, int learn_context, int learn_hidden, REAL_t *context_locks) nogil: cdef long long a - cdef long long row1 = word2_index * size, row2 + cdef long long row1 = context_index * size, row2 cdef unsigned long long modulo = 281474976710655ULL cdef REAL_t f, g, label cdef np.uint32_t target_index @@ -128,37 +129,33 @@ cdef unsigned long long fast_sentence_dbow_neg( if target_index == word_index: continue label = 0.0 - row2 = target_index * size - f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + f = our_dot(&size, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - if train_hidden: - our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - if train_inputs: - our_saxpy(&size, &syn0locks[word2_index], work, &ONE, &syn0[row1], &ONE) + if learn_hidden: + our_saxpy(&size, &g, &context_vectors[row1], &ONE, &syn1neg[row2], &ONE) + if learn_context: + our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) return next_random cdef void fast_sentence_dm_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], - const REAL_t alpha, REAL_t *work, int i, int j, int k, int lbl_length, int learn_hidden, - int learn_lbls, int learn_words, REAL_t *syn0locks) nogil: + const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, + REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, + const int size, int learn_hidden) nogil: - cdef long long a, b + cdef long long b cdef long long row2 - cdef REAL_t f, g, count, inv_count - cdef int m + cdef REAL_t f, g # l1 already composed by caller, passed in as neu1 - memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error - for b in range(codelens[i]): + # work (also passed in) will accumulate l1 error + for b in range(word_code_len): row2 = word_point[b] * size f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: @@ -168,45 +165,29 @@ cdef void fast_sentence_dm_hs( our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) if learn_hidden: our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - if learn_words: - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) - if learn_lbls: - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) cdef unsigned long long fast_sentence_dm_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - int lbl_codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - np.uint32_t indexes[MAX_SENTENCE_LEN], np.uint32_t lbl_indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, unsigned long long next_random, int lbl_length, int learn_hidden, int learn_lbls, - int learn_words, REAL_t *syn0locks) nogil: + const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, + REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, + const int size, int learn_hidden) nogil: - cdef long long a cdef long long row2 cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, count, inv_count, label - cdef np.uint32_t target_index, word_index - cdef int d, m + cdef REAL_t f, g, label + cdef np.uint32_t target_index + cdef int d # l1 already composed by caller, passed in as neu1 - memset(work, 0, size * cython.sizeof(REAL_t)) # work accumulates net l1 error - word_index = indexes[i] + # work (also passsed in) will accumulate l1 error for outside application for d in range(negative+1): if d == 0: - target_index = word_index + target_index = predict_word_index label = ONEF else: target_index = table[(next_random >> 16) % table_len] next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: + if target_index == predict_word_index: continue label = 0.0 @@ -219,27 +200,13 @@ cdef unsigned long long fast_sentence_dm_neg( our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) if learn_hidden: our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - if learn_words: - for m in range(j,k): - if m == i or codelens[m] == 0: - continue - else: - our_saxpy(&size, &syn0locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) - if learn_lbls: - for m in range(lbl_length): - if lbl_codelens[m] == 0: - continue - else: - our_saxpy(&size, &syn0locks[lbl_indexes[m]], work, &ONE, &syn0[lbl_indexes[m]*size], &ONE) return next_random cdef void fast_sentence_dmc_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int layer1_size, const int vector_size, - const np.uint32_t window_indexes[MAX_SENTENCE_LEN], - const REAL_t alpha, REAL_t *work, const int lbl_length, const int window, - int learn_hidden, int learn_lbls, int learn_words, REAL_t *syn0locks) nogil: + REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, + const int layer1_size, const int vector_size, int learn_hidden) nogil: cdef long long a, b cdef long long row2 @@ -247,7 +214,7 @@ cdef void fast_sentence_dmc_hs( cdef int m # l1 already composed by caller, passed in as neu1 - memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + # work accumulates net l1 error; eventually applied by caller for b in range(word_code_len): row2 = word_point[b] * layer1_size f = our_dot(&layer1_size, neu1, &ONE, &syn1[row2], &ONE) @@ -258,23 +225,12 @@ cdef void fast_sentence_dmc_hs( our_saxpy(&layer1_size, &g, &syn1[row2], &ONE, work, &ONE) if learn_hidden: our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) - if learn_lbls: - for m in range(lbl_length): - our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - &syn0[window_indexes[m] * vector_size], &ONE) - if learn_words: - for m in range(lbl_length, lbl_length + (2 * window)): - our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, - &syn0[window_indexes[m] * vector_size], &ONE) cdef unsigned long long fast_sentence_dmc_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int layer1_size, const int vector_size, - np.uint32_t window_indexes[MAX_SENTENCE_LEN], - const REAL_t alpha, REAL_t *work, const int predict_word_index, - const int lbl_length, const int window, unsigned long long next_random, - int learn_hidden, int learn_lbls, int learn_words, REAL_t *syn0locks) nogil: + const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, + REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, + const int layer1_size, const int vector_size, int learn_hidden) nogil: cdef long long a cdef long long row2 @@ -284,7 +240,7 @@ cdef unsigned long long fast_sentence_dmc_neg( cdef int d, m # l1 already composed by caller, passed in as neu1 - memset(work, 0, layer1_size * cython.sizeof(REAL_t)) # work accumulates net l1 error + # work accumulates net l1 error; eventually applied by caller for d in range(negative+1): if d == 0: target_index = predict_word_index @@ -305,36 +261,34 @@ cdef unsigned long long fast_sentence_dmc_neg( our_saxpy(&layer1_size, &g, &syn1neg[row2], &ONE, work, &ONE) if learn_hidden: our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - if learn_lbls: - for m in range(lbl_length): - our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m * vector_size], &ONE, - &syn0[window_indexes[m] * vector_size], &ONE) - if learn_words: - for m in range(lbl_length, lbl_length + (2 * window)): - our_saxpy(&vector_size, &syn0locks[window_indexes[m]], &work[m*vector_size], &ONE, - &syn0[window_indexes[m] * vector_size], &ONE) return next_random -def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_lbls): +def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): cdef int hs = model.hs cdef int negative = model.negative - cdef int tw = train_words - cdef int tl = train_lbls - - cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) - cdef REAL_t *work + cdef int _train_words = train_words + cdef int _learn_words = learn_words + cdef int _learn_hidden = learn_hidden + cdef int _learn_doclbls = learn_doclbls + + cdef REAL_t *_word_vectors + cdef REAL_t *_doclbl_vectors + cdef REAL_t *_word_locks + cdef REAL_t *_doclbl_locks + cdef REAL_t *_work cdef REAL_t _alpha = alpha cdef int size = model.layer1_size cdef int codelens[MAX_SENTENCE_LEN] - cdef int lbl_codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t lbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t doclbl_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] cdef int sentence_len - cdef int lbl_length + cdef int doclbl_len cdef int window = model.window cdef int i, j @@ -351,8 +305,21 @@ def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_ cdef unsigned long long table_len cdef unsigned long long next_random - # lock some of syn0 against training - cdef REAL_t *syn0locks + if word_vectors is None: + word_vectors = model.syn0 + _word_vectors = (np.PyArray_DATA(word_vectors)) + + if doclbl_vectors is None: + doclbl_vectors = model.syn0 + _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + + if word_locks is None: + word_locks = model.syn0locks + _word_locks = (np.PyArray_DATA(word_locks)) + + if doclbl_locks is None: + doclbl_locks = model.syn0locks + _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -364,46 +331,40 @@ def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_ next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # convert Python structures to primitive types, so we can release the GIL - work = np.PyArray_DATA(_work) - sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - - syn0locks = np.PyArray_DATA(model.syn0locks) + if work is None: + work = zeros(model.layer1_size, dtype=REAL) + _work = np.PyArray_DATA(work) + sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) for i in range(sentence_len): - word = sentence[i] - if word is None: + predict_word = word_vocabs[i] + if predict_word is None: codelens[i] = 0 else: - indexes[i] = word.index + indexes[i] = predict_word.index + reduced_windows[i] = np.random.randint(window) if hs: - codelens[i] = len(word.code) - codes[i] = np.PyArray_DATA(word.code) - points[i] = np.PyArray_DATA(word.point) + codelens[i] = len(predict_word.code) + codes[i] = np.PyArray_DATA(predict_word.code) + points[i] = np.PyArray_DATA(predict_word.point) else: codelens[i] = 1 result += 1 # single randint() call avoids a big thread-sync slowdown for i, item in enumerate(np.random.randint(0, window, sentence_len)): reduced_windows[i] = item - for i in range(lbl_length): - word = lbls[i] - if word is None: - lbl_codelens[i] = 0 - else: - lbl_indexes[i] = word.index - if hs: - lbl_codelens[i] = len(word.code) - else: - lbl_codelens[i] = 1 - result += 1 + for i in range(doclbl_len): + context_token = doclbl_vocabs[i] + doclbl_indexes[i] = context_token.index + result += 1 # release GIL & train on the sentence with nogil: for i in range(sentence_len): if codelens[i] == 0: continue - if tw: # simultaneous skip-gram wordvec-training + if _train_words: # simultaneous skip-gram wordvec-training j = i - window + reduced_windows[i] if j < 0: j = 0 @@ -415,51 +376,53 @@ def train_sentence_dbow(model, sentence, lbls, alpha, _work, train_words, train_ continue if hs: # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], - _alpha, work, 1, 1, syn0locks) + fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], + _alpha, _work, _learn_words, _learn_hidden, _word_locks) if negative: # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, - indexes[i], indexes[j], _alpha, work, next_random, - 1, 1, syn0locks) - - if tl: # docvec-training - for j in range(lbl_length): - if lbl_codelens[j] == 0: - continue - if hs: - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], syn0, syn1, size, lbl_indexes[j], - _alpha, work, 1, 1, syn0locks) - if negative: - next_random = fast_sentence_dbow_neg(negative, table, table_len, syn0, syn1neg, size, - indexes[i], lbl_indexes[j], _alpha, work, next_random, - 1, 1, syn0locks) + next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, + indexes[i], indexes[j], _alpha, _work, next_random, + _learn_words, _learn_hidden, _word_locks) + + # docvec-training + for j in range(doclbl_len): + if hs: + fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + if negative: + next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, + indexes[i], doclbl_indexes[j], _alpha, _work, next_random, + _learn_doclbls, _learn_hidden, _doclbl_locks) return result -def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, _train_lbls): +def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + learn_doclbls=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): cdef int hs = model.hs cdef int negative = model.negative - cdef int learn_words = _train_words - cdef int learn_lbls = _train_lbls - cdef int learn_hidden = True + cdef int _learn_doclbls = learn_doclbls + cdef int _learn_words = learn_words + cdef int _learn_hidden = learn_hidden cdef int cbow_mean = model.cbow_mean cdef REAL_t count, inv_count - cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) - cdef REAL_t *work - cdef REAL_t *neu1 + cdef REAL_t *_word_vectors + cdef REAL_t *_doclbl_vectors + cdef REAL_t *_word_locks + cdef REAL_t *_doclbl_locks + cdef REAL_t *_work + cdef REAL_t *_neu1 cdef REAL_t _alpha = alpha cdef int size = model.layer1_size cdef int codelens[MAX_SENTENCE_LEN] - cdef int lbl_codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t lbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t doclbl_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] cdef int sentence_len - cdef int lbl_length + cdef int doclbl_len cdef int window = model.window cdef int i, j, k, m @@ -469,8 +432,6 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, cdef REAL_t *syn1 cdef np.uint32_t *points[MAX_SENTENCE_LEN] cdef np.uint8_t *codes[MAX_SENTENCE_LEN] - cdef np.uint32_t *lbl_points[MAX_SENTENCE_LEN] - cdef np.uint8_t *lbl_codes[MAX_SENTENCE_LEN] # For negative sampling cdef REAL_t *syn1neg @@ -478,6 +439,20 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, cdef unsigned long long table_len cdef unsigned long long next_random + # default vectors, locks from syn0 + if word_vectors is None: + word_vectors = model.syn0 + _word_vectors = (np.PyArray_DATA(word_vectors)) + if doclbl_vectors is None: + doclbl_vectors = model.syn0 + _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + if word_locks is None: + word_locks = model.syn0locks + _word_locks = (np.PyArray_DATA(word_locks)) + if doclbl_locks is None: + doclbl_locks = model.syn0locks + _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -488,47 +463,48 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # convert Python structures to primitive types, so we can release the GIL - work = np.PyArray_DATA(_work) - neu1 = np.PyArray_DATA(_neu1) - sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) - - syn0locks = np.PyArray_DATA(model.syn0locks) - + if work is None: + work = zeros(model.layer1_size, dtype=REAL) + _work = np.PyArray_DATA(work) + if neu1 is None: + neu1 = zeros(model.layer1_size, dtype=REAL) + _neu1 = np.PyArray_DATA(neu1) + + sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + j = 0 for i in range(sentence_len): - word = sentence[i] + word = word_vocabs[i] if word is None: - codelens[i] = 0 + # shrink sentence to leave out word + sentence_len = sentence_len - 1 + continue # leaving j unchanged else: - indexes[i] = word.index + indexes[j] = word.index if hs: - codelens[i] = len(word.code) - codes[i] = np.PyArray_DATA(word.code) - points[i] = np.PyArray_DATA(word.point) - else: - codelens[i] = 1 + codelens[j] = len(word.code) + codes[j] = np.PyArray_DATA(word.code) + points[j] = np.PyArray_DATA(word.point) result += 1 + j = j + 1 # single randint() call avoids a big thread-sync slowdown for i, item in enumerate(np.random.randint(0, window, sentence_len)): reduced_windows[i] = item - lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - for i in range(lbl_length): - word = lbls[i] - if word is None: - lbl_codelens[i] = 0 + doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + j = 0 + for i in range(doclbl_len): + token = doclbl_vocabs[i] + if token is None: + doclbl_len = doclbl_len - 1 + continue # leaving j unchanged else: - lbl_indexes[i] = word.index - if hs: - lbl_codelens[i] = len(word.code) - else: - lbl_codelens[i] = 1 + doclbl_indexes[j] = token.index result += 1 + j = j + 1 # release GIL & train on the sentence with nogil: for i in range(sentence_len): - if codelens[i] == 0: - continue j = i - window + reduced_windows[i] if j < 0: j = 0 @@ -536,59 +512,75 @@ def train_sentence_dm(model, sentence, lbls, alpha, _work, _neu1, _train_words, if k > sentence_len: k = sentence_len - # compose l1 (in neu1) - memset(neu1, 0, size * cython.sizeof(REAL_t)) + # compose l1 (in _neu1) & clear _work + memset(_neu1, 0, size * cython.sizeof(REAL_t)) count = 0.0 for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - for m in range(lbl_length): - if lbl_codelens[m] == 0: + if m == i: continue else: count += ONEF - our_saxpy(&size, &ONEF, &syn0[lbl_indexes[m] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) + for m in range(doclbl_len): + count += ONEF + our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) if cbow_mean and count > (0.5): inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - + sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) + memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error + if hs: - fast_sentence_dm_hs(points[i], codes[i], codelens, lbl_codelens, neu1, syn0, syn1, - size, indexes, lbl_indexes, _alpha, work, i, j, k, lbl_length, - learn_hidden, learn_lbls, learn_words, syn0locks) + fast_sentence_dm_hs(points[i], codes[i], codelens[i], + _neu1, syn1, _alpha, _work, + size, _learn_hidden) if negative: - next_random = fast_sentence_dm_neg(negative, table, table_len, codelens, lbl_codelens, neu1, syn0, - syn1neg, size, indexes, lbl_indexes, _alpha, work, i, j, k, - next_random, lbl_length, learn_hidden, learn_lbls, learn_words, syn0locks) + next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, + _neu1, syn1neg, indexes[i], _alpha, _work, + size, _learn_hidden) + + # apply accumulated error in work + if _learn_doclbls: + for m in range(doclbl_len): + our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, + &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + if _learn_words: + for m in range(j, k): + if m == i: + continue + else: + our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, + &_word_vectors[indexes[m] * size], &ONE) return result -def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_words, _learn_lbls): +def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + learn_doclbls=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): cdef int hs = model.hs cdef int negative = model.negative - cdef int learn_hidden = True - cdef int learn_lbls = _learn_lbls - cdef int learn_words = _learn_words - - cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) - cdef REAL_t *work - cdef REAL_t *neu1 + cdef int _learn_doclbls = learn_doclbls + cdef int _learn_words = learn_words + cdef int _learn_hidden = learn_hidden + + cdef REAL_t *_word_vectors + cdef REAL_t *_doclbl_vectors + cdef REAL_t *_word_locks + cdef REAL_t *_doclbl_locks + cdef REAL_t *_work + cdef REAL_t *_neu1 cdef REAL_t _alpha = alpha cdef int layer1_size = model.layer1_size cdef int vector_size = model.vector_size cdef int codelens[MAX_SENTENCE_LEN] - cdef int lbl_codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t window_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t window_indexes[MAX_SENTENCE_LEN] cdef int sentence_len - cdef int lbl_length + cdef int doclbl_len cdef int window = model.window - cdef int expected_lbl_length = model.dm_lbl_count + cdef int expected_doclbl_len = model.dm_lbl_count cdef int i, j, k, m, n cdef long result = 0 @@ -598,8 +590,6 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_ cdef REAL_t *syn1 cdef np.uint32_t *points[MAX_SENTENCE_LEN] cdef np.uint8_t *codes[MAX_SENTENCE_LEN] - cdef np.uint32_t *lbl_points[MAX_SENTENCE_LEN] - cdef np.uint8_t *lbl_codes[MAX_SENTENCE_LEN] # For negative sampling cdef REAL_t *syn1neg @@ -607,10 +597,24 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_ cdef unsigned long long table_len cdef unsigned long long next_random - lbl_length = min(MAX_SENTENCE_LEN, len(lbls)) - if lbl_length != expected_lbl_length: + doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + if doclbl_len != expected_doclbl_len: return 0 # skip doc without expected nmber of lbls + # default vectors, locks from syn0 + if word_vectors is None: + word_vectors = model.syn0 + _word_vectors = (np.PyArray_DATA(word_vectors)) + if doclbl_vectors is None: + doclbl_vectors = model.syn0 + _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + if word_locks is None: + word_locks = model.syn0locks + _word_locks = (np.PyArray_DATA(word_locks)) + if doclbl_locks is None: + doclbl_locks = model.syn0locks + _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -621,16 +625,17 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_ next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # convert Python structures to primitive types, so we can release the GIL - work = np.PyArray_DATA(_work) - neu1 = np.PyArray_DATA(_neu1) - - # optional locking of some vactors against backprop-learnind - syn0locks = np.PyArray_DATA(model.syn0locks) - - sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) + if work is None: + work = zeros(model.layer1_size, dtype=REAL) + _work = np.PyArray_DATA(work) + if neu1 is None: + neu1 = zeros(model.layer1_size, dtype=REAL) + _neu1 = np.PyArray_DATA(neu1) + + sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) j = 0 for i in range(sentence_len): - word = sentence[i] + word = word_vocabs[i] if word is None: # shrink sentence to leave out word sentence_len = sentence_len - 1 @@ -646,13 +651,13 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_ result += 1 j = j + 1 - for i in range(lbl_length): - word = lbls[i] - if word is None: - # no support for missing lbls where expected; skip sentence + for i in range(doclbl_len): + token = doclbl_vocabs[i] + if token is None: + # no current support for missing doclbls where expected; skip sentence return 0 else: - window_indexes[i] = word.index + doclbl_indexes[i] = token.index result += 1 # release GIL & train on the sentence @@ -662,8 +667,13 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_ k = i + window + 1 # past sentence end OK: will pad with null word # compose l1 & clear work - n = lbl_length + for m in range(doclbl_len): + # doc vector(s) + memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], + vector_size * cython.sizeof(REAL_t)) + n = 0 for m in range(j, k): + # word vectors in window if m == i: continue if m < 0 or m >= sentence_len: @@ -671,20 +681,28 @@ def train_sentence_dm_concat(model, sentence, lbls, alpha, _work, _neu1, _learn_ else: window_indexes[n] = indexes[m] n = n + 1 - for m in range(lbl_length + (2 * window)): - memcpy(&neu1[m * vector_size], &syn0[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) - memset(work, 0, layer1_size * cython.sizeof(REAL_t)) + for m in range(2 * window): + memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + vector_size * cython.sizeof(REAL_t)) + memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error if hs: - fast_sentence_dmc_hs(points[i], codes[i], codelens[i], neu1, syn0, syn1, - layer1_size, vector_size, window_indexes, _alpha, - work, lbl_length, window, - learn_hidden, learn_lbls, learn_words, syn0locks) + fast_sentence_dmc_hs(points[i], codes[i], codelens[i], + _neu1, syn1, _alpha, _work, + layer1_size, vector_size, _learn_hidden) if negative: - next_random = fast_sentence_dmc_neg(negative, table, table_len, neu1, syn0, syn1neg, - layer1_size, vector_size, window_indexes, _alpha, - work, indexes[i], lbl_length, window, - next_random, learn_hidden, learn_lbls, learn_words, syn0locks) + next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, + _neu1, syn1neg, indexes[i], _alpha, _work, + layer1_size, vector_size, _learn_hidden) + + if _learn_doclbls: + for m in range(doclbl_len): + our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], + &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + if _learn_words: + for m in range(2 * window): + our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], + &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) return result From 6e85df5ebd30b2786367f6eb3eb2457c8fecf222 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Fri, 15 May 2015 20:06:07 -0700 Subject: [PATCH 13/49] compact_name --- gensim/models/doc2vec.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index b01bf163a7..19ab273c6d 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -380,6 +380,36 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): def __str__(self): return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) + @property + def compact_name(self): + segments = [] + if self.sg: + segments.append('dbow') # PV-DBOW (skip-gram-style) + if self.dbow_words: + segments.append('w') # also training words + else: + segments.append('dm') # PV-DM... + if self.dm_concat: + segments.append('c') # ...with concatenative context layer + else: + if self.cbow_mean: + segments.append('m') + else: + segments.append('s') + segments.append('_') + segments.append('d%d' % self.vector_size) # dimensions + if self.negative: + segments.append('n%d' % self.negative) # negative samples + if self.hs: + segments.append('hs') + if not self.sg or (self.sg and self.dbow_words): + segments.append('w%d' % self.window) # window size, when relevant + if self.min_count > 1: + segments.append('mc%d' % self.min_count) + if self.sample > 0: + segments.append('s%d' % self.sample) + return ''.join(segments) + def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors super(Doc2Vec, self).save(*args, **kwargs) From f33bb277d0c8368f7b6bc0d802e1b254509ec3a3 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Fri, 15 May 2015 20:07:19 -0700 Subject: [PATCH 14/49] rename merge_ to intersect_ --- gensim/models/word2vec.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index a180456e2b..c9717ea8bd 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -649,10 +649,12 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, norm_only=True): result.init_sims(norm_only) return result - def merge_word2vec_format(self, fname, binary=False): + def intersect_word2vec_format(self, fname, binary=False): """ - Merge the input-hidden weight matrix from the original C word2vec-tool format, - where it overlaps with the current vocabulary. + Merge the input-hidden weight matrix from the original C word2vec-tool format + given, where it intersects with the current vocabulary. (No words are added to the + existing vocabulary, but intersecting words adopt the file's weights, and + non-intersecting words are left alone.) `binary` is a boolean indicating whether the data is in binary word2vec format. """ From 0e587c33ef57713d9cfe212e981d78fd8ef288dd Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 16 May 2015 00:57:13 -0700 Subject: [PATCH 15/49] for dm-sum, divide error over all conributing vectors --- gensim/models/doc2vec.py | 2 ++ gensim/models/doc2vec_inner.pyx | 9 ++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 19ab273c6d..d6fafe74e4 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -130,6 +130,8 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= if word2_indices and model.cbow_mean: l1 /= (len(word2_indices) + doclbl_len) neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, learn_vectors=False, learn_hidden=True) + if word2_indices and not model.cbow_mean: + neu1e /= (len(word2_indices) + doclbl_len) if learn_doclbls: doclbl_vectors[doclbl_indices] += \ neu1e * np_repeat(doclbl_locks[doclbl_indices],model.vector_size).reshape(-1,model.vector_size) diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index bc8941add1..6cbf103136 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -406,7 +406,7 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden cdef int cbow_mean = model.cbow_mean - cdef REAL_t count, inv_count + cdef REAL_t count, inv_count = 1.0 cdef REAL_t *_word_vectors cdef REAL_t *_doclbl_vectors @@ -524,8 +524,9 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= for m in range(doclbl_len): count += ONEF our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) - if cbow_mean and count > (0.5): + if count > (0.5): inv_count = ONEF/count + if cbow_mean: sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error @@ -537,7 +538,9 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, _neu1, syn1neg, indexes[i], _alpha, _work, size, _learn_hidden) - + + if not cbow_mean: + sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # apply accumulated error in work if _learn_doclbls: for m in range(doclbl_len): From d28b0b1c60131db8ebd8332233c80dfa5de33745 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 16 May 2015 00:59:47 -0700 Subject: [PATCH 16/49] rm unnecessary pretrain() --- gensim/models/word2vec.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index c9717ea8bd..05da8d9486 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -455,14 +455,6 @@ def _get_job_words(self, alpha, work, job, neu1): else: return sum(train_sentence_cbow(self, sentence, alpha, work, neu1) for sentence in job) - def pretrain(self): -# if FAST_VERSION < 0: - self.neg_labels = [] - if self.negative > 0: - # precompute negative labels optimization for pure-python training - self.neg_labels = zeros(self.negative + 1) - self.neg_labels[0] = 1. - def train(self, sentences, total_words=None, word_count=0, chunksize=100): """ Update the model's neural weights from a sequence of sentences (can be a once-only generator stream). @@ -472,8 +464,12 @@ def train(self, sentences, total_words=None, word_count=0, chunksize=100): if FAST_VERSION < 0: import warnings warnings.warn("C extension compilation failed, training will be slow. Install a C compiler and reinstall gensim for fast training.") + self.neg_labels = [] + if self.negative > 0: + # precompute negative labels optimization for pure-python training + self.neg_labels = zeros(self.negative + 1) + self.neg_labels[0] = 1. - self.pretrain() logger.info("training model with %i workers on %i vocabulary and %i features, " "using 'skipgram'=%s 'hierarchical softmax'=%s 'subsample'=%s and 'negative sampling'=%s" % (self.workers, len(self.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative)) From 8d3d0f34b6f718c5bf6bc9fce9923ce4d628d30f Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 16 May 2015 01:04:18 -0700 Subject: [PATCH 17/49] doclbls/docvecs separate from vocab/syn0; rename syn0locks syn0_lockf ('...lock factor') --- gensim/models/doc2vec.py | 65 +++++++++++++++++++++++++-------- gensim/models/doc2vec_inner.pyx | 26 ++++++------- gensim/models/word2vec.py | 11 +++--- 3 files changed, 67 insertions(+), 35 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index d6fafe74e4..68fdafade8 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -42,6 +42,8 @@ except ImportError: from Queue import Queue +from collections import namedtuple + from numpy import zeros, random, sum as np_sum, add as np_add, concatenate,\ repeat as np_repeat, array, float32 as REAL, empty, ones from six import string_types @@ -50,6 +52,7 @@ from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_sentence_sg +from six.moves import xrange try: from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, train_sentence_dm_concat,\ @@ -79,8 +82,13 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, will use the optimized version from doc2vec_inner instead. """ + if doclbl_vectors is None: + doclbl_vectors = model.doclbl_syn0 + if doclbl_locks is None: + doclbl_locks = model.doclbl_syn0_lockf + if train_words and learn_words: - train_sentence_sg(model, word_vocabs, alpha, work) + train_sentence_sg(model, word_vocabs, alpha, work) # TODO: adapt for word_vectors/word_locks for doclbl in doclbl_vocabs: if doclbl is None: continue # OOV token => skip @@ -109,11 +117,11 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= if word_vectors is None: word_vectors = model.syn0 if word_locks is None: - word_locks = model.syn0locks + word_locks = model.syn0_lockf if doclbl_vectors is None: - doclbl_vectors = model.syn0 + doclbl_vectors = model.doclbl_syn0 if doclbl_locks is None: - doclbl_locks = model.syn0locks + doclbl_locks = model.doclbl_syn0_lockf doclbl_indices = [doclbl.index for doclbl in doclbl_vocabs if doclbl is not None] doclbl_sum = np_sum(doclbl_vectors[doclbl_indices], axis=0) @@ -159,11 +167,11 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None if word_vectors is None: word_vectors = model.syn0 if word_locks is None: - word_locks = model.syn0locks + word_locks = model.syn0_lockf if doclbl_vectors is None: - doclbl_vectors = model.syn0 + doclbl_vectors = model.doclbl_syn0 if doclbl_locks is None: - doclbl_locks = model.syn0locks + doclbl_locks = model.doclbl_syn0_lockf doclbl_indices = [doclbl.index for doclbl in doclbl_vocabs if doclbl is not None] doclbl_len = len(doclbl_indices) @@ -224,6 +232,13 @@ def __init__(self, words, labels): def __str__(self): return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.labels) +class Doclbl(namedtuple('Doclbl', 'index, word_count, doc_count')): + """A document label discovered during the initial vocabulary + scan. (The document-vector equivalent of a Vocab object.)""" + __slots__ = () + def repeat(self, word_count): + return self._replace(word_count=self.word_count + word_count, doc_count=self.doc_count + 1) + class Doc2Vec(Word2Vec): """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" @@ -283,6 +298,8 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, null_word=dm_concat, **kwargs) + self.doclbls = {} # mapping from doclbl (string) to Doclbl object + self.index2doclbl = [] # map from doclbl's int index to string self.dbow_words = dbow_words self.dm_concat = dm_concat self.dm_lbl_count = dm_lbl_count @@ -296,9 +313,13 @@ def reset_weights(self): self.layer1_size = (self.dm_lbl_count + (2 * self.window)) * self.vector_size logger.info("using concatenative %d-dimensional layer1"% (self.layer1_size)) Word2Vec.reset_weights(self) + self.doclbl_syn0 = empty((len(self.doclbls), self.vector_size), dtype=REAL) + for i in xrange(len(self.doclbls)): + # construct deterministic seed from word AND seed argument + self.doclbl_syn0[i] = self.seeded_vector(self.index2doclbl[i] + str(self.seed)) + self.doclbl_syn0_lockf = ones(len(self.doclbls), dtype=REAL) # zeros suppress learning - @staticmethod - def _vocab_from(sentences): + def _vocab_from(self, sentences): sentence_no, vocab = -1, {} total_words = 0 for sentence_no, sentence in enumerate(sentences): @@ -308,10 +329,11 @@ def _vocab_from(sentences): sentence_length = len(sentence.words) for label in sentence.labels: total_words += 1 - if label in vocab: - vocab[label].count += sentence_length + if label in self.doclbls: + self.doclbls[label] = Doclbl[label].repeat(sentence_length) else: - vocab[label] = Vocab(count=sentence_length) # FIXME: doc-labels for short docs can be culled by min_count + self.doclbls[label] = Doclbl(sentence_no, sentence_length, 1) + self.index2doclbl.append(label) for word in sentence.words: total_words += 1 if word in vocab: @@ -325,7 +347,8 @@ def _vocab_from(sentences): def _prepare_sentences(self, sentences): for sentence in sentences: # avoid calling random_sample() where prob >= 1, to speed things up a little: - yield (self._tokens_to_vocabs(sentence.words), self._tokens_to_vocabs(sentence.labels, sample=False)) + yield (self._tokens_to_vocabs(sentence.words), + self._tokens_to_vocabs(sentence.labels, sample=False, source_dict=self.doclbls)) def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): if source_dict is None: @@ -355,7 +378,7 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): doclbl_vectors = empty((1, self.vector_size), dtype=REAL) doclbl_vectors[0] = self.seeded_vector(' '.join(document)) doclbl_locks = ones(1, dtype=REAL) - doclbl_vocabs = [Vocab(index=0) for doclbl in doclbl_vectors] + doclbl_vocabs = [Doclbl(0,0,0) for doclbl in doclbl_vectors] word_vocabs = self._tokens_to_vocabs(document) work = zeros(self.layer1_size, dtype=REAL) @@ -379,6 +402,18 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): return doclbl_vectors[0] + def get_docvec(self, doclbl): + return self.doclbl_syn0[self.doclbls[doclbl].index] + + @property + def docs(self): + class DocsView(object): + def __init__(self, doc2vec): + self.model = doc2vec + def __getitem__(self, doclbl): + return self.model.get_docvec(doclbl) + return DocsView(self) + def __str__(self): return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) @@ -414,7 +449,7 @@ def compact_name(self): def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors - super(Doc2Vec, self).save(*args, **kwargs) + super(Doc2Vec, self).save(*args, **kwargs) ### TODO: save doclbl fields class LabeledBrownCorpus(object): diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 6cbf103136..ec49bb3fe9 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -305,20 +305,18 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, cdef unsigned long long table_len cdef unsigned long long next_random + # default vectors, locks from syn0/doclbl_syn0 if word_vectors is None: word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) - if doclbl_vectors is None: - doclbl_vectors = model.syn0 + doclbl_vectors = model.doclbl_syn0 _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) - if word_locks is None: - word_locks = model.syn0locks + word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) - if doclbl_locks is None: - doclbl_locks = model.syn0locks + doclbl_locks = model.doclbl_syn0_lockf _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: @@ -439,18 +437,18 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= cdef unsigned long long table_len cdef unsigned long long next_random - # default vectors, locks from syn0 + # default vectors, locks from syn0/doclbl_syn0 if word_vectors is None: word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) if doclbl_vectors is None: - doclbl_vectors = model.syn0 + doclbl_vectors = model.doclbl_syn0 _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) if word_locks is None: - word_locks = model.syn0locks + word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) if doclbl_locks is None: - doclbl_locks = model.syn0locks + doclbl_locks = model.doclbl_syn0_lockf _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: @@ -604,18 +602,18 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None if doclbl_len != expected_doclbl_len: return 0 # skip doc without expected nmber of lbls - # default vectors, locks from syn0 + # default vectors, locks from syn0/doclbl_syn0 if word_vectors is None: word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) if doclbl_vectors is None: - doclbl_vectors = model.syn0 + doclbl_vectors = model.doclbl_syn0 _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) if word_locks is None: - word_locks = model.syn0locks + word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) if doclbl_locks is None: - doclbl_locks = model.syn0locks + doclbl_locks = model.doclbl_syn0_lockf _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 05da8d9486..4e767d2f3a 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -154,7 +154,7 @@ def train_sg_pair(model, predict_word, context_token, alpha, learn_vectors=True, if context_vectors is None: context_vectors = model.syn0 if context_locks is None: - context_locks = model.syn0locks + context_locks = model.syn0_lockf l1 = context_vectors[context_token.index] lock_factor = context_locks[context_token.index] @@ -215,7 +215,7 @@ def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=Tr if learn_vectors: # learn input -> hidden, here for all words in the window separately l = len(input_word_indices) - model.syn0[input_word_indices] += np_repeat(neu1e,l).reshape(l,model.vector_size) * model.syn0locks[input_word_indices][:,None] + model.syn0[input_word_indices] += np_repeat(neu1e,l).reshape(l,model.vector_size) * model.syn0_lockf[input_word_indices][:,None] return neu1e @@ -423,8 +423,7 @@ def build_vocab(self, sentences): self.reset_weights() sys.stderr.flush() - @staticmethod - def _vocab_from(sentences): + def _vocab_from(self, sentences): sentence_no, vocab = -1, {} total_words = 0 for sentence_no, sentence in enumerate(sentences): @@ -544,7 +543,7 @@ def reset_weights(self): self.syn1neg = zeros((len(self.vocab), self.layer1_size), dtype=REAL) self.syn0norm = None - self.syn0locks = ones(len(self.vocab), dtype=REAL) # zeros suppress training vectors + self.syn0_lockf = ones(len(self.vocab), dtype=REAL) # zeros suppress learning def seeded_vector(self, seed_string): """Create one 'random' vector (but deterministic by seed_string)""" @@ -680,7 +679,7 @@ def intersect_word2vec_format(self, fname, binary=False): if word in self.vocab: overlap_count += 1 self.syn0[self.vocab[word].index] = weights - self.syn0locks[self.vocab[word].index] = 0.0 # lock it + self.syn0_lockf[self.vocab[word].index] = 0.0 # lock it else: for line_no, line in enumerate(fin): parts = utils.to_unicode(line).split() From 13b7ee2befad83a7706e39eb0d5c281d24f4982a Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Mon, 18 May 2015 13:07:08 -0700 Subject: [PATCH 18/49] delegate docvecs to (memmappable, replaceable) DocvecsInArray --- gensim/models/doc2vec.py | 152 +++++++++++++++++++++----------- gensim/models/doc2vec_inner.pyx | 69 ++++++--------- gensim/models/word2vec.py | 8 +- 3 files changed, 133 insertions(+), 96 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 68fdafade8..7aeb242803 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -45,7 +45,7 @@ from collections import namedtuple from numpy import zeros, random, sum as np_sum, add as np_add, concatenate,\ - repeat as np_repeat, array, float32 as REAL, empty, ones + repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap from six import string_types logger = logging.getLogger(__name__) @@ -61,7 +61,7 @@ # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 - def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + def train_sentence_dbow(model, word_vocabs, doclbl_indices, alpha, work=None, train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): """ @@ -83,25 +83,23 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, """ if doclbl_vectors is None: - doclbl_vectors = model.doclbl_syn0 + doclbl_vectors = model.docvecs.doclbl_syn0 if doclbl_locks is None: - doclbl_locks = model.doclbl_syn0_lockf + doclbl_locks = model.docvecs.doclbl_syn0_lockf if train_words and learn_words: train_sentence_sg(model, word_vocabs, alpha, work) # TODO: adapt for word_vectors/word_locks - for doclbl in doclbl_vocabs: - if doclbl is None: - continue # OOV token => skip + for doclbl_index in doclbl_indices: for word in word_vocabs: if word is None: continue # OOV word in the input sentence => skip - train_sg_pair(model, word, doclbl, alpha, learn_vectors=learn_doclbls, + train_sg_pair(model, word, doclbl_index, alpha, learn_vectors=learn_doclbls, learn_hidden=learn_hidden, context_vectors=doclbl_vectors, context_locks=doclbl_locks) return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + def train_sentence_dm(model, word_vocabs, doclbl_indices, alpha, work=None, neu1=None, learn_doclbls=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): """ @@ -119,11 +117,10 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= if word_locks is None: word_locks = model.syn0_lockf if doclbl_vectors is None: - doclbl_vectors = model.doclbl_syn0 + doclbl_vectors = model.docvecs.doclbl_syn0 if doclbl_locks is None: - doclbl_locks = model.doclbl_syn0_lockf + doclbl_locks = model.docvecs.doclbl_syn0_lockf - doclbl_indices = [doclbl.index for doclbl in doclbl_vocabs if doclbl is not None] doclbl_sum = np_sum(doclbl_vectors[doclbl_indices], axis=0) doclbl_len = len(doclbl_indices) @@ -150,7 +147,7 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + def train_sentence_dm_concat(model, word_vocabs, doclbl_indices, alpha, work=None, neu1=None, learn_doclbls=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): """ @@ -169,11 +166,10 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None if word_locks is None: word_locks = model.syn0_lockf if doclbl_vectors is None: - doclbl_vectors = model.doclbl_syn0 + doclbl_vectors = model.docvecs.doclbl_syn0 if doclbl_locks is None: - doclbl_locks = model.doclbl_syn0_lockf + doclbl_locks = model.docvecs.doclbl_syn0_lockf - doclbl_indices = [doclbl.index for doclbl in doclbl_vocabs if doclbl is not None] doclbl_len = len(doclbl_indices) if doclbl_len != model.dm_lbl_count: return 0 # skip doc without expected doclbl(s) @@ -232,6 +228,72 @@ def __init__(self, words, labels): def __str__(self): return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.labels) + +class DocvecsInArray(object): + """ + Default storage of docvecs during training, in a numpy array. + + Maintains dict mapping string doclbl -> int mapping if necessary. + (If all LabeledSentences use only int doclbls, this overhead is + avoided.) Supplying a mapfile_path at construction will use a + pair of memory-mapped files as the array backing for syn0/syn0_lockf + values. + + (A future alternative implementation, based on another persistence + mechanism like LMDB, LevelDB, or SQLite, should also be possible.) + """ + + def __init__(self, mapfile_path=None): + self.doclbls = {} # string -> Doclbl (if necessary) + self.index2doclbl = [] # int index -> String (if necessary) + self.max_index = -1 + self.mapfile_path = mapfile_path + + def note_doclbl(self, key, sentence_no, sentence_length): + if isinstance(key, int): + self.max_index = max(self.max_index, key) + else: + if key in self.doclbls: + self.doclbls[key] = self.doclbls[key].repeat(sentence_length) + else: + self.doclbls[key] = Doclbl(sentence_no, sentence_length, 1) + self.index2doclbl.append(key) + + def indexed_doclbls(self, doclbl_tokens): + return ([i for i in [self._int_index(index,-1) for index in doclbl_tokens] if i > -1], + self.doclbl_syn0, doclbl_tokens) + + def trained_items(self, indexed_tuples): + """Persist any changes to the given indices; a no-op for this implementation""" + pass + + def _int_index(self, index, missing=None): + if isinstance(index, int): + return index + else: + return self.doclbls[index].index if index in self.doclbls else missing + + def __getitem__(self, index): + return self.doclbl_syn0[self._int_index(index)] + + def reset_weights(self, model): + length = max(len(self.doclbls),self.max_index) + if self.mapfile_path: + print(length) + self.doclbl_syn0 = np_memmap(self.mapfile_path+'.doclbl_syn0',dtype=REAL,mode='w+',shape=(length,model.vector_size)) + self.doclbl_syn0_lockf = np_memmap(self.mapfile_path+'.doclbl_syn0_lockf',dtype=REAL,mode='w+',shape=(length,)) + self.doclbl_syn0_lockf.fill(1.0) + else: + print(length) + self.doclbl_syn0 = empty((length, model.vector_size), dtype=REAL) + self.doclbl_syn0_lockf = ones((length,), dtype=REAL) # zeros suppress learning + + for i in xrange(length): + # construct deterministic seed from index AND model seed + seed = "%d %s" % (model.seed, self.index2doclbl[i] if len(self.index2doclbl)>0 else str(i)) + self.doclbl_syn0[i] = model.seeded_vector(seed) + + class Doclbl(namedtuple('Doclbl', 'index, word_count, doc_count')): """A document label discovered during the initial vocabulary scan. (The document-vector equivalent of a Vocab object.)""" @@ -244,7 +306,8 @@ class Doc2Vec(Word2Vec): """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, - dbow_words=0, dm_mean=0, dm_concat=0, dm_lbl_count=1, **kwargs): + dbow_words=0, dm_mean=0, dm_concat=0, dm_lbl_count=1, + docvecs=None, docvecs_mapfile=None, **kwargs): """ Initialize the model from an iterable of `sentences`. Each sentence is a LabeledSentence object that will be used for training. @@ -298,11 +361,12 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha, sg=(1+dm) % 2, hs=hs, negative=negative, cbow_mean=dm_mean, null_word=dm_concat, **kwargs) - self.doclbls = {} # mapping from doclbl (string) to Doclbl object - self.index2doclbl = [] # map from doclbl's int index to string self.dbow_words = dbow_words self.dm_concat = dm_concat self.dm_lbl_count = dm_lbl_count + self.docvecs = docvecs + if not self.docvecs: + self.docvecs = DocvecsInArray(docvecs_mapfile) if sentences is not None: self.build_vocab(sentences) self.train(sentences) @@ -313,11 +377,7 @@ def reset_weights(self): self.layer1_size = (self.dm_lbl_count + (2 * self.window)) * self.vector_size logger.info("using concatenative %d-dimensional layer1"% (self.layer1_size)) Word2Vec.reset_weights(self) - self.doclbl_syn0 = empty((len(self.doclbls), self.vector_size), dtype=REAL) - for i in xrange(len(self.doclbls)): - # construct deterministic seed from word AND seed argument - self.doclbl_syn0[i] = self.seeded_vector(self.index2doclbl[i] + str(self.seed)) - self.doclbl_syn0_lockf = ones(len(self.doclbls), dtype=REAL) # zeros suppress learning + self.docvecs.reset_weights(self) def _vocab_from(self, sentences): sentence_no, vocab = -1, {} @@ -328,12 +388,7 @@ def _vocab_from(self, sentences): (sentence_no, total_words, len(vocab))) sentence_length = len(sentence.words) for label in sentence.labels: - total_words += 1 - if label in self.doclbls: - self.doclbls[label] = Doclbl[label].repeat(sentence_length) - else: - self.doclbls[label] = Doclbl(sentence_no, sentence_length, 1) - self.index2doclbl.append(label) + self.docvecs.note_doclbl(label, sentence_no, sentence_length) for word in sentence.words: total_words += 1 if word in vocab: @@ -348,7 +403,7 @@ def _prepare_sentences(self, sentences): for sentence in sentences: # avoid calling random_sample() where prob >= 1, to speed things up a little: yield (self._tokens_to_vocabs(sentence.words), - self._tokens_to_vocabs(sentence.labels, sample=False, source_dict=self.doclbls)) + self.docvecs.indexed_doclbls(sentence.labels)) def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): if source_dict is None: @@ -362,12 +417,19 @@ def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): def _get_job_words(self, alpha, work, job, neu1): if self.sg: - return sum(train_sentence_dbow(self, sentence, lbls, alpha, work, train_words=self.dbow_words) - for sentence, lbls in job) + tally = sum(train_sentence_dbow(self, sentence, doclbl_indices, alpha, work, train_words=self.dbow_words, + doclbl_vectors=doclbl_vectors) + for sentence, (doclbl_indices, doclbl_vectors, ignored) in job) elif self.dm_concat: - return sum(train_sentence_dm_concat(self, sentence, lbls, alpha, work, neu1) for sentence, lbls in job) + tally = sum(train_sentence_dm_concat(self, sentence, doclbl_indices, alpha, work, neu1, + doclbl_vectors=doclbl_vectors) + for sentence, (doclbl_indices, doclbl_vectors, ignored) in job) else: - return sum(train_sentence_dm(self, sentence, lbls, alpha, work, neu1) for sentence, lbls in job) + tally = sum(train_sentence_dm(self, sentence, doclbl_indices, alpha, work, neu1, + doclbl_vectors=doclbl_vectors) + for sentence, (doclbl_indices, doclbl_vectors, ignored) in job) + self.docvecs.trained_items(item for s, item in job) + return tally def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): """ @@ -378,7 +440,7 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): doclbl_vectors = empty((1, self.vector_size), dtype=REAL) doclbl_vectors[0] = self.seeded_vector(' '.join(document)) doclbl_locks = ones(1, dtype=REAL) - doclbl_vocabs = [Doclbl(0,0,0) for doclbl in doclbl_vectors] + doclbl_indices = [0] word_vocabs = self._tokens_to_vocabs(document) work = zeros(self.layer1_size, dtype=REAL) @@ -387,33 +449,21 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): for i in range(steps): if self.sg: - train_sentence_dbow(self, word_vocabs, doclbl_vocabs, alpha, work, + train_sentence_dbow(self, word_vocabs, doclbl_indices, alpha, work, learn_words=False, learn_hidden=False, doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) elif self.dm_concat: - train_sentence_dm_concat(self, word_vocabs, doclbl_vocabs, alpha, work, neu1, + train_sentence_dm_concat(self, word_vocabs, doclbl_indices, alpha, work, neu1, learn_words=False, learn_hidden=False, doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) else: - train_sentence_dm(self, word_vocabs, doclbl_vocabs, alpha, work, neu1, + train_sentence_dm(self, word_vocabs, doclbl_indices, alpha, work, neu1, learn_words=False, learn_hidden=False, doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha return doclbl_vectors[0] - def get_docvec(self, doclbl): - return self.doclbl_syn0[self.doclbls[doclbl].index] - - @property - def docs(self): - class DocsView(object): - def __init__(self, doc2vec): - self.model = doc2vec - def __getitem__(self, doclbl): - return self.model.get_docvec(doclbl) - return DocsView(self) - def __str__(self): return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index ec49bb3fe9..184e59b68f 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -265,7 +265,7 @@ cdef unsigned long long fast_sentence_dmc_neg( return next_random -def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, +def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): cdef int hs = model.hs @@ -285,7 +285,7 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, cdef int codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t _doclbl_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] cdef int sentence_len cdef int doclbl_len @@ -310,13 +310,13 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) if doclbl_vectors is None: - doclbl_vectors = model.doclbl_syn0 + doclbl_vectors = model.docvecs.doclbl_syn0 _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) if word_locks is None: word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) if doclbl_locks is None: - doclbl_locks = model.doclbl_syn0_lockf + doclbl_locks = model.docvecs.doclbl_syn0_lockf _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: @@ -333,7 +333,7 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, work = zeros(model.layer1_size, dtype=REAL) _work = np.PyArray_DATA(work) sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) for i in range(sentence_len): predict_word = word_vocabs[i] @@ -353,8 +353,7 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, for i, item in enumerate(np.random.randint(0, window, sentence_len)): reduced_windows[i] = item for i in range(doclbl_len): - context_token = doclbl_vocabs[i] - doclbl_indexes[i] = context_token.index + _doclbl_indexes[i] = doclbl_indexes[i] result += 1 # release GIL & train on the sentence @@ -385,17 +384,17 @@ def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # docvec-training for j in range(doclbl_len): if hs: - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) if negative: next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, - indexes[i], doclbl_indexes[j], _alpha, _work, next_random, + indexes[i], _doclbl_indexes[j], _alpha, _work, next_random, _learn_doclbls, _learn_hidden, _doclbl_locks) return result -def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, +def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, learn_doclbls=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): cdef int hs = model.hs @@ -417,7 +416,7 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= cdef int codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t _doclbl_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] cdef int sentence_len cdef int doclbl_len @@ -442,13 +441,13 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) if doclbl_vectors is None: - doclbl_vectors = model.doclbl_syn0 + doclbl_vectors = model.docvecs.doclbl_syn0 _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) if word_locks is None: word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) if doclbl_locks is None: - doclbl_locks = model.doclbl_syn0_lockf + doclbl_locks = model.docvecs.doclbl_syn0_lockf _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: @@ -488,17 +487,10 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= for i, item in enumerate(np.random.randint(0, window, sentence_len)): reduced_windows[i] = item - doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) - j = 0 + doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) for i in range(doclbl_len): - token = doclbl_vocabs[i] - if token is None: - doclbl_len = doclbl_len - 1 - continue # leaving j unchanged - else: - doclbl_indexes[j] = token.index - result += 1 - j = j + 1 + _doclbl_indexes[i] = doclbl_indexes[i] + result += 1 # release GIL & train on the sentence with nogil: @@ -521,7 +513,7 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) for m in range(doclbl_len): count += ONEF - our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) if count > (0.5): inv_count = ONEF/count if cbow_mean: @@ -542,8 +534,8 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= # apply accumulated error in work if _learn_doclbls: for m in range(doclbl_len): - our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, - &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, + &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) if _learn_words: for m in range(j, k): if m == i: @@ -555,7 +547,7 @@ def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1= return result -def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, +def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, learn_doclbls=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): cdef int hs = model.hs @@ -576,7 +568,7 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None cdef int codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t _doclbl_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t window_indexes[MAX_SENTENCE_LEN] cdef int sentence_len cdef int doclbl_len @@ -598,7 +590,7 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None cdef unsigned long long table_len cdef unsigned long long next_random - doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) if doclbl_len != expected_doclbl_len: return 0 # skip doc without expected nmber of lbls @@ -607,13 +599,13 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) if doclbl_vectors is None: - doclbl_vectors = model.doclbl_syn0 + doclbl_vectors = model.docvecs.doclbl_syn0 _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) if word_locks is None: word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) if doclbl_locks is None: - doclbl_locks = model.doclbl_syn0_lockf + doclbl_locks = model.docvecs.doclbl_syn0_lockf _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) if hs: @@ -653,13 +645,8 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None j = j + 1 for i in range(doclbl_len): - token = doclbl_vocabs[i] - if token is None: - # no current support for missing doclbls where expected; skip sentence - return 0 - else: - doclbl_indexes[i] = token.index - result += 1 + _doclbl_indexes[i] = doclbl_indexes[i] + result += 1 # release GIL & train on the sentence with nogil: @@ -670,7 +657,7 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None # compose l1 & clear work for m in range(doclbl_len): # doc vector(s) - memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], + memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) n = 0 for m in range(j, k): @@ -698,8 +685,8 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None if _learn_doclbls: for m in range(doclbl_len): - our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], - &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], + &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) if _learn_words: for m in range(2 * window): our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 4e767d2f3a..0db5b49662 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -119,7 +119,7 @@ def train_sentence_sg(model, sentence, alpha, work=None): for pos2, word2 in enumerate(sentence[start : pos + model.window + 1 - reduced_window], start): # don't train on OOV words and on the `word` itself if word2 and not (pos2 == pos): - train_sg_pair(model, word, word2, alpha) + train_sg_pair(model, word, word2.index, alpha) return len([word for word in sentence if word is not None]) @@ -149,15 +149,15 @@ def train_sentence_cbow(model, sentence, alpha, work=None, neu1=None): return len([word for word in sentence if word is not None]) -def train_sg_pair(model, predict_word, context_token, alpha, learn_vectors=True, learn_hidden=True, +def train_sg_pair(model, predict_word, context_index, alpha, learn_vectors=True, learn_hidden=True, context_vectors=None, context_locks=None): if context_vectors is None: context_vectors = model.syn0 if context_locks is None: context_locks = model.syn0_lockf - l1 = context_vectors[context_token.index] - lock_factor = context_locks[context_token.index] + l1 = context_vectors[context_index] + lock_factor = context_locks[context_index] neu1e = zeros(l1.shape) From a75553d570b719c8602f65ccf2a4e75902044cf2 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Fri, 5 Jun 2015 23:55:13 -0700 Subject: [PATCH 19/49] fix thread perf crash from randint()-per-word --- gensim/models/doc2vec_inner.c | 2370 +++++++++++++++---------------- gensim/models/doc2vec_inner.pyx | 8 +- 2 files changed, 1141 insertions(+), 1237 deletions(-) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 7eff9ea096..cef91671bd 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -907,12 +907,6 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, int is_list, int wraparound, int boundscheck); -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); -#endif - -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); - static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); @@ -946,6 +940,12 @@ static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void); static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name); +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg); +#endif + +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); + #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); #else @@ -1165,9 +1165,9 @@ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ @@ -1227,7 +1227,6 @@ static char __pyx_k_scopy[] = "scopy"; static char __pyx_k_snrm2[] = "snrm2"; static char __pyx_k_sscal[] = "sscal"; static char __pyx_k_table[] = "table"; -static char __pyx_k_token[] = "token"; static char __pyx_k_vocab[] = "vocab"; static char __pyx_k_zeros[] = "zeros"; static char __pyx_k_import[] = "__import__"; @@ -1238,6 +1237,7 @@ static char __pyx_k_result[] = "result"; static char __pyx_k_window[] = "window"; static char __pyx_k_work_2[] = "_work"; static char __pyx_k_alpha_2[] = "_alpha"; +static char __pyx_k_docvecs[] = "docvecs"; static char __pyx_k_float32[] = "float32"; static char __pyx_k_indexes[] = "indexes"; static char __pyx_k_randint[] = "randint"; @@ -1249,11 +1249,12 @@ static char __pyx_k_negative[] = "negative"; static char __pyx_k_cbow_mean[] = "cbow_mean"; static char __pyx_k_enumerate[] = "enumerate"; static char __pyx_k_inv_count[] = "inv_count"; -static char __pyx_k_syn0locks[] = "syn0locks"; static char __pyx_k_table_len[] = "table_len"; static char __pyx_k_ValueError[] = "ValueError"; static char __pyx_k_doclbl_len[] = "doclbl_len"; +static char __pyx_k_syn0_lockf[] = "syn0_lockf"; static char __pyx_k_word_locks[] = "word_locks"; +static char __pyx_k_doclbl_syn0[] = "doclbl_syn0"; static char __pyx_k_layer1_size[] = "layer1_size"; static char __pyx_k_learn_words[] = "learn_words"; static char __pyx_k_next_random[] = "next_random"; @@ -1269,8 +1270,6 @@ static char __pyx_k_predict_word[] = "predict_word"; static char __pyx_k_sentence_len[] = "sentence_len"; static char __pyx_k_word_locks_2[] = "_word_locks"; static char __pyx_k_word_vectors[] = "word_vectors"; -static char __pyx_k_context_token[] = "context_token"; -static char __pyx_k_doclbl_vocabs[] = "doclbl_vocabs"; static char __pyx_k_learn_doclbls[] = "learn_doclbls"; static char __pyx_k_learn_words_2[] = "_learn_words"; static char __pyx_k_train_words_2[] = "_train_words"; @@ -1283,7 +1282,9 @@ static char __pyx_k_word_vectors_2[] = "_word_vectors"; static char __pyx_k_learn_doclbls_2[] = "_learn_doclbls"; static char __pyx_k_null_word_index[] = "null_word_index"; static char __pyx_k_reduced_windows[] = "reduced_windows"; +static char __pyx_k_doclbl_indexes_2[] = "_doclbl_indexes"; static char __pyx_k_doclbl_vectors_2[] = "_doclbl_vectors"; +static char __pyx_k_doclbl_syn0_lockf[] = "doclbl_syn0_lockf"; static char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static char __pyx_k_train_sentence_dm[] = "train_sentence_dm"; static char __pyx_k_expected_doclbl_len[] = "expected_doclbl_len"; @@ -1312,18 +1313,20 @@ static PyObject *__pyx_n_s_cbow_mean; static PyObject *__pyx_n_s_code; static PyObject *__pyx_n_s_codelens; static PyObject *__pyx_n_s_codes; -static PyObject *__pyx_n_s_context_token; static PyObject *__pyx_n_s_count; static PyObject *__pyx_n_s_cpointer; static PyObject *__pyx_n_s_d_res; static PyObject *__pyx_n_s_dm_lbl_count; static PyObject *__pyx_n_s_doclbl_indexes; +static PyObject *__pyx_n_s_doclbl_indexes_2; static PyObject *__pyx_n_s_doclbl_len; static PyObject *__pyx_n_s_doclbl_locks; static PyObject *__pyx_n_s_doclbl_locks_2; +static PyObject *__pyx_n_s_doclbl_syn0; +static PyObject *__pyx_n_s_doclbl_syn0_lockf; static PyObject *__pyx_n_s_doclbl_vectors; static PyObject *__pyx_n_s_doclbl_vectors_2; -static PyObject *__pyx_n_s_doclbl_vocabs; +static PyObject *__pyx_n_s_docvecs; static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; @@ -1378,13 +1381,12 @@ static PyObject *__pyx_n_s_size; static PyObject *__pyx_n_s_snrm2; static PyObject *__pyx_n_s_sscal; static PyObject *__pyx_n_s_syn0; -static PyObject *__pyx_n_s_syn0locks; +static PyObject *__pyx_n_s_syn0_lockf; static PyObject *__pyx_n_s_syn1; static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_table; static PyObject *__pyx_n_s_table_len; static PyObject *__pyx_n_s_test; -static PyObject *__pyx_n_s_token; static PyObject *__pyx_n_s_train_sentence_dbow; static PyObject *__pyx_n_s_train_sentence_dm; static PyObject *__pyx_n_s_train_sentence_dm_concat; @@ -2831,7 +2833,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -2842,7 +2844,7 @@ static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sent static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; - PyObject *__pyx_v_doclbl_vocabs = 0; + PyObject *__pyx_v_doclbl_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; PyObject *__pyx_v_train_words = 0; @@ -2860,13 +2862,13 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dbow (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_vocabs,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); /* "trunk/gensim/models/doc2vec_inner.pyx":269 * - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs @@ -2877,7 +2879,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":270 - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -2918,7 +2920,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vocabs)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_indexes)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } @@ -2997,7 +2999,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } __pyx_v_model = values[0]; __pyx_v_word_vocabs = values[1]; - __pyx_v_doclbl_vocabs = values[2]; + __pyx_v_doclbl_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; __pyx_v_train_words = values[5]; @@ -3017,12 +3019,12 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_vocabs, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -3032,7 +3034,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__train_words; @@ -3048,7 +3050,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ int __pyx_v_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_doclbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v__doclbl_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; int __pyx_v_sentence_len; int __pyx_v_doclbl_len; @@ -3065,7 +3067,6 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ unsigned PY_LONG_LONG __pyx_v_next_random; PyObject *__pyx_v_predict_word = NULL; PyObject *__pyx_v_item = NULL; - PyObject *__pyx_v_context_token = NULL; long __pyx_v_k; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations @@ -3074,8 +3075,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_t_3; int __pyx_t_4; int __pyx_t_5; - Py_ssize_t __pyx_t_6; - PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; PyObject *__pyx_t_8 = NULL; unsigned PY_LONG_LONG __pyx_t_9; PyObject *__pyx_t_10 = NULL; @@ -3209,9 +3210,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":308 - * cdef unsigned long long next_random + /* "trunk/gensim/models/doc2vec_inner.pyx":309 * + * # default vectors, locks from syn0/doclbl_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) @@ -3220,14 +3221,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":309 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":310 + * # default vectors, locks from syn0/doclbl_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) - * + * if doclbl_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 309; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; @@ -3235,21 +3236,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L3:; - /* "trunk/gensim/models/doc2vec_inner.pyx":310 + /* "trunk/gensim/models/doc2vec_inner.pyx":311 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< - * * if doclbl_vectors is None: + * doclbl_vectors = model.docvecs.doclbl_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); /* "trunk/gensim/models/doc2vec_inner.pyx":312 + * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * * if doclbl_vectors is None: # <<<<<<<<<<<<<< - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) */ __pyx_t_5 = (__pyx_v_doclbl_vectors == Py_None); @@ -3257,103 +3258,109 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":313 - * + * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 # <<<<<<<<<<<<<< + * doclbl_vectors = model.docvecs.doclbl_syn0 # <<<<<<<<<<<<<< * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) - * + * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doclbl_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_6); + __pyx_t_6 = 0; goto __pyx_L4; } __pyx_L4:; /* "trunk/gensim/models/doc2vec_inner.pyx":314 * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< - * * if word_locks is None: + * word_locks = model.syn0_lockf */ if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":316 + /* "trunk/gensim/models/doc2vec_inner.pyx":315 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) - * * if word_locks is None: # <<<<<<<<<<<<<< - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) */ __pyx_t_4 = (__pyx_v_word_locks == Py_None); __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":317 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":316 + * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: - * word_locks = model.syn0locks # <<<<<<<<<<<<<< + * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) - * + * if doclbl_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_6); + __pyx_t_6 = 0; goto __pyx_L5; } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":318 + /* "trunk/gensim/models/doc2vec_inner.pyx":317 * if word_locks is None: - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< - * * if doclbl_locks is None: + * doclbl_locks = model.docvecs.doclbl_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 318; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":320 + /* "trunk/gensim/models/doc2vec_inner.pyx":318 + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) - * * if doclbl_locks is None: # <<<<<<<<<<<<<< - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) */ __pyx_t_5 = (__pyx_v_doclbl_locks == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":321 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":319 + * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: - * doclbl_locks = model.syn0locks # <<<<<<<<<<<<<< + * doclbl_locks = model.docvecs.doclbl_syn0_lockf # <<<<<<<<<<<<<< * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 321; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doclbl_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); __pyx_t_1 = 0; goto __pyx_L6; } __pyx_L6:; - /* "trunk/gensim/models/doc2vec_inner.pyx":322 + /* "trunk/gensim/models/doc2vec_inner.pyx":320 * if doclbl_locks is None: - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 322; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":324 + /* "trunk/gensim/models/doc2vec_inner.pyx":322 * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -3363,23 +3370,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":325 + /* "trunk/gensim/models/doc2vec_inner.pyx":323 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L7; } __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":327 + /* "trunk/gensim/models/doc2vec_inner.pyx":325 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3389,89 +3396,89 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":328 + /* "trunk/gensim/models/doc2vec_inner.pyx":326 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":329 + /* "trunk/gensim/models/doc2vec_inner.pyx":327 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 327; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":330 + /* "trunk/gensim/models/doc2vec_inner.pyx":328 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 330; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_table_len = __pyx_t_6; + __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":331 + /* "trunk/gensim/models/doc2vec_inner.pyx":329 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_7); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_7); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_9; goto __pyx_L8; } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":334 + /* "trunk/gensim/models/doc2vec_inner.pyx":332 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -3482,31 +3489,31 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":335 + /* "trunk/gensim/models/doc2vec_inner.pyx":333 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); @@ -3515,50 +3522,50 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":336 + /* "trunk/gensim/models/doc2vec_inner.pyx":334 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":337 + /* "trunk/gensim/models/doc2vec_inner.pyx":335 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) * */ - __pyx_t_6 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; - if (((__pyx_t_6 < __pyx_t_11) != 0)) { - __pyx_t_12 = __pyx_t_6; + if (((__pyx_t_7 < __pyx_t_11) != 0)) { + __pyx_t_12 = __pyx_t_7; } else { __pyx_t_12 = __pyx_t_11; } __pyx_v_sentence_len = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":338 + /* "trunk/gensim/models/doc2vec_inner.pyx":336 * _work = np.PyArray_DATA(work) * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) # <<<<<<<<<<<<<< + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_vocabs); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; if (((__pyx_t_12 < __pyx_t_11) != 0)) { - __pyx_t_6 = __pyx_t_12; + __pyx_t_7 = __pyx_t_12; } else { - __pyx_t_6 = __pyx_t_11; + __pyx_t_7 = __pyx_t_11; } - __pyx_v_doclbl_len = ((int)__pyx_t_6); + __pyx_v_doclbl_len = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":340 - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + /* "trunk/gensim/models/doc2vec_inner.pyx":338 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< * predict_word = word_vocabs[i] @@ -3568,19 +3575,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":341 + /* "trunk/gensim/models/doc2vec_inner.pyx":339 * * for i in range(sentence_len): * predict_word = word_vocabs[i] # <<<<<<<<<<<<<< * if predict_word is None: * codelens[i] = 0 */ - __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_10); __Pyx_XDECREF_SET(__pyx_v_predict_word, __pyx_t_10); __pyx_t_10 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":342 + /* "trunk/gensim/models/doc2vec_inner.pyx":340 * for i in range(sentence_len): * predict_word = word_vocabs[i] * if predict_word is None: # <<<<<<<<<<<<<< @@ -3591,7 +3598,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":343 + /* "trunk/gensim/models/doc2vec_inner.pyx":341 * predict_word = word_vocabs[i] * if predict_word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -3603,69 +3610,22 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":345 + /* "trunk/gensim/models/doc2vec_inner.pyx":343 * codelens[i] = 0 * else: * indexes[i] = predict_word.index # <<<<<<<<<<<<<< - * reduced_windows[i] = np.random.randint(window) * if hs: + * codelens[i] = len(predict_word.code) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":346 + /* "trunk/gensim/models/doc2vec_inner.pyx":344 * else: * indexes[i] = predict_word.index - * reduced_windows[i] = np.random.randint(window) # <<<<<<<<<<<<<< - * if hs: - * codelens[i] = len(predict_word.code) - */ - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = NULL; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_8))) { - __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_8); - if (likely(__pyx_t_7)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8); - __Pyx_INCREF(__pyx_t_7); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_8, function); - } - } - if (!__pyx_t_7) { - __pyx_t_10 = __Pyx_PyObject_CallOneArg(__pyx_t_8, __pyx_t_1); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_GOTREF(__pyx_t_10); - } else { - __pyx_t_15 = PyTuple_New(1+1); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_15); - PyTuple_SET_ITEM(__pyx_t_15, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); __pyx_t_7 = NULL; - PyTuple_SET_ITEM(__pyx_t_15, 0+1, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_1 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_15, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - } - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - - /* "trunk/gensim/models/doc2vec_inner.pyx":347 - * indexes[i] = predict_word.index - * reduced_windows[i] = np.random.randint(window) * if hs: # <<<<<<<<<<<<<< * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) @@ -3673,223 +3633,224 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":348 - * reduced_windows[i] = np.random.randint(window) + /* "trunk/gensim/models/doc2vec_inner.pyx":345 + * indexes[i] = predict_word.index * if hs: * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_6 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_6); + (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":349 + /* "trunk/gensim/models/doc2vec_inner.pyx":346 * if hs: * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(predict_word.point) * else: */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":350 + /* "trunk/gensim/models/doc2vec_inner.pyx":347 * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 350; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L13; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":352 + /* "trunk/gensim/models/doc2vec_inner.pyx":349 * points[i] = np.PyArray_DATA(predict_word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< * result += 1 - * # single randint() call avoids a big thread-sync slowdown + * if _train_words: */ (__pyx_v_codelens[__pyx_v_i]) = 1; } __pyx_L13:; - /* "trunk/gensim/models/doc2vec_inner.pyx":353 + /* "trunk/gensim/models/doc2vec_inner.pyx":350 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * if _train_words: + * # single randint() call avoids a big thread-synchronization slowdown */ __pyx_v_result = (__pyx_v_result + 1); } __pyx_L12:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":355 + /* "trunk/gensim/models/doc2vec_inner.pyx":351 + * codelens[i] = 1 * result += 1 - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< - * reduced_windows[i] = item + * if _train_words: # <<<<<<<<<<<<<< + * # single randint() call avoids a big thread-synchronization slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + */ + __pyx_t_4 = (__pyx_v__train_words != 0); + if (__pyx_t_4) { + + /* "trunk/gensim/models/doc2vec_inner.pyx":353 + * if _train_words: + * # single randint() call avoids a big thread-synchronization slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * reduced_windows[i] = item * for i in range(doclbl_len): */ - __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_15 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_15); - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_15, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_15); __pyx_t_15 = 0; - __pyx_t_15 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_15); - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = NULL; - __pyx_t_6 = 0; - if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_8))) { - __pyx_t_7 = PyMethod_GET_SELF(__pyx_t_8); - if (likely(__pyx_t_7)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8); - __Pyx_INCREF(__pyx_t_7); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_8, function); - __pyx_t_6 = 1; - } - } - __pyx_t_16 = PyTuple_New(3+__pyx_t_6); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_16); - if (__pyx_t_7) { - PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_7); __Pyx_GIVEREF(__pyx_t_7); __pyx_t_7 = NULL; - } - __Pyx_INCREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_6, __pyx_int_0); - __Pyx_GIVEREF(__pyx_int_0); - PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_6, __pyx_t_15); - __Pyx_GIVEREF(__pyx_t_15); - PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_6, __pyx_t_1); - __Pyx_GIVEREF(__pyx_t_1); - __pyx_t_15 = 0; - __pyx_t_1 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - if (likely(PyList_CheckExact(__pyx_t_10)) || PyTuple_CheckExact(__pyx_t_10)) { - __pyx_t_8 = __pyx_t_10; __Pyx_INCREF(__pyx_t_8); __pyx_t_6 = 0; - __pyx_t_17 = NULL; - } else { - __pyx_t_6 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = 0; + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - } - __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - for (;;) { - if (likely(!__pyx_t_17)) { - if (likely(PyList_CheckExact(__pyx_t_8))) { - if (__pyx_t_6 >= PyList_GET_SIZE(__pyx_t_8)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_10); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif - } else { - if (__pyx_t_6 >= PyTuple_GET_SIZE(__pyx_t_8)) break; - #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_6); __Pyx_INCREF(__pyx_t_10); __pyx_t_6++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_6); __pyx_t_6++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - #endif + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_15 = NULL; + __pyx_t_7 = 0; + if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_8))) { + __pyx_t_15 = PyMethod_GET_SELF(__pyx_t_8); + if (likely(__pyx_t_15)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_8); + __Pyx_INCREF(__pyx_t_15); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_8, function); + __pyx_t_7 = 1; } + } + __pyx_t_16 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_16); + if (__pyx_t_15) { + PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; + } + __Pyx_INCREF(__pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 0+__pyx_t_7, __pyx_int_0); + __Pyx_GIVEREF(__pyx_int_0); + PyTuple_SET_ITEM(__pyx_t_16, 1+__pyx_t_7, __pyx_t_1); + __Pyx_GIVEREF(__pyx_t_1); + PyTuple_SET_ITEM(__pyx_t_16, 2+__pyx_t_7, __pyx_t_6); + __Pyx_GIVEREF(__pyx_t_6); + __pyx_t_1 = 0; + __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_10); + __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + if (likely(PyList_CheckExact(__pyx_t_10)) || PyTuple_CheckExact(__pyx_t_10)) { + __pyx_t_8 = __pyx_t_10; __Pyx_INCREF(__pyx_t_8); __pyx_t_7 = 0; + __pyx_t_17 = NULL; } else { - __pyx_t_10 = __pyx_t_17(__pyx_t_8); - if (unlikely(!__pyx_t_10)) { - PyObject* exc_type = PyErr_Occurred(); - if (exc_type) { - if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_8); + __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; + for (;;) { + if (likely(!__pyx_t_17)) { + if (likely(PyList_CheckExact(__pyx_t_8))) { + if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif + } else { + if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_8)) break; + #if CYTHON_COMPILING_IN_CPYTHON + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #else + __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + #endif } - break; + } else { + __pyx_t_10 = __pyx_t_17(__pyx_t_8); + if (unlikely(!__pyx_t_10)) { + PyObject* exc_type = PyErr_Occurred(); + if (exc_type) { + if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + } + break; + } + __Pyx_GOTREF(__pyx_t_10); } - __Pyx_GOTREF(__pyx_t_10); - } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); - __pyx_t_10 = 0; - __pyx_v_i = __pyx_t_2; - __pyx_t_2 = (__pyx_t_2 + 1); - - /* "trunk/gensim/models/doc2vec_inner.pyx":356 - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): - * reduced_windows[i] = item # <<<<<<<<<<<<<< + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_10); + __pyx_t_10 = 0; + __pyx_v_i = __pyx_t_2; + __pyx_t_2 = (__pyx_t_2 + 1); + + /* "trunk/gensim/models/doc2vec_inner.pyx":354 + * # single randint() call avoids a big thread-synchronization slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * reduced_windows[i] = item # <<<<<<<<<<<<<< * for i in range(doclbl_len): - * context_token = doclbl_vocabs[i] + * _doclbl_indexes[i] = doclbl_indexes[i] */ - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":355 - * result += 1 - * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< - * reduced_windows[i] = item + /* "trunk/gensim/models/doc2vec_inner.pyx":353 + * if _train_words: + * # single randint() call avoids a big thread-synchronization slowdown + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * reduced_windows[i] = item * for i in range(doclbl_len): */ + } + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + goto __pyx_L14; } - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":357 - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): - * reduced_windows[i] = item + /* "trunk/gensim/models/doc2vec_inner.pyx":355 + * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * reduced_windows[i] = item * for i in range(doclbl_len): # <<<<<<<<<<<<<< - * context_token = doclbl_vocabs[i] - * doclbl_indexes[i] = context_token.index + * _doclbl_indexes[i] = doclbl_indexes[i] + * result += 1 */ __pyx_t_2 = __pyx_v_doclbl_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":358 - * reduced_windows[i] = item - * for i in range(doclbl_len): - * context_token = doclbl_vocabs[i] # <<<<<<<<<<<<<< - * doclbl_indexes[i] = context_token.index - * result += 1 - */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_8); - __Pyx_XDECREF_SET(__pyx_v_context_token, __pyx_t_8); - __pyx_t_8 = 0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":359 + /* "trunk/gensim/models/doc2vec_inner.pyx":356 + * reduced_windows[i] = item * for i in range(doclbl_len): - * context_token = doclbl_vocabs[i] - * doclbl_indexes[i] = context_token.index # <<<<<<<<<<<<<< + * _doclbl_indexes[i] = doclbl_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_context_token, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - (__pyx_v_doclbl_indexes[__pyx_v_i]) = __pyx_t_14; + (__pyx_v__doclbl_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":360 - * context_token = doclbl_vocabs[i] - * doclbl_indexes[i] = context_token.index + /* "trunk/gensim/models/doc2vec_inner.pyx":357 + * for i in range(doclbl_len): + * _doclbl_indexes[i] = doclbl_indexes[i] * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence @@ -3897,7 +3858,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":363 + /* "trunk/gensim/models/doc2vec_inner.pyx":360 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -3911,7 +3872,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":361 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3922,7 +3883,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":365 + /* "trunk/gensim/models/doc2vec_inner.pyx":362 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -3932,17 +3893,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":366 + /* "trunk/gensim/models/doc2vec_inner.pyx":363 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] */ - goto __pyx_L21_continue; + goto __pyx_L22_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":367 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * if codelens[i] == 0: * continue * if _train_words: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< @@ -3952,7 +3913,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v__train_words != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":368 + /* "trunk/gensim/models/doc2vec_inner.pyx":365 * continue * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -3961,7 +3922,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":369 + /* "trunk/gensim/models/doc2vec_inner.pyx":366 * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -3971,7 +3932,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = ((__pyx_v_j < 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":370 + /* "trunk/gensim/models/doc2vec_inner.pyx":367 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -3979,11 +3940,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if k > sentence_len: */ __pyx_v_j = 0; - goto __pyx_L25; + goto __pyx_L26; } - __pyx_L25:; + __pyx_L26:; - /* "trunk/gensim/models/doc2vec_inner.pyx":371 + /* "trunk/gensim/models/doc2vec_inner.pyx":368 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3992,7 +3953,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":372 + /* "trunk/gensim/models/doc2vec_inner.pyx":369 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -4002,7 +3963,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":373 + /* "trunk/gensim/models/doc2vec_inner.pyx":370 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -4010,11 +3971,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if j == i or codelens[j] == 0: */ __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L26; + goto __pyx_L27; } - __pyx_L26:; + __pyx_L27:; - /* "trunk/gensim/models/doc2vec_inner.pyx":374 + /* "trunk/gensim/models/doc2vec_inner.pyx":371 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -4025,7 +3986,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_11; __pyx_t_18+=1) { __pyx_v_j = __pyx_t_18; - /* "trunk/gensim/models/doc2vec_inner.pyx":375 + /* "trunk/gensim/models/doc2vec_inner.pyx":372 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< @@ -4036,24 +3997,24 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (!__pyx_t_5) { } else { __pyx_t_4 = __pyx_t_5; - goto __pyx_L30_bool_binop_done; + goto __pyx_L31_bool_binop_done; } __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_j]) == 0) != 0); __pyx_t_4 = __pyx_t_5; - __pyx_L30_bool_binop_done:; + __pyx_L31_bool_binop_done:; if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":376 + /* "trunk/gensim/models/doc2vec_inner.pyx":373 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose */ - goto __pyx_L27_continue; + goto __pyx_L28_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":377 + /* "trunk/gensim/models/doc2vec_inner.pyx":374 * if j == i or codelens[j] == 0: * continue * if hs: # <<<<<<<<<<<<<< @@ -4063,7 +4024,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":379 + /* "trunk/gensim/models/doc2vec_inner.pyx":376 * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< @@ -4071,11 +4032,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if negative: */ __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__word_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); - goto __pyx_L32; + goto __pyx_L33; } - __pyx_L32:; + __pyx_L33:; - /* "trunk/gensim/models/doc2vec_inner.pyx":381 + /* "trunk/gensim/models/doc2vec_inner.pyx":378 * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< @@ -4085,7 +4046,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":383 + /* "trunk/gensim/models/doc2vec_inner.pyx":380 * if negative: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -4093,75 +4054,75 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * _learn_words, _learn_hidden, _word_locks) */ __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__word_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); - goto __pyx_L33; + goto __pyx_L34; } - __pyx_L33:; - __pyx_L27_continue:; + __pyx_L34:; + __pyx_L28_continue:; } - goto __pyx_L24; + goto __pyx_L25; } - __pyx_L24:; + __pyx_L25:; - /* "trunk/gensim/models/doc2vec_inner.pyx":388 + /* "trunk/gensim/models/doc2vec_inner.pyx":385 * * # docvec-training * for j in range(doclbl_len): # <<<<<<<<<<<<<< * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], */ __pyx_t_18 = __pyx_v_doclbl_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":389 + /* "trunk/gensim/models/doc2vec_inner.pyx":386 * # docvec-training * for j in range(doclbl_len): * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) */ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":390 + /* "trunk/gensim/models/doc2vec_inner.pyx":387 * for j in range(doclbl_len): * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], # <<<<<<<<<<<<<< + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], # <<<<<<<<<<<<<< * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) * if negative: */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doclbl_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); - goto __pyx_L36; + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doclbl_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); + goto __pyx_L37; } - __pyx_L36:; + __pyx_L37:; - /* "trunk/gensim/models/doc2vec_inner.pyx":392 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, doclbl_indexes[j], + /* "trunk/gensim/models/doc2vec_inner.pyx":389 + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, - * indexes[i], doclbl_indexes[j], _alpha, _work, next_random, + * indexes[i], _doclbl_indexes[j], _alpha, _work, next_random, */ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":393 + /* "trunk/gensim/models/doc2vec_inner.pyx":390 * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) * if negative: * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, # <<<<<<<<<<<<<< - * indexes[i], doclbl_indexes[j], _alpha, _work, next_random, + * indexes[i], _doclbl_indexes[j], _alpha, _work, next_random, * _learn_doclbls, _learn_hidden, _doclbl_locks) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doclbl_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); - goto __pyx_L37; + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doclbl_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); + goto __pyx_L38; } - __pyx_L37:; + __pyx_L38:; } - __pyx_L21_continue:; + __pyx_L22_continue:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":363 + /* "trunk/gensim/models/doc2vec_inner.pyx":360 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4173,13 +4134,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L20; + goto __pyx_L21; } - __pyx_L20:; + __pyx_L21:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":397 + /* "trunk/gensim/models/doc2vec_inner.pyx":394 * _learn_doclbls, _learn_hidden, _doclbl_locks) * * return result # <<<<<<<<<<<<<< @@ -4187,7 +4148,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 394; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_r = __pyx_t_8; __pyx_t_8 = 0; @@ -4196,7 +4157,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -4204,7 +4165,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_15); @@ -4214,7 +4175,6 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L0:; __Pyx_XDECREF(__pyx_v_predict_word); __Pyx_XDECREF(__pyx_v_item); - __Pyx_XDECREF(__pyx_v_context_token); __Pyx_XDECREF(__pyx_v_work); __Pyx_XDECREF(__pyx_v_word_vectors); __Pyx_XDECREF(__pyx_v_word_locks); @@ -4225,10 +4185,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":400 +/* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -4239,7 +4199,7 @@ static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sent static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; - PyObject *__pyx_v_doclbl_vocabs = 0; + PyObject *__pyx_v_doclbl_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; PyObject *__pyx_v_neu1 = 0; @@ -4257,14 +4217,14 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dm (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_vocabs,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "trunk/gensim/models/doc2vec_inner.pyx":401 + /* "trunk/gensim/models/doc2vec_inner.pyx":398 * - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, * learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs @@ -4273,8 +4233,8 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "trunk/gensim/models/doc2vec_inner.pyx":402 - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + /* "trunk/gensim/models/doc2vec_inner.pyx":399 + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -4312,17 +4272,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vocabs)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -4371,7 +4331,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -4394,7 +4354,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } __pyx_v_model = values[0]; __pyx_v_word_vocabs = values[1]; - __pyx_v_doclbl_vocabs = values[2]; + __pyx_v_doclbl_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; __pyx_v_neu1 = values[5]; @@ -4408,18 +4368,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_vocabs, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":400 + /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -4429,7 +4389,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__learn_doclbls; @@ -4448,7 +4408,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence int __pyx_v_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_doclbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v__doclbl_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; int __pyx_v_sentence_len; int __pyx_v_doclbl_len; @@ -4467,7 +4427,6 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence unsigned PY_LONG_LONG __pyx_v_next_random; PyObject *__pyx_v_word = NULL; PyObject *__pyx_v_item = NULL; - PyObject *__pyx_v_token = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -4475,8 +4434,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_t_3; int __pyx_t_4; int __pyx_t_5; - Py_ssize_t __pyx_t_6; - PyObject *__pyx_t_7 = NULL; + PyObject *__pyx_t_6 = NULL; + Py_ssize_t __pyx_t_7; PyObject *__pyx_t_8 = NULL; unsigned PY_LONG_LONG __pyx_t_9; PyObject *__pyx_t_10 = NULL; @@ -4500,112 +4459,121 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_INCREF(__pyx_v_doclbl_vectors); __Pyx_INCREF(__pyx_v_doclbl_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":403 + /* "trunk/gensim/models/doc2vec_inner.pyx":400 * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int _learn_doclbls = learn_doclbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":404 + /* "trunk/gensim/models/doc2vec_inner.pyx":401 * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int _learn_doclbls = learn_doclbls * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":405 + /* "trunk/gensim/models/doc2vec_inner.pyx":402 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doclbls = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":406 + /* "trunk/gensim/models/doc2vec_inner.pyx":403 * cdef int negative = model.negative * cdef int _learn_doclbls = learn_doclbls * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":407 + /* "trunk/gensim/models/doc2vec_inner.pyx":404 * cdef int _learn_doclbls = learn_doclbls * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean - * cdef REAL_t count, inv_count + * cdef REAL_t count, inv_count = 1.0 */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":408 + /* "trunk/gensim/models/doc2vec_inner.pyx":405 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< - * cdef REAL_t count, inv_count + * cdef REAL_t count, inv_count = 1.0 * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 408; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":417 + /* "trunk/gensim/models/doc2vec_inner.pyx":406 + * cdef int _learn_hidden = learn_hidden + * cdef int cbow_mean = model.cbow_mean + * cdef REAL_t count, inv_count = 1.0 # <<<<<<<<<<<<<< + * + * cdef REAL_t *_word_vectors + */ + __pyx_v_inv_count = 1.0; + + /* "trunk/gensim/models/doc2vec_inner.pyx":414 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":418 + /* "trunk/gensim/models/doc2vec_inner.pyx":415 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 418; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":426 + /* "trunk/gensim/models/doc2vec_inner.pyx":423 * cdef int sentence_len * cdef int doclbl_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k, m */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 426; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":429 + /* "trunk/gensim/models/doc2vec_inner.pyx":426 * * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< @@ -4614,9 +4582,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":443 + /* "trunk/gensim/models/doc2vec_inner.pyx":440 * - * # default vectors, locks from syn0 + * # default vectors, locks from syn0/doclbl_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) @@ -4625,14 +4593,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":444 - * # default vectors, locks from syn0 + /* "trunk/gensim/models/doc2vec_inner.pyx":441 + * # default vectors, locks from syn0/doclbl_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; @@ -4640,125 +4608,131 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L3:; - /* "trunk/gensim/models/doc2vec_inner.pyx":445 + /* "trunk/gensim/models/doc2vec_inner.pyx":442 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":446 + /* "trunk/gensim/models/doc2vec_inner.pyx":443 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: # <<<<<<<<<<<<<< - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) */ __pyx_t_5 = (__pyx_v_doclbl_vectors == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":447 + /* "trunk/gensim/models/doc2vec_inner.pyx":444 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 # <<<<<<<<<<<<<< + * doclbl_vectors = model.docvecs.doclbl_syn0 # <<<<<<<<<<<<<< * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doclbl_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_6); + __pyx_t_6 = 0; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":448 + /* "trunk/gensim/models/doc2vec_inner.pyx":445 * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":449 - * doclbl_vectors = model.syn0 + /* "trunk/gensim/models/doc2vec_inner.pyx":446 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) */ __pyx_t_4 = (__pyx_v_word_locks == Py_None); __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":450 + /* "trunk/gensim/models/doc2vec_inner.pyx":447 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: - * word_locks = model.syn0locks # <<<<<<<<<<<<<< + * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_6); + __pyx_t_6 = 0; goto __pyx_L5; } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":451 + /* "trunk/gensim/models/doc2vec_inner.pyx":448 * if word_locks is None: - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doclbl_locks is None: - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":452 - * word_locks = model.syn0locks + /* "trunk/gensim/models/doc2vec_inner.pyx":449 + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: # <<<<<<<<<<<<<< - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) */ __pyx_t_5 = (__pyx_v_doclbl_locks == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":453 + /* "trunk/gensim/models/doc2vec_inner.pyx":450 * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: - * doclbl_locks = model.syn0locks # <<<<<<<<<<<<<< + * doclbl_locks = model.docvecs.doclbl_syn0_lockf # <<<<<<<<<<<<<< * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doclbl_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); __pyx_t_1 = 0; goto __pyx_L6; } __pyx_L6:; - /* "trunk/gensim/models/doc2vec_inner.pyx":454 + /* "trunk/gensim/models/doc2vec_inner.pyx":451 * if doclbl_locks is None: - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":456 + /* "trunk/gensim/models/doc2vec_inner.pyx":453 * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -4768,23 +4742,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":457 + /* "trunk/gensim/models/doc2vec_inner.pyx":454 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L7; } __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":459 + /* "trunk/gensim/models/doc2vec_inner.pyx":456 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -4794,89 +4768,89 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":460 + /* "trunk/gensim/models/doc2vec_inner.pyx":457 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":461 + /* "trunk/gensim/models/doc2vec_inner.pyx":458 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":462 + /* "trunk/gensim/models/doc2vec_inner.pyx":459 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_table_len = __pyx_t_6; + __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":463 + /* "trunk/gensim/models/doc2vec_inner.pyx":460 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_7); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_7); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_9; goto __pyx_L8; } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":466 + /* "trunk/gensim/models/doc2vec_inner.pyx":463 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -4887,31 +4861,31 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":467 + /* "trunk/gensim/models/doc2vec_inner.pyx":464 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; __Pyx_DECREF_SET(__pyx_v_work, __pyx_t_10); @@ -4920,17 +4894,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":468 + /* "trunk/gensim/models/doc2vec_inner.pyx":465 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":469 + /* "trunk/gensim/models/doc2vec_inner.pyx":466 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -4941,66 +4915,66 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":470 + /* "trunk/gensim/models/doc2vec_inner.pyx":467 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_7) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_6) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_7); - __pyx_t_7 = 0; + __Pyx_DECREF_SET(__pyx_v_neu1, __pyx_t_6); + __pyx_t_6 = 0; goto __pyx_L10; } __pyx_L10:; - /* "trunk/gensim/models/doc2vec_inner.pyx":471 + /* "trunk/gensim/models/doc2vec_inner.pyx":468 * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 471; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":473 + /* "trunk/gensim/models/doc2vec_inner.pyx":470 * _neu1 = np.PyArray_DATA(neu1) * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 * for i in range(sentence_len): */ - __pyx_t_6 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_6 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; - if (((__pyx_t_6 < __pyx_t_11) != 0)) { - __pyx_t_12 = __pyx_t_6; + if (((__pyx_t_7 < __pyx_t_11) != 0)) { + __pyx_t_12 = __pyx_t_7; } else { __pyx_t_12 = __pyx_t_11; } __pyx_v_sentence_len = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":474 + /* "trunk/gensim/models/doc2vec_inner.pyx":471 * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< @@ -5009,7 +4983,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_j = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":475 + /* "trunk/gensim/models/doc2vec_inner.pyx":472 * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5020,19 +4994,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":476 + /* "trunk/gensim/models/doc2vec_inner.pyx":473 * j = 0 * for i in range(sentence_len): * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: * # shrink sentence to leave out word */ - __pyx_t_7 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_7 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 476; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; - __Pyx_GOTREF(__pyx_t_7); - __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_7); - __pyx_t_7 = 0; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __Pyx_GOTREF(__pyx_t_6); + __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); + __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":477 + /* "trunk/gensim/models/doc2vec_inner.pyx":474 * for i in range(sentence_len): * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< @@ -5043,7 +5017,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":479 + /* "trunk/gensim/models/doc2vec_inner.pyx":476 * if word is None: * # shrink sentence to leave out word * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< @@ -5052,7 +5026,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":480 + /* "trunk/gensim/models/doc2vec_inner.pyx":477 * # shrink sentence to leave out word * sentence_len = sentence_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< @@ -5063,20 +5037,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":482 + /* "trunk/gensim/models/doc2vec_inner.pyx":479 * continue # leaving j unchanged * else: * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[j] = len(word.code) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_7); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 479; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 479; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":483 + /* "trunk/gensim/models/doc2vec_inner.pyx":480 * else: * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< @@ -5086,49 +5060,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":484 + /* "trunk/gensim/models/doc2vec_inner.pyx":481 * indexes[j] = word.index * if hs: * codelens[j] = len(word.code) # <<<<<<<<<<<<<< * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - __pyx_t_12 = PyObject_Length(__pyx_t_7); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_12 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":485 + /* "trunk/gensim/models/doc2vec_inner.pyx":482 * if hs: * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[j] = np.PyArray_DATA(word.point) * result += 1 */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":486 + /* "trunk/gensim/models/doc2vec_inner.pyx":483 * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * result += 1 * j = j + 1 */ - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); - if (!(likely(((__pyx_t_7) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_7, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 486; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_7))); - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 483; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 483; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L14; } __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":487 + /* "trunk/gensim/models/doc2vec_inner.pyx":484 * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) * result += 1 # <<<<<<<<<<<<<< @@ -5137,7 +5111,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":488 + /* "trunk/gensim/models/doc2vec_inner.pyx":485 * points[j] = np.PyArray_DATA(word.point) * result += 1 * j = j + 1 # <<<<<<<<<<<<<< @@ -5149,7 +5123,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L11_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":490 + /* "trunk/gensim/models/doc2vec_inner.pyx":487 * j = j + 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -5157,17 +5131,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * */ __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_t_15 = NULL; __pyx_t_12 = 0; @@ -5181,7 +5155,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_12 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -5195,64 +5169,64 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_GIVEREF(__pyx_t_10); __pyx_t_1 = 0; __pyx_t_10 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_7); + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - if (likely(PyList_CheckExact(__pyx_t_7)) || PyTuple_CheckExact(__pyx_t_7)) { - __pyx_t_8 = __pyx_t_7; __Pyx_INCREF(__pyx_t_8); __pyx_t_12 = 0; + if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { + __pyx_t_8 = __pyx_t_6; __Pyx_INCREF(__pyx_t_8); __pyx_t_12 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_12 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_7); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } - __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { if (likely(!__pyx_t_17)) { if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_12 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_7 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_7); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_12 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_7 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_7); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_7 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { - __pyx_t_7 = __pyx_t_17(__pyx_t_8); - if (unlikely(!__pyx_t_7)) { + __pyx_t_6 = __pyx_t_17(__pyx_t_8); + if (unlikely(!__pyx_t_6)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } - __Pyx_GOTREF(__pyx_t_7); + __Pyx_GOTREF(__pyx_t_6); } - __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_7); - __pyx_t_7 = 0; + __Pyx_XDECREF_SET(__pyx_v_item, __pyx_t_6); + __pyx_t_6 = 0; __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":491 + /* "trunk/gensim/models/doc2vec_inner.pyx":488 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) */ - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 491; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":490 + /* "trunk/gensim/models/doc2vec_inner.pyx":487 * j = j + 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -5262,120 +5236,57 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":493 + /* "trunk/gensim/models/doc2vec_inner.pyx":490 * reduced_windows[i] = item * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) # <<<<<<<<<<<<<< - * j = 0 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) # <<<<<<<<<<<<<< * for i in range(doclbl_len): + * _doclbl_indexes[i] = doclbl_indexes[i] */ - __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_vocabs); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 493; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; if (((__pyx_t_12 < __pyx_t_11) != 0)) { - __pyx_t_6 = __pyx_t_12; + __pyx_t_7 = __pyx_t_12; } else { - __pyx_t_6 = __pyx_t_11; + __pyx_t_7 = __pyx_t_11; } - __pyx_v_doclbl_len = ((int)__pyx_t_6); + __pyx_v_doclbl_len = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":494 + /* "trunk/gensim/models/doc2vec_inner.pyx":491 * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) - * j = 0 # <<<<<<<<<<<<<< - * for i in range(doclbl_len): - * token = doclbl_vocabs[i] - */ - __pyx_v_j = 0; - - /* "trunk/gensim/models/doc2vec_inner.pyx":495 - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) - * j = 0 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) * for i in range(doclbl_len): # <<<<<<<<<<<<<< - * token = doclbl_vocabs[i] - * if token is None: + * _doclbl_indexes[i] = doclbl_indexes[i] + * result += 1 */ __pyx_t_2 = __pyx_v_doclbl_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":496 - * j = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":492 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) * for i in range(doclbl_len): - * token = doclbl_vocabs[i] # <<<<<<<<<<<<<< - * if token is None: - * doclbl_len = doclbl_len - 1 + * _doclbl_indexes[i] = doclbl_indexes[i] # <<<<<<<<<<<<<< + * result += 1 + * */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 496; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); - __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_8); - __pyx_t_8 = 0; + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; + (__pyx_v__doclbl_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":497 + /* "trunk/gensim/models/doc2vec_inner.pyx":493 * for i in range(doclbl_len): - * token = doclbl_vocabs[i] - * if token is None: # <<<<<<<<<<<<<< - * doclbl_len = doclbl_len - 1 - * continue # leaving j unchanged - */ - __pyx_t_5 = (__pyx_v_token == Py_None); - __pyx_t_4 = (__pyx_t_5 != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":498 - * token = doclbl_vocabs[i] - * if token is None: - * doclbl_len = doclbl_len - 1 # <<<<<<<<<<<<<< - * continue # leaving j unchanged - * else: - */ - __pyx_v_doclbl_len = (__pyx_v_doclbl_len - 1); - - /* "trunk/gensim/models/doc2vec_inner.pyx":499 - * if token is None: - * doclbl_len = doclbl_len - 1 - * continue # leaving j unchanged # <<<<<<<<<<<<<< - * else: - * doclbl_indexes[j] = token.index - */ - goto __pyx_L17_continue; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":501 - * continue # leaving j unchanged - * else: - * doclbl_indexes[j] = token.index # <<<<<<<<<<<<<< - * result += 1 - * j = j + 1 - */ - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_token, __pyx_n_s_index); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 501; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_8); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 501; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - (__pyx_v_doclbl_indexes[__pyx_v_j]) = __pyx_t_14; - - /* "trunk/gensim/models/doc2vec_inner.pyx":502 - * else: - * doclbl_indexes[j] = token.index - * result += 1 # <<<<<<<<<<<<<< - * j = j + 1 - * - */ - __pyx_v_result = (__pyx_v_result + 1); - - /* "trunk/gensim/models/doc2vec_inner.pyx":503 - * doclbl_indexes[j] = token.index - * result += 1 - * j = j + 1 # <<<<<<<<<<<<<< + * _doclbl_indexes[i] = doclbl_indexes[i] + * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_v_j = (__pyx_v_j + 1); - } - __pyx_L17_continue:; + __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":506 + /* "trunk/gensim/models/doc2vec_inner.pyx":496 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -5389,7 +5300,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":507 + /* "trunk/gensim/models/doc2vec_inner.pyx":497 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5400,7 +5311,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":508 + /* "trunk/gensim/models/doc2vec_inner.pyx":498 * with nogil: * for i in range(sentence_len): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -5409,17 +5320,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":509 + /* "trunk/gensim/models/doc2vec_inner.pyx":499 * for i in range(sentence_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 * k = i + window + 1 - reduced_windows[i] */ - __pyx_t_4 = ((__pyx_v_j < 0) != 0); - if (__pyx_t_4) { + __pyx_t_5 = ((__pyx_v_j < 0) != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":510 + /* "trunk/gensim/models/doc2vec_inner.pyx":500 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5427,11 +5338,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if k > sentence_len: */ __pyx_v_j = 0; - goto __pyx_L25; + goto __pyx_L24; } - __pyx_L25:; + __pyx_L24:; - /* "trunk/gensim/models/doc2vec_inner.pyx":511 + /* "trunk/gensim/models/doc2vec_inner.pyx":501 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -5440,17 +5351,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":512 + /* "trunk/gensim/models/doc2vec_inner.pyx":502 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< * k = sentence_len * */ - __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); - if (__pyx_t_4) { + __pyx_t_5 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":513 + /* "trunk/gensim/models/doc2vec_inner.pyx":503 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -5458,11 +5369,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * # compose l1 (in _neu1) & clear _work */ __pyx_v_k = __pyx_v_sentence_len; - goto __pyx_L26; + goto __pyx_L25; } - __pyx_L26:; + __pyx_L25:; - /* "trunk/gensim/models/doc2vec_inner.pyx":516 + /* "trunk/gensim/models/doc2vec_inner.pyx":506 * * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -5471,7 +5382,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ memset(__pyx_v__neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":517 + /* "trunk/gensim/models/doc2vec_inner.pyx":507 * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -5480,7 +5391,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":518 + /* "trunk/gensim/models/doc2vec_inner.pyx":508 * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5491,28 +5402,28 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":519 + /* "trunk/gensim/models/doc2vec_inner.pyx":509 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (__pyx_t_4) { + __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":520 + /* "trunk/gensim/models/doc2vec_inner.pyx":510 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< * else: * count += ONEF */ - goto __pyx_L27_continue; + goto __pyx_L26_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":522 + /* "trunk/gensim/models/doc2vec_inner.pyx":512 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -5521,7 +5432,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":523 + /* "trunk/gensim/models/doc2vec_inner.pyx":513 * else: * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -5530,69 +5441,74 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L27_continue:; + __pyx_L26_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":524 + /* "trunk/gensim/models/doc2vec_inner.pyx":514 * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doclbl_len): # <<<<<<<<<<<<<< * count += ONEF - * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) */ __pyx_t_18 = __pyx_v_doclbl_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":525 + /* "trunk/gensim/models/doc2vec_inner.pyx":515 * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doclbl_len): * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) - * if cbow_mean and count > (0.5): + * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * if count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":526 + /* "trunk/gensim/models/doc2vec_inner.pyx":516 * for m in range(doclbl_len): * count += ONEF - * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): + * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< + * if count > (0.5): * inv_count = ONEF/count */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - /* "trunk/gensim/models/doc2vec_inner.pyx":527 + /* "trunk/gensim/models/doc2vec_inner.pyx":517 * count += ONEF - * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * if count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count - * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * if cbow_mean: */ - __pyx_t_5 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L33_bool_binop_done; - } __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L33_bool_binop_done:; - if (__pyx_t_4) { + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":528 - * our_saxpy(&size, &ONEF, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE, _neu1, &ONE) - * if cbow_mean and count > (0.5): + /* "trunk/gensim/models/doc2vec_inner.pyx":518 + * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< + * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error */ __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF / __pyx_v_count); + goto __pyx_L31; + } + __pyx_L31:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":519 + * if count > (0.5): + * inv_count = ONEF/count + * if cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error + */ + __pyx_t_5 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":529 - * if cbow_mean and count > (0.5): + /* "trunk/gensim/models/doc2vec_inner.pyx":520 * inv_count = ONEF/count + * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * @@ -5602,8 +5518,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L32:; - /* "trunk/gensim/models/doc2vec_inner.pyx":530 - * inv_count = ONEF/count + /* "trunk/gensim/models/doc2vec_inner.pyx":521 + * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< * @@ -5611,17 +5527,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ memset(__pyx_v__work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":532 + /* "trunk/gensim/models/doc2vec_inner.pyx":523 * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< * fast_sentence_dm_hs(points[i], codes[i], codelens[i], * _neu1, syn1, _alpha, _work, */ - __pyx_t_4 = (__pyx_v_hs != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_hs != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":533 + /* "trunk/gensim/models/doc2vec_inner.pyx":524 * * if hs: * fast_sentence_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -5629,21 +5545,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * size, _learn_hidden) */ __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); - goto __pyx_L35; + goto __pyx_L33; } - __pyx_L35:; + __pyx_L33:; - /* "trunk/gensim/models/doc2vec_inner.pyx":536 + /* "trunk/gensim/models/doc2vec_inner.pyx":527 * _neu1, syn1, _alpha, _work, * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, * _neu1, syn1neg, indexes[i], _alpha, _work, */ - __pyx_t_4 = (__pyx_v_negative != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v_negative != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":537 + /* "trunk/gensim/models/doc2vec_inner.pyx":528 * size, _learn_hidden) * if negative: * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< @@ -5651,56 +5567,78 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * size, _learn_hidden) */ __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); - goto __pyx_L36; + goto __pyx_L34; } - __pyx_L36:; + __pyx_L34:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":532 + * size, _learn_hidden) + * + * if not cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) + * # apply accumulated error in work + */ + __pyx_t_5 = ((!(__pyx_v_cbow_mean != 0)) != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":542 + /* "trunk/gensim/models/doc2vec_inner.pyx":533 * + * if not cbow_mean: + * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * # apply accumulated error in work + * if _learn_doclbls: + */ + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + goto __pyx_L35; + } + __pyx_L35:; + + /* "trunk/gensim/models/doc2vec_inner.pyx":535 + * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work * if _learn_doclbls: # <<<<<<<<<<<<<< * for m in range(doclbl_len): - * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, + * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, */ - __pyx_t_4 = (__pyx_v__learn_doclbls != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v__learn_doclbls != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":543 + /* "trunk/gensim/models/doc2vec_inner.pyx":536 * # apply accumulated error in work * if _learn_doclbls: * for m in range(doclbl_len): # <<<<<<<<<<<<<< - * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) */ __pyx_t_18 = __pyx_v_doclbl_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":544 + /* "trunk/gensim/models/doc2vec_inner.pyx":537 * if _learn_doclbls: * for m in range(doclbl_len): - * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, # <<<<<<<<<<<<<< - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, # <<<<<<<<<<<<<< + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) * if _learn_words: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doclbl_locks[(__pyx_v_doclbl_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doclbl_locks[(__pyx_v__doclbl_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - goto __pyx_L37; + goto __pyx_L36; } - __pyx_L37:; + __pyx_L36:; - /* "trunk/gensim/models/doc2vec_inner.pyx":546 - * our_saxpy(&size, &_doclbl_locks[doclbl_indexes[m]], _work, - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":539 + * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< * for m in range(j, k): * if m == i: */ - __pyx_t_4 = (__pyx_v__learn_words != 0); - if (__pyx_t_4) { + __pyx_t_5 = (__pyx_v__learn_words != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":547 - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":540 + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) * if _learn_words: * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i: @@ -5710,28 +5648,28 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":548 + /* "trunk/gensim/models/doc2vec_inner.pyx":541 * if _learn_words: * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (__pyx_t_4) { + __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":549 + /* "trunk/gensim/models/doc2vec_inner.pyx":542 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, */ - goto __pyx_L41_continue; + goto __pyx_L40_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":551 + /* "trunk/gensim/models/doc2vec_inner.pyx":544 * continue * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, # <<<<<<<<<<<<<< @@ -5740,15 +5678,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - __pyx_L41_continue:; + __pyx_L40_continue:; } - goto __pyx_L40; + goto __pyx_L39; } - __pyx_L40:; + __pyx_L39:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":506 + /* "trunk/gensim/models/doc2vec_inner.pyx":496 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -5760,13 +5698,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L22; + goto __pyx_L21; } - __pyx_L22:; + __pyx_L21:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":554 + /* "trunk/gensim/models/doc2vec_inner.pyx":547 * &_word_vectors[indexes[m] * size], &ONE) * * return result # <<<<<<<<<<<<<< @@ -5774,16 +5712,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 547; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_r = __pyx_t_8; __pyx_t_8 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":400 + /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -5791,7 +5729,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_1); - __Pyx_XDECREF(__pyx_t_7); + __Pyx_XDECREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_8); __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_15); @@ -5801,7 +5739,6 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); __Pyx_XDECREF(__pyx_v_item); - __Pyx_XDECREF(__pyx_v_token); __Pyx_XDECREF(__pyx_v_work); __Pyx_XDECREF(__pyx_v_neu1); __Pyx_XDECREF(__pyx_v_word_vectors); @@ -5813,10 +5750,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":557 +/* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -5827,7 +5764,7 @@ static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sent static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; - PyObject *__pyx_v_doclbl_vocabs = 0; + PyObject *__pyx_v_doclbl_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; PyObject *__pyx_v_neu1 = 0; @@ -5845,14 +5782,14 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dm_concat (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_vocabs,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "trunk/gensim/models/doc2vec_inner.pyx":558 + /* "trunk/gensim/models/doc2vec_inner.pyx":551 * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, * learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs @@ -5861,8 +5798,8 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "trunk/gensim/models/doc2vec_inner.pyx":559 - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, + /* "trunk/gensim/models/doc2vec_inner.pyx":552 + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -5900,17 +5837,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vocabs)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -5959,7 +5896,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -5982,7 +5919,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } __pyx_v_model = values[0]; __pyx_v_word_vocabs = values[1]; - __pyx_v_doclbl_vocabs = values[2]; + __pyx_v_doclbl_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; __pyx_v_neu1 = values[5]; @@ -5996,18 +5933,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_vocabs, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":557 + /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -6017,7 +5954,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_vocabs, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__learn_doclbls; @@ -6034,7 +5971,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence int __pyx_v_vector_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v_doclbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v__doclbl_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_window_indexes[10000]; int __pyx_v_sentence_len; int __pyx_v_doclbl_len; @@ -6055,7 +5992,6 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence unsigned PY_LONG_LONG __pyx_v_table_len; unsigned PY_LONG_LONG __pyx_v_next_random; PyObject *__pyx_v_word = NULL; - PyObject *__pyx_v_token = NULL; PyObject *__pyx_r = NULL; __Pyx_RefNannyDeclarations PyObject *__pyx_t_1 = NULL; @@ -6085,125 +6021,125 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __Pyx_INCREF(__pyx_v_doclbl_vectors); __Pyx_INCREF(__pyx_v_doclbl_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":560 + /* "trunk/gensim/models/doc2vec_inner.pyx":553 * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int _learn_doclbls = learn_doclbls */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 560; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 553; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 560; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 553; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":561 + /* "trunk/gensim/models/doc2vec_inner.pyx":554 * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int _learn_doclbls = learn_doclbls * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 561; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 561; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":562 + /* "trunk/gensim/models/doc2vec_inner.pyx":555 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 562; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 555; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doclbls = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":563 + /* "trunk/gensim/models/doc2vec_inner.pyx":556 * cdef int negative = model.negative * cdef int _learn_doclbls = learn_doclbls * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 563; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 556; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":564 + /* "trunk/gensim/models/doc2vec_inner.pyx":557 * cdef int _learn_doclbls = learn_doclbls * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 564; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":572 + /* "trunk/gensim/models/doc2vec_inner.pyx":565 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 572; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":573 + /* "trunk/gensim/models/doc2vec_inner.pyx":566 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size # <<<<<<<<<<<<<< * cdef int vector_size = model.vector_size * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 573; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 573; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_layer1_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":574 + /* "trunk/gensim/models/doc2vec_inner.pyx":567 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 574; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_vector_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":582 + /* "trunk/gensim/models/doc2vec_inner.pyx":575 * cdef int sentence_len * cdef int doclbl_len * cdef int window = model.window # <<<<<<<<<<<<<< * cdef int expected_doclbl_len = model.dm_lbl_count * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":583 + /* "trunk/gensim/models/doc2vec_inner.pyx":576 * cdef int doclbl_len * cdef int window = model.window * cdef int expected_doclbl_len = model.dm_lbl_count # <<<<<<<<<<<<<< * * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_lbl_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_lbl_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 583; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_expected_doclbl_len = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":586 + /* "trunk/gensim/models/doc2vec_inner.pyx":579 * * cdef int i, j, k, m, n * cdef long result = 0 # <<<<<<<<<<<<<< @@ -6212,33 +6148,33 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":587 + /* "trunk/gensim/models/doc2vec_inner.pyx":580 * cdef int i, j, k, m, n * cdef long result = 0 * cdef int null_word_index = model.vocab['\0'].index # <<<<<<<<<<<<<< * * # For hierarchical softmax */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_null_word_index = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":600 + /* "trunk/gensim/models/doc2vec_inner.pyx":593 * cdef unsigned long long next_random * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) # <<<<<<<<<<<<<< + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) # <<<<<<<<<<<<<< * if doclbl_len != expected_doclbl_len: * return 0 # skip doc without expected nmber of lbls */ - __pyx_t_5 = PyObject_Length(__pyx_v_doclbl_vocabs); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_doclbl_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_5 < __pyx_t_6) != 0)) { __pyx_t_7 = __pyx_t_5; @@ -6247,9 +6183,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_v_doclbl_len = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":601 + /* "trunk/gensim/models/doc2vec_inner.pyx":594 * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) * if doclbl_len != expected_doclbl_len: # <<<<<<<<<<<<<< * return 0 # skip doc without expected nmber of lbls * @@ -6257,12 +6193,12 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = ((__pyx_v_doclbl_len != __pyx_v_expected_doclbl_len) != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":602 - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_vocabs)) + /* "trunk/gensim/models/doc2vec_inner.pyx":595 + * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) * if doclbl_len != expected_doclbl_len: * return 0 # skip doc without expected nmber of lbls # <<<<<<<<<<<<<< * - * # default vectors, locks from syn0 + * # default vectors, locks from syn0/doclbl_syn0 */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_int_0); @@ -6270,9 +6206,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":605 + /* "trunk/gensim/models/doc2vec_inner.pyx":598 * - * # default vectors, locks from syn0 + * # default vectors, locks from syn0/doclbl_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) @@ -6281,14 +6217,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":606 - * # default vectors, locks from syn0 + /* "trunk/gensim/models/doc2vec_inner.pyx":599 + * # default vectors, locks from syn0/doclbl_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; @@ -6296,125 +6232,131 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":607 + /* "trunk/gensim/models/doc2vec_inner.pyx":600 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 607; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":608 + /* "trunk/gensim/models/doc2vec_inner.pyx":601 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: # <<<<<<<<<<<<<< - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) */ __pyx_t_9 = (__pyx_v_doclbl_vectors == Py_None); __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":609 + /* "trunk/gensim/models/doc2vec_inner.pyx":602 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 # <<<<<<<<<<<<<< + * doclbl_vectors = model.docvecs.doclbl_syn0 # <<<<<<<<<<<<<< * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doclbl_syn0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_4); + __pyx_t_4 = 0; goto __pyx_L5; } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":610 + /* "trunk/gensim/models/doc2vec_inner.pyx":603 * if doclbl_vectors is None: - * doclbl_vectors = model.syn0 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":611 - * doclbl_vectors = model.syn0 + /* "trunk/gensim/models/doc2vec_inner.pyx":604 + * doclbl_vectors = model.docvecs.doclbl_syn0 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) */ __pyx_t_8 = (__pyx_v_word_locks == Py_None); __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":612 + /* "trunk/gensim/models/doc2vec_inner.pyx":605 * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) * if word_locks is None: - * word_locks = model.syn0locks # <<<<<<<<<<<<<< + * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 612; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_1); - __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_1); - __pyx_t_1 = 0; + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_4); + __pyx_t_4 = 0; goto __pyx_L6; } __pyx_L6:; - /* "trunk/gensim/models/doc2vec_inner.pyx":613 + /* "trunk/gensim/models/doc2vec_inner.pyx":606 * if word_locks is None: - * word_locks = model.syn0locks + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doclbl_locks is None: - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":614 - * word_locks = model.syn0locks + /* "trunk/gensim/models/doc2vec_inner.pyx":607 + * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: # <<<<<<<<<<<<<< - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) */ __pyx_t_9 = (__pyx_v_doclbl_locks == Py_None); __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":615 + /* "trunk/gensim/models/doc2vec_inner.pyx":608 * _word_locks = (np.PyArray_DATA(word_locks)) * if doclbl_locks is None: - * doclbl_locks = model.syn0locks # <<<<<<<<<<<<<< + * doclbl_locks = model.docvecs.doclbl_syn0_lockf # <<<<<<<<<<<<<< * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0locks); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_4); + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_doclbl_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); __pyx_t_1 = 0; goto __pyx_L7; } __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":616 + /* "trunk/gensim/models/doc2vec_inner.pyx":609 * if doclbl_locks is None: - * doclbl_locks = model.syn0locks + * doclbl_locks = model.docvecs.doclbl_syn0_lockf * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":618 + /* "trunk/gensim/models/doc2vec_inner.pyx":611 * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -6424,23 +6366,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_v_hs != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":619 + /* "trunk/gensim/models/doc2vec_inner.pyx":612 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 612; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 612; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L8; } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":621 + /* "trunk/gensim/models/doc2vec_inner.pyx":614 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -6450,89 +6392,89 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_v_negative != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":622 + /* "trunk/gensim/models/doc2vec_inner.pyx":615 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":623 + /* "trunk/gensim/models/doc2vec_inner.pyx":616 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":624 + /* "trunk/gensim/models/doc2vec_inner.pyx":617 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":625 + /* "trunk/gensim/models/doc2vec_inner.pyx":618 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_next_random = __pyx_t_11; goto __pyx_L9; } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":628 + /* "trunk/gensim/models/doc2vec_inner.pyx":621 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -6543,29 +6485,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":629 + /* "trunk/gensim/models/doc2vec_inner.pyx":622 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -6576,17 +6518,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L10:; - /* "trunk/gensim/models/doc2vec_inner.pyx":630 + /* "trunk/gensim/models/doc2vec_inner.pyx":623 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":631 + /* "trunk/gensim/models/doc2vec_inner.pyx":624 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -6597,29 +6539,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":632 + /* "trunk/gensim/models/doc2vec_inner.pyx":625 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -6630,24 +6572,24 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L11:; - /* "trunk/gensim/models/doc2vec_inner.pyx":633 + /* "trunk/gensim/models/doc2vec_inner.pyx":626 * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 633; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":635 + /* "trunk/gensim/models/doc2vec_inner.pyx":628 * _neu1 = np.PyArray_DATA(neu1) * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 * for i in range(sentence_len): */ - __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 635; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_7 < __pyx_t_6) != 0)) { __pyx_t_5 = __pyx_t_7; @@ -6656,7 +6598,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":636 + /* "trunk/gensim/models/doc2vec_inner.pyx":629 * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< @@ -6665,7 +6607,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_j = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":637 + /* "trunk/gensim/models/doc2vec_inner.pyx":630 * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6676,19 +6618,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":638 + /* "trunk/gensim/models/doc2vec_inner.pyx":631 * j = 0 * for i in range(sentence_len): * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: * # shrink sentence to leave out word */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 638; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 631; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":639 + /* "trunk/gensim/models/doc2vec_inner.pyx":632 * for i in range(sentence_len): * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< @@ -6699,7 +6641,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":641 + /* "trunk/gensim/models/doc2vec_inner.pyx":634 * if word is None: * # shrink sentence to leave out word * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< @@ -6708,7 +6650,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":642 + /* "trunk/gensim/models/doc2vec_inner.pyx":635 * # shrink sentence to leave out word * sentence_len = sentence_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< @@ -6719,20 +6661,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":644 + /* "trunk/gensim/models/doc2vec_inner.pyx":637 * continue # leaving j unchanged * else: * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[j] = len(word.code) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 644; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 644; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":645 + /* "trunk/gensim/models/doc2vec_inner.pyx":638 * else: * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< @@ -6742,49 +6684,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_v_hs != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":646 + /* "trunk/gensim/models/doc2vec_inner.pyx":639 * indexes[j] = word.index * if hs: * codelens[j] = len(word.code) # <<<<<<<<<<<<<< * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 646; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 646; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":647 + /* "trunk/gensim/models/doc2vec_inner.pyx":640 * if hs: * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[j] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 647; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":648 + /* "trunk/gensim/models/doc2vec_inner.pyx":641 * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[j] = 1 */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; goto __pyx_L15; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":650 + /* "trunk/gensim/models/doc2vec_inner.pyx":643 * points[j] = np.PyArray_DATA(word.point) * else: * codelens[j] = 1 # <<<<<<<<<<<<<< @@ -6795,7 +6737,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L15:; - /* "trunk/gensim/models/doc2vec_inner.pyx":651 + /* "trunk/gensim/models/doc2vec_inner.pyx":644 * else: * codelens[j] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -6804,7 +6746,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":652 + /* "trunk/gensim/models/doc2vec_inner.pyx":645 * codelens[j] = 1 * result += 1 * j = j + 1 # <<<<<<<<<<<<<< @@ -6816,79 +6758,41 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_L12_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":654 + /* "trunk/gensim/models/doc2vec_inner.pyx":647 * j = j + 1 * * for i in range(doclbl_len): # <<<<<<<<<<<<<< - * token = doclbl_vocabs[i] - * if token is None: + * _doclbl_indexes[i] = doclbl_indexes[i] + * result += 1 */ __pyx_t_2 = __pyx_v_doclbl_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":655 + /* "trunk/gensim/models/doc2vec_inner.pyx":648 * * for i in range(doclbl_len): - * token = doclbl_vocabs[i] # <<<<<<<<<<<<<< - * if token is None: - * # no current support for missing doclbls where expected; skip sentence + * _doclbl_indexes[i] = doclbl_indexes[i] # <<<<<<<<<<<<<< + * result += 1 + * */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doclbl_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 655; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doclbl_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); - __Pyx_XDECREF_SET(__pyx_v_token, __pyx_t_4); - __pyx_t_4 = 0; + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; + (__pyx_v__doclbl_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":656 + /* "trunk/gensim/models/doc2vec_inner.pyx":649 * for i in range(doclbl_len): - * token = doclbl_vocabs[i] - * if token is None: # <<<<<<<<<<<<<< - * # no current support for missing doclbls where expected; skip sentence - * return 0 - */ - __pyx_t_9 = (__pyx_v_token == Py_None); - __pyx_t_8 = (__pyx_t_9 != 0); - if (__pyx_t_8) { - - /* "trunk/gensim/models/doc2vec_inner.pyx":658 - * if token is None: - * # no current support for missing doclbls where expected; skip sentence - * return 0 # <<<<<<<<<<<<<< - * else: - * doclbl_indexes[i] = token.index - */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(__pyx_int_0); - __pyx_r = __pyx_int_0; - goto __pyx_L0; - } - /*else*/ { - - /* "trunk/gensim/models/doc2vec_inner.pyx":660 - * return 0 - * else: - * doclbl_indexes[i] = token.index # <<<<<<<<<<<<<< - * result += 1 - * - */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_token, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 660; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_GOTREF(__pyx_t_4); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 660; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - (__pyx_v_doclbl_indexes[__pyx_v_i]) = __pyx_t_14; - - /* "trunk/gensim/models/doc2vec_inner.pyx":661 - * else: - * doclbl_indexes[i] = token.index - * result += 1 # <<<<<<<<<<<<<< + * _doclbl_indexes[i] = doclbl_indexes[i] + * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_v_result = (__pyx_v_result + 1); - } + __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":664 + /* "trunk/gensim/models/doc2vec_inner.pyx":652 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6902,7 +6806,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":665 + /* "trunk/gensim/models/doc2vec_inner.pyx":653 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6913,7 +6817,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":666 + /* "trunk/gensim/models/doc2vec_inner.pyx":654 * with nogil: * for i in range(sentence_len): * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< @@ -6922,7 +6826,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "trunk/gensim/models/doc2vec_inner.pyx":667 + /* "trunk/gensim/models/doc2vec_inner.pyx":655 * for i in range(sentence_len): * j = i - window # negative OK: will pad with null word * k = i + window + 1 # past sentence end OK: will pad with null word # <<<<<<<<<<<<<< @@ -6931,29 +6835,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":670 + /* "trunk/gensim/models/doc2vec_inner.pyx":658 * * # compose l1 & clear work * for m in range(doclbl_len): # <<<<<<<<<<<<<< * # doc vector(s) - * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], + * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], */ __pyx_t_15 = __pyx_v_doclbl_len; for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":672 + /* "trunk/gensim/models/doc2vec_inner.pyx":660 * for m in range(doclbl_len): * # doc vector(s) - * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], # <<<<<<<<<<<<<< + * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], # <<<<<<<<<<<<<< * vector_size * cython.sizeof(REAL_t)) * n = 0 */ - memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":674 - * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[doclbl_indexes[m] * vector_size], + /* "trunk/gensim/models/doc2vec_inner.pyx":662 + * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * n = 0 # <<<<<<<<<<<<<< * for m in range(j, k): @@ -6961,7 +6865,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_n = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":675 + /* "trunk/gensim/models/doc2vec_inner.pyx":663 * vector_size * cython.sizeof(REAL_t)) * n = 0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -6972,45 +6876,45 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_16 = __pyx_v_j; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":677 + /* "trunk/gensim/models/doc2vec_inner.pyx":665 * for m in range(j, k): * # word vectors in window * if m == i: # <<<<<<<<<<<<<< * continue * if m < 0 or m >= sentence_len: */ - __pyx_t_8 = ((__pyx_v_m == __pyx_v_i) != 0); - if (__pyx_t_8) { + __pyx_t_9 = ((__pyx_v_m == __pyx_v_i) != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":678 + /* "trunk/gensim/models/doc2vec_inner.pyx":666 * # word vectors in window * if m == i: * continue # <<<<<<<<<<<<<< * if m < 0 or m >= sentence_len: * window_indexes[n] = null_word_index */ - goto __pyx_L26_continue; + goto __pyx_L25_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":679 + /* "trunk/gensim/models/doc2vec_inner.pyx":667 * if m == i: * continue * if m < 0 or m >= sentence_len: # <<<<<<<<<<<<<< * window_indexes[n] = null_word_index * else: */ - __pyx_t_9 = ((__pyx_v_m < 0) != 0); - if (!__pyx_t_9) { + __pyx_t_8 = ((__pyx_v_m < 0) != 0); + if (!__pyx_t_8) { } else { - __pyx_t_8 = __pyx_t_9; - goto __pyx_L30_bool_binop_done; + __pyx_t_9 = __pyx_t_8; + goto __pyx_L29_bool_binop_done; } - __pyx_t_9 = ((__pyx_v_m >= __pyx_v_sentence_len) != 0); - __pyx_t_8 = __pyx_t_9; - __pyx_L30_bool_binop_done:; - if (__pyx_t_8) { + __pyx_t_8 = ((__pyx_v_m >= __pyx_v_sentence_len) != 0); + __pyx_t_9 = __pyx_t_8; + __pyx_L29_bool_binop_done:; + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":680 + /* "trunk/gensim/models/doc2vec_inner.pyx":668 * continue * if m < 0 or m >= sentence_len: * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< @@ -7018,11 +6922,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * window_indexes[n] = indexes[m] */ (__pyx_v_window_indexes[__pyx_v_n]) = __pyx_v_null_word_index; - goto __pyx_L29; + goto __pyx_L28; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":682 + /* "trunk/gensim/models/doc2vec_inner.pyx":670 * window_indexes[n] = null_word_index * else: * window_indexes[n] = indexes[m] # <<<<<<<<<<<<<< @@ -7031,9 +6935,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ (__pyx_v_window_indexes[__pyx_v_n]) = (__pyx_v_indexes[__pyx_v_m]); } - __pyx_L29:; + __pyx_L28:; - /* "trunk/gensim/models/doc2vec_inner.pyx":683 + /* "trunk/gensim/models/doc2vec_inner.pyx":671 * else: * window_indexes[n] = indexes[m] * n = n + 1 # <<<<<<<<<<<<<< @@ -7041,10 +6945,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], */ __pyx_v_n = (__pyx_v_n + 1); - __pyx_L26_continue:; + __pyx_L25_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":684 + /* "trunk/gensim/models/doc2vec_inner.pyx":672 * window_indexes[n] = indexes[m] * n = n + 1 * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -7055,7 +6959,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { __pyx_v_m = __pyx_t_15; - /* "trunk/gensim/models/doc2vec_inner.pyx":685 + /* "trunk/gensim/models/doc2vec_inner.pyx":673 * n = n + 1 * for m in range(2 * window): * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -7065,7 +6969,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence memcpy((&(__pyx_v__neu1[((__pyx_v_doclbl_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":687 + /* "trunk/gensim/models/doc2vec_inner.pyx":675 * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< @@ -7074,17 +6978,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ memset(__pyx_v__work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":689 + /* "trunk/gensim/models/doc2vec_inner.pyx":677 * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], * _neu1, syn1, _alpha, _work, */ - __pyx_t_8 = (__pyx_v_hs != 0); - if (__pyx_t_8) { + __pyx_t_9 = (__pyx_v_hs != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":690 + /* "trunk/gensim/models/doc2vec_inner.pyx":678 * * if hs: * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -7092,21 +6996,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * layer1_size, vector_size, _learn_hidden) */ __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); - goto __pyx_L34; + goto __pyx_L33; } - __pyx_L34:; + __pyx_L33:; - /* "trunk/gensim/models/doc2vec_inner.pyx":693 + /* "trunk/gensim/models/doc2vec_inner.pyx":681 * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, * _neu1, syn1neg, indexes[i], _alpha, _work, */ - __pyx_t_8 = (__pyx_v_negative != 0); - if (__pyx_t_8) { + __pyx_t_9 = (__pyx_v_negative != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":694 + /* "trunk/gensim/models/doc2vec_inner.pyx":682 * layer1_size, vector_size, _learn_hidden) * if negative: * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< @@ -7114,56 +7018,56 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * layer1_size, vector_size, _learn_hidden) */ __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); - goto __pyx_L35; + goto __pyx_L34; } - __pyx_L35:; + __pyx_L34:; - /* "trunk/gensim/models/doc2vec_inner.pyx":698 + /* "trunk/gensim/models/doc2vec_inner.pyx":686 * layer1_size, vector_size, _learn_hidden) * * if _learn_doclbls: # <<<<<<<<<<<<<< * for m in range(doclbl_len): - * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], + * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], */ - __pyx_t_8 = (__pyx_v__learn_doclbls != 0); - if (__pyx_t_8) { + __pyx_t_9 = (__pyx_v__learn_doclbls != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":699 + /* "trunk/gensim/models/doc2vec_inner.pyx":687 * * if _learn_doclbls: * for m in range(doclbl_len): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) */ __pyx_t_15 = __pyx_v_doclbl_len; for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":700 + /* "trunk/gensim/models/doc2vec_inner.pyx":688 * if _learn_doclbls: * for m in range(doclbl_len): - * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) * if _learn_words: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doclbl_locks[(__pyx_v_doclbl_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v_doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doclbl_locks[(__pyx_v__doclbl_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - goto __pyx_L36; + goto __pyx_L35; } - __pyx_L36:; + __pyx_L35:; - /* "trunk/gensim/models/doc2vec_inner.pyx":702 - * our_saxpy(&vector_size, &_doclbl_locks[doclbl_indexes[m]], &_work[m * vector_size], - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":690 + * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< * for m in range(2 * window): * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], */ - __pyx_t_8 = (__pyx_v__learn_words != 0); - if (__pyx_t_8) { + __pyx_t_9 = (__pyx_v__learn_words != 0); + if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":703 - * &ONE, &_doclbl_vectors[doclbl_indexes[m] * vector_size], &ONE) + /* "trunk/gensim/models/doc2vec_inner.pyx":691 + * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) * if _learn_words: * for m in range(2 * window): # <<<<<<<<<<<<<< * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], @@ -7173,7 +7077,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { __pyx_v_m = __pyx_t_15; - /* "trunk/gensim/models/doc2vec_inner.pyx":704 + /* "trunk/gensim/models/doc2vec_inner.pyx":692 * if _learn_words: * for m in range(2 * window): * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], # <<<<<<<<<<<<<< @@ -7182,13 +7086,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__word_locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v__work[((__pyx_v_doclbl_len + __pyx_v_m) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - goto __pyx_L39; + goto __pyx_L38; } - __pyx_L39:; + __pyx_L38:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":664 + /* "trunk/gensim/models/doc2vec_inner.pyx":652 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -7200,13 +7104,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence #ifdef WITH_THREAD Py_BLOCK_THREADS #endif - goto __pyx_L21; + goto __pyx_L20; } - __pyx_L21:; + __pyx_L20:; } } - /* "trunk/gensim/models/doc2vec_inner.pyx":707 + /* "trunk/gensim/models/doc2vec_inner.pyx":695 * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) * * return result # <<<<<<<<<<<<<< @@ -7214,16 +7118,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 707; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 695; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_r = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":557 + /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -7238,7 +7142,6 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); - __Pyx_XDECREF(__pyx_v_token); __Pyx_XDECREF(__pyx_v_work); __Pyx_XDECREF(__pyx_v_neu1); __Pyx_XDECREF(__pyx_v_word_vectors); @@ -7250,7 +7153,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":710 +/* "trunk/gensim/models/doc2vec_inner.pyx":698 * * * def init(): # <<<<<<<<<<<<<< @@ -7289,7 +7192,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":720 + /* "trunk/gensim/models/doc2vec_inner.pyx":708 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -7299,7 +7202,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":721 + /* "trunk/gensim/models/doc2vec_inner.pyx":709 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -7309,7 +7212,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":722 + /* "trunk/gensim/models/doc2vec_inner.pyx":710 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -7318,7 +7221,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/doc2vec_inner.pyx":723 + /* "trunk/gensim/models/doc2vec_inner.pyx":711 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -7327,7 +7230,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_size = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":728 + /* "trunk/gensim/models/doc2vec_inner.pyx":716 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -7337,7 +7240,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":729 + /* "trunk/gensim/models/doc2vec_inner.pyx":717 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -7346,7 +7249,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/doc2vec_inner.pyx":730 + /* "trunk/gensim/models/doc2vec_inner.pyx":718 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -7356,7 +7259,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":733 + /* "trunk/gensim/models/doc2vec_inner.pyx":721 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -7365,7 +7268,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":734 + /* "trunk/gensim/models/doc2vec_inner.pyx":722 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -7374,7 +7277,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/doc2vec_inner.pyx":735 + /* "trunk/gensim/models/doc2vec_inner.pyx":723 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -7384,7 +7287,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":736 + /* "trunk/gensim/models/doc2vec_inner.pyx":724 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -7393,7 +7296,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double; - /* "trunk/gensim/models/doc2vec_inner.pyx":737 + /* "trunk/gensim/models/doc2vec_inner.pyx":725 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -7402,7 +7305,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":738 + /* "trunk/gensim/models/doc2vec_inner.pyx":726 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -7415,7 +7318,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":739 + /* "trunk/gensim/models/doc2vec_inner.pyx":727 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -7425,7 +7328,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":740 + /* "trunk/gensim/models/doc2vec_inner.pyx":728 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -7434,7 +7337,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float; - /* "trunk/gensim/models/doc2vec_inner.pyx":741 + /* "trunk/gensim/models/doc2vec_inner.pyx":729 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -7443,7 +7346,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":742 + /* "trunk/gensim/models/doc2vec_inner.pyx":730 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -7457,7 +7360,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":746 + /* "trunk/gensim/models/doc2vec_inner.pyx":734 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -7466,7 +7369,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":747 + /* "trunk/gensim/models/doc2vec_inner.pyx":735 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -7475,7 +7378,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":748 + /* "trunk/gensim/models/doc2vec_inner.pyx":736 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -7488,7 +7391,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":710 + /* "trunk/gensim/models/doc2vec_inner.pyx":698 * * * def init(): # <<<<<<<<<<<<<< @@ -9543,18 +9446,20 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_code, __pyx_k_code, sizeof(__pyx_k_code), 0, 0, 1, 1}, {&__pyx_n_s_codelens, __pyx_k_codelens, sizeof(__pyx_k_codelens), 0, 0, 1, 1}, {&__pyx_n_s_codes, __pyx_k_codes, sizeof(__pyx_k_codes), 0, 0, 1, 1}, - {&__pyx_n_s_context_token, __pyx_k_context_token, sizeof(__pyx_k_context_token), 0, 0, 1, 1}, {&__pyx_n_s_count, __pyx_k_count, sizeof(__pyx_k_count), 0, 0, 1, 1}, {&__pyx_n_s_cpointer, __pyx_k_cpointer, sizeof(__pyx_k_cpointer), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, {&__pyx_n_s_dm_lbl_count, __pyx_k_dm_lbl_count, sizeof(__pyx_k_dm_lbl_count), 0, 0, 1, 1}, {&__pyx_n_s_doclbl_indexes, __pyx_k_doclbl_indexes, sizeof(__pyx_k_doclbl_indexes), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_indexes_2, __pyx_k_doclbl_indexes_2, sizeof(__pyx_k_doclbl_indexes_2), 0, 0, 1, 1}, {&__pyx_n_s_doclbl_len, __pyx_k_doclbl_len, sizeof(__pyx_k_doclbl_len), 0, 0, 1, 1}, {&__pyx_n_s_doclbl_locks, __pyx_k_doclbl_locks, sizeof(__pyx_k_doclbl_locks), 0, 0, 1, 1}, {&__pyx_n_s_doclbl_locks_2, __pyx_k_doclbl_locks_2, sizeof(__pyx_k_doclbl_locks_2), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_syn0, __pyx_k_doclbl_syn0, sizeof(__pyx_k_doclbl_syn0), 0, 0, 1, 1}, + {&__pyx_n_s_doclbl_syn0_lockf, __pyx_k_doclbl_syn0_lockf, sizeof(__pyx_k_doclbl_syn0_lockf), 0, 0, 1, 1}, {&__pyx_n_s_doclbl_vectors, __pyx_k_doclbl_vectors, sizeof(__pyx_k_doclbl_vectors), 0, 0, 1, 1}, {&__pyx_n_s_doclbl_vectors_2, __pyx_k_doclbl_vectors_2, sizeof(__pyx_k_doclbl_vectors_2), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_vocabs, __pyx_k_doclbl_vocabs, sizeof(__pyx_k_doclbl_vocabs), 0, 0, 1, 1}, + {&__pyx_n_s_docvecs, __pyx_k_docvecs, sizeof(__pyx_k_docvecs), 0, 0, 1, 1}, {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, @@ -9609,13 +9514,12 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_snrm2, __pyx_k_snrm2, sizeof(__pyx_k_snrm2), 0, 0, 1, 1}, {&__pyx_n_s_sscal, __pyx_k_sscal, sizeof(__pyx_k_sscal), 0, 0, 1, 1}, {&__pyx_n_s_syn0, __pyx_k_syn0, sizeof(__pyx_k_syn0), 0, 0, 1, 1}, - {&__pyx_n_s_syn0locks, __pyx_k_syn0locks, sizeof(__pyx_k_syn0locks), 0, 0, 1, 1}, + {&__pyx_n_s_syn0_lockf, __pyx_k_syn0_lockf, sizeof(__pyx_k_syn0_lockf), 0, 0, 1, 1}, {&__pyx_n_s_syn1, __pyx_k_syn1, sizeof(__pyx_k_syn1), 0, 0, 1, 1}, {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, {&__pyx_n_s_table, __pyx_k_table, sizeof(__pyx_k_table), 0, 0, 1, 1}, {&__pyx_n_s_table_len, __pyx_k_table_len, sizeof(__pyx_k_table_len), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_n_s_token, __pyx_k_token, sizeof(__pyx_k_token), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dbow, __pyx_k_train_sentence_dbow, sizeof(__pyx_k_train_sentence_dbow), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dm, __pyx_k_train_sentence_dm, sizeof(__pyx_k_train_sentence_dm), 0, 0, 1, 1}, {&__pyx_n_s_train_sentence_dm_concat, __pyx_k_train_sentence_dm_concat, sizeof(__pyx_k_train_sentence_dm_concat), 0, 0, 1, 1}, @@ -9642,7 +9546,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -9654,45 +9558,45 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":331 + /* "trunk/gensim/models/doc2vec_inner.pyx":329 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/doc2vec_inner.pyx":463 + /* "trunk/gensim/models/doc2vec_inner.pyx":460 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 463; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "trunk/gensim/models/doc2vec_inner.pyx":625 + /* "trunk/gensim/models/doc2vec_inner.pyx":618 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); @@ -9765,50 +9669,50 @@ static int __Pyx_InitCachedConstants(void) { /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_tuple__14 = PyTuple_Pack(47, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_vocabs, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doclbls_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_context_token, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doclbls_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 47, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":400 + /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_tuple__16 = PyTuple_Pack(51, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_vocabs, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item, __pyx_n_s_token); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 51, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 400, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 397, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":557 + /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(51, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_vocabs, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_expected_doclbl_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_token); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_expected_doclbl_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 51, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 557, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 550, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":710 + /* "trunk/gensim/models/doc2vec_inner.pyx":698 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__20); __Pyx_GIVEREF(__pyx_tuple__20); - __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 710, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 698, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -10131,7 +10035,7 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_vocabs, alpha, work=None, # <<<<<<<<<<<<<< + * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ @@ -10140,48 +10044,48 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":400 + /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":557 + /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_vocabs, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doclbls=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":710 + /* "trunk/gensim/models/doc2vec_inner.pyx":698 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 710; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":750 + /* "trunk/gensim/models/doc2vec_inner.pyx":738 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_1))) { @@ -10194,14 +10098,14 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_2 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 750; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":1 @@ -10541,55 +10445,6 @@ static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i, return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i)); } -#if CYTHON_COMPILING_IN_CPYTHON -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { - PyObject *self, *result; - PyCFunction cfunc; - cfunc = PyCFunction_GET_FUNCTION(func); - self = PyCFunction_GET_SELF(func); - if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) - return NULL; - result = cfunc(self, arg); - Py_LeaveRecursiveCall(); - if (unlikely(!result) && unlikely(!PyErr_Occurred())) { - PyErr_SetString( - PyExc_SystemError, - "NULL result without error in PyObject_Call"); - } - return result; -} -#endif - -#if CYTHON_COMPILING_IN_CPYTHON -static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject *result; - PyObject *args = PyTuple_New(1); - if (unlikely(!args)) return NULL; - Py_INCREF(arg); - PyTuple_SET_ITEM(args, 0, arg); - result = __Pyx_PyObject_Call(func, args, NULL); - Py_DECREF(args); - return result; -} -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { -#ifdef __Pyx_CyFunction_USED - if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { -#else - if (likely(PyCFunction_Check(func))) { -#endif - if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { - return __Pyx_PyObject_CallMethO(func, arg); - } - } - return __Pyx__PyObject_CallOneArg(func, arg); -} -#else -static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { - PyObject* args = PyTuple_Pack(1, arg); - return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL; -} -#endif - static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) { #if CYTHON_COMPILING_IN_CPYTHON PyObject *tmp_type, *tmp_value, *tmp_tb; @@ -10806,6 +10661,55 @@ static PyObject* __Pyx_ImportFrom(PyObject* module, PyObject* name) { return value; } +#if CYTHON_COMPILING_IN_CPYTHON +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) { + PyObject *self, *result; + PyCFunction cfunc; + cfunc = PyCFunction_GET_FUNCTION(func); + self = PyCFunction_GET_SELF(func); + if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object"))) + return NULL; + result = cfunc(self, arg); + Py_LeaveRecursiveCall(); + if (unlikely(!result) && unlikely(!PyErr_Occurred())) { + PyErr_SetString( + PyExc_SystemError, + "NULL result without error in PyObject_Call"); + } + return result; +} +#endif + +#if CYTHON_COMPILING_IN_CPYTHON +static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject *result; + PyObject *args = PyTuple_New(1); + if (unlikely(!args)) return NULL; + Py_INCREF(arg); + PyTuple_SET_ITEM(args, 0, arg); + result = __Pyx_PyObject_Call(func, args, NULL); + Py_DECREF(args); + return result; +} +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { +#ifdef __Pyx_CyFunction_USED + if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) { +#else + if (likely(PyCFunction_Check(func))) { +#endif + if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) { + return __Pyx_PyObject_CallMethO(func, arg); + } + } + return __Pyx__PyObject_CallOneArg(func, arg); +} +#else +static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) { + PyObject* args = PyTuple_Pack(1, arg); + return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL; +} +#endif + #if CYTHON_COMPILING_IN_CPYTHON static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) { #ifdef __Pyx_CyFunction_USED diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 184e59b68f..dd2982c258 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -341,7 +341,6 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, codelens[i] = 0 else: indexes[i] = predict_word.index - reduced_windows[i] = np.random.randint(window) if hs: codelens[i] = len(predict_word.code) codes[i] = np.PyArray_DATA(predict_word.code) @@ -349,9 +348,10 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, else: codelens[i] = 1 result += 1 - # single randint() call avoids a big thread-sync slowdown - for i, item in enumerate(np.random.randint(0, window, sentence_len)): - reduced_windows[i] = item + if _train_words: + # single randint() call avoids a big thread-synchronization slowdown + for i, item in enumerate(np.random.randint(0, window, sentence_len)): + reduced_windows[i] = item for i in range(doclbl_len): _doclbl_indexes[i] = doclbl_indexes[i] result += 1 From 89ad0ff4312c6966b03e1a98cd571bba86fe99b5 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sat, 6 Jun 2015 01:11:40 -0700 Subject: [PATCH 20/49] thread count in compact_name --- gensim/models/doc2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 7aeb242803..7d9d625878 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -279,12 +279,10 @@ def __getitem__(self, index): def reset_weights(self, model): length = max(len(self.doclbls),self.max_index) if self.mapfile_path: - print(length) self.doclbl_syn0 = np_memmap(self.mapfile_path+'.doclbl_syn0',dtype=REAL,mode='w+',shape=(length,model.vector_size)) self.doclbl_syn0_lockf = np_memmap(self.mapfile_path+'.doclbl_syn0_lockf',dtype=REAL,mode='w+',shape=(length,)) self.doclbl_syn0_lockf.fill(1.0) else: - print(length) self.doclbl_syn0 = empty((length, model.vector_size), dtype=REAL) self.doclbl_syn0_lockf = ones((length,), dtype=REAL) # zeros suppress learning @@ -495,6 +493,8 @@ def compact_name(self): segments.append('mc%d' % self.min_count) if self.sample > 0: segments.append('s%d' % self.sample) + if self.workers > 1: + segments.append('t%d' % self.workers) return ''.join(segments) def save(self, *args, **kwargs): From 156ea06263752e62a539c143a057f5505edc0bda Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Sun, 7 Jun 2015 21:13:47 -0700 Subject: [PATCH 21/49] rename [lbl,label,LabeledSentence] -> [tag,tag,TaggedDocument] --- gensim/models/doc2vec.py | 242 ++++---- gensim/models/doc2vec_inner.c | 960 ++++++++++++++++---------------- gensim/models/doc2vec_inner.pyx | 154 ++--- 3 files changed, 674 insertions(+), 682 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 7d9d625878..25ca3ef40e 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -61,9 +61,9 @@ # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 - def train_sentence_dbow(model, word_vocabs, doclbl_indices, alpha, work=None, - train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + def train_sentence_dbow(model, word_vocabs, doctag_indices, alpha, work=None, + train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ Update distributed bag of words model by training on a single sentence. @@ -82,26 +82,26 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indices, alpha, work=None, will use the optimized version from doc2vec_inner instead. """ - if doclbl_vectors is None: - doclbl_vectors = model.docvecs.doclbl_syn0 - if doclbl_locks is None: - doclbl_locks = model.docvecs.doclbl_syn0_lockf + if doctag_vectors is None: + doctag_vectors = model.docvecs.doctag_syn0 + if doctag_locks is None: + doctag_locks = model.docvecs.doctag_syn0_lockf if train_words and learn_words: train_sentence_sg(model, word_vocabs, alpha, work) # TODO: adapt for word_vectors/word_locks - for doclbl_index in doclbl_indices: + for doctag_index in doctag_indices: for word in word_vocabs: if word is None: continue # OOV word in the input sentence => skip - train_sg_pair(model, word, doclbl_index, alpha, learn_vectors=learn_doclbls, - learn_hidden=learn_hidden, context_vectors=doclbl_vectors, - context_locks=doclbl_locks) + train_sg_pair(model, word, doctag_index, alpha, learn_vectors=learn_doctags, + learn_hidden=learn_hidden, context_vectors=doctag_vectors, + context_locks=doctag_locks) return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm(model, word_vocabs, doclbl_indices, alpha, work=None, neu1=None, - learn_doclbls=True, learn_words=True, learn_hidden=True, - word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + def train_sentence_dm(model, word_vocabs, doctag_indices, alpha, work=None, neu1=None, + learn_doctags=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ Update distributed memory model by training on a single sentence. @@ -116,13 +116,13 @@ def train_sentence_dm(model, word_vocabs, doclbl_indices, alpha, work=None, neu1 word_vectors = model.syn0 if word_locks is None: word_locks = model.syn0_lockf - if doclbl_vectors is None: - doclbl_vectors = model.docvecs.doclbl_syn0 - if doclbl_locks is None: - doclbl_locks = model.docvecs.doclbl_syn0_lockf + if doctag_vectors is None: + doctag_vectors = model.docvecs.doctag_syn0 + if doctag_locks is None: + doctag_locks = model.docvecs.doctag_syn0_lockf - doclbl_sum = np_sum(doclbl_vectors[doclbl_indices], axis=0) - doclbl_len = len(doclbl_indices) + doctag_sum = np_sum(doctag_vectors[doctag_indices], axis=0) + doctag_len = len(doctag_indices) for pos, word in enumerate(word_vocabs): if word is None: @@ -131,15 +131,15 @@ def train_sentence_dm(model, word_vocabs, doclbl_indices, alpha, work=None, neu1 start = max(0, pos - model.window + reduced_window) window_pos = enumerate(word_vocabs[start : pos + model.window + 1 - reduced_window], start) word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(word_vectors[word2_indices], axis=0) + doclbl_sum # 1 x layer1_size + l1 = np_sum(word_vectors[word2_indices], axis=0) + doctag_sum # 1 x layer1_size if word2_indices and model.cbow_mean: - l1 /= (len(word2_indices) + doclbl_len) + l1 /= (len(word2_indices) + doctag_len) neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, learn_vectors=False, learn_hidden=True) if word2_indices and not model.cbow_mean: - neu1e /= (len(word2_indices) + doclbl_len) - if learn_doclbls: - doclbl_vectors[doclbl_indices] += \ - neu1e * np_repeat(doclbl_locks[doclbl_indices],model.vector_size).reshape(-1,model.vector_size) + neu1e /= (len(word2_indices) + doctag_len) + if learn_doctags: + doctag_vectors[doctag_indices] += \ + neu1e * np_repeat(doctag_locks[doctag_indices],model.vector_size).reshape(-1,model.vector_size) if learn_words: word_vectors[word2_indices] += \ neu1e * np_repeat(word_locks[word2_indices],model.vector_size).reshape(-1,model.vector_size) @@ -147,9 +147,9 @@ def train_sentence_dm(model, word_vocabs, doclbl_indices, alpha, work=None, neu1 return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm_concat(model, word_vocabs, doclbl_indices, alpha, work=None, neu1=None, - learn_doclbls=True, learn_words=True, learn_hidden=True, - word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + def train_sentence_dm_concat(model, word_vocabs, doctag_indices, alpha, work=None, neu1=None, + learn_doctags=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ Update distributed memory model by training on a single sentence, using a concatenation of the context window word vectors (rather than a sum or average). @@ -165,14 +165,14 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indices, alpha, work=Non word_vectors = model.syn0 if word_locks is None: word_locks = model.syn0_lockf - if doclbl_vectors is None: - doclbl_vectors = model.docvecs.doclbl_syn0 - if doclbl_locks is None: - doclbl_locks = model.docvecs.doclbl_syn0_lockf + if doctag_vectors is None: + doctag_vectors = model.docvecs.doctag_syn0 + if doctag_locks is None: + doctag_locks = model.docvecs.doctag_syn0_lockf - doclbl_len = len(doclbl_indices) - if doclbl_len != model.dm_lbl_count: - return 0 # skip doc without expected doclbl(s) + doctag_len = len(doctag_indices) + if doctag_len != model.dm_tag_count: + return 0 # skip doc without expected doctag(s) null_word = model.vocab['\0'] pre_pad_count = model.window @@ -191,50 +191,42 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indices, alpha, work=Non word_context_len = len(word_context_indices) predict_word = model.vocab[model.index2word[padded_sentence_indices[pos]]] # numpy advanced-indexing copies; concatenate, flatten to 1d - l1 = concatenate((doclbl_vectors[doclbl_indices], word_vectors[word_context_indices])).ravel() + l1 = concatenate((doctag_vectors[doctag_indices], word_vectors[word_context_indices])).ravel() neu1e = train_cbow_pair(model, predict_word, None, l1, alpha, learn_hidden=learn_hidden, learn_vectors=False) # filter by locks and shape for addition to source vectors - e_locks = concatenate((doclbl_locks[doclbl_indices], word_locks[word_context_indices])) + e_locks = concatenate((doctag_locks[doctag_indices], word_locks[word_context_indices])) neu1e_r = (neu1e.reshape(-1,model.vector_size) * np_repeat(e_locks,model.vector_size).reshape(-1,model.vector_size)) - if learn_doclbls: - np_add.at(doclbl_vectors, doclbl_indices, neu1e_r[:doclbl_len]) + if learn_doctags: + np_add.at(doctag_vectors, doctag_indices, neu1e_r[:doctag_len]) if learn_words: - np_add.at(word_vectors, word_context_indices, neu1e_r[doclbl_len:]) + np_add.at(word_vectors, word_context_indices, neu1e_r[doctag_len:]) return len(padded_sentence_indices) - pre_pad_count - post_pad_count -class LabeledSentence(object): +class TaggedDocument(namedtuple('TaggedDocument','words tags')): """ - A single labeled sentence = text item. + A single document, made up of `words` (a list of unicode string tokens) + and `tags` (a list of tokens). Tags may also be one or more unicode string + tokens, but typical practice (which will also be most memory-efficient) is + for the tags list to include a unique integer id as the only tag. + Replaces "sentence as a list of words" from Word2Vec. """ - def __init__(self, words, labels): - """ - `words` is a list of tokens (unicode strings), - `labels` a list of text labels associated with this text - or a single string label. - - """ - if isinstance(labels, string_types): - labels = (labels,) - self.words = words - self.labels = labels - def __str__(self): - return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.labels) + return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags) -class DocvecsInArray(object): +class DocvecsArray(object): """ Default storage of docvecs during training, in a numpy array. - Maintains dict mapping string doclbl -> int mapping if necessary. - (If all LabeledSentences use only int doclbls, this overhead is + Maintains dict mapping string doctag -> int mapping if necessary. + (If all TaggedSentences use only int doctags, this overhead is avoided.) Supplying a mapfile_path at construction will use a pair of memory-mapped files as the array backing for syn0/syn0_lockf values. @@ -244,24 +236,24 @@ class DocvecsInArray(object): """ def __init__(self, mapfile_path=None): - self.doclbls = {} # string -> Doclbl (if necessary) - self.index2doclbl = [] # int index -> String (if necessary) + self.doctags = {} # string -> Doctag (if necessary) + self.index2doctag = [] # int index -> String (if necessary) self.max_index = -1 self.mapfile_path = mapfile_path - def note_doclbl(self, key, sentence_no, sentence_length): + def note_doctag(self, key, sentence_no, sentence_length): if isinstance(key, int): self.max_index = max(self.max_index, key) else: - if key in self.doclbls: - self.doclbls[key] = self.doclbls[key].repeat(sentence_length) + if key in self.doctags: + self.doctags[key] = self.doctags[key].repeat(sentence_length) else: - self.doclbls[key] = Doclbl(sentence_no, sentence_length, 1) - self.index2doclbl.append(key) + self.doctags[key] = Doctag(sentence_no, sentence_length, 1) + self.index2doctag.append(key) - def indexed_doclbls(self, doclbl_tokens): - return ([i for i in [self._int_index(index,-1) for index in doclbl_tokens] if i > -1], - self.doclbl_syn0, doclbl_tokens) + def indexed_doctags(self, doctag_tokens): + return ([i for i in [self._int_index(index,-1) for index in doctag_tokens] if i > -1], + self.doctag_syn0, doctag_tokens) def trained_items(self, indexed_tuples): """Persist any changes to the given indices; a no-op for this implementation""" @@ -271,29 +263,29 @@ def _int_index(self, index, missing=None): if isinstance(index, int): return index else: - return self.doclbls[index].index if index in self.doclbls else missing + return self.doctags[index].index if index in self.doctags else missing def __getitem__(self, index): - return self.doclbl_syn0[self._int_index(index)] + return self.doctag_syn0[self._int_index(index)] def reset_weights(self, model): - length = max(len(self.doclbls),self.max_index) + length = max(len(self.doctags),self.max_index) if self.mapfile_path: - self.doclbl_syn0 = np_memmap(self.mapfile_path+'.doclbl_syn0',dtype=REAL,mode='w+',shape=(length,model.vector_size)) - self.doclbl_syn0_lockf = np_memmap(self.mapfile_path+'.doclbl_syn0_lockf',dtype=REAL,mode='w+',shape=(length,)) - self.doclbl_syn0_lockf.fill(1.0) + self.doctag_syn0 = np_memmap(self.mapfile_path+'.doctag_syn0',dtype=REAL,mode='w+',shape=(length,model.vector_size)) + self.doctag_syn0_lockf = np_memmap(self.mapfile_path+'.doctag_syn0_lockf',dtype=REAL,mode='w+',shape=(length,)) + self.doctag_syn0_lockf.fill(1.0) else: - self.doclbl_syn0 = empty((length, model.vector_size), dtype=REAL) - self.doclbl_syn0_lockf = ones((length,), dtype=REAL) # zeros suppress learning + self.doctag_syn0 = empty((length, model.vector_size), dtype=REAL) + self.doctag_syn0_lockf = ones((length,), dtype=REAL) # zeros suppress learning for i in xrange(length): # construct deterministic seed from index AND model seed - seed = "%d %s" % (model.seed, self.index2doclbl[i] if len(self.index2doclbl)>0 else str(i)) - self.doclbl_syn0[i] = model.seeded_vector(seed) + seed = "%d %s" % (model.seed, self.index2doctag[i] if len(self.index2doctag)>0 else str(i)) + self.doctag_syn0[i] = model.seeded_vector(seed) -class Doclbl(namedtuple('Doclbl', 'index, word_count, doc_count')): - """A document label discovered during the initial vocabulary +class Doctag(namedtuple('Doctag', 'index, word_count, doc_count')): + """A string document tag discovered during the initial vocabulary scan. (The document-vector equivalent of a Vocab object.)""" __slots__ = () def repeat(self, word_count): @@ -304,13 +296,13 @@ class Doc2Vec(Word2Vec): """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, - dbow_words=0, dm_mean=0, dm_concat=0, dm_lbl_count=1, + dbow_words=0, dm_mean=0, dm_concat=0, dm_tag_count=1, docvecs=None, docvecs_mapfile=None, **kwargs): """ Initialize the model from an iterable of `sentences`. Each sentence is a - LabeledSentence object that will be used for training. + TaggedSentence object that will be used for training. - The `sentences` iterable can be simply a list of LabeledSentence elements, but for larger corpora, + The `sentences` iterable can be simply a list of TaggedSentence elements, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. If you don't supply `sentences`, the model is left uninitialized -- use if @@ -346,9 +338,9 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, `dm_concat` = if 1, use concatenation of context vectors rather than sum/average; default is 0 (off). Note concatenation results in a much-larger model, as the input is no longer the size of one (sampled or arithmatically combined) word vector, but the - size of the label(s) and all words in the context strung together. + size of the tag(s) and all words in the context strung together. - `dm_lbl_count` = expected constant number of sentence lbls per sentence, when using + `dm_tag_count` = expected constant number of sentence tags per sentence, when using dm_concat mode; default is 1. `dbow_words` if set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW @@ -361,18 +353,18 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, null_word=dm_concat, **kwargs) self.dbow_words = dbow_words self.dm_concat = dm_concat - self.dm_lbl_count = dm_lbl_count + self.dm_tag_count = dm_tag_count self.docvecs = docvecs if not self.docvecs: - self.docvecs = DocvecsInArray(docvecs_mapfile) + self.docvecs = DocvecsArray(docvecs_mapfile) if sentences is not None: self.build_vocab(sentences) self.train(sentences) def reset_weights(self): if self.dm_concat: - # expand l1 size to match concatenated lbls+words length - self.layer1_size = (self.dm_lbl_count + (2 * self.window)) * self.vector_size + # expand l1 size to match concatenated tags+words length + self.layer1_size = (self.dm_tag_count + (2 * self.window)) * self.vector_size logger.info("using concatenative %d-dimensional layer1"% (self.layer1_size)) Word2Vec.reset_weights(self) self.docvecs.reset_weights(self) @@ -385,8 +377,8 @@ def _vocab_from(self, sentences): logger.info("PROGRESS: at item #%i, processed %i words and %i word types" % (sentence_no, total_words, len(vocab))) sentence_length = len(sentence.words) - for label in sentence.labels: - self.docvecs.note_doclbl(label, sentence_no, sentence_length) + for tag in sentence.tags: + self.docvecs.note_doctag(tag, sentence_no, sentence_length) for word in sentence.words: total_words += 1 if word in vocab: @@ -401,7 +393,7 @@ def _prepare_sentences(self, sentences): for sentence in sentences: # avoid calling random_sample() where prob >= 1, to speed things up a little: yield (self._tokens_to_vocabs(sentence.words), - self.docvecs.indexed_doclbls(sentence.labels)) + self.docvecs.indexed_doctags(sentence.tags)) def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): if source_dict is None: @@ -415,17 +407,17 @@ def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): def _get_job_words(self, alpha, work, job, neu1): if self.sg: - tally = sum(train_sentence_dbow(self, sentence, doclbl_indices, alpha, work, train_words=self.dbow_words, - doclbl_vectors=doclbl_vectors) - for sentence, (doclbl_indices, doclbl_vectors, ignored) in job) + tally = sum(train_sentence_dbow(self, sentence, doctag_indices, alpha, work, train_words=self.dbow_words, + doctag_vectors=doctag_vectors) + for sentence, (doctag_indices, doctag_vectors, ignored) in job) elif self.dm_concat: - tally = sum(train_sentence_dm_concat(self, sentence, doclbl_indices, alpha, work, neu1, - doclbl_vectors=doclbl_vectors) - for sentence, (doclbl_indices, doclbl_vectors, ignored) in job) + tally = sum(train_sentence_dm_concat(self, sentence, doctag_indices, alpha, work, neu1, + doctag_vectors=doctag_vectors) + for sentence, (doctag_indices, doctag_vectors, ignored) in job) else: - tally = sum(train_sentence_dm(self, sentence, doclbl_indices, alpha, work, neu1, - doclbl_vectors=doclbl_vectors) - for sentence, (doclbl_indices, doclbl_vectors, ignored) in job) + tally = sum(train_sentence_dm(self, sentence, doctag_indices, alpha, work, neu1, + doctag_vectors=doctag_vectors) + for sentence, (doctag_indices, doctag_vectors, ignored) in job) self.docvecs.trained_items(item for s, item in job) return tally @@ -435,10 +427,10 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): Document should be a list of (word) tokens. """ - doclbl_vectors = empty((1, self.vector_size), dtype=REAL) - doclbl_vectors[0] = self.seeded_vector(' '.join(document)) - doclbl_locks = ones(1, dtype=REAL) - doclbl_indices = [0] + doctag_vectors = empty((1, self.vector_size), dtype=REAL) + doctag_vectors[0] = self.seeded_vector(' '.join(document)) + doctag_locks = ones(1, dtype=REAL) + doctag_indices = [0] word_vocabs = self._tokens_to_vocabs(document) work = zeros(self.layer1_size, dtype=REAL) @@ -447,20 +439,20 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): for i in range(steps): if self.sg: - train_sentence_dbow(self, word_vocabs, doclbl_indices, alpha, work, + train_sentence_dbow(self, word_vocabs, doctag_indices, alpha, work, learn_words=False, learn_hidden=False, - doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) elif self.dm_concat: - train_sentence_dm_concat(self, word_vocabs, doclbl_indices, alpha, work, neu1, + train_sentence_dm_concat(self, word_vocabs, doctag_indices, alpha, work, neu1, learn_words=False, learn_hidden=False, - doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) else: - train_sentence_dm(self, word_vocabs, doclbl_indices, alpha, work, neu1, + train_sentence_dm(self, word_vocabs, doctag_indices, alpha, work, neu1, learn_words=False, learn_hidden=False, - doclbl_vectors=doclbl_vectors, doclbl_locks=doclbl_locks) + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha - return doclbl_vectors[0] + return doctag_vectors[0] def __str__(self): return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) @@ -499,12 +491,12 @@ def compact_name(self): def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors - super(Doc2Vec, self).save(*args, **kwargs) ### TODO: save doclbl fields + super(Doc2Vec, self).save(*args, **kwargs) ### TODO: save doctag fields -class LabeledBrownCorpus(object): +class TaggedBrownCorpus(object): """Iterate over sentences from the Brown corpus (part of NLTK data), yielding - each sentence out as a LabeledSentence object.""" + each sentence out as a TaggedSentence object.""" def __init__(self, dirname): self.dirname = dirname @@ -522,26 +514,26 @@ def __iter__(self): words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()] if not words: # don't bother sending out empty sentences continue - yield LabeledSentence(words, ['%s_SENT_%s' % (fname, item_no)]) + yield TaggedSentence(words, ['%s_SENT_%s' % (fname, item_no)]) -class LabeledLineSentence(object): - """Simple format: one sentence = one line = one LabeledSentence object. +class TaggedLineSentence(object): + """Simple format: one sentence = one line = one TaggedDocument object. Words are expected to be already preprocessed and separated by whitespace, - labels are constructed automatically from the sentence line number.""" + tags are constructed automatically from the sentence line number.""" def __init__(self, source): """ `source` can be either a string (filename) or a file object. Example:: - sentences = LineSentence('myfile.txt') + sentences = TaggedLineSentence('myfile.txt') Or for compressed files:: - sentences = LineSentence('compressed_text.txt.bz2') - sentences = LineSentence('compressed_text.txt.gz') + sentences = TaggedLineSentence('compressed_text.txt.bz2') + sentences = TaggedLineSentence('compressed_text.txt.gz') """ self.source = source @@ -553,9 +545,9 @@ def __iter__(self): # Things that don't have seek will trigger an exception self.source.seek(0) for item_no, line in enumerate(self.source): - yield LabeledSentence(utils.to_unicode(line).split(), ['SENT_%s' % item_no]) + yield TaggedDocument(utils.to_unicode(line).split(), [item_no]) except AttributeError: # If it didn't work like a file, use it as a string filename with utils.smart_open(self.source) as fin: for item_no, line in enumerate(fin): - yield LabeledSentence(utils.to_unicode(line).split(), ['SENT_%s' % item_no]) + yield TaggedDocument(utils.to_unicode(line).split(), [item_no]) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index cef91671bd..c699f2212b 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -1165,9 +1165,9 @@ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ @@ -1251,10 +1251,10 @@ static char __pyx_k_enumerate[] = "enumerate"; static char __pyx_k_inv_count[] = "inv_count"; static char __pyx_k_table_len[] = "table_len"; static char __pyx_k_ValueError[] = "ValueError"; -static char __pyx_k_doclbl_len[] = "doclbl_len"; +static char __pyx_k_doctag_len[] = "doctag_len"; static char __pyx_k_syn0_lockf[] = "syn0_lockf"; static char __pyx_k_word_locks[] = "word_locks"; -static char __pyx_k_doclbl_syn0[] = "doclbl_syn0"; +static char __pyx_k_doctag_syn0[] = "doctag_syn0"; static char __pyx_k_layer1_size[] = "layer1_size"; static char __pyx_k_learn_words[] = "learn_words"; static char __pyx_k_next_random[] = "next_random"; @@ -1263,31 +1263,31 @@ static char __pyx_k_vector_size[] = "vector_size"; static char __pyx_k_word_vocabs[] = "word_vocabs"; static char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static char __pyx_k_RuntimeError[] = "RuntimeError"; -static char __pyx_k_dm_lbl_count[] = "dm_lbl_count"; -static char __pyx_k_doclbl_locks[] = "doclbl_locks"; +static char __pyx_k_dm_tag_count[] = "dm_tag_count"; +static char __pyx_k_doctag_locks[] = "doctag_locks"; static char __pyx_k_learn_hidden[] = "learn_hidden"; static char __pyx_k_predict_word[] = "predict_word"; static char __pyx_k_sentence_len[] = "sentence_len"; static char __pyx_k_word_locks_2[] = "_word_locks"; static char __pyx_k_word_vectors[] = "word_vectors"; -static char __pyx_k_learn_doclbls[] = "learn_doclbls"; +static char __pyx_k_learn_doctags[] = "learn_doctags"; static char __pyx_k_learn_words_2[] = "_learn_words"; static char __pyx_k_train_words_2[] = "_train_words"; -static char __pyx_k_doclbl_indexes[] = "doclbl_indexes"; -static char __pyx_k_doclbl_locks_2[] = "_doclbl_locks"; -static char __pyx_k_doclbl_vectors[] = "doclbl_vectors"; +static char __pyx_k_doctag_indexes[] = "doctag_indexes"; +static char __pyx_k_doctag_locks_2[] = "_doctag_locks"; +static char __pyx_k_doctag_vectors[] = "doctag_vectors"; static char __pyx_k_learn_hidden_2[] = "_learn_hidden"; static char __pyx_k_window_indexes[] = "window_indexes"; static char __pyx_k_word_vectors_2[] = "_word_vectors"; -static char __pyx_k_learn_doclbls_2[] = "_learn_doclbls"; +static char __pyx_k_learn_doctags_2[] = "_learn_doctags"; static char __pyx_k_null_word_index[] = "null_word_index"; static char __pyx_k_reduced_windows[] = "reduced_windows"; -static char __pyx_k_doclbl_indexes_2[] = "_doclbl_indexes"; -static char __pyx_k_doclbl_vectors_2[] = "_doclbl_vectors"; -static char __pyx_k_doclbl_syn0_lockf[] = "doclbl_syn0_lockf"; +static char __pyx_k_doctag_indexes_2[] = "_doctag_indexes"; +static char __pyx_k_doctag_vectors_2[] = "_doctag_vectors"; +static char __pyx_k_doctag_syn0_lockf[] = "doctag_syn0_lockf"; static char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; static char __pyx_k_train_sentence_dm[] = "train_sentence_dm"; -static char __pyx_k_expected_doclbl_len[] = "expected_doclbl_len"; +static char __pyx_k_expected_doctag_len[] = "expected_doctag_len"; static char __pyx_k_train_sentence_dbow[] = "train_sentence_dbow"; static char __pyx_k_train_sentence_dm_concat[] = "train_sentence_dm_concat"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; @@ -1316,21 +1316,21 @@ static PyObject *__pyx_n_s_codes; static PyObject *__pyx_n_s_count; static PyObject *__pyx_n_s_cpointer; static PyObject *__pyx_n_s_d_res; -static PyObject *__pyx_n_s_dm_lbl_count; -static PyObject *__pyx_n_s_doclbl_indexes; -static PyObject *__pyx_n_s_doclbl_indexes_2; -static PyObject *__pyx_n_s_doclbl_len; -static PyObject *__pyx_n_s_doclbl_locks; -static PyObject *__pyx_n_s_doclbl_locks_2; -static PyObject *__pyx_n_s_doclbl_syn0; -static PyObject *__pyx_n_s_doclbl_syn0_lockf; -static PyObject *__pyx_n_s_doclbl_vectors; -static PyObject *__pyx_n_s_doclbl_vectors_2; +static PyObject *__pyx_n_s_dm_tag_count; +static PyObject *__pyx_n_s_doctag_indexes; +static PyObject *__pyx_n_s_doctag_indexes_2; +static PyObject *__pyx_n_s_doctag_len; +static PyObject *__pyx_n_s_doctag_locks; +static PyObject *__pyx_n_s_doctag_locks_2; +static PyObject *__pyx_n_s_doctag_syn0; +static PyObject *__pyx_n_s_doctag_syn0_lockf; +static PyObject *__pyx_n_s_doctag_vectors; +static PyObject *__pyx_n_s_doctag_vectors_2; static PyObject *__pyx_n_s_docvecs; static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_enumerate; static PyObject *__pyx_n_s_expected; -static PyObject *__pyx_n_s_expected_doclbl_len; +static PyObject *__pyx_n_s_expected_doctag_len; static PyObject *__pyx_n_s_fblas; static PyObject *__pyx_n_s_float32; static PyObject *__pyx_n_s_hs; @@ -1344,8 +1344,8 @@ static PyObject *__pyx_n_s_item; static PyObject *__pyx_n_s_j; static PyObject *__pyx_n_s_k; static PyObject *__pyx_n_s_layer1_size; -static PyObject *__pyx_n_s_learn_doclbls; -static PyObject *__pyx_n_s_learn_doclbls_2; +static PyObject *__pyx_n_s_learn_doctags; +static PyObject *__pyx_n_s_learn_doctags_2; static PyObject *__pyx_n_s_learn_hidden; static PyObject *__pyx_n_s_learn_hidden_2; static PyObject *__pyx_n_s_learn_words; @@ -2833,9 +2833,9 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* Python wrapper */ @@ -2844,17 +2844,17 @@ static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sent static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; - PyObject *__pyx_v_doclbl_indexes = 0; + PyObject *__pyx_v_doctag_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; PyObject *__pyx_v_train_words = 0; - PyObject *__pyx_v_learn_doclbls = 0; + PyObject *__pyx_v_learn_doctags = 0; PyObject *__pyx_v_learn_words = 0; PyObject *__pyx_v_learn_hidden = 0; PyObject *__pyx_v_word_vectors = 0; PyObject *__pyx_v_word_locks = 0; - PyObject *__pyx_v_doclbl_vectors = 0; - PyObject *__pyx_v_doclbl_locks = 0; + PyObject *__pyx_v_doctag_vectors = 0; + PyObject *__pyx_v_doctag_locks = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -2862,15 +2862,15 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dbow (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); /* "trunk/gensim/models/doc2vec_inner.pyx":269 * - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs */ values[5] = ((PyObject *)Py_False); @@ -2879,9 +2879,9 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":270 - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ @@ -2920,7 +2920,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_indexes)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } @@ -2941,7 +2941,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } case 6: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doclbls); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doctags); if (value) { values[6] = value; kw_args--; } } case 7: @@ -2966,12 +2966,12 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } case 11: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vectors); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_vectors); if (value) { values[11] = value; kw_args--; } } case 12: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_locks); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_locks); if (value) { values[12] = value; kw_args--; } } } @@ -2999,17 +2999,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } __pyx_v_model = values[0]; __pyx_v_word_vocabs = values[1]; - __pyx_v_doclbl_indexes = values[2]; + __pyx_v_doctag_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; __pyx_v_train_words = values[5]; - __pyx_v_learn_doclbls = values[6]; + __pyx_v_learn_doctags = values[6]; __pyx_v_learn_words = values[7]; __pyx_v_learn_hidden = values[8]; __pyx_v_word_vectors = values[9]; __pyx_v_word_locks = values[10]; - __pyx_v_doclbl_vectors = values[11]; - __pyx_v_doclbl_locks = values[12]; + __pyx_v_doctag_vectors = values[11]; + __pyx_v_doctag_locks = values[12]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; @@ -3019,14 +3019,14 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* function exit code */ @@ -3034,26 +3034,26 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__train_words; int __pyx_v__learn_words; int __pyx_v__learn_hidden; - int __pyx_v__learn_doclbls; + int __pyx_v__learn_doctags; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_vectors; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doctag_vectors; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_locks; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doctag_locks; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__work; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v__doclbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v__doctag_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; int __pyx_v_sentence_len; - int __pyx_v_doclbl_len; + int __pyx_v_doctag_len; int __pyx_v_window; int __pyx_v_i; int __pyx_v_j; @@ -3096,12 +3096,12 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_INCREF(__pyx_v_work); __Pyx_INCREF(__pyx_v_word_vectors); __Pyx_INCREF(__pyx_v_word_locks); - __Pyx_INCREF(__pyx_v_doclbl_vectors); - __Pyx_INCREF(__pyx_v_doclbl_locks); + __Pyx_INCREF(__pyx_v_doctag_vectors); + __Pyx_INCREF(__pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":271 - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int _train_words = train_words @@ -3113,7 +3113,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_hs = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":272 - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int _train_words = train_words @@ -3140,7 +3140,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * cdef int _train_words = train_words * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags */ __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 274; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; @@ -3149,7 +3149,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * cdef int _train_words = train_words * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * */ __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3158,15 +3158,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":276 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden - * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< + * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__learn_doclbls = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_doctags = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":283 - * cdef REAL_t *_doclbl_locks + * cdef REAL_t *_doctag_locks * cdef REAL_t *_work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size @@ -3190,7 +3190,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":292 * cdef int sentence_len - * cdef int doclbl_len + * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j @@ -3212,7 +3212,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":309 * - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) @@ -3222,11 +3222,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":310 - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: + * if doctag_vectors is None: */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 310; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -3240,8 +3240,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 */ if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 311; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); @@ -3249,45 +3249,45 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":312 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: # <<<<<<<<<<<<<< - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if doctag_vectors is None: # <<<<<<<<<<<<<< + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) */ - __pyx_t_5 = (__pyx_v_doclbl_vectors == Py_None); + __pyx_t_5 = (__pyx_v_doctag_vectors == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":313 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 # <<<<<<<<<<<<<< - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doclbl_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 313; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_6); + __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_6); __pyx_t_6 = 0; goto __pyx_L4; } __pyx_L4:; /* "trunk/gensim/models/doc2vec_inner.pyx":314 - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 314; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doctag_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); /* "trunk/gensim/models/doc2vec_inner.pyx":315 - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) @@ -3297,11 +3297,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":316 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: + * if doctag_locks is None: */ __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 316; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); @@ -3315,8 +3315,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf */ if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 317; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); @@ -3324,44 +3324,44 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":318 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: # <<<<<<<<<<<<<< - * doclbl_locks = model.docvecs.doclbl_syn0_lockf - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * if doctag_locks is None: # <<<<<<<<<<<<<< + * doctag_locks = model.docvecs.doctag_syn0_lockf + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) */ - __pyx_t_5 = (__pyx_v_doclbl_locks == Py_None); + __pyx_t_5 = (__pyx_v_doctag_locks == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":319 * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf # <<<<<<<<<<<<<< - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doclbl_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 319; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); __pyx_t_1 = 0; goto __pyx_L6; } __pyx_L6:; /* "trunk/gensim/models/doc2vec_inner.pyx":320 - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doctag_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); /* "trunk/gensim/models/doc2vec_inner.pyx":322 - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -3527,7 +3527,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) */ if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); @@ -3536,7 +3536,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) * */ __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3551,21 +3551,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":336 * _work = np.PyArray_DATA(work) * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) # <<<<<<<<<<<<<< + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; if (((__pyx_t_12 < __pyx_t_11) != 0)) { __pyx_t_7 = __pyx_t_12; } else { __pyx_t_7 = __pyx_t_11; } - __pyx_v_doclbl_len = ((int)__pyx_t_7); + __pyx_v_doctag_len = ((int)__pyx_t_7); /* "trunk/gensim/models/doc2vec_inner.pyx":338 - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< * predict_word = word_vocabs[i] @@ -3713,7 +3713,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * for i in range(doclbl_len): + * for i in range(doctag_len): */ __pyx_t_2 = 0; __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3805,8 +3805,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] */ __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; @@ -3816,7 +3816,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item - * for i in range(doclbl_len): + * for i in range(doctag_len): */ } __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -3827,30 +3827,30 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":355 * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item - * for i in range(doclbl_len): # <<<<<<<<<<<<<< - * _doclbl_indexes[i] = doclbl_indexes[i] + * for i in range(doctag_len): # <<<<<<<<<<<<<< + * _doctag_indexes[i] = doctag_indexes[i] * result += 1 */ - __pyx_t_2 = __pyx_v_doclbl_len; + __pyx_t_2 = __pyx_v_doctag_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":356 * reduced_windows[i] = item - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] # <<<<<<<<<<<<<< + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - (__pyx_v__doclbl_indexes[__pyx_v_i]) = __pyx_t_14; + (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; /* "trunk/gensim/models/doc2vec_inner.pyx":357 - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence @@ -4066,54 +4066,54 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":385 * * # docvec-training - * for j in range(doclbl_len): # <<<<<<<<<<<<<< + * for j in range(doctag_len): # <<<<<<<<<<<<<< * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], */ - __pyx_t_18 = __pyx_v_doclbl_len; + __pyx_t_18 = __pyx_v_doctag_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; /* "trunk/gensim/models/doc2vec_inner.pyx":386 * # docvec-training - * for j in range(doclbl_len): + * for j in range(doctag_len): * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], - * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) */ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":387 - * for j in range(doclbl_len): + * for j in range(doctag_len): * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], # <<<<<<<<<<<<<< - * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], # <<<<<<<<<<<<<< + * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doclbl_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doctag_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); goto __pyx_L37; } __pyx_L37:; /* "trunk/gensim/models/doc2vec_inner.pyx":389 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], - * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, - * indexes[i], _doclbl_indexes[j], _alpha, _work, next_random, + * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, + * indexes[i], _doctag_indexes[j], _alpha, _work, next_random, */ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":390 - * _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: - * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, # <<<<<<<<<<<<<< - * indexes[i], _doclbl_indexes[j], _alpha, _work, next_random, - * _learn_doclbls, _learn_hidden, _doclbl_locks) + * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, # <<<<<<<<<<<<<< + * indexes[i], _doctag_indexes[j], _alpha, _work, next_random, + * _learn_doctags, _learn_hidden, _doctag_locks) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doclbl_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doclbl_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doclbls, __pyx_v__learn_hidden, __pyx_v__doclbl_locks); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doctag_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); goto __pyx_L38; } __pyx_L38:; @@ -4141,7 +4141,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } /* "trunk/gensim/models/doc2vec_inner.pyx":394 - * _learn_doclbls, _learn_hidden, _doclbl_locks) + * _learn_doctags, _learn_hidden, _doctag_locks) * * return result # <<<<<<<<<<<<<< * @@ -4157,9 +4157,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* function exit code */ @@ -4178,8 +4178,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_XDECREF(__pyx_v_work); __Pyx_XDECREF(__pyx_v_word_vectors); __Pyx_XDECREF(__pyx_v_word_locks); - __Pyx_XDECREF(__pyx_v_doclbl_vectors); - __Pyx_XDECREF(__pyx_v_doclbl_locks); + __Pyx_XDECREF(__pyx_v_doctag_vectors); + __Pyx_XDECREF(__pyx_v_doctag_locks); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; @@ -4188,9 +4188,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* Python wrapper */ @@ -4199,17 +4199,17 @@ static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sent static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; - PyObject *__pyx_v_doclbl_indexes = 0; + PyObject *__pyx_v_doctag_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; PyObject *__pyx_v_neu1 = 0; - PyObject *__pyx_v_learn_doclbls = 0; + PyObject *__pyx_v_learn_doctags = 0; PyObject *__pyx_v_learn_words = 0; PyObject *__pyx_v_learn_hidden = 0; PyObject *__pyx_v_word_vectors = 0; PyObject *__pyx_v_word_locks = 0; - PyObject *__pyx_v_doclbl_vectors = 0; - PyObject *__pyx_v_doclbl_locks = 0; + PyObject *__pyx_v_doctag_vectors = 0; + PyObject *__pyx_v_doctag_locks = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -4217,16 +4217,16 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dm (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); /* "trunk/gensim/models/doc2vec_inner.pyx":398 * - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, - * learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs */ values[6] = ((PyObject *)Py_True); @@ -4234,9 +4234,9 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":399 - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ @@ -4275,7 +4275,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_indexes)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } @@ -4296,7 +4296,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } case 6: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doclbls); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doctags); if (value) { values[6] = value; kw_args--; } } case 7: @@ -4321,12 +4321,12 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } case 11: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vectors); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_vectors); if (value) { values[11] = value; kw_args--; } } case 12: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_locks); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_locks); if (value) { values[12] = value; kw_args--; } } } @@ -4354,17 +4354,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } __pyx_v_model = values[0]; __pyx_v_word_vocabs = values[1]; - __pyx_v_doclbl_indexes = values[2]; + __pyx_v_doctag_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; __pyx_v_neu1 = values[5]; - __pyx_v_learn_doclbls = values[6]; + __pyx_v_learn_doctags = values[6]; __pyx_v_learn_words = values[7]; __pyx_v_learn_hidden = values[8]; __pyx_v_word_vectors = values[9]; __pyx_v_word_locks = values[10]; - __pyx_v_doclbl_vectors = values[11]; - __pyx_v_doclbl_locks = values[12]; + __pyx_v_doctag_vectors = values[11]; + __pyx_v_doctag_locks = values[12]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; @@ -4374,14 +4374,14 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* function exit code */ @@ -4389,29 +4389,29 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v__learn_doclbls; + int __pyx_v__learn_doctags; int __pyx_v__learn_words; int __pyx_v__learn_hidden; int __pyx_v_cbow_mean; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_inv_count; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_vectors; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doctag_vectors; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_locks; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doctag_locks; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__work; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__neu1; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v__doclbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v__doctag_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; int __pyx_v_sentence_len; - int __pyx_v_doclbl_len; + int __pyx_v_doctag_len; int __pyx_v_window; int __pyx_v_i; int __pyx_v_j; @@ -4456,15 +4456,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_INCREF(__pyx_v_neu1); __Pyx_INCREF(__pyx_v_word_vectors); __Pyx_INCREF(__pyx_v_word_locks); - __Pyx_INCREF(__pyx_v_doclbl_vectors); - __Pyx_INCREF(__pyx_v_doclbl_locks); + __Pyx_INCREF(__pyx_v_doctag_vectors); + __Pyx_INCREF(__pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":400 - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -4473,10 +4473,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_hs = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":401 - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -4488,16 +4488,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":402 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< + * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__learn_doclbls = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_doctags = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":403 * cdef int negative = model.negative - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean @@ -4506,7 +4506,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v__learn_words = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":404 - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean @@ -4562,7 +4562,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":423 * cdef int sentence_len - * cdef int doclbl_len + * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k, m @@ -4584,7 +4584,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":440 * - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) @@ -4594,11 +4594,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":441 - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: + * if doctag_vectors is None: */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -4612,8 +4612,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 */ if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); @@ -4621,45 +4621,45 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":443 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: # <<<<<<<<<<<<<< - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if doctag_vectors is None: # <<<<<<<<<<<<<< + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) */ - __pyx_t_5 = (__pyx_v_doclbl_vectors == Py_None); + __pyx_t_5 = (__pyx_v_doctag_vectors == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":444 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 # <<<<<<<<<<<<<< - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doclbl_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_6); + __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_6); __pyx_t_6 = 0; goto __pyx_L4; } __pyx_L4:; /* "trunk/gensim/models/doc2vec_inner.pyx":445 - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doctag_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); /* "trunk/gensim/models/doc2vec_inner.pyx":446 - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) @@ -4669,11 +4669,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":447 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: + * if doctag_locks is None: */ __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); @@ -4687,8 +4687,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf */ if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); @@ -4696,44 +4696,44 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":449 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: # <<<<<<<<<<<<<< - * doclbl_locks = model.docvecs.doclbl_syn0_lockf - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * if doctag_locks is None: # <<<<<<<<<<<<<< + * doctag_locks = model.docvecs.doctag_syn0_lockf + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) */ - __pyx_t_5 = (__pyx_v_doclbl_locks == Py_None); + __pyx_t_5 = (__pyx_v_doctag_locks == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":450 * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf # <<<<<<<<<<<<<< - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doclbl_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); __pyx_t_1 = 0; goto __pyx_L6; } __pyx_L6:; /* "trunk/gensim/models/doc2vec_inner.pyx":451 - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doctag_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); /* "trunk/gensim/models/doc2vec_inner.pyx":453 - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -5221,7 +5221,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) */ __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; @@ -5239,46 +5239,46 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":490 * reduced_windows[i] = item * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) # <<<<<<<<<<<<<< - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] */ - __pyx_t_12 = PyObject_Length(__pyx_v_doclbl_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; if (((__pyx_t_12 < __pyx_t_11) != 0)) { __pyx_t_7 = __pyx_t_12; } else { __pyx_t_7 = __pyx_t_11; } - __pyx_v_doclbl_len = ((int)__pyx_t_7); + __pyx_v_doctag_len = ((int)__pyx_t_7); /* "trunk/gensim/models/doc2vec_inner.pyx":491 * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) - * for i in range(doclbl_len): # <<<<<<<<<<<<<< - * _doclbl_indexes[i] = doclbl_indexes[i] + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * for i in range(doctag_len): # <<<<<<<<<<<<<< + * _doctag_indexes[i] = doctag_indexes[i] * result += 1 */ - __pyx_t_2 = __pyx_v_doclbl_len; + __pyx_t_2 = __pyx_v_doctag_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":492 - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] # <<<<<<<<<<<<<< + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doclbl_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - (__pyx_v__doclbl_indexes[__pyx_v_i]) = __pyx_t_14; + (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; /* "trunk/gensim/models/doc2vec_inner.pyx":493 - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence @@ -5428,7 +5428,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) - * for m in range(doclbl_len): + * for m in range(doctag_len): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); @@ -5436,7 +5436,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * else: * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< - * for m in range(doclbl_len): + * for m in range(doctag_len): * count += ONEF */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__word_vectors[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); @@ -5447,36 +5447,36 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":514 * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) - * for m in range(doclbl_len): # <<<<<<<<<<<<<< + * for m in range(doctag_len): # <<<<<<<<<<<<<< * count += ONEF - * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) */ - __pyx_t_18 = __pyx_v_doclbl_len; + __pyx_t_18 = __pyx_v_doctag_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; /* "trunk/gensim/models/doc2vec_inner.pyx":515 * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) - * for m in range(doclbl_len): + * for m in range(doctag_len): * count += ONEF # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); /* "trunk/gensim/models/doc2vec_inner.pyx":516 - * for m in range(doclbl_len): + * for m in range(doctag_len): * count += ONEF - * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< * if count > (0.5): * inv_count = ONEF/count */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } /* "trunk/gensim/models/doc2vec_inner.pyx":517 * count += ONEF - * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count * if cbow_mean: @@ -5485,7 +5485,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":518 - * our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< * if cbow_mean: @@ -5586,7 +5586,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if not cbow_mean: * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * # apply accumulated error in work - * if _learn_doclbls: + * if _learn_doctags: */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L35; @@ -5596,40 +5596,40 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":535 * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work - * if _learn_doclbls: # <<<<<<<<<<<<<< - * for m in range(doclbl_len): - * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, + * if _learn_doctags: # <<<<<<<<<<<<<< + * for m in range(doctag_len): + * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, */ - __pyx_t_5 = (__pyx_v__learn_doclbls != 0); + __pyx_t_5 = (__pyx_v__learn_doctags != 0); if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":536 * # apply accumulated error in work - * if _learn_doclbls: - * for m in range(doclbl_len): # <<<<<<<<<<<<<< - * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) + * if _learn_doctags: + * for m in range(doctag_len): # <<<<<<<<<<<<<< + * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, + * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) */ - __pyx_t_18 = __pyx_v_doclbl_len; + __pyx_t_18 = __pyx_v_doctag_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; /* "trunk/gensim/models/doc2vec_inner.pyx":537 - * if _learn_doclbls: - * for m in range(doclbl_len): - * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, # <<<<<<<<<<<<<< - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) + * if _learn_doctags: + * for m in range(doctag_len): + * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, # <<<<<<<<<<<<<< + * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doclbl_locks[(__pyx_v__doclbl_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v__doctag_locks[(__pyx_v__doctag_indexes[__pyx_v_m])])), __pyx_v__work, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } goto __pyx_L36; } __pyx_L36:; /* "trunk/gensim/models/doc2vec_inner.pyx":539 - * our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) + * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, + * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< * for m in range(j, k): * if m == i: @@ -5638,7 +5638,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":540 - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) + * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i: @@ -5721,9 +5721,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* function exit code */ @@ -5743,8 +5743,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_XDECREF(__pyx_v_neu1); __Pyx_XDECREF(__pyx_v_word_vectors); __Pyx_XDECREF(__pyx_v_word_locks); - __Pyx_XDECREF(__pyx_v_doclbl_vectors); - __Pyx_XDECREF(__pyx_v_doclbl_locks); + __Pyx_XDECREF(__pyx_v_doctag_vectors); + __Pyx_XDECREF(__pyx_v_doctag_locks); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; @@ -5753,9 +5753,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* Python wrapper */ @@ -5764,17 +5764,17 @@ static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sent static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; - PyObject *__pyx_v_doclbl_indexes = 0; + PyObject *__pyx_v_doctag_indexes = 0; PyObject *__pyx_v_alpha = 0; PyObject *__pyx_v_work = 0; PyObject *__pyx_v_neu1 = 0; - PyObject *__pyx_v_learn_doclbls = 0; + PyObject *__pyx_v_learn_doctags = 0; PyObject *__pyx_v_learn_words = 0; PyObject *__pyx_v_learn_hidden = 0; PyObject *__pyx_v_word_vectors = 0; PyObject *__pyx_v_word_locks = 0; - PyObject *__pyx_v_doclbl_vectors = 0; - PyObject *__pyx_v_doclbl_locks = 0; + PyObject *__pyx_v_doctag_vectors = 0; + PyObject *__pyx_v_doctag_locks = 0; int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; @@ -5782,16 +5782,16 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("train_sentence_dm_concat (wrapper)", 0); { - static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doclbl_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doclbls,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doclbl_vectors,&__pyx_n_s_doclbl_locks,0}; + static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); /* "trunk/gensim/models/doc2vec_inner.pyx":551 * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, - * learn_doclbls=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs */ values[6] = ((PyObject *)Py_True); @@ -5799,9 +5799,9 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":552 - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): # <<<<<<<<<<<<<< + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ @@ -5840,7 +5840,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: - if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_indexes)) != 0)) kw_args--; + if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } @@ -5861,7 +5861,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } case 6: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doclbls); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_learn_doctags); if (value) { values[6] = value; kw_args--; } } case 7: @@ -5886,12 +5886,12 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } case 11: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_vectors); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_vectors); if (value) { values[11] = value; kw_args--; } } case 12: if (kw_args > 0) { - PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doclbl_locks); + PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_locks); if (value) { values[12] = value; kw_args--; } } } @@ -5919,17 +5919,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } __pyx_v_model = values[0]; __pyx_v_word_vocabs = values[1]; - __pyx_v_doclbl_indexes = values[2]; + __pyx_v_doctag_indexes = values[2]; __pyx_v_alpha = values[3]; __pyx_v_work = values[4]; __pyx_v_neu1 = values[5]; - __pyx_v_learn_doclbls = values[6]; + __pyx_v_learn_doctags = values[6]; __pyx_v_learn_words = values[7]; __pyx_v_learn_hidden = values[8]; __pyx_v_word_vectors = values[9]; __pyx_v_word_locks = values[10]; - __pyx_v_doclbl_vectors = values[11]; - __pyx_v_doclbl_locks = values[12]; + __pyx_v_doctag_vectors = values[11]; + __pyx_v_doctag_locks = values[12]; } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; @@ -5939,14 +5939,14 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doclbl_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doclbls, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doclbl_vectors, __pyx_v_doclbl_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* function exit code */ @@ -5954,16 +5954,16 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doclbl_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doclbls, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doclbl_vectors, PyObject *__pyx_v_doclbl_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; - int __pyx_v__learn_doclbls; + int __pyx_v__learn_doctags; int __pyx_v__learn_words; int __pyx_v__learn_hidden; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_vectors; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_vectors; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doctag_vectors; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__word_locks; - __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doclbl_locks; + __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__doctag_locks; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__work; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v__neu1; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v__alpha; @@ -5971,12 +5971,12 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence int __pyx_v_vector_size; int __pyx_v_codelens[10000]; __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; - __pyx_t_5numpy_uint32_t __pyx_v__doclbl_indexes[10000]; + __pyx_t_5numpy_uint32_t __pyx_v__doctag_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_window_indexes[10000]; int __pyx_v_sentence_len; - int __pyx_v_doclbl_len; + int __pyx_v_doctag_len; int __pyx_v_window; - int __pyx_v_expected_doclbl_len; + int __pyx_v_expected_doctag_len; int __pyx_v_i; int __pyx_v_j; int __pyx_v_k; @@ -6018,15 +6018,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __Pyx_INCREF(__pyx_v_neu1); __Pyx_INCREF(__pyx_v_word_vectors); __Pyx_INCREF(__pyx_v_word_locks); - __Pyx_INCREF(__pyx_v_doclbl_vectors); - __Pyx_INCREF(__pyx_v_doclbl_locks); + __Pyx_INCREF(__pyx_v_doctag_vectors); + __Pyx_INCREF(__pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":553 - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 553; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -6035,10 +6035,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_v_hs = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":554 - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6050,16 +6050,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":555 * cdef int hs = model.hs * cdef int negative = model.negative - * cdef int _learn_doclbls = learn_doclbls # <<<<<<<<<<<<<< + * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doclbls); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 555; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__learn_doclbls = __pyx_t_2; + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 555; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__learn_doctags = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":556 * cdef int negative = model.negative - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * @@ -6068,7 +6068,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_v__learn_words = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":557 - * cdef int _learn_doclbls = learn_doclbls + * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * @@ -6115,9 +6115,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":575 * cdef int sentence_len - * cdef int doclbl_len + * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< - * cdef int expected_doclbl_len = model.dm_lbl_count + * cdef int expected_doctag_len = model.dm_tag_count * */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -6127,17 +6127,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_v_window = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":576 - * cdef int doclbl_len + * cdef int doctag_len * cdef int window = model.window - * cdef int expected_doclbl_len = model.dm_lbl_count # <<<<<<<<<<<<<< + * cdef int expected_doctag_len = model.dm_tag_count # <<<<<<<<<<<<<< * * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_lbl_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_v_expected_doclbl_len = __pyx_t_2; + __pyx_v_expected_doctag_len = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":579 * @@ -6170,35 +6170,35 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":593 * cdef unsigned long long next_random * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) # <<<<<<<<<<<<<< - * if doclbl_len != expected_doclbl_len: - * return 0 # skip doc without expected nmber of lbls + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + * if doctag_len != expected_doctag_len: + * return 0 # skip doc without expected nmber of tags */ - __pyx_t_5 = PyObject_Length(__pyx_v_doclbl_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_5 < __pyx_t_6) != 0)) { __pyx_t_7 = __pyx_t_5; } else { __pyx_t_7 = __pyx_t_6; } - __pyx_v_doclbl_len = ((int)__pyx_t_7); + __pyx_v_doctag_len = ((int)__pyx_t_7); /* "trunk/gensim/models/doc2vec_inner.pyx":594 * - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) - * if doclbl_len != expected_doclbl_len: # <<<<<<<<<<<<<< - * return 0 # skip doc without expected nmber of lbls + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * if doctag_len != expected_doctag_len: # <<<<<<<<<<<<<< + * return 0 # skip doc without expected nmber of tags * */ - __pyx_t_8 = ((__pyx_v_doclbl_len != __pyx_v_expected_doclbl_len) != 0); + __pyx_t_8 = ((__pyx_v_doctag_len != __pyx_v_expected_doctag_len) != 0); if (__pyx_t_8) { /* "trunk/gensim/models/doc2vec_inner.pyx":595 - * doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) - * if doclbl_len != expected_doclbl_len: - * return 0 # skip doc without expected nmber of lbls # <<<<<<<<<<<<<< + * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * if doctag_len != expected_doctag_len: + * return 0 # skip doc without expected nmber of tags # <<<<<<<<<<<<<< * - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_int_0); @@ -6208,7 +6208,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":598 * - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) @@ -6218,11 +6218,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence if (__pyx_t_9) { /* "trunk/gensim/models/doc2vec_inner.pyx":599 - * # default vectors, locks from syn0/doclbl_syn0 + * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: + * if doctag_vectors is None: */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -6236,8 +6236,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 */ if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); @@ -6245,45 +6245,45 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":601 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: # <<<<<<<<<<<<<< - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if doctag_vectors is None: # <<<<<<<<<<<<<< + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) */ - __pyx_t_9 = (__pyx_v_doclbl_vectors == Py_None); + __pyx_t_9 = (__pyx_v_doctag_vectors == Py_None); __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { /* "trunk/gensim/models/doc2vec_inner.pyx":602 * _word_vectors = (np.PyArray_DATA(word_vectors)) - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 # <<<<<<<<<<<<<< - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doclbl_syn0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __Pyx_DECREF_SET(__pyx_v_doclbl_vectors, __pyx_t_4); + __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_4); __pyx_t_4 = 0; goto __pyx_L5; } __pyx_L5:; /* "trunk/gensim/models/doc2vec_inner.pyx":603 - * if doclbl_vectors is None: - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) # <<<<<<<<<<<<<< + * if doctag_vectors is None: + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doclbl_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__doclbl_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_vectors))); + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doctag_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); /* "trunk/gensim/models/doc2vec_inner.pyx":604 - * doclbl_vectors = model.docvecs.doclbl_syn0 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * doctag_vectors = model.docvecs.doctag_syn0 + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) @@ -6293,11 +6293,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence if (__pyx_t_9) { /* "trunk/gensim/models/doc2vec_inner.pyx":605 - * _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: + * if doctag_locks is None: */ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); @@ -6311,8 +6311,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf */ if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); @@ -6320,44 +6320,44 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":607 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: # <<<<<<<<<<<<<< - * doclbl_locks = model.docvecs.doclbl_syn0_lockf - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * if doctag_locks is None: # <<<<<<<<<<<<<< + * doctag_locks = model.docvecs.doctag_syn0_lockf + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) */ - __pyx_t_9 = (__pyx_v_doclbl_locks == Py_None); + __pyx_t_9 = (__pyx_v_doctag_locks == Py_None); __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { /* "trunk/gensim/models/doc2vec_inner.pyx":608 * _word_locks = (np.PyArray_DATA(word_locks)) - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf # <<<<<<<<<<<<<< - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_doclbl_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __Pyx_DECREF_SET(__pyx_v_doclbl_locks, __pyx_t_1); + __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); __pyx_t_1 = 0; goto __pyx_L7; } __pyx_L7:; /* "trunk/gensim/models/doc2vec_inner.pyx":609 - * if doclbl_locks is None: - * doclbl_locks = model.docvecs.doclbl_syn0_lockf - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) # <<<<<<<<<<<<<< + * if doctag_locks is None: + * doctag_locks = model.docvecs.doctag_syn0_lockf + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doclbl_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doclbl_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_v__doclbl_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doclbl_locks))); + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v__doctag_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); /* "trunk/gensim/models/doc2vec_inner.pyx":611 - * _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< * syn1 = (np.PyArray_DATA(model.syn1)) @@ -6751,7 +6751,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * result += 1 * j = j + 1 # <<<<<<<<<<<<<< * - * for i in range(doclbl_len): + * for i in range(doctag_len): */ __pyx_v_j = (__pyx_v_j + 1); } @@ -6761,30 +6761,30 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":647 * j = j + 1 * - * for i in range(doclbl_len): # <<<<<<<<<<<<<< - * _doclbl_indexes[i] = doclbl_indexes[i] + * for i in range(doctag_len): # <<<<<<<<<<<<<< + * _doctag_indexes[i] = doctag_indexes[i] * result += 1 */ - __pyx_t_2 = __pyx_v_doclbl_len; + __pyx_t_2 = __pyx_v_doctag_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":648 * - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] # <<<<<<<<<<<<<< + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doclbl_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - (__pyx_v__doclbl_indexes[__pyx_v_i]) = __pyx_t_14; + (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; /* "trunk/gensim/models/doc2vec_inner.pyx":649 - * for i in range(doclbl_len): - * _doclbl_indexes[i] = doclbl_indexes[i] + * for i in range(doctag_len): + * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< * * # release GIL & train on the sentence @@ -6838,26 +6838,26 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":658 * * # compose l1 & clear work - * for m in range(doclbl_len): # <<<<<<<<<<<<<< + * for m in range(doctag_len): # <<<<<<<<<<<<<< * # doc vector(s) - * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], + * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], */ - __pyx_t_15 = __pyx_v_doclbl_len; + __pyx_t_15 = __pyx_v_doctag_len; for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; /* "trunk/gensim/models/doc2vec_inner.pyx":660 - * for m in range(doclbl_len): + * for m in range(doctag_len): * # doc vector(s) - * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], # <<<<<<<<<<<<<< + * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], # <<<<<<<<<<<<<< * vector_size * cython.sizeof(REAL_t)) * n = 0 */ - memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } /* "trunk/gensim/models/doc2vec_inner.pyx":662 - * memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], + * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * n = 0 # <<<<<<<<<<<<<< * for m in range(j, k): @@ -6942,7 +6942,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * window_indexes[n] = indexes[m] * n = n + 1 # <<<<<<<<<<<<<< * for m in range(2 * window): - * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], */ __pyx_v_n = (__pyx_v_n + 1); __pyx_L25_continue:; @@ -6952,7 +6952,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * window_indexes[n] = indexes[m] * n = n + 1 * for m in range(2 * window): # <<<<<<<<<<<<<< - * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) */ __pyx_t_6 = (2 * __pyx_v_window); @@ -6962,15 +6962,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":673 * n = n + 1 * for m in range(2 * window): - * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< + * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< * vector_size * cython.sizeof(REAL_t)) * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error */ - memcpy((&(__pyx_v__neu1[((__pyx_v_doclbl_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); + memcpy((&(__pyx_v__neu1[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } /* "trunk/gensim/models/doc2vec_inner.pyx":675 - * memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< * @@ -7025,52 +7025,52 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":686 * layer1_size, vector_size, _learn_hidden) * - * if _learn_doclbls: # <<<<<<<<<<<<<< - * for m in range(doclbl_len): - * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], + * if _learn_doctags: # <<<<<<<<<<<<<< + * for m in range(doctag_len): + * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], */ - __pyx_t_9 = (__pyx_v__learn_doclbls != 0); + __pyx_t_9 = (__pyx_v__learn_doctags != 0); if (__pyx_t_9) { /* "trunk/gensim/models/doc2vec_inner.pyx":687 * - * if _learn_doclbls: - * for m in range(doclbl_len): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) + * if _learn_doctags: + * for m in range(doctag_len): # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], + * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) */ - __pyx_t_15 = __pyx_v_doclbl_len; + __pyx_t_15 = __pyx_v_doctag_len; for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; /* "trunk/gensim/models/doc2vec_inner.pyx":688 - * if _learn_doclbls: - * for m in range(doclbl_len): - * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) + * if _learn_doctags: + * for m in range(doctag_len): + * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< + * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doclbl_locks[(__pyx_v__doclbl_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doclbl_vectors[((__pyx_v__doclbl_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__doctag_locks[(__pyx_v__doctag_indexes[__pyx_v_m])])), (&(__pyx_v__work[(__pyx_v_m * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } goto __pyx_L35; } __pyx_L35:; /* "trunk/gensim/models/doc2vec_inner.pyx":690 - * our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) + * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], + * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< * for m in range(2 * window): - * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], + * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], */ __pyx_t_9 = (__pyx_v__learn_words != 0); if (__pyx_t_9) { /* "trunk/gensim/models/doc2vec_inner.pyx":691 - * &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) + * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: * for m in range(2 * window): # <<<<<<<<<<<<<< - * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], + * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) */ __pyx_t_6 = (2 * __pyx_v_window); @@ -7080,11 +7080,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":692 * if _learn_words: * for m in range(2 * window): - * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], # <<<<<<<<<<<<<< + * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], # <<<<<<<<<<<<<< * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) * */ - __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__word_locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v__work[((__pyx_v_doclbl_len + __pyx_v_m) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_vector_size), (&(__pyx_v__word_locks[(__pyx_v_window_indexes[__pyx_v_m])])), (&(__pyx_v__work[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } goto __pyx_L38; } @@ -7127,9 +7127,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* function exit code */ @@ -7146,8 +7146,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __Pyx_XDECREF(__pyx_v_neu1); __Pyx_XDECREF(__pyx_v_word_vectors); __Pyx_XDECREF(__pyx_v_word_locks); - __Pyx_XDECREF(__pyx_v_doclbl_vectors); - __Pyx_XDECREF(__pyx_v_doclbl_locks); + __Pyx_XDECREF(__pyx_v_doctag_vectors); + __Pyx_XDECREF(__pyx_v_doctag_locks); __Pyx_XGIVEREF(__pyx_r); __Pyx_RefNannyFinishContext(); return __pyx_r; @@ -9449,21 +9449,21 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_count, __pyx_k_count, sizeof(__pyx_k_count), 0, 0, 1, 1}, {&__pyx_n_s_cpointer, __pyx_k_cpointer, sizeof(__pyx_k_cpointer), 0, 0, 1, 1}, {&__pyx_n_s_d_res, __pyx_k_d_res, sizeof(__pyx_k_d_res), 0, 0, 1, 1}, - {&__pyx_n_s_dm_lbl_count, __pyx_k_dm_lbl_count, sizeof(__pyx_k_dm_lbl_count), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_indexes, __pyx_k_doclbl_indexes, sizeof(__pyx_k_doclbl_indexes), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_indexes_2, __pyx_k_doclbl_indexes_2, sizeof(__pyx_k_doclbl_indexes_2), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_len, __pyx_k_doclbl_len, sizeof(__pyx_k_doclbl_len), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_locks, __pyx_k_doclbl_locks, sizeof(__pyx_k_doclbl_locks), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_locks_2, __pyx_k_doclbl_locks_2, sizeof(__pyx_k_doclbl_locks_2), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_syn0, __pyx_k_doclbl_syn0, sizeof(__pyx_k_doclbl_syn0), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_syn0_lockf, __pyx_k_doclbl_syn0_lockf, sizeof(__pyx_k_doclbl_syn0_lockf), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_vectors, __pyx_k_doclbl_vectors, sizeof(__pyx_k_doclbl_vectors), 0, 0, 1, 1}, - {&__pyx_n_s_doclbl_vectors_2, __pyx_k_doclbl_vectors_2, sizeof(__pyx_k_doclbl_vectors_2), 0, 0, 1, 1}, + {&__pyx_n_s_dm_tag_count, __pyx_k_dm_tag_count, sizeof(__pyx_k_dm_tag_count), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_indexes, __pyx_k_doctag_indexes, sizeof(__pyx_k_doctag_indexes), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_indexes_2, __pyx_k_doctag_indexes_2, sizeof(__pyx_k_doctag_indexes_2), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_len, __pyx_k_doctag_len, sizeof(__pyx_k_doctag_len), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_locks, __pyx_k_doctag_locks, sizeof(__pyx_k_doctag_locks), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_locks_2, __pyx_k_doctag_locks_2, sizeof(__pyx_k_doctag_locks_2), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_syn0, __pyx_k_doctag_syn0, sizeof(__pyx_k_doctag_syn0), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_syn0_lockf, __pyx_k_doctag_syn0_lockf, sizeof(__pyx_k_doctag_syn0_lockf), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_vectors, __pyx_k_doctag_vectors, sizeof(__pyx_k_doctag_vectors), 0, 0, 1, 1}, + {&__pyx_n_s_doctag_vectors_2, __pyx_k_doctag_vectors_2, sizeof(__pyx_k_doctag_vectors_2), 0, 0, 1, 1}, {&__pyx_n_s_docvecs, __pyx_k_docvecs, sizeof(__pyx_k_docvecs), 0, 0, 1, 1}, {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, {&__pyx_n_s_expected, __pyx_k_expected, sizeof(__pyx_k_expected), 0, 0, 1, 1}, - {&__pyx_n_s_expected_doclbl_len, __pyx_k_expected_doclbl_len, sizeof(__pyx_k_expected_doclbl_len), 0, 0, 1, 1}, + {&__pyx_n_s_expected_doctag_len, __pyx_k_expected_doctag_len, sizeof(__pyx_k_expected_doctag_len), 0, 0, 1, 1}, {&__pyx_n_s_fblas, __pyx_k_fblas, sizeof(__pyx_k_fblas), 0, 0, 1, 1}, {&__pyx_n_s_float32, __pyx_k_float32, sizeof(__pyx_k_float32), 0, 0, 1, 1}, {&__pyx_n_s_hs, __pyx_k_hs, sizeof(__pyx_k_hs), 0, 0, 1, 1}, @@ -9477,8 +9477,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_j, __pyx_k_j, sizeof(__pyx_k_j), 0, 0, 1, 1}, {&__pyx_n_s_k, __pyx_k_k, sizeof(__pyx_k_k), 0, 0, 1, 1}, {&__pyx_n_s_layer1_size, __pyx_k_layer1_size, sizeof(__pyx_k_layer1_size), 0, 0, 1, 1}, - {&__pyx_n_s_learn_doclbls, __pyx_k_learn_doclbls, sizeof(__pyx_k_learn_doclbls), 0, 0, 1, 1}, - {&__pyx_n_s_learn_doclbls_2, __pyx_k_learn_doclbls_2, sizeof(__pyx_k_learn_doclbls_2), 0, 0, 1, 1}, + {&__pyx_n_s_learn_doctags, __pyx_k_learn_doctags, sizeof(__pyx_k_learn_doctags), 0, 0, 1, 1}, + {&__pyx_n_s_learn_doctags_2, __pyx_k_learn_doctags_2, sizeof(__pyx_k_learn_doctags_2), 0, 0, 1, 1}, {&__pyx_n_s_learn_hidden, __pyx_k_learn_hidden, sizeof(__pyx_k_learn_hidden), 0, 0, 1, 1}, {&__pyx_n_s_learn_hidden_2, __pyx_k_learn_hidden_2, sizeof(__pyx_k_learn_hidden_2), 0, 0, 1, 1}, {&__pyx_n_s_learn_words, __pyx_k_learn_words, sizeof(__pyx_k_learn_words), 0, 0, 1, 1}, @@ -9669,11 +9669,11 @@ static int __Pyx_InitCachedConstants(void) { /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__14 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doclbls_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -9681,11 +9681,11 @@ static int __Pyx_InitCachedConstants(void) { /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 397, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -9693,11 +9693,11 @@ static int __Pyx_InitCachedConstants(void) { /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doclbl_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doclbls, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doclbl_vectors, __pyx_n_s_doclbl_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doclbls_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doclbl_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doclbl_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doclbl_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doclbl_len, __pyx_n_s_window, __pyx_n_s_expected_doclbl_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 550, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -10035,9 +10035,9 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # <<<<<<<<<<<<<< - * train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -10047,9 +10047,9 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) /* "trunk/gensim/models/doc2vec_inner.pyx":397 * * - * def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); @@ -10059,9 +10059,9 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) /* "trunk/gensim/models/doc2vec_inner.pyx":550 * * - * def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< - * learn_doclbls=True, learn_words=True, learn_hidden=True, - * word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): + * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * learn_doctags=True, learn_words=True, learn_hidden=True, + * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index dd2982c258..2ea8e195b6 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -265,30 +265,30 @@ cdef unsigned long long fast_sentence_dmc_neg( return next_random -def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, - train_words=False, learn_doclbls=True, learn_words=True, learn_hidden=True, - word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): +def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs cdef int negative = model.negative cdef int _train_words = train_words cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden - cdef int _learn_doclbls = learn_doclbls + cdef int _learn_doctags = learn_doctags cdef REAL_t *_word_vectors - cdef REAL_t *_doclbl_vectors + cdef REAL_t *_doctag_vectors cdef REAL_t *_word_locks - cdef REAL_t *_doclbl_locks + cdef REAL_t *_doctag_locks cdef REAL_t *_work cdef REAL_t _alpha = alpha cdef int size = model.layer1_size cdef int codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t _doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t _doctag_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] cdef int sentence_len - cdef int doclbl_len + cdef int doctag_len cdef int window = model.window cdef int i, j @@ -305,19 +305,19 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, cdef unsigned long long table_len cdef unsigned long long next_random - # default vectors, locks from syn0/doclbl_syn0 + # default vectors, locks from syn0/doctag_syn0 if word_vectors is None: word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) - if doclbl_vectors is None: - doclbl_vectors = model.docvecs.doclbl_syn0 - _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + if doctag_vectors is None: + doctag_vectors = model.docvecs.doctag_syn0 + _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) if word_locks is None: word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) - if doclbl_locks is None: - doclbl_locks = model.docvecs.doclbl_syn0_lockf - _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + if doctag_locks is None: + doctag_locks = model.docvecs.doctag_syn0_lockf + _doctag_locks = (np.PyArray_DATA(doctag_locks)) if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -333,7 +333,7 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, work = zeros(model.layer1_size, dtype=REAL) _work = np.PyArray_DATA(work) sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) + doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) for i in range(sentence_len): predict_word = word_vocabs[i] @@ -352,8 +352,8 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, # single randint() call avoids a big thread-synchronization slowdown for i, item in enumerate(np.random.randint(0, window, sentence_len)): reduced_windows[i] = item - for i in range(doclbl_len): - _doclbl_indexes[i] = doclbl_indexes[i] + for i in range(doctag_len): + _doctag_indexes[i] = doctag_indexes[i] result += 1 # release GIL & train on the sentence @@ -382,33 +382,33 @@ def train_sentence_dbow(model, word_vocabs, doclbl_indexes, alpha, work=None, _learn_words, _learn_hidden, _word_locks) # docvec-training - for j in range(doclbl_len): + for j in range(doctag_len): if hs: - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doclbl_vectors, syn1, size, _doclbl_indexes[j], - _alpha, _work, _learn_doclbls, _learn_hidden, _doclbl_locks) + fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) if negative: - next_random = fast_sentence_dbow_neg(negative, table, table_len, _doclbl_vectors, syn1neg, size, - indexes[i], _doclbl_indexes[j], _alpha, _work, next_random, - _learn_doclbls, _learn_hidden, _doclbl_locks) + next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, + indexes[i], _doctag_indexes[j], _alpha, _work, next_random, + _learn_doctags, _learn_hidden, _doctag_locks) return result -def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, - learn_doclbls=True, learn_words=True, learn_hidden=True, - word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): +def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + learn_doctags=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs cdef int negative = model.negative - cdef int _learn_doclbls = learn_doclbls + cdef int _learn_doctags = learn_doctags cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden cdef int cbow_mean = model.cbow_mean cdef REAL_t count, inv_count = 1.0 cdef REAL_t *_word_vectors - cdef REAL_t *_doclbl_vectors + cdef REAL_t *_doctag_vectors cdef REAL_t *_word_locks - cdef REAL_t *_doclbl_locks + cdef REAL_t *_doctag_locks cdef REAL_t *_work cdef REAL_t *_neu1 cdef REAL_t _alpha = alpha @@ -416,10 +416,10 @@ def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1 cdef int codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t _doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t _doctag_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] cdef int sentence_len - cdef int doclbl_len + cdef int doctag_len cdef int window = model.window cdef int i, j, k, m @@ -436,19 +436,19 @@ def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1 cdef unsigned long long table_len cdef unsigned long long next_random - # default vectors, locks from syn0/doclbl_syn0 + # default vectors, locks from syn0/doctag_syn0 if word_vectors is None: word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) - if doclbl_vectors is None: - doclbl_vectors = model.docvecs.doclbl_syn0 - _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + if doctag_vectors is None: + doctag_vectors = model.docvecs.doctag_syn0 + _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) if word_locks is None: word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) - if doclbl_locks is None: - doclbl_locks = model.docvecs.doclbl_syn0_lockf - _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + if doctag_locks is None: + doctag_locks = model.docvecs.doctag_syn0_lockf + _doctag_locks = (np.PyArray_DATA(doctag_locks)) if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -487,9 +487,9 @@ def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1 for i, item in enumerate(np.random.randint(0, window, sentence_len)): reduced_windows[i] = item - doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) - for i in range(doclbl_len): - _doclbl_indexes[i] = doclbl_indexes[i] + doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + for i in range(doctag_len): + _doctag_indexes[i] = doctag_indexes[i] result += 1 # release GIL & train on the sentence @@ -511,9 +511,9 @@ def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1 else: count += ONEF our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) - for m in range(doclbl_len): + for m in range(doctag_len): count += ONEF - our_saxpy(&size, &ONEF, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE, _neu1, &ONE) + our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) if count > (0.5): inv_count = ONEF/count if cbow_mean: @@ -532,10 +532,10 @@ def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1 if not cbow_mean: sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # apply accumulated error in work - if _learn_doclbls: - for m in range(doclbl_len): - our_saxpy(&size, &_doclbl_locks[_doclbl_indexes[m]], _work, - &ONE, &_doclbl_vectors[_doclbl_indexes[m] * size], &ONE) + if _learn_doctags: + for m in range(doctag_len): + our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, + &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) if _learn_words: for m in range(j, k): if m == i: @@ -547,19 +547,19 @@ def train_sentence_dm(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1 return result -def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=None, neu1=None, - learn_doclbls=True, learn_words=True, learn_hidden=True, - word_vectors=None, word_locks=None, doclbl_vectors=None, doclbl_locks=None): +def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + learn_doctags=True, learn_words=True, learn_hidden=True, + word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs cdef int negative = model.negative - cdef int _learn_doclbls = learn_doclbls + cdef int _learn_doctags = learn_doctags cdef int _learn_words = learn_words cdef int _learn_hidden = learn_hidden cdef REAL_t *_word_vectors - cdef REAL_t *_doclbl_vectors + cdef REAL_t *_doctag_vectors cdef REAL_t *_word_locks - cdef REAL_t *_doclbl_locks + cdef REAL_t *_doctag_locks cdef REAL_t *_work cdef REAL_t *_neu1 cdef REAL_t _alpha = alpha @@ -568,12 +568,12 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=Non cdef int codelens[MAX_SENTENCE_LEN] cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t _doclbl_indexes[MAX_SENTENCE_LEN] + cdef np.uint32_t _doctag_indexes[MAX_SENTENCE_LEN] cdef np.uint32_t window_indexes[MAX_SENTENCE_LEN] cdef int sentence_len - cdef int doclbl_len + cdef int doctag_len cdef int window = model.window - cdef int expected_doclbl_len = model.dm_lbl_count + cdef int expected_doctag_len = model.dm_tag_count cdef int i, j, k, m, n cdef long result = 0 @@ -590,23 +590,23 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=Non cdef unsigned long long table_len cdef unsigned long long next_random - doclbl_len = min(MAX_SENTENCE_LEN, len(doclbl_indexes)) - if doclbl_len != expected_doclbl_len: - return 0 # skip doc without expected nmber of lbls + doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + if doctag_len != expected_doctag_len: + return 0 # skip doc without expected nmber of tags - # default vectors, locks from syn0/doclbl_syn0 + # default vectors, locks from syn0/doctag_syn0 if word_vectors is None: word_vectors = model.syn0 _word_vectors = (np.PyArray_DATA(word_vectors)) - if doclbl_vectors is None: - doclbl_vectors = model.docvecs.doclbl_syn0 - _doclbl_vectors = (np.PyArray_DATA(doclbl_vectors)) + if doctag_vectors is None: + doctag_vectors = model.docvecs.doctag_syn0 + _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) if word_locks is None: word_locks = model.syn0_lockf _word_locks = (np.PyArray_DATA(word_locks)) - if doclbl_locks is None: - doclbl_locks = model.docvecs.doclbl_syn0_lockf - _doclbl_locks = (np.PyArray_DATA(doclbl_locks)) + if doctag_locks is None: + doctag_locks = model.docvecs.doctag_syn0_lockf + _doctag_locks = (np.PyArray_DATA(doctag_locks)) if hs: syn1 = (np.PyArray_DATA(model.syn1)) @@ -644,8 +644,8 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=Non result += 1 j = j + 1 - for i in range(doclbl_len): - _doclbl_indexes[i] = doclbl_indexes[i] + for i in range(doctag_len): + _doctag_indexes[i] = doctag_indexes[i] result += 1 # release GIL & train on the sentence @@ -655,9 +655,9 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=Non k = i + window + 1 # past sentence end OK: will pad with null word # compose l1 & clear work - for m in range(doclbl_len): + for m in range(doctag_len): # doc vector(s) - memcpy(&_neu1[m * vector_size], &_doclbl_vectors[_doclbl_indexes[m] * vector_size], + memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) n = 0 for m in range(j, k): @@ -670,7 +670,7 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=Non window_indexes[n] = indexes[m] n = n + 1 for m in range(2 * window): - memcpy(&_neu1[(doclbl_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], + memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], vector_size * cython.sizeof(REAL_t)) memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error @@ -683,13 +683,13 @@ def train_sentence_dm_concat(model, word_vocabs, doclbl_indexes, alpha, work=Non _neu1, syn1neg, indexes[i], _alpha, _work, layer1_size, vector_size, _learn_hidden) - if _learn_doclbls: - for m in range(doclbl_len): - our_saxpy(&vector_size, &_doclbl_locks[_doclbl_indexes[m]], &_work[m * vector_size], - &ONE, &_doclbl_vectors[_doclbl_indexes[m] * vector_size], &ONE) + if _learn_doctags: + for m in range(doctag_len): + our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], + &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) if _learn_words: for m in range(2 * window): - our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doclbl_len + m) * vector_size], + our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) return result From 522913902a00d734ff18bd86783d729754837287 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Mon, 8 Jun 2015 16:14:25 -0700 Subject: [PATCH 22/49] reset_from, borrow_from to share vocab/etc between models in testing --- gensim/models/doc2vec.py | 10 ++++++++++ gensim/models/word2vec.py | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 25ca3ef40e..3373fb7d81 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -268,6 +268,11 @@ def _int_index(self, index, missing=None): def __getitem__(self, index): return self.doctag_syn0[self._int_index(index)] + def borrow_from(self, other_docvecs): + self.max_index = other_docvecs.max_index + self.doctags = other_docvecs.doctags + self.index2doctag = other_docvecs.index2doctag + def reset_weights(self, model): length = max(len(self.doctags),self.max_index) if self.mapfile_path: @@ -369,6 +374,11 @@ def reset_weights(self): Word2Vec.reset_weights(self) self.docvecs.reset_weights(self) + def reset_from(self, other_model): + """Reuse shareable structures from other_model.""" + self.docvecs.borrow_from(other_model.docvecs) + Word2Vec.reset_from(self, other_model) + def _vocab_from(self, sentences): sentence_no, vocab = -1, {} total_words = 0 diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 0db5b49662..7cbe8be818 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -440,6 +440,16 @@ def _vocab_from(self, sentences): (len(vocab), total_words, sentence_no + 1)) return vocab + def reset_from(self, other_model): + """ + Borrow shareable pre-built structures (like vocab) from the other_model. Useful + if testing multiple models in parallel on the same corpus. + """ + self.vocab = other_model.vocab + self.index2word = other_model.index2word + self.table = other_model.table + self.reset_weights() + def _prepare_sentences(self, sentences): for sentence in sentences: # avoid calling random_sample() where prob >= 1, to speed things up a little: From 012823fd84af7de362d421bc07d7865d2816e9c6 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 06:49:00 -0700 Subject: [PATCH 23/49] most_similar, etc on docvecs --- gensim/models/doc2vec.py | 127 +++++++++++++++++++++++++++++++++++++- gensim/models/word2vec.py | 5 +- 2 files changed, 128 insertions(+), 4 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 3373fb7d81..001739971f 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -44,15 +44,16 @@ from collections import namedtuple -from numpy import zeros, random, sum as np_sum, add as np_add, concatenate,\ - repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap -from six import string_types +from numpy import zeros, random, sum as np_sum, add as np_add, concatenate, \ + repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap, \ + sqrt, newaxis, ndarray, dot, argsort, vstack logger = logging.getLogger(__name__) from gensim import utils, matutils # utility fnc for pickling, common scipy operations etc from gensim.models.word2vec import Word2Vec, Vocab, train_cbow_pair, train_sg_pair, train_sentence_sg from six.moves import xrange +from six import string_types, integer_types try: from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, train_sentence_dm_concat,\ @@ -250,6 +251,7 @@ def note_doctag(self, key, sentence_no, sentence_length): else: self.doctags[key] = Doctag(sentence_no, sentence_length, 1) self.index2doctag.append(key) + self.max_index = max(self.max_index, len(self.index2doctag)) def indexed_doctags(self, doctag_tokens): return ([i for i in [self._int_index(index,-1) for index in doctag_tokens] if i > -1], @@ -265,14 +267,29 @@ def _int_index(self, index, missing=None): else: return self.doctags[index].index if index in self.doctags else missing + def _key_index(self, i_index, missing=None): + if i_index < len(self.index2doctag): + return self.index2doctag[i_index] + else: + return i_index + def __getitem__(self, index): return self.doctag_syn0[self._int_index(index)] + def __contains__(self, index): + if isinstance(index, int): + return index < self.max_index + else: + return index in self.doctags + def borrow_from(self, other_docvecs): self.max_index = other_docvecs.max_index self.doctags = other_docvecs.doctags self.index2doctag = other_docvecs.index2doctag + def clear_sims(self): + self.doctag_syn0norm = None + def reset_weights(self, model): length = max(len(self.doctags),self.max_index) if self.mapfile_path: @@ -288,6 +305,106 @@ def reset_weights(self, model): seed = "%d %s" % (model.seed, self.index2doctag[i] if len(self.index2doctag)>0 else str(i)) self.doctag_syn0[i] = model.seeded_vector(seed) + def init_sims(self, replace=False): + """ + Precompute L2-normalized vectors. + + If `replace` is set, forget the original vectors and only keep the normalized + ones = saves lots of memory! + + Note that you **cannot continue training** after doing a replace. The model becomes + effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`. + + """ + if getattr(self, 'doctag_syn0norm', None) is None or replace: + logger.info("precomputing L2-norms of doc weight vectors") + if replace: + for i in xrange(self.doctag_syn0.shape[0]): + self.doctag_syn0[i, :] /= sqrt((self.doctag_syn0[i, :] ** 2).sum(-1)) + self.doctag_syn0norm = self.doctag_syn0 + else: + self.doctag_syn0norm = (self.doctag_syn0 / sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL) + + def most_similar(self, positive=[], negative=[], topn=10): + """ + Find the top-N most similar docvecs known from training. Positive docs contribute + positively towards the similarity, negative docs negatively. + + This method computes cosine similarity between a simple mean of the projection + weight vectors of the given docs. Docs may be specified as vectors, integer indexes + of trained docvecs, or if the documents were originally presented with string tags, + by the corresponding tags. + """ + self.init_sims() + + if isinstance(positive, string_types + integer_types) and not negative: + # allow calls like most_similar('dog'), as a shorthand for most_similar(['dog']) + positive = [positive] + + # add weights for each doc, if not already present; default to 1.0 for positive and -1.0 for negative docs + positive = [(doc, 1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) + else doc for doc in positive] + negative = [(doc, -1.0) if isinstance(doc, string_types + (ndarray,) + integer_types) + else doc for doc in negative] + + # compute the weighted average of all docs + all_docs, mean = set(), [] + for doc, weight in positive + negative: + if isinstance(doc, ndarray): + mean.append(weight * doc) + elif doc in self.doctags or doc < self.max_index: + mean.append(weight * self.doctag_syn0norm[self._int_index(doc)]) + all_docs.add(self._int_index(doc)) + else: + raise KeyError("doc '%s' not in trained set" % doc) + if not mean: + raise ValueError("cannot compute similarity with no input") + mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL) + + dists = dot(self.doctag_syn0norm, mean) + if not topn: + return dists + best = argsort(dists)[::-1][:topn + len(all_docs)] + # ignore (don't return) docs from the input + result = [(self._key_index(sim), float(dists[sim])) for sim in best if sim not in all_docs] + return result[:topn] + + def doesnt_match(self, docs): + """ + Which doc from the given list doesn't go with the others? + + (TODO: Accept vectors of out-of-training-set docs, as if from inference.) + + """ + self.init_sims() + + docs = [doc for doc in docs if doc in self.doctags or 0 <= doc < self.max_index] # filter out unknowns + logger.debug("using docs %s" % docs) + if not docs: + raise ValueError("cannot select a doc from an empty list") + vectors = vstack(self.doctag_syn0norm[self._int_index(doc)] for doc in docs).astype(REAL) + mean = matutils.unitvec(vectors.mean(axis=0)).astype(REAL) + dists = dot(vectors, mean) + return sorted(zip(dists, docs))[0][1] + + def similarity(self, d1, d2): + """ + Compute cosine similarity between two docvecs in the trained set, specified by int index or + string tag. (TODO: Accept vectors of out-of-training-set docs, as if from inference.) + + """ + return dot(matutils.unitvec(self[d1]), matutils.unitvec(self[d2])) + + def n_similarity(self, ds1, ds2): + """ + Compute cosine similarity between two sets of docvecs from the trained set, specified by int + index or string tag. (TODO: Accept vectors of out-of-training-set docs, as if from inference.) + + """ + v1 = [self[doc] for doc in ds1] + v2 = [self[doc] for doc in ds2] + return dot(matutils.unitvec(array(v1).mean(axis=0)), matutils.unitvec(array(v2).mean(axis=0))) + class Doctag(namedtuple('Doctag', 'index, word_count, doc_count')): """A string document tag discovered during the initial vocabulary @@ -366,6 +483,10 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, self.build_vocab(sentences) self.train(sentences) + def clear_sims(self): + Word2Vec.reset_weights(self) + self.docvecs.clear_sims() + def reset_weights(self): if self.dm_concat: # expand l1 size to match concatenated tags+words length diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 7cbe8be818..42fc77983e 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -536,9 +536,12 @@ def worker_train(): elapsed = time.time() - start logger.info("training on %i words took %.1fs, %.0f words/s" % (word_count[0], elapsed, word_count[0] / elapsed if elapsed else 0.0)) - self.syn0norm = None + self.clear_sims() return word_count[0] + def clear_sims(self): + self.syn0norm = None + def reset_weights(self): """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary.""" logger.info("resetting layer weights") From 93a62724850cdecb31e17ca4b7571f26e8abd7b4 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 06:53:30 -0700 Subject: [PATCH 24/49] initial doc2vec unit tests --- gensim/test/test_doc2vec.py | 348 ++++++++++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 gensim/test/test_doc2vec.py diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py new file mode 100644 index 0000000000..f27db4e7a4 --- /dev/null +++ b/gensim/test/test_doc2vec.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2010 Radim Rehurek +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html + +""" +Automated tests for checking transformation algorithms (the models package). +""" + +from __future__ import with_statement + +import logging +import unittest +import os +import tempfile +import itertools +import bz2 +from six import iteritems, iterkeys +from six.moves import xrange, zip as izip +from collections import namedtuple, Counter + +import numpy as np + +from gensim import utils, matutils +from gensim.models import doc2vec +from gensim.models import word2vec +from gensim.models.doc2vec import TaggedDocument + +module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder +datapath = lambda fname: os.path.join(module_path, 'test_data', fname) + +logger = logging.getLogger('gensim.test.test_doc2vec') + + +class LeeCorpus(object): + def __iter__(self): + with open(datapath('lee_background.cor')) as f: + for line in f: + yield utils.simple_preprocess(line) + +class DocsLeeCorpus(object): + def __init__(self, string_tags=False): + self.string_tags = string_tags + + def _tag(self, i): + return i if not self.string_tags else '_*%d' % i + + def __iter__(self): + with open(datapath('lee_background.cor')) as f: + for i, line in enumerate(f): + yield TaggedDocument(utils.simple_preprocess(line),[self._tag(i)]) + + +sentences = [ + ['human', 'interface', 'computer'], + ['survey', 'user', 'computer', 'system', 'response', 'time'], + ['eps', 'user', 'interface', 'system'], + ['system', 'human', 'system', 'eps'], + ['user', 'response', 'time'], + ['trees'], + ['graph', 'trees'], + ['graph', 'minors', 'trees'], + ['graph', 'minors', 'survey'] + ] + +sentences = [TaggedDocument(words,[i]) for i, words in enumerate(sentences)] + + +def testfile(): + # temporary data will be stored to this file + return os.path.join(tempfile.gettempdir(), 'gensim_doc2vec.tst') + + +class TestDoc2VecModel(unittest.TestCase): + def test_persistence(self): + """Test storing/loading the entire model.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), min_count=1) + model.save(testfile()) + self.models_equal(model, doc2vec.Doc2Vec.load(testfile())) + + def test_load_mmap(self): + """Test storing/loading the entire model.""" + model = doc2vec.Doc2Vec(sentences, min_count=1) + + # test storing the internal arrays into separate files + model.save(testfile(), sep_limit=0) + self.models_equal(model, doc2vec.Doc2Vec.load(testfile())) + + # make sure mmaping the arrays back works, too + self.models_equal(model, doc2vec.Doc2Vec.load(testfile(), mmap='r')) + + def test_int_doctags(self): + """Test doc2vec doctag alternatives""" + corpus = DocsLeeCorpus() + + model = doc2vec.Doc2Vec(min_count=1) + model.build_vocab(corpus) + self.assertEqual(len(model.docvecs.doctag_syn0),299) + self.assertEqual(model.docvecs[0].shape,(300,)) + self.assertRaises(KeyError,model.__getitem__,'_*0') + + def test_string_doctags(self): + """Test doc2vec doctag alternatives""" + corpus = DocsLeeCorpus(True) + + model = doc2vec.Doc2Vec(min_count=1) + model.build_vocab(corpus) + self.assertEqual(len(model.docvecs.doctag_syn0),299) + self.assertEqual(model.docvecs[0].shape,(300,)) + self.assertEqual(model.docvecs['_*0'].shape,(300,)) + self.assertEqual(model.docvecs['_*0'],model.docvecs[0]) + + def test_empty_errors(self): + corpus = DocsLeeCorpus() + + # no input => "RuntimeError: you must first build vocabulary before training the model" + self.assertRaises(RuntimeError, doc2vec.Doc2Vec, []) + + # input not empty, but rather completely filtered out + self.assertRaises(RuntimeError, doc2vec.Doc2Vec, corpus, min_count=10000) + + def model_sanity(self, model): + """Any non-trivial model on DocsLeeCorpus can pass these sanity checks""" + fire1 = 0 # doc 0 sydney fires + fire2 = 8 # doc 8 sydney fires + tennis1 = 6 # doc 6 tennis + + sims = model.docvecs.most_similar(fire1) + sims = [(idx, round(dist,5)) for idx, dist in sims] + self.assertTrue(fire2 in [match[0] for match in sims]) + + doc0_vec = model.docvecs[fire1] + sims2 = model.docvecs.most_similar(positive=[doc0_vec], topn=11) + sims2 = [(idx, round(dist,5)) for idx, dist in sims2] + self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is doc itself + + self.assertEqual(model.docvecs.doesnt_match([fire1, tennis1, fire2]), tennis1) + + self.assertTrue(model.docvecs.similarity(fire1,fire2) > model.docvecs.similarity(fire1,tennis1)) + + + def test_training(self): + """Test doc2vec training.""" + + corpus = DocsLeeCorpus() + model = doc2vec.Doc2Vec(size=100, min_count=2, iter=20) + model.build_vocab(corpus) + self.assertEquals(model.docvecs.doctag_syn0.shape, (299, 100)) + model.train(corpus) + + self.model_sanity(model) + + # build vocab and train in one step; must be the same as above + model2 = doc2vec.Doc2Vec(corpus, size=100, min_count=2, iter=20) + self.models_equal(model, model2) + + + def test_dbow_hs(self): + """Test DBOW doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=0, hs=1, negative=0, min_count=2, iter=20) + self.model_sanity(model) + + def test_dmm_hs(self): + """Test DM/mean doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, hs=1, negative=0, min_count=2, iter=20) + self.model_sanity(model) + + def test_dms_hs(self): + """Test DM/sum doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, hs=1, negative=0, min_count=2, iter=20) + self.model_sanity(model) + + def test_dmc_hs(self): + """Test DM/concatenate doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=48, hs=1, negative=0, min_count=2, iter=20) + self.model_sanity(model) + + def test_dbow_neg(self): + """Test DBOW doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=0, hs=0, negative=10, min_count=2, iter=20) + self.model_sanity(model) + + def test_dmm_neg(self): + """Test DM/mean doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, hs=0, negative=10, min_count=2, iter=20) + self.model_sanity(model) + + def test_dms_neg(self): + """Test DM/sum doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, hs=0, negative=10, min_count=2, iter=20) + self.model_sanity(model) + + def test_dmc_neg(self): + """Test DM/concatenate doc2vec training.""" + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=48, hs=0, negative=10, min_count=2, iter=20) + self.model_sanity(model) + + def test_parallel(self): + """Test doc2vec parallel training.""" + if doc2vec.FAST_VERSION < 0: # don't test the plain NumPy version for parallelism (too slow) + return + + corpus = utils.RepeatCorpus(DocsLeeCorpus(), 10000) + + for workers in [2, 4]: + model = doc2vec.Doc2Vec(corpus, workers=workers) + self.model_sanity(model) + + def testRNG(self): + """Test doc2vec results identical with identical RNG seed.""" + model = doc2vec.Doc2Vec(sentences, hs=0, negative=15, min_count=2, seed=42, workers=1) + model2 = doc2vec.Doc2Vec(sentences, hs=0, negative=15, min_count=2, seed=42, workers=1) + self.models_equal(model, model2) + + def models_equal(self, model, model2): + # check words/hidden-weights + self.assertEqual(len(model.vocab), len(model2.vocab)) + self.assertTrue(np.allclose(model.syn0, model2.syn0)) + if model.hs: + self.assertTrue(np.allclose(model.syn1, model2.syn1)) + if model.negative: + self.assertTrue(np.allclose(model.syn1neg, model2.syn1neg)) + # check docvecs + self.assertEqual(len(model.docvecs.doctags), len(model2.docvecs.doctags)) + self.assertEqual(len(model.docvecs.index2doctag), len(model2.docvecs.index2doctag)) + self.assertTrue(np.allclose(model.docvecs.doctag_syn0, model2.docvecs.doctag_syn0)) + +#endclass TestWord2VecModel + +# following code is useful for reproducing paragraph-vectors paper sentiment experiments + +class ConcatenatedDoc2Vec(object): + """ + Concatenation of multiple models for reproducing the Paragraph Vectors paper. + Models must have exactly-matching vocabulary and document IDs. (Models should + be trained separately; this wrapper just returns concatenated results.) + """ + def __init__(self, models): + self.models = models + if hasattr(models[0],'docvecs'): + self.docvecs = ConcatenatedDocvecs([model.docvecs for model in models]) + + def __getitem__(self, token): + return np.concatenate([model[token] for model in self.models]) + + def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): + return np.concatenate([model.infer_vector(document,alpha,min_alpha,steps) for model in self.models]) + + def train(self, ignored): + pass + +class ConcatenatedDocvecs(object): + def __init__(self, models): + self.models = models + + def __getitem__(self, token): + return np.concatenate([model[token] for model in self.models]) + + +SentimentDocument = namedtuple('SentimentDocument','words tags split sentiment') + + +def read_su_sentiment_rotten_tomatoes(dirname, lowercase=True): + """ + Read and return documents from the Stanford Sentiment Treebank + corpus (Rotten Tomatoes reviews), from http://nlp.Stanford.edu/sentiment/ + + Initialize the corpus from a given directory, where + http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip + has been expanded. It's not too big, so compose entirely into memory. + """ + logger.info("loading corpus from %s" % dirname) + + # many mangled chars in sentences (datasetSentences.txt) + chars_sst_mangled = ['à', 'á', 'â', 'ã', 'æ', 'ç', 'è', 'é', 'í', + 'í', 'ï', 'ñ', 'ó', 'ô', 'ö', 'û', 'ü'] + sentence_fixups = [(char.encode('utf-8').decode('latin1'), char) for char in chars_sst_mangled] + # more junk, and the replace necessary for sentence-phrase consistency + sentence_fixups.extend([ + ('Â', ''), + ('\xa0', ' '), + ('-LRB-', '('), + ('-RRB-', ')'), + ]) + # only this junk in phrases (dictionary.txt) + phrase_fixups = [('\xa0', ' ')] + + # sentence_id and split are only positive for the full sentences + + # read sentences to temp {sentence -> (id,split) dict, to correlate with dictionary.txt + info_by_sentence = {} + with open(os.path.join(dirname, 'datasetSentences.txt'), 'r') as sentences, \ + open(os.path.join(dirname, 'datasetSplit.txt'), 'r') as splits: + next(sentences) # legend + next(splits) # legend + for sentence_line, split_line in izip(sentences, splits): + (id, text) = sentence_line.split('\t') + id = int(id) + text = text.rstrip() + for junk, fix in sentence_fixups: + text = text.replace(junk, fix) + (id2, split_i) = split_line.split(',') + assert id == int(id2) + if text not in info_by_sentence: # discard duplicates + info_by_sentence[text] = (id, int(split_i)) + + # read all phrase text + phrases = [None] * 239232 # known size of phrases + with open(os.path.join(dirname, 'dictionary.txt'), 'r') as phrase_lines: + for line in phrase_lines: + (text, id) = line.split('|') + for junk, fix in phrase_fixups: + text = text.replace(junk, fix) + phrases[int(id)] = text.rstrip() # for 1st pass just string + + SentimentPhrase = namedtuple('SentimentPhrase', SentimentDocument._fields + ('sentence_id',)) + # add sentiment labels, correlate with sentences + with open(os.path.join(dirname, 'sentiment_labels.txt'), 'r') as sentiments: + next(sentiments) # legend + for line in sentiments: + (id, sentiment) = line.split('|') + id = int(id) + sentiment = float(sentiment) + text = phrases[id] + words = text.split() + if lowercase: + words = [word.lower() for word in words] + (sentence_id, split_i) = info_by_sentence.get(text, (None, 0)) + split = [None,'train','test','dev'][split_i] + phrases[id] = SentimentPhrase(words, [id], split, sentiment, sentence_id) + + assert len([phrase for phrase in phrases if phrase.sentence_id is not None]) == len(info_by_sentence) # all + # counts don't match 8544, 2210, 1101 because 13 TRAIN and 1 DEV sentences are duplicates + assert len([phrase for phrase in phrases if phrase.split == 'train']) == 8531 # 'train' + assert len([phrase for phrase in phrases if phrase.split == 'test']) == 2210 # 'test' + assert len([phrase for phrase in phrases if phrase.split == 'dev']) == 1100 # 'dev' + + logger.info("loaded corpus with %i sentences and %i phrases from %s" + % (len(info_by_sentence), len(phrases), dirname)) + + return phrases + + +if __name__ == '__main__': + logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) + logging.info("using optimization %s" % doc2vec.FAST_VERSION) + unittest.main() From f171b52c8165b3989a8467bfecab7b563f9901c1 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 15:47:35 -0700 Subject: [PATCH 25/49] fix off-by-1 risking segfaults --- gensim/models/doc2vec.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 001739971f..4b24b5a378 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -239,19 +239,19 @@ class DocvecsArray(object): def __init__(self, mapfile_path=None): self.doctags = {} # string -> Doctag (if necessary) self.index2doctag = [] # int index -> String (if necessary) - self.max_index = -1 + self.count = -1 self.mapfile_path = mapfile_path def note_doctag(self, key, sentence_no, sentence_length): if isinstance(key, int): - self.max_index = max(self.max_index, key) + self.count = max(self.count, key+1) else: if key in self.doctags: self.doctags[key] = self.doctags[key].repeat(sentence_length) else: self.doctags[key] = Doctag(sentence_no, sentence_length, 1) self.index2doctag.append(key) - self.max_index = max(self.max_index, len(self.index2doctag)) + self.count = max(self.count, len(self.index2doctag)) def indexed_doctags(self, doctag_tokens): return ([i for i in [self._int_index(index,-1) for index in doctag_tokens] if i > -1], @@ -278,12 +278,12 @@ def __getitem__(self, index): def __contains__(self, index): if isinstance(index, int): - return index < self.max_index + return index < self.count else: return index in self.doctags def borrow_from(self, other_docvecs): - self.max_index = other_docvecs.max_index + self.count = other_docvecs.count self.doctags = other_docvecs.doctags self.index2doctag = other_docvecs.index2doctag @@ -291,7 +291,7 @@ def clear_sims(self): self.doctag_syn0norm = None def reset_weights(self, model): - length = max(len(self.doctags),self.max_index) + length = max(len(self.doctags),self.count) if self.mapfile_path: self.doctag_syn0 = np_memmap(self.mapfile_path+'.doctag_syn0',dtype=REAL,mode='w+',shape=(length,model.vector_size)) self.doctag_syn0_lockf = np_memmap(self.mapfile_path+'.doctag_syn0_lockf',dtype=REAL,mode='w+',shape=(length,)) @@ -352,7 +352,7 @@ def most_similar(self, positive=[], negative=[], topn=10): for doc, weight in positive + negative: if isinstance(doc, ndarray): mean.append(weight * doc) - elif doc in self.doctags or doc < self.max_index: + elif doc in self.doctags or doc < self.count: mean.append(weight * self.doctag_syn0norm[self._int_index(doc)]) all_docs.add(self._int_index(doc)) else: @@ -378,7 +378,7 @@ def doesnt_match(self, docs): """ self.init_sims() - docs = [doc for doc in docs if doc in self.doctags or 0 <= doc < self.max_index] # filter out unknowns + docs = [doc for doc in docs if doc in self.doctags or 0 <= doc < self.count] # filter out unknowns logger.debug("using docs %s" % docs) if not docs: raise ValueError("cannot select a doc from an empty list") From 93059802246251ff5131041c299132a49de3105f Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 15:58:27 -0700 Subject: [PATCH 26/49] corrections, tolerance tuning --- gensim/test/test_doc2vec.py | 42 ++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index f27db4e7a4..a158838e3c 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -96,7 +96,7 @@ def test_int_doctags(self): model = doc2vec.Doc2Vec(min_count=1) model.build_vocab(corpus) - self.assertEqual(len(model.docvecs.doctag_syn0),299) + self.assertEqual(len(model.docvecs.doctag_syn0),300) self.assertEqual(model.docvecs[0].shape,(300,)) self.assertRaises(KeyError,model.__getitem__,'_*0') @@ -106,10 +106,10 @@ def test_string_doctags(self): model = doc2vec.Doc2Vec(min_count=1) model.build_vocab(corpus) - self.assertEqual(len(model.docvecs.doctag_syn0),299) + self.assertEqual(len(model.docvecs.doctag_syn0),300) self.assertEqual(model.docvecs[0].shape,(300,)) self.assertEqual(model.docvecs['_*0'].shape,(300,)) - self.assertEqual(model.docvecs['_*0'],model.docvecs[0]) + self.assertTrue(all(model.docvecs['_*0']==model.docvecs[0])) def test_empty_errors(self): corpus = DocsLeeCorpus() @@ -126,12 +126,14 @@ def model_sanity(self, model): fire2 = 8 # doc 8 sydney fires tennis1 = 6 # doc 6 tennis - sims = model.docvecs.most_similar(fire1) - sims = [(idx, round(dist,5)) for idx, dist in sims] + sims = model.docvecs.most_similar(fire1,topn=20) + sims = [(idx, round(dist,5)) for idx, dist in sims] + if fire2 not in [match[0] for match in sims]: + print(sims) self.assertTrue(fire2 in [match[0] for match in sims]) doc0_vec = model.docvecs[fire1] - sims2 = model.docvecs.most_similar(positive=[doc0_vec], topn=11) + sims2 = model.docvecs.most_similar(positive=[doc0_vec], topn=21) sims2 = [(idx, round(dist,5)) for idx, dist in sims2] self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is doc itself @@ -139,14 +141,12 @@ def model_sanity(self, model): self.assertTrue(model.docvecs.similarity(fire1,fire2) > model.docvecs.similarity(fire1,tennis1)) - def test_training(self): """Test doc2vec training.""" - corpus = DocsLeeCorpus() model = doc2vec.Doc2Vec(size=100, min_count=2, iter=20) model.build_vocab(corpus) - self.assertEquals(model.docvecs.doctag_syn0.shape, (299, 100)) + self.assertEqual(model.docvecs.doctag_syn0.shape, (300, 100)) model.train(corpus) self.model_sanity(model) @@ -155,7 +155,6 @@ def test_training(self): model2 = doc2vec.Doc2Vec(corpus, size=100, min_count=2, iter=20) self.models_equal(model, model2) - def test_dbow_hs(self): """Test DBOW doc2vec training.""" model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=0, hs=1, negative=0, min_count=2, iter=20) @@ -163,17 +162,17 @@ def test_dbow_hs(self): def test_dmm_hs(self): """Test DM/mean doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, size=24, window=4, hs=1, negative=0, min_count=2, iter=20) self.model_sanity(model) def test_dms_hs(self): """Test DM/sum doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, size=24, window=4, hs=1, negative=0, min_count=2, iter=20) self.model_sanity(model) def test_dmc_hs(self): """Test DM/concatenate doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=48, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=0, min_count=2, iter=20) self.model_sanity(model) def test_dbow_neg(self): @@ -183,17 +182,17 @@ def test_dbow_neg(self): def test_dmm_neg(self): """Test DM/mean doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, size=24, window=4, hs=0, negative=10, min_count=2, iter=20) self.model_sanity(model) def test_dms_neg(self): """Test DM/sum doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, size=24, window=4, hs=0, negative=10, min_count=2, iter=20) self.model_sanity(model) def test_dmc_neg(self): """Test DM/concatenate doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=48, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=0, negative=10, min_count=2, iter=20) self.model_sanity(model) def test_parallel(self): @@ -207,10 +206,15 @@ def test_parallel(self): model = doc2vec.Doc2Vec(corpus, workers=workers) self.model_sanity(model) - def testRNG(self): + def test_deterministic_seeding(self): """Test doc2vec results identical with identical RNG seed.""" - model = doc2vec.Doc2Vec(sentences, hs=0, negative=15, min_count=2, seed=42, workers=1) - model2 = doc2vec.Doc2Vec(sentences, hs=0, negative=15, min_count=2, seed=42, workers=1) + # hs + model = doc2vec.Doc2Vec(DocsLeeCorpus(), seed=42, workers=1) + model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), seed=42, workers=1) + self.models_equal(model, model2) + # neg + model = doc2vec.Doc2Vec(DocsLeeCorpus(), hs=0, negative=3, seed=42, workers=1) + model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), hs=0, negative=3, seed=42, workers=1) self.models_equal(model, model2) def models_equal(self, model, model2): From 15241a8c0f8146a0d3336da1d10b6dffff327117 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 16:14:00 -0700 Subject: [PATCH 27/49] looser float matching --- gensim/test/test_doc2vec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index a158838e3c..d89325358e 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -127,14 +127,14 @@ def model_sanity(self, model): tennis1 = 6 # doc 6 tennis sims = model.docvecs.most_similar(fire1,topn=20) - sims = [(idx, round(dist,5)) for idx, dist in sims] + sims = [(idx, round(dist,4)) for idx, dist in sims] if fire2 not in [match[0] for match in sims]: print(sims) self.assertTrue(fire2 in [match[0] for match in sims]) doc0_vec = model.docvecs[fire1] sims2 = model.docvecs.most_similar(positive=[doc0_vec], topn=21) - sims2 = [(idx, round(dist,5)) for idx, dist in sims2] + sims2 = [(idx, round(dist,4)) for idx, dist in sims2] self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is doc itself self.assertEqual(model.docvecs.doesnt_match([fire1, tennis1, fire2]), tennis1) From c44cf58a396f53f1e7b941e1fb362e0109fd794c Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 16:17:37 -0700 Subject: [PATCH 28/49] clarify shrunken sentence_len --- gensim/models/doc2vec_inner.c | 1010 ++++++++++++++++--------------- gensim/models/doc2vec_inner.pyx | 5 +- 2 files changed, 512 insertions(+), 503 deletions(-) diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index c699f2212b..1032adb4fe 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -3580,7 +3580,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * for i in range(sentence_len): * predict_word = word_vocabs[i] # <<<<<<<<<<<<<< * if predict_word is None: - * codelens[i] = 0 + * # shrink sentence to leave out word */ __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_10); @@ -3591,39 +3591,47 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * for i in range(sentence_len): * predict_word = word_vocabs[i] * if predict_word is None: # <<<<<<<<<<<<<< - * codelens[i] = 0 - * else: + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 */ __pyx_t_5 = (__pyx_v_predict_word == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":341 - * predict_word = word_vocabs[i] + /* "trunk/gensim/models/doc2vec_inner.pyx":342 * if predict_word is None: - * codelens[i] = 0 # <<<<<<<<<<<<<< + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< + * continue # leaving j unchanged + * else: + */ + __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); + + /* "trunk/gensim/models/doc2vec_inner.pyx":343 + * # shrink sentence to leave out word + * sentence_len = sentence_len - 1 + * continue # leaving j unchanged # <<<<<<<<<<<<<< * else: * indexes[i] = predict_word.index */ - (__pyx_v_codelens[__pyx_v_i]) = 0; - goto __pyx_L12; + goto __pyx_L10_continue; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":343 - * codelens[i] = 0 + /* "trunk/gensim/models/doc2vec_inner.pyx":345 + * continue # leaving j unchanged * else: * indexes[i] = predict_word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(predict_word.code) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_index); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 343; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_10); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":344 + /* "trunk/gensim/models/doc2vec_inner.pyx":346 * else: * indexes[i] = predict_word.index * if hs: # <<<<<<<<<<<<<< @@ -3633,49 +3641,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":345 + /* "trunk/gensim/models/doc2vec_inner.pyx":347 * indexes[i] = predict_word.index * if hs: * codelens[i] = len(predict_word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_7 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 345; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_10); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":346 + /* "trunk/gensim/models/doc2vec_inner.pyx":348 * if hs: * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(predict_word.point) * else: */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_code); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":347 + /* "trunk/gensim/models/doc2vec_inner.pyx":349 * codelens[i] = len(predict_word.code) * codes[i] = np.PyArray_DATA(predict_word.code) * points[i] = np.PyArray_DATA(predict_word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_predict_word, __pyx_n_s_point); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 347; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_10) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_10, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 349; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_10))); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L13; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":349 + /* "trunk/gensim/models/doc2vec_inner.pyx":351 * points[i] = np.PyArray_DATA(predict_word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -3686,7 +3694,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L13:; - /* "trunk/gensim/models/doc2vec_inner.pyx":350 + /* "trunk/gensim/models/doc2vec_inner.pyx":352 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -3695,10 +3703,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_result = (__pyx_v_result + 1); } - __pyx_L12:; + __pyx_L10_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":351 + /* "trunk/gensim/models/doc2vec_inner.pyx":353 * codelens[i] = 1 * result += 1 * if _train_words: # <<<<<<<<<<<<<< @@ -3708,7 +3716,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v__train_words != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":353 + /* "trunk/gensim/models/doc2vec_inner.pyx":355 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -3716,17 +3724,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * for i in range(doctag_len): */ __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_t_15 = NULL; __pyx_t_7 = 0; @@ -3740,7 +3748,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_7 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_7); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -3754,7 +3762,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_GIVEREF(__pyx_t_6); __pyx_t_1 = 0; __pyx_t_6 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -3762,9 +3770,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_8 = __pyx_t_10; __Pyx_INCREF(__pyx_t_8); __pyx_t_7 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_10); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; for (;;) { @@ -3772,16 +3780,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_10 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_7); __Pyx_INCREF(__pyx_t_10); __pyx_t_7++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PySequence_ITEM(__pyx_t_8, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -3790,7 +3798,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -3801,17 +3809,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":354 + /* "trunk/gensim/models/doc2vec_inner.pyx":356 * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] */ - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 354; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":353 + /* "trunk/gensim/models/doc2vec_inner.pyx":355 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -3824,7 +3832,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":355 + /* "trunk/gensim/models/doc2vec_inner.pyx":357 * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -3835,20 +3843,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":356 + /* "trunk/gensim/models/doc2vec_inner.pyx":358 * reduced_windows[i] = item * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 356; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":357 + /* "trunk/gensim/models/doc2vec_inner.pyx":359 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -3858,7 +3866,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":360 + /* "trunk/gensim/models/doc2vec_inner.pyx":362 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -3872,7 +3880,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":361 + /* "trunk/gensim/models/doc2vec_inner.pyx":363 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3883,7 +3891,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":362 + /* "trunk/gensim/models/doc2vec_inner.pyx":364 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -3893,7 +3901,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":363 + /* "trunk/gensim/models/doc2vec_inner.pyx":365 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -3903,7 +3911,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ goto __pyx_L22_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":364 + /* "trunk/gensim/models/doc2vec_inner.pyx":366 * if codelens[i] == 0: * continue * if _train_words: # simultaneous skip-gram wordvec-training # <<<<<<<<<<<<<< @@ -3913,7 +3921,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v__train_words != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":365 + /* "trunk/gensim/models/doc2vec_inner.pyx":367 * continue * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -3922,7 +3930,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":366 + /* "trunk/gensim/models/doc2vec_inner.pyx":368 * if _train_words: # simultaneous skip-gram wordvec-training * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -3932,7 +3940,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = ((__pyx_v_j < 0) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":367 + /* "trunk/gensim/models/doc2vec_inner.pyx":369 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -3944,7 +3952,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L26:; - /* "trunk/gensim/models/doc2vec_inner.pyx":368 + /* "trunk/gensim/models/doc2vec_inner.pyx":370 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3953,7 +3961,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":369 + /* "trunk/gensim/models/doc2vec_inner.pyx":371 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -3963,7 +3971,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":370 + /* "trunk/gensim/models/doc2vec_inner.pyx":372 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -3975,7 +3983,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L27:; - /* "trunk/gensim/models/doc2vec_inner.pyx":371 + /* "trunk/gensim/models/doc2vec_inner.pyx":373 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -3986,7 +3994,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_18 = __pyx_v_j; __pyx_t_18 < __pyx_t_11; __pyx_t_18+=1) { __pyx_v_j = __pyx_t_18; - /* "trunk/gensim/models/doc2vec_inner.pyx":372 + /* "trunk/gensim/models/doc2vec_inner.pyx":374 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< @@ -4004,7 +4012,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L31_bool_binop_done:; if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":373 + /* "trunk/gensim/models/doc2vec_inner.pyx":375 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< @@ -4014,7 +4022,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ goto __pyx_L28_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":374 + /* "trunk/gensim/models/doc2vec_inner.pyx":376 * if j == i or codelens[j] == 0: * continue * if hs: # <<<<<<<<<<<<<< @@ -4024,7 +4032,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":376 + /* "trunk/gensim/models/doc2vec_inner.pyx":378 * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< @@ -4036,7 +4044,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L33:; - /* "trunk/gensim/models/doc2vec_inner.pyx":378 + /* "trunk/gensim/models/doc2vec_inner.pyx":380 * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< @@ -4046,7 +4054,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":380 + /* "trunk/gensim/models/doc2vec_inner.pyx":382 * if negative: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose * next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -4063,7 +4071,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L25:; - /* "trunk/gensim/models/doc2vec_inner.pyx":385 + /* "trunk/gensim/models/doc2vec_inner.pyx":387 * * # docvec-training * for j in range(doctag_len): # <<<<<<<<<<<<<< @@ -4074,7 +4082,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":386 + /* "trunk/gensim/models/doc2vec_inner.pyx":388 * # docvec-training * for j in range(doctag_len): * if hs: # <<<<<<<<<<<<<< @@ -4084,7 +4092,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":387 + /* "trunk/gensim/models/doc2vec_inner.pyx":389 * for j in range(doctag_len): * if hs: * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], # <<<<<<<<<<<<<< @@ -4096,7 +4104,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } __pyx_L37:; - /* "trunk/gensim/models/doc2vec_inner.pyx":389 + /* "trunk/gensim/models/doc2vec_inner.pyx":391 * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: # <<<<<<<<<<<<<< @@ -4106,7 +4114,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":390 + /* "trunk/gensim/models/doc2vec_inner.pyx":392 * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, # <<<<<<<<<<<<<< @@ -4122,7 +4130,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } } - /* "trunk/gensim/models/doc2vec_inner.pyx":360 + /* "trunk/gensim/models/doc2vec_inner.pyx":362 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4140,7 +4148,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } } - /* "trunk/gensim/models/doc2vec_inner.pyx":394 + /* "trunk/gensim/models/doc2vec_inner.pyx":396 * _learn_doctags, _learn_hidden, _doctag_locks) * * return result # <<<<<<<<<<<<<< @@ -4148,7 +4156,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 394; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 396; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_r = __pyx_t_8; __pyx_t_8 = 0; @@ -4185,7 +4193,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":397 +/* "trunk/gensim/models/doc2vec_inner.pyx":399 * * * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -4222,7 +4230,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "trunk/gensim/models/doc2vec_inner.pyx":398 + /* "trunk/gensim/models/doc2vec_inner.pyx":400 * * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< @@ -4233,7 +4241,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "trunk/gensim/models/doc2vec_inner.pyx":399 + /* "trunk/gensim/models/doc2vec_inner.pyx":401 * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< @@ -4272,17 +4280,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -4331,7 +4339,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -4368,7 +4376,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -4376,7 +4384,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":397 + /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -4459,76 +4467,76 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":400 + /* "trunk/gensim/models/doc2vec_inner.pyx":402 * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int _learn_doctags = learn_doctags */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 400; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":401 + /* "trunk/gensim/models/doc2vec_inner.pyx":403 * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":402 + /* "trunk/gensim/models/doc2vec_inner.pyx":404 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 402; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doctags = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":403 + /* "trunk/gensim/models/doc2vec_inner.pyx":405 * cdef int negative = model.negative * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 403; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":404 + /* "trunk/gensim/models/doc2vec_inner.pyx":406 * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count = 1.0 */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 406; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":405 + /* "trunk/gensim/models/doc2vec_inner.pyx":407 * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * cdef REAL_t count, inv_count = 1.0 * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 405; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 407; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":406 + /* "trunk/gensim/models/doc2vec_inner.pyx":408 * cdef int _learn_hidden = learn_hidden * cdef int cbow_mean = model.cbow_mean * cdef REAL_t count, inv_count = 1.0 # <<<<<<<<<<<<<< @@ -4537,43 +4545,43 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_inv_count = 1.0; - /* "trunk/gensim/models/doc2vec_inner.pyx":414 + /* "trunk/gensim/models/doc2vec_inner.pyx":416 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 416; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":415 + /* "trunk/gensim/models/doc2vec_inner.pyx":417 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 415; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":423 + /* "trunk/gensim/models/doc2vec_inner.pyx":425 * cdef int sentence_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k, m */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 423; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 425; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":426 + /* "trunk/gensim/models/doc2vec_inner.pyx":428 * * cdef int i, j, k, m * cdef long result = 0 # <<<<<<<<<<<<<< @@ -4582,7 +4590,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":440 + /* "trunk/gensim/models/doc2vec_inner.pyx":442 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -4593,14 +4601,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":441 + /* "trunk/gensim/models/doc2vec_inner.pyx":443 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 441; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 443; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; @@ -4608,17 +4616,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L3:; - /* "trunk/gensim/models/doc2vec_inner.pyx":442 + /* "trunk/gensim/models/doc2vec_inner.pyx":444 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 442; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":443 + /* "trunk/gensim/models/doc2vec_inner.pyx":445 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -4629,16 +4637,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":444 + /* "trunk/gensim/models/doc2vec_inner.pyx":446 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 446; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_6); @@ -4647,17 +4655,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":445 + /* "trunk/gensim/models/doc2vec_inner.pyx":447 * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 445; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":446 + /* "trunk/gensim/models/doc2vec_inner.pyx":448 * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -4668,14 +4676,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":447 + /* "trunk/gensim/models/doc2vec_inner.pyx":449 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 447; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 449; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_6); __pyx_t_6 = 0; @@ -4683,17 +4691,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":448 + /* "trunk/gensim/models/doc2vec_inner.pyx":450 * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 448; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":449 + /* "trunk/gensim/models/doc2vec_inner.pyx":451 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -4704,16 +4712,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":450 + /* "trunk/gensim/models/doc2vec_inner.pyx":452 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 450; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); @@ -4722,17 +4730,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L6:; - /* "trunk/gensim/models/doc2vec_inner.pyx":451 + /* "trunk/gensim/models/doc2vec_inner.pyx":453 * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 451; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 453; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":453 + /* "trunk/gensim/models/doc2vec_inner.pyx":455 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -4742,23 +4750,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":454 + /* "trunk/gensim/models/doc2vec_inner.pyx":456 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 456; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L7; } __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":456 + /* "trunk/gensim/models/doc2vec_inner.pyx":458 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -4768,89 +4776,89 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":457 + /* "trunk/gensim/models/doc2vec_inner.pyx":459 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 457; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":458 + /* "trunk/gensim/models/doc2vec_inner.pyx":460 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 458; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":459 + /* "trunk/gensim/models/doc2vec_inner.pyx":461 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 459; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 461; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":460 + /* "trunk/gensim/models/doc2vec_inner.pyx":462 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_9 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_9 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_9; goto __pyx_L8; } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":463 + /* "trunk/gensim/models/doc2vec_inner.pyx":465 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -4861,29 +4869,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":464 + /* "trunk/gensim/models/doc2vec_inner.pyx":466 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_10) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 464; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 466; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -4894,17 +4902,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":465 + /* "trunk/gensim/models/doc2vec_inner.pyx":467 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 465; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":466 + /* "trunk/gensim/models/doc2vec_inner.pyx":468 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -4915,29 +4923,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_4 = (__pyx_t_5 != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":467 + /* "trunk/gensim/models/doc2vec_inner.pyx":469 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_8); __Pyx_GIVEREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_6) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_8, __pyx_n_s_dtype, __pyx_t_6) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 467; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_10, __pyx_t_1, __pyx_t_8); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 469; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -4948,24 +4956,24 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L10:; - /* "trunk/gensim/models/doc2vec_inner.pyx":468 + /* "trunk/gensim/models/doc2vec_inner.pyx":470 * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 468; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":470 + /* "trunk/gensim/models/doc2vec_inner.pyx":472 * _neu1 = np.PyArray_DATA(neu1) * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 * for i in range(sentence_len): */ - __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 472; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; if (((__pyx_t_7 < __pyx_t_11) != 0)) { __pyx_t_12 = __pyx_t_7; @@ -4974,7 +4982,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":471 + /* "trunk/gensim/models/doc2vec_inner.pyx":473 * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< @@ -4983,7 +4991,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_j = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":472 + /* "trunk/gensim/models/doc2vec_inner.pyx":474 * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4994,19 +5002,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":473 + /* "trunk/gensim/models/doc2vec_inner.pyx":475 * j = 0 * for i in range(sentence_len): * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: * # shrink sentence to leave out word */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 473; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 475; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":474 + /* "trunk/gensim/models/doc2vec_inner.pyx":476 * for i in range(sentence_len): * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< @@ -5017,7 +5025,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_t_4 != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":476 + /* "trunk/gensim/models/doc2vec_inner.pyx":478 * if word is None: * # shrink sentence to leave out word * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< @@ -5026,7 +5034,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":477 + /* "trunk/gensim/models/doc2vec_inner.pyx":479 * # shrink sentence to leave out word * sentence_len = sentence_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< @@ -5037,20 +5045,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":479 + /* "trunk/gensim/models/doc2vec_inner.pyx":481 * continue # leaving j unchanged * else: * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[j] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 479; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 479; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":480 + /* "trunk/gensim/models/doc2vec_inner.pyx":482 * else: * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< @@ -5060,49 +5068,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":481 + /* "trunk/gensim/models/doc2vec_inner.pyx":483 * indexes[j] = word.index * if hs: * codelens[j] = len(word.code) # <<<<<<<<<<<<<< * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 483; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_12 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 481; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 483; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_12); - /* "trunk/gensim/models/doc2vec_inner.pyx":482 + /* "trunk/gensim/models/doc2vec_inner.pyx":484 * if hs: * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[j] = np.PyArray_DATA(word.point) * result += 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 482; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 484; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":483 + /* "trunk/gensim/models/doc2vec_inner.pyx":485 * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * result += 1 * j = j + 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 483; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 483; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 485; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L14; } __pyx_L14:; - /* "trunk/gensim/models/doc2vec_inner.pyx":484 + /* "trunk/gensim/models/doc2vec_inner.pyx":486 * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) * result += 1 # <<<<<<<<<<<<<< @@ -5111,7 +5119,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":485 + /* "trunk/gensim/models/doc2vec_inner.pyx":487 * points[j] = np.PyArray_DATA(word.point) * result += 1 * j = j + 1 # <<<<<<<<<<<<<< @@ -5123,7 +5131,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L11_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":487 + /* "trunk/gensim/models/doc2vec_inner.pyx":489 * j = j + 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -5131,17 +5139,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * */ __pyx_t_2 = 0; - __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_8, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_t_15 = NULL; __pyx_t_12 = 0; @@ -5155,7 +5163,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_12 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_12); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -5169,7 +5177,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_GIVEREF(__pyx_t_10); __pyx_t_1 = 0; __pyx_t_10 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_8, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; @@ -5177,9 +5185,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_8 = __pyx_t_6; __Pyx_INCREF(__pyx_t_8); __pyx_t_12 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_12 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = -1; __pyx_t_8 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); - __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_8)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -5187,16 +5195,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence if (likely(PyList_CheckExact(__pyx_t_8))) { if (__pyx_t_12 >= PyList_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_12 >= PyTuple_GET_SIZE(__pyx_t_8)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_8, __pyx_t_12); __Pyx_INCREF(__pyx_t_6); __pyx_t_12++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_8, __pyx_t_12); __pyx_t_12++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -5205,7 +5213,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 487; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -5216,17 +5224,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":488 + /* "trunk/gensim/models/doc2vec_inner.pyx":490 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) */ - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 488; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":487 + /* "trunk/gensim/models/doc2vec_inner.pyx":489 * j = j + 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -5236,14 +5244,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":490 + /* "trunk/gensim/models/doc2vec_inner.pyx":492 * reduced_windows[i] = item * * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] */ - __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; if (((__pyx_t_12 < __pyx_t_11) != 0)) { __pyx_t_7 = __pyx_t_12; @@ -5252,7 +5260,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_v_doctag_len = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":491 + /* "trunk/gensim/models/doc2vec_inner.pyx":493 * * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -5263,20 +5271,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":492 + /* "trunk/gensim/models/doc2vec_inner.pyx":494 * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_8 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_8 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 494; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_8); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 492; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_8); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 494; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":493 + /* "trunk/gensim/models/doc2vec_inner.pyx":495 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -5286,7 +5294,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":496 + /* "trunk/gensim/models/doc2vec_inner.pyx":498 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -5300,7 +5308,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":497 + /* "trunk/gensim/models/doc2vec_inner.pyx":499 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5311,7 +5319,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":498 + /* "trunk/gensim/models/doc2vec_inner.pyx":500 * with nogil: * for i in range(sentence_len): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -5320,7 +5328,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":499 + /* "trunk/gensim/models/doc2vec_inner.pyx":501 * for i in range(sentence_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -5330,7 +5338,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = ((__pyx_v_j < 0) != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":500 + /* "trunk/gensim/models/doc2vec_inner.pyx":502 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5342,7 +5350,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L24:; - /* "trunk/gensim/models/doc2vec_inner.pyx":501 + /* "trunk/gensim/models/doc2vec_inner.pyx":503 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -5351,7 +5359,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/doc2vec_inner.pyx":502 + /* "trunk/gensim/models/doc2vec_inner.pyx":504 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -5361,7 +5369,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":503 + /* "trunk/gensim/models/doc2vec_inner.pyx":505 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -5373,7 +5381,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L25:; - /* "trunk/gensim/models/doc2vec_inner.pyx":506 + /* "trunk/gensim/models/doc2vec_inner.pyx":508 * * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -5382,7 +5390,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ memset(__pyx_v__neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":507 + /* "trunk/gensim/models/doc2vec_inner.pyx":509 * # compose l1 (in _neu1) & clear _work * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -5391,7 +5399,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/doc2vec_inner.pyx":508 + /* "trunk/gensim/models/doc2vec_inner.pyx":510 * memset(_neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5402,7 +5410,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":509 + /* "trunk/gensim/models/doc2vec_inner.pyx":511 * count = 0.0 * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -5412,7 +5420,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":510 + /* "trunk/gensim/models/doc2vec_inner.pyx":512 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -5423,7 +5431,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":512 + /* "trunk/gensim/models/doc2vec_inner.pyx":514 * continue * else: * count += ONEF # <<<<<<<<<<<<<< @@ -5432,7 +5440,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":513 + /* "trunk/gensim/models/doc2vec_inner.pyx":515 * else: * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -5444,7 +5452,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_L26_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":514 + /* "trunk/gensim/models/doc2vec_inner.pyx":516 * count += ONEF * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -5455,7 +5463,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":515 + /* "trunk/gensim/models/doc2vec_inner.pyx":517 * our_saxpy(&size, &ONEF, &_word_vectors[indexes[m] * size], &ONE, _neu1, &ONE) * for m in range(doctag_len): * count += ONEF # <<<<<<<<<<<<<< @@ -5464,7 +5472,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF); - /* "trunk/gensim/models/doc2vec_inner.pyx":516 + /* "trunk/gensim/models/doc2vec_inner.pyx":518 * for m in range(doctag_len): * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) # <<<<<<<<<<<<<< @@ -5474,7 +5482,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONEF), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); } - /* "trunk/gensim/models/doc2vec_inner.pyx":517 + /* "trunk/gensim/models/doc2vec_inner.pyx":519 * count += ONEF * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): # <<<<<<<<<<<<<< @@ -5484,7 +5492,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":518 + /* "trunk/gensim/models/doc2vec_inner.pyx":520 * our_saxpy(&size, &ONEF, &_doctag_vectors[_doctag_indexes[m] * size], &ONE, _neu1, &ONE) * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< @@ -5496,7 +5504,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L31:; - /* "trunk/gensim/models/doc2vec_inner.pyx":519 + /* "trunk/gensim/models/doc2vec_inner.pyx":521 * if count > (0.5): * inv_count = ONEF/count * if cbow_mean: # <<<<<<<<<<<<<< @@ -5506,30 +5514,30 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v_cbow_mean != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":520 + /* "trunk/gensim/models/doc2vec_inner.pyx":522 * inv_count = ONEF/count * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error - * + * if hs: */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v__neu1, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); goto __pyx_L32; } __pyx_L32:; - /* "trunk/gensim/models/doc2vec_inner.pyx":521 + /* "trunk/gensim/models/doc2vec_inner.pyx":523 * if cbow_mean: * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< - * * if hs: + * fast_sentence_dm_hs(points[i], codes[i], codelens[i], */ memset(__pyx_v__work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":523 + /* "trunk/gensim/models/doc2vec_inner.pyx":524 + * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error - * * if hs: # <<<<<<<<<<<<<< * fast_sentence_dm_hs(points[i], codes[i], codelens[i], * _neu1, syn1, _alpha, _work, @@ -5537,8 +5545,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v_hs != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":524 - * + /* "trunk/gensim/models/doc2vec_inner.pyx":525 + * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: * fast_sentence_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< * _neu1, syn1, _alpha, _work, @@ -5549,7 +5557,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L33:; - /* "trunk/gensim/models/doc2vec_inner.pyx":527 + /* "trunk/gensim/models/doc2vec_inner.pyx":528 * _neu1, syn1, _alpha, _work, * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -5559,7 +5567,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v_negative != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":528 + /* "trunk/gensim/models/doc2vec_inner.pyx":529 * size, _learn_hidden) * if negative: * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< @@ -5571,7 +5579,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L34:; - /* "trunk/gensim/models/doc2vec_inner.pyx":532 + /* "trunk/gensim/models/doc2vec_inner.pyx":533 * size, _learn_hidden) * * if not cbow_mean: # <<<<<<<<<<<<<< @@ -5581,7 +5589,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = ((!(__pyx_v_cbow_mean != 0)) != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":533 + /* "trunk/gensim/models/doc2vec_inner.pyx":534 * * if not cbow_mean: * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< @@ -5593,7 +5601,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L35:; - /* "trunk/gensim/models/doc2vec_inner.pyx":535 + /* "trunk/gensim/models/doc2vec_inner.pyx":536 * sscal(&size, &inv_count, _work, &ONE) # (does this need BLAS-variants like saxpy?) * # apply accumulated error in work * if _learn_doctags: # <<<<<<<<<<<<<< @@ -5603,7 +5611,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v__learn_doctags != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":536 + /* "trunk/gensim/models/doc2vec_inner.pyx":537 * # apply accumulated error in work * if _learn_doctags: * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -5614,7 +5622,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":537 + /* "trunk/gensim/models/doc2vec_inner.pyx":538 * if _learn_doctags: * for m in range(doctag_len): * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, # <<<<<<<<<<<<<< @@ -5627,7 +5635,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } __pyx_L36:; - /* "trunk/gensim/models/doc2vec_inner.pyx":539 + /* "trunk/gensim/models/doc2vec_inner.pyx":540 * our_saxpy(&size, &_doctag_locks[_doctag_indexes[m]], _work, * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -5637,7 +5645,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = (__pyx_v__learn_words != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":540 + /* "trunk/gensim/models/doc2vec_inner.pyx":541 * &ONE, &_doctag_vectors[_doctag_indexes[m] * size], &ONE) * if _learn_words: * for m in range(j, k): # <<<<<<<<<<<<<< @@ -5648,7 +5656,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_m = __pyx_t_19; - /* "trunk/gensim/models/doc2vec_inner.pyx":541 + /* "trunk/gensim/models/doc2vec_inner.pyx":542 * if _learn_words: * for m in range(j, k): * if m == i: # <<<<<<<<<<<<<< @@ -5658,7 +5666,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_5) { - /* "trunk/gensim/models/doc2vec_inner.pyx":542 + /* "trunk/gensim/models/doc2vec_inner.pyx":543 * for m in range(j, k): * if m == i: * continue # <<<<<<<<<<<<<< @@ -5669,7 +5677,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":544 + /* "trunk/gensim/models/doc2vec_inner.pyx":545 * continue * else: * our_saxpy(&size, &_word_locks[indexes[m]], _work, &ONE, # <<<<<<<<<<<<<< @@ -5686,7 +5694,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":496 + /* "trunk/gensim/models/doc2vec_inner.pyx":498 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -5704,7 +5712,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":547 + /* "trunk/gensim/models/doc2vec_inner.pyx":548 * &_word_vectors[indexes[m] * size], &ONE) * * return result # <<<<<<<<<<<<<< @@ -5712,13 +5720,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 547; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 548; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_8); __pyx_r = __pyx_t_8; __pyx_t_8 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":397 + /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -5750,7 +5758,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":550 +/* "trunk/gensim/models/doc2vec_inner.pyx":551 * * * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -5787,7 +5795,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence values[4] = ((PyObject *)Py_None); values[5] = ((PyObject *)Py_None); - /* "trunk/gensim/models/doc2vec_inner.pyx":551 + /* "trunk/gensim/models/doc2vec_inner.pyx":552 * * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< @@ -5798,7 +5806,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence values[7] = ((PyObject *)Py_True); values[8] = ((PyObject *)Py_True); - /* "trunk/gensim/models/doc2vec_inner.pyx":552 + /* "trunk/gensim/models/doc2vec_inner.pyx":553 * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< @@ -5837,17 +5845,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -5896,7 +5904,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -5933,7 +5941,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5941,7 +5949,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":550 + /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -6021,125 +6029,125 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __Pyx_INCREF(__pyx_v_doctag_vectors); __Pyx_INCREF(__pyx_v_doctag_locks); - /* "trunk/gensim/models/doc2vec_inner.pyx":553 + /* "trunk/gensim/models/doc2vec_inner.pyx":554 * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int _learn_doctags = learn_doctags */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 553; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 553; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":554 + /* "trunk/gensim/models/doc2vec_inner.pyx":555 * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 555; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 554; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 555; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":555 + /* "trunk/gensim/models/doc2vec_inner.pyx":556 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int _learn_doctags = learn_doctags # <<<<<<<<<<<<<< * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 555; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_doctags); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 556; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_doctags = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":556 + /* "trunk/gensim/models/doc2vec_inner.pyx":557 * cdef int negative = model.negative * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words # <<<<<<<<<<<<<< * cdef int _learn_hidden = learn_hidden * */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 556; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_words); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_words = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":557 + /* "trunk/gensim/models/doc2vec_inner.pyx":558 * cdef int _learn_doctags = learn_doctags * cdef int _learn_words = learn_words * cdef int _learn_hidden = learn_hidden # <<<<<<<<<<<<<< * * cdef REAL_t *_word_vectors */ - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 557; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_v_learn_hidden); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 558; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__learn_hidden = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":565 + /* "trunk/gensim/models/doc2vec_inner.pyx":566 * cdef REAL_t *_work * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 565; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":566 + /* "trunk/gensim/models/doc2vec_inner.pyx":567 * cdef REAL_t *_neu1 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size # <<<<<<<<<<<<<< * cdef int vector_size = model.vector_size * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 566; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_layer1_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":567 + /* "trunk/gensim/models/doc2vec_inner.pyx":568 * cdef REAL_t _alpha = alpha * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 568; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 567; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 568; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_vector_size = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":575 + /* "trunk/gensim/models/doc2vec_inner.pyx":576 * cdef int sentence_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * cdef int expected_doctag_len = model.dm_tag_count * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 575; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":576 + /* "trunk/gensim/models/doc2vec_inner.pyx":577 * cdef int doctag_len * cdef int window = model.window * cdef int expected_doctag_len = model.dm_tag_count # <<<<<<<<<<<<<< * * cdef int i, j, k, m, n */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_dm_tag_count); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 576; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 577; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_expected_doctag_len = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":579 + /* "trunk/gensim/models/doc2vec_inner.pyx":580 * * cdef int i, j, k, m, n * cdef long result = 0 # <<<<<<<<<<<<<< @@ -6148,33 +6156,33 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":580 + /* "trunk/gensim/models/doc2vec_inner.pyx":581 * cdef int i, j, k, m, n * cdef long result = 0 * cdef int null_word_index = model.vocab['\0'].index # <<<<<<<<<<<<<< * * # For hierarchical softmax */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vocab); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = PyObject_GetItem(__pyx_t_1, __pyx_kp_s__5); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_index); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_null_word_index = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":593 + /* "trunk/gensim/models/doc2vec_inner.pyx":594 * cdef unsigned long long next_random * * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * if doctag_len != expected_doctag_len: * return 0 # skip doc without expected nmber of tags */ - __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 594; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_5 < __pyx_t_6) != 0)) { __pyx_t_7 = __pyx_t_5; @@ -6183,7 +6191,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_v_doctag_len = ((int)__pyx_t_7); - /* "trunk/gensim/models/doc2vec_inner.pyx":594 + /* "trunk/gensim/models/doc2vec_inner.pyx":595 * * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: # <<<<<<<<<<<<<< @@ -6193,7 +6201,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = ((__pyx_v_doctag_len != __pyx_v_expected_doctag_len) != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":595 + /* "trunk/gensim/models/doc2vec_inner.pyx":596 * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: * return 0 # skip doc without expected nmber of tags # <<<<<<<<<<<<<< @@ -6206,7 +6214,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":598 + /* "trunk/gensim/models/doc2vec_inner.pyx":599 * * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: # <<<<<<<<<<<<<< @@ -6217,14 +6225,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":599 + /* "trunk/gensim/models/doc2vec_inner.pyx":600 * # default vectors, locks from syn0/doctag_syn0 * if word_vectors is None: * word_vectors = model.syn0 # <<<<<<<<<<<<<< * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 599; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF_SET(__pyx_v_word_vectors, __pyx_t_1); __pyx_t_1 = 0; @@ -6232,17 +6240,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L4:; - /* "trunk/gensim/models/doc2vec_inner.pyx":600 + /* "trunk/gensim/models/doc2vec_inner.pyx":601 * if word_vectors is None: * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) # <<<<<<<<<<<<<< * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 */ - if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 600; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 601; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":601 + /* "trunk/gensim/models/doc2vec_inner.pyx":602 * word_vectors = model.syn0 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: # <<<<<<<<<<<<<< @@ -6253,16 +6261,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":602 + /* "trunk/gensim/models/doc2vec_inner.pyx":603 * _word_vectors = (np.PyArray_DATA(word_vectors)) * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 # <<<<<<<<<<<<<< * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 602; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_doctag_syn0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_vectors, __pyx_t_4); @@ -6271,17 +6279,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L5:; - /* "trunk/gensim/models/doc2vec_inner.pyx":603 + /* "trunk/gensim/models/doc2vec_inner.pyx":604 * if doctag_vectors is None: * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) # <<<<<<<<<<<<<< * if word_locks is None: * word_locks = model.syn0_lockf */ - if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 603; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_vectors) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_vectors, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 604; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_vectors = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_vectors))); - /* "trunk/gensim/models/doc2vec_inner.pyx":604 + /* "trunk/gensim/models/doc2vec_inner.pyx":605 * doctag_vectors = model.docvecs.doctag_syn0 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: # <<<<<<<<<<<<<< @@ -6292,14 +6300,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":605 + /* "trunk/gensim/models/doc2vec_inner.pyx":606 * _doctag_vectors = (np.PyArray_DATA(doctag_vectors)) * if word_locks is None: * word_locks = model.syn0_lockf # <<<<<<<<<<<<<< * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 605; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF_SET(__pyx_v_word_locks, __pyx_t_4); __pyx_t_4 = 0; @@ -6307,17 +6315,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L6:; - /* "trunk/gensim/models/doc2vec_inner.pyx":606 + /* "trunk/gensim/models/doc2vec_inner.pyx":607 * if word_locks is None: * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) # <<<<<<<<<<<<<< * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf */ - if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 606; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_word_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_word_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 607; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__word_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_word_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":607 + /* "trunk/gensim/models/doc2vec_inner.pyx":608 * word_locks = model.syn0_lockf * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: # <<<<<<<<<<<<<< @@ -6328,16 +6336,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":608 + /* "trunk/gensim/models/doc2vec_inner.pyx":609 * _word_locks = (np.PyArray_DATA(word_locks)) * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf # <<<<<<<<<<<<<< * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_docvecs); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 608; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_doctag_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF_SET(__pyx_v_doctag_locks, __pyx_t_1); @@ -6346,17 +6354,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L7:; - /* "trunk/gensim/models/doc2vec_inner.pyx":609 + /* "trunk/gensim/models/doc2vec_inner.pyx":610 * if doctag_locks is None: * doctag_locks = model.docvecs.doctag_syn0_lockf * _doctag_locks = (np.PyArray_DATA(doctag_locks)) # <<<<<<<<<<<<<< * * if hs: */ - if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 609; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_doctag_locks) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_doctag_locks, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__doctag_locks = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_doctag_locks))); - /* "trunk/gensim/models/doc2vec_inner.pyx":611 + /* "trunk/gensim/models/doc2vec_inner.pyx":612 * _doctag_locks = (np.PyArray_DATA(doctag_locks)) * * if hs: # <<<<<<<<<<<<<< @@ -6366,23 +6374,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_v_hs != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":612 + /* "trunk/gensim/models/doc2vec_inner.pyx":613 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 612; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 612; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L8; } __pyx_L8:; - /* "trunk/gensim/models/doc2vec_inner.pyx":614 + /* "trunk/gensim/models/doc2vec_inner.pyx":615 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -6392,89 +6400,89 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_v_negative != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":615 + /* "trunk/gensim/models/doc2vec_inner.pyx":616 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":616 + /* "trunk/gensim/models/doc2vec_inner.pyx":617 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":617 + /* "trunk/gensim/models/doc2vec_inner.pyx":618 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 617; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_7; - /* "trunk/gensim/models/doc2vec_inner.pyx":618 + /* "trunk/gensim/models/doc2vec_inner.pyx":619 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_4); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_random); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_10, __pyx_n_s_randint); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = PyNumber_Add(__pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_11 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_4); if (unlikely((__pyx_t_11 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_next_random = __pyx_t_11; goto __pyx_L9; } __pyx_L9:; - /* "trunk/gensim/models/doc2vec_inner.pyx":621 + /* "trunk/gensim/models/doc2vec_inner.pyx":622 * * # convert Python structures to primitive types, so we can release the GIL * if work is None: # <<<<<<<<<<<<<< @@ -6485,29 +6493,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":622 + /* "trunk/gensim/models/doc2vec_inner.pyx":623 * # convert Python structures to primitive types, so we can release the GIL * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) * if neu1 is None: */ - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_12) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; - __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 622; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_PyObject_Call(__pyx_t_4, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -6518,17 +6526,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L10:; - /* "trunk/gensim/models/doc2vec_inner.pyx":623 + /* "trunk/gensim/models/doc2vec_inner.pyx":624 * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) */ - if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 624; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); - /* "trunk/gensim/models/doc2vec_inner.pyx":624 + /* "trunk/gensim/models/doc2vec_inner.pyx":625 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) * if neu1 is None: # <<<<<<<<<<<<<< @@ -6539,29 +6547,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_8 = (__pyx_t_9 != 0); if (__pyx_t_8) { - /* "trunk/gensim/models/doc2vec_inner.pyx":625 + /* "trunk/gensim/models/doc2vec_inner.pyx":626 * _work = np.PyArray_DATA(work) * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _neu1 = np.PyArray_DATA(neu1) * */ - __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_12 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_12); - __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_10); __Pyx_GIVEREF(__pyx_t_10); __pyx_t_10 = 0; - __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyDict_New(); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); - __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_REAL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_t_10, __pyx_n_s_dtype, __pyx_t_4) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 625; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_12, __pyx_t_1, __pyx_t_10); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_12); __pyx_t_12 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -6572,24 +6580,24 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L11:; - /* "trunk/gensim/models/doc2vec_inner.pyx":626 + /* "trunk/gensim/models/doc2vec_inner.pyx":627 * if neu1 is None: * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) */ - if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 626; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 627; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); - /* "trunk/gensim/models/doc2vec_inner.pyx":628 + /* "trunk/gensim/models/doc2vec_inner.pyx":629 * _neu1 = np.PyArray_DATA(neu1) * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 * for i in range(sentence_len): */ - __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 628; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; if (((__pyx_t_7 < __pyx_t_6) != 0)) { __pyx_t_5 = __pyx_t_7; @@ -6598,7 +6606,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":629 + /* "trunk/gensim/models/doc2vec_inner.pyx":630 * * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< @@ -6607,7 +6615,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_j = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":630 + /* "trunk/gensim/models/doc2vec_inner.pyx":631 * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) * j = 0 * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6618,19 +6626,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":631 + /* "trunk/gensim/models/doc2vec_inner.pyx":632 * j = 0 * for i in range(sentence_len): * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: * # shrink sentence to leave out word */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 631; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":632 + /* "trunk/gensim/models/doc2vec_inner.pyx":633 * for i in range(sentence_len): * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< @@ -6641,7 +6649,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_t_8 != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":634 + /* "trunk/gensim/models/doc2vec_inner.pyx":635 * if word is None: * # shrink sentence to leave out word * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< @@ -6650,7 +6658,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":635 + /* "trunk/gensim/models/doc2vec_inner.pyx":636 * # shrink sentence to leave out word * sentence_len = sentence_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< @@ -6661,20 +6669,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":637 + /* "trunk/gensim/models/doc2vec_inner.pyx":638 * continue # leaving j unchanged * else: * indexes[j] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[j] = len(word.code) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 638; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 638; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; (__pyx_v_indexes[__pyx_v_j]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":638 + /* "trunk/gensim/models/doc2vec_inner.pyx":639 * else: * indexes[j] = word.index * if hs: # <<<<<<<<<<<<<< @@ -6684,49 +6692,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_v_hs != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":639 + /* "trunk/gensim/models/doc2vec_inner.pyx":640 * indexes[j] = word.index * if hs: * codelens[j] = len(word.code) # <<<<<<<<<<<<<< * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 639; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_4); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; (__pyx_v_codelens[__pyx_v_j]) = ((int)__pyx_t_5); - /* "trunk/gensim/models/doc2vec_inner.pyx":640 + /* "trunk/gensim/models/doc2vec_inner.pyx":641 * if hs: * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[j] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 640; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_j]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":641 + /* "trunk/gensim/models/doc2vec_inner.pyx":642 * codelens[j] = len(word.code) * codes[j] = np.PyArray_DATA(word.code) * points[j] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[j] = 1 */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 642; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); - if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 641; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_4) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_4, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 642; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_j]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_4))); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; goto __pyx_L15; } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":643 + /* "trunk/gensim/models/doc2vec_inner.pyx":644 * points[j] = np.PyArray_DATA(word.point) * else: * codelens[j] = 1 # <<<<<<<<<<<<<< @@ -6737,7 +6745,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L15:; - /* "trunk/gensim/models/doc2vec_inner.pyx":644 + /* "trunk/gensim/models/doc2vec_inner.pyx":645 * else: * codelens[j] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -6746,7 +6754,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_result = (__pyx_v_result + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":645 + /* "trunk/gensim/models/doc2vec_inner.pyx":646 * codelens[j] = 1 * result += 1 * j = j + 1 # <<<<<<<<<<<<<< @@ -6758,7 +6766,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_L12_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":647 + /* "trunk/gensim/models/doc2vec_inner.pyx":648 * j = j + 1 * * for i in range(doctag_len): # <<<<<<<<<<<<<< @@ -6769,20 +6777,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":648 + /* "trunk/gensim/models/doc2vec_inner.pyx":649 * * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 * */ - __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_doctag_indexes, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 649; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); - __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 648; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_t_4); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 649; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; (__pyx_v__doctag_indexes[__pyx_v_i]) = __pyx_t_14; - /* "trunk/gensim/models/doc2vec_inner.pyx":649 + /* "trunk/gensim/models/doc2vec_inner.pyx":650 * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< @@ -6792,7 +6800,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_v_result = (__pyx_v_result + 1); } - /* "trunk/gensim/models/doc2vec_inner.pyx":652 + /* "trunk/gensim/models/doc2vec_inner.pyx":653 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6806,7 +6814,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":653 + /* "trunk/gensim/models/doc2vec_inner.pyx":654 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6817,7 +6825,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; - /* "trunk/gensim/models/doc2vec_inner.pyx":654 + /* "trunk/gensim/models/doc2vec_inner.pyx":655 * with nogil: * for i in range(sentence_len): * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< @@ -6826,7 +6834,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); - /* "trunk/gensim/models/doc2vec_inner.pyx":655 + /* "trunk/gensim/models/doc2vec_inner.pyx":656 * for i in range(sentence_len): * j = i - window # negative OK: will pad with null word * k = i + window + 1 # past sentence end OK: will pad with null word # <<<<<<<<<<<<<< @@ -6835,7 +6843,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_k = ((__pyx_v_i + __pyx_v_window) + 1); - /* "trunk/gensim/models/doc2vec_inner.pyx":658 + /* "trunk/gensim/models/doc2vec_inner.pyx":659 * * # compose l1 & clear work * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -6846,7 +6854,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":660 + /* "trunk/gensim/models/doc2vec_inner.pyx":661 * for m in range(doctag_len): * # doc vector(s) * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -6856,7 +6864,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence memcpy((&(__pyx_v__neu1[(__pyx_v_m * __pyx_v_vector_size)])), (&(__pyx_v__doctag_vectors[((__pyx_v__doctag_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":662 + /* "trunk/gensim/models/doc2vec_inner.pyx":663 * memcpy(&_neu1[m * vector_size], &_doctag_vectors[_doctag_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * n = 0 # <<<<<<<<<<<<<< @@ -6865,7 +6873,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ __pyx_v_n = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":663 + /* "trunk/gensim/models/doc2vec_inner.pyx":664 * vector_size * cython.sizeof(REAL_t)) * n = 0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -6876,7 +6884,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_16 = __pyx_v_j; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":665 + /* "trunk/gensim/models/doc2vec_inner.pyx":666 * for m in range(j, k): * # word vectors in window * if m == i: # <<<<<<<<<<<<<< @@ -6886,7 +6894,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":666 + /* "trunk/gensim/models/doc2vec_inner.pyx":667 * # word vectors in window * if m == i: * continue # <<<<<<<<<<<<<< @@ -6896,7 +6904,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence goto __pyx_L25_continue; } - /* "trunk/gensim/models/doc2vec_inner.pyx":667 + /* "trunk/gensim/models/doc2vec_inner.pyx":668 * if m == i: * continue * if m < 0 or m >= sentence_len: # <<<<<<<<<<<<<< @@ -6914,7 +6922,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_L29_bool_binop_done:; if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":668 + /* "trunk/gensim/models/doc2vec_inner.pyx":669 * continue * if m < 0 or m >= sentence_len: * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< @@ -6926,7 +6934,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":670 + /* "trunk/gensim/models/doc2vec_inner.pyx":671 * window_indexes[n] = null_word_index * else: * window_indexes[n] = indexes[m] # <<<<<<<<<<<<<< @@ -6937,7 +6945,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L28:; - /* "trunk/gensim/models/doc2vec_inner.pyx":671 + /* "trunk/gensim/models/doc2vec_inner.pyx":672 * else: * window_indexes[n] = indexes[m] * n = n + 1 # <<<<<<<<<<<<<< @@ -6948,7 +6956,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_L25_continue:; } - /* "trunk/gensim/models/doc2vec_inner.pyx":672 + /* "trunk/gensim/models/doc2vec_inner.pyx":673 * window_indexes[n] = indexes[m] * n = n + 1 * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -6959,7 +6967,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { __pyx_v_m = __pyx_t_15; - /* "trunk/gensim/models/doc2vec_inner.pyx":673 + /* "trunk/gensim/models/doc2vec_inner.pyx":674 * n = n + 1 * for m in range(2 * window): * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], # <<<<<<<<<<<<<< @@ -6969,7 +6977,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence memcpy((&(__pyx_v__neu1[((__pyx_v_doctag_len + __pyx_v_m) * __pyx_v_vector_size)])), (&(__pyx_v__word_vectors[((__pyx_v_window_indexes[__pyx_v_m]) * __pyx_v_vector_size)])), (__pyx_v_vector_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":675 + /* "trunk/gensim/models/doc2vec_inner.pyx":676 * memcpy(&_neu1[(doctag_len + m) * vector_size], &_word_vectors[window_indexes[m] * vector_size], * vector_size * cython.sizeof(REAL_t)) * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< @@ -6978,7 +6986,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence */ memset(__pyx_v__work, 0, (__pyx_v_layer1_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); - /* "trunk/gensim/models/doc2vec_inner.pyx":677 + /* "trunk/gensim/models/doc2vec_inner.pyx":678 * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< @@ -6988,7 +6996,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_v_hs != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":678 + /* "trunk/gensim/models/doc2vec_inner.pyx":679 * * if hs: * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< @@ -7000,7 +7008,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L33:; - /* "trunk/gensim/models/doc2vec_inner.pyx":681 + /* "trunk/gensim/models/doc2vec_inner.pyx":682 * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< @@ -7010,7 +7018,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_v_negative != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":682 + /* "trunk/gensim/models/doc2vec_inner.pyx":683 * layer1_size, vector_size, _learn_hidden) * if negative: * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< @@ -7022,7 +7030,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L34:; - /* "trunk/gensim/models/doc2vec_inner.pyx":686 + /* "trunk/gensim/models/doc2vec_inner.pyx":687 * layer1_size, vector_size, _learn_hidden) * * if _learn_doctags: # <<<<<<<<<<<<<< @@ -7032,7 +7040,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_v__learn_doctags != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":687 + /* "trunk/gensim/models/doc2vec_inner.pyx":688 * * if _learn_doctags: * for m in range(doctag_len): # <<<<<<<<<<<<<< @@ -7043,7 +7051,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_16 = 0; __pyx_t_16 < __pyx_t_15; __pyx_t_16+=1) { __pyx_v_m = __pyx_t_16; - /* "trunk/gensim/models/doc2vec_inner.pyx":688 + /* "trunk/gensim/models/doc2vec_inner.pyx":689 * if _learn_doctags: * for m in range(doctag_len): * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], # <<<<<<<<<<<<<< @@ -7056,7 +7064,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } __pyx_L35:; - /* "trunk/gensim/models/doc2vec_inner.pyx":690 + /* "trunk/gensim/models/doc2vec_inner.pyx":691 * our_saxpy(&vector_size, &_doctag_locks[_doctag_indexes[m]], &_work[m * vector_size], * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: # <<<<<<<<<<<<<< @@ -7066,7 +7074,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = (__pyx_v__learn_words != 0); if (__pyx_t_9) { - /* "trunk/gensim/models/doc2vec_inner.pyx":691 + /* "trunk/gensim/models/doc2vec_inner.pyx":692 * &ONE, &_doctag_vectors[_doctag_indexes[m] * vector_size], &ONE) * if _learn_words: * for m in range(2 * window): # <<<<<<<<<<<<<< @@ -7077,7 +7085,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence for (__pyx_t_15 = 0; __pyx_t_15 < __pyx_t_6; __pyx_t_15+=1) { __pyx_v_m = __pyx_t_15; - /* "trunk/gensim/models/doc2vec_inner.pyx":692 + /* "trunk/gensim/models/doc2vec_inner.pyx":693 * if _learn_words: * for m in range(2 * window): * our_saxpy(&vector_size, &_word_locks[window_indexes[m]], &_work[(doctag_len + m) * vector_size], # <<<<<<<<<<<<<< @@ -7092,7 +7100,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":652 + /* "trunk/gensim/models/doc2vec_inner.pyx":653 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -7110,7 +7118,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } } - /* "trunk/gensim/models/doc2vec_inner.pyx":695 + /* "trunk/gensim/models/doc2vec_inner.pyx":696 * &ONE, &_word_vectors[window_indexes[m] * vector_size], &ONE) * * return result # <<<<<<<<<<<<<< @@ -7118,13 +7126,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 695; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_4 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 696; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_4); __pyx_r = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L0; - /* "trunk/gensim/models/doc2vec_inner.pyx":550 + /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< @@ -7153,7 +7161,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence return __pyx_r; } -/* "trunk/gensim/models/doc2vec_inner.pyx":698 +/* "trunk/gensim/models/doc2vec_inner.pyx":699 * * * def init(): # <<<<<<<<<<<<<< @@ -7192,7 +7200,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/doc2vec_inner.pyx":708 + /* "trunk/gensim/models/doc2vec_inner.pyx":709 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -7202,7 +7210,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/doc2vec_inner.pyx":709 + /* "trunk/gensim/models/doc2vec_inner.pyx":710 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -7212,7 +7220,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/doc2vec_inner.pyx":710 + /* "trunk/gensim/models/doc2vec_inner.pyx":711 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -7221,7 +7229,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/doc2vec_inner.pyx":711 + /* "trunk/gensim/models/doc2vec_inner.pyx":712 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -7230,7 +7238,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_size = 1; - /* "trunk/gensim/models/doc2vec_inner.pyx":716 + /* "trunk/gensim/models/doc2vec_inner.pyx":717 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -7240,7 +7248,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/doc2vec_inner.pyx":717 + /* "trunk/gensim/models/doc2vec_inner.pyx":718 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -7249,7 +7257,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/doc2vec_inner.pyx":718 + /* "trunk/gensim/models/doc2vec_inner.pyx":719 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -7259,7 +7267,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN (__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/doc2vec_inner.pyx":721 + /* "trunk/gensim/models/doc2vec_inner.pyx":722 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -7268,7 +7276,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_13doc2vec_inner_ONE)); - /* "trunk/gensim/models/doc2vec_inner.pyx":722 + /* "trunk/gensim/models/doc2vec_inner.pyx":723 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -7277,7 +7285,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/doc2vec_inner.pyx":723 + /* "trunk/gensim/models/doc2vec_inner.pyx":724 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -7287,7 +7295,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":724 + /* "trunk/gensim/models/doc2vec_inner.pyx":725 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -7296,7 +7304,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_double; - /* "trunk/gensim/models/doc2vec_inner.pyx":725 + /* "trunk/gensim/models/doc2vec_inner.pyx":726 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -7305,7 +7313,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":726 + /* "trunk/gensim/models/doc2vec_inner.pyx":727 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -7318,7 +7326,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":727 + /* "trunk/gensim/models/doc2vec_inner.pyx":728 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -7328,7 +7336,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/doc2vec_inner.pyx":728 + /* "trunk/gensim/models/doc2vec_inner.pyx":729 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -7337,7 +7345,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float; - /* "trunk/gensim/models/doc2vec_inner.pyx":729 + /* "trunk/gensim/models/doc2vec_inner.pyx":730 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -7346,7 +7354,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_saxpy; - /* "trunk/gensim/models/doc2vec_inner.pyx":730 + /* "trunk/gensim/models/doc2vec_inner.pyx":731 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -7360,7 +7368,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN } /*else*/ { - /* "trunk/gensim/models/doc2vec_inner.pyx":734 + /* "trunk/gensim/models/doc2vec_inner.pyx":735 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -7369,7 +7377,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":735 + /* "trunk/gensim/models/doc2vec_inner.pyx":736 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -7378,7 +7386,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN */ __pyx_v_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/doc2vec_inner.pyx":736 + /* "trunk/gensim/models/doc2vec_inner.pyx":737 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -7391,7 +7399,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UN goto __pyx_L0; } - /* "trunk/gensim/models/doc2vec_inner.pyx":698 + /* "trunk/gensim/models/doc2vec_inner.pyx":699 * * * def init(): # <<<<<<<<<<<<<< @@ -9546,7 +9554,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 93; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 353; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -9572,31 +9580,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/doc2vec_inner.pyx":460 + /* "trunk/gensim/models/doc2vec_inner.pyx":462 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 460; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 462; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); - /* "trunk/gensim/models/doc2vec_inner.pyx":618 + /* "trunk/gensim/models/doc2vec_inner.pyx":619 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24)*np.random.randint(0,2**24) + np.random.randint(0,2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__6 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__6); __Pyx_GIVEREF(__pyx_tuple__6); - __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 618; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__7 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__7); __Pyx_GIVEREF(__pyx_tuple__7); @@ -9678,41 +9686,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GIVEREF(__pyx_tuple__14); __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":397 + /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 397, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 399, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":550 + /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 550, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 551, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/doc2vec_inner.pyx":698 + /* "trunk/gensim/models/doc2vec_inner.pyx":699 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__20 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__20); __Pyx_GIVEREF(__pyx_tuple__20); - __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 698, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__21 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__20, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 699, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__21)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -10044,48 +10052,48 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":397 + /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 397; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":550 + /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 550; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":698 + /* "trunk/gensim/models/doc2vec_inner.pyx":699 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_7init, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "trunk/gensim/models/doc2vec_inner.pyx":738 + /* "trunk/gensim/models/doc2vec_inner.pyx":739 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 739; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_1))) { @@ -10098,14 +10106,14 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 739; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_2 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 739; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 738; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 739; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":1 diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 2ea8e195b6..8ea164513c 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -338,7 +338,9 @@ def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, for i in range(sentence_len): predict_word = word_vocabs[i] if predict_word is None: - codelens[i] = 0 + # shrink sentence to leave out word + sentence_len = sentence_len - 1 + continue # leaving j unchanged else: indexes[i] = predict_word.index if hs: @@ -519,7 +521,6 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 if cbow_mean: sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error - if hs: fast_sentence_dm_hs(points[i], codes[i], codelens[i], _neu1, syn1, _alpha, _work, From 0878db89bc0cb45cc9d356b741aee4268a59ab09 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 9 Jun 2015 23:52:09 -0700 Subject: [PATCH 29/49] expand deterministic tests --- gensim/test/test_doc2vec.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index d89325358e..caf415add5 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -206,17 +206,27 @@ def test_parallel(self): model = doc2vec.Doc2Vec(corpus, workers=workers) self.model_sanity(model) - def test_deterministic_seeding(self): + def test_deterministic_hs(self): """Test doc2vec results identical with identical RNG seed.""" # hs model = doc2vec.Doc2Vec(DocsLeeCorpus(), seed=42, workers=1) model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), seed=42, workers=1) self.models_equal(model, model2) + + def test_deterministic_neg(self): + """Test doc2vec results identical with identical RNG seed.""" # neg model = doc2vec.Doc2Vec(DocsLeeCorpus(), hs=0, negative=3, seed=42, workers=1) model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), hs=0, negative=3, seed=42, workers=1) self.models_equal(model, model2) + def test_deterministic_dmc(self): + """Test doc2vec results identical with identical RNG seed.""" + # bigger, dmc + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3, seed=42, workers=1) + model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3, seed=42, workers=1) + self.models_equal(model, model2) + def models_equal(self, model, model2): # check words/hidden-weights self.assertEqual(len(model.vocab), len(model2.vocab)) From 45c81517236037f8007486d781ed49fbc683dc03 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 04:15:57 -0700 Subject: [PATCH 30/49] comment cleanup; doc_locks in job batches --- gensim/models/doc2vec.py | 93 +++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 35 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 4b24b5a378..cb3162f500 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -211,8 +211,8 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indices, alpha, work=Non class TaggedDocument(namedtuple('TaggedDocument','words tags')): """ A single document, made up of `words` (a list of unicode string tokens) - and `tags` (a list of tokens). Tags may also be one or more unicode string - tokens, but typical practice (which will also be most memory-efficient) is + and `tags` (a list of tokens). Tags may be one or more unicode string + tokens, but typical practice (which will also be most memory-efficient) is for the tags list to include a unique integer id as the only tag. Replaces "sentence as a list of words" from Word2Vec. @@ -224,25 +224,37 @@ def __str__(self): class DocvecsArray(object): """ - Default storage of docvecs during training, in a numpy array. + Default storage of doc vectors during/after training, in a numpy array. - Maintains dict mapping string doctag -> int mapping if necessary. - (If all TaggedSentences use only int doctags, this overhead is - avoided.) Supplying a mapfile_path at construction will use a - pair of memory-mapped files as the array backing for syn0/syn0_lockf - values. + As the 'docvecs' property of a Doc2Vec model, allows access and + comparison of document vectors. - (A future alternative implementation, based on another persistence - mechanism like LMDB, LevelDB, or SQLite, should also be possible.) - """ + >>> docvec = d2v_model.docvecs[99] + >>> docvec = d2v_model.docvecs['SENT_99'] # if string tag used in training + >>> sims = d2v_model.docvecs.most_similar(99) + >>> sims = d2v_model.docvecs.most_similar('SENT_99')) + >>> sims = d2v_model.docvecs.most_similar(docvec)) + + If only plain int tags are presented during training, the dict (of + string tag -> index) and list (of index -> string tag) stay empty, + saving memory. + Supplying a mapfile_path (as by initializing a Doc2Vec model with a + 'docvecs_mapfile' value) will use a pair of memory-mapped + files as the array backing for doctag_syn0/doctag_syn0_lockf values. + + The Doc2Vec model automatically uses this class, but a future alternative + implementation, based on another persistence mechanism like LMDB, LevelDB, + or SQLite, should also be possible. + """ def __init__(self, mapfile_path=None): - self.doctags = {} # string -> Doctag (if necessary) - self.index2doctag = [] # int index -> String (if necessary) + self.doctags = {} # string -> Doctag (only filled if necessary) + self.index2doctag = [] # int index -> String (only filled if necessary) self.count = -1 self.mapfile_path = mapfile_path def note_doctag(self, key, sentence_no, sentence_length): + """Note a document tag during initial corpus scan, for structure sizing.""" if isinstance(key, int): self.count = max(self.count, key+1) else: @@ -254,20 +266,24 @@ def note_doctag(self, key, sentence_no, sentence_length): self.count = max(self.count, len(self.index2doctag)) def indexed_doctags(self, doctag_tokens): + """Return indexes and backing-arrays used in training examples.""" return ([i for i in [self._int_index(index,-1) for index in doctag_tokens] if i > -1], - self.doctag_syn0, doctag_tokens) + self.doctag_syn0, self.doctag_syn0_lockf, doctag_tokens) def trained_items(self, indexed_tuples): - """Persist any changes to the given indices; a no-op for this implementation""" + """Persist any changes made to the given indices (matching tuple previously + returned by indexed_doctags()); a no-op for this implementation""" pass def _int_index(self, index, missing=None): + """Return int index for either string or int index""" if isinstance(index, int): return index else: return self.doctags[index].index if index in self.doctags else missing def _key_index(self, i_index, missing=None): + """Return string index for given int index, if available""" if i_index < len(self.index2doctag): return self.index2doctag[i_index] else: @@ -408,7 +424,10 @@ def n_similarity(self, ds1, ds2): class Doctag(namedtuple('Doctag', 'index, word_count, doc_count')): """A string document tag discovered during the initial vocabulary - scan. (The document-vector equivalent of a Vocab object.)""" + scan. (The document-vector equivalent of a Vocab object.) + + Will not be used if all presented document tags are ints. + """ __slots__ = () def repeat(self, word_count): return self._replace(word_count=self.word_count + word_count, doc_count=self.doc_count + 1) @@ -416,18 +435,18 @@ def repeat(self, word_count): class Doc2Vec(Word2Vec): """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf""" - def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, + def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, dbow_words=0, dm_mean=0, dm_concat=0, dm_tag_count=1, docvecs=None, docvecs_mapfile=None, **kwargs): """ - Initialize the model from an iterable of `sentences`. Each sentence is a - TaggedSentence object that will be used for training. + Initialize the model from an iterable of `documents`. Each sentence is a + TaggedDocument object that will be used for training. - The `sentences` iterable can be simply a list of TaggedSentence elements, but for larger corpora, - consider an iterable that streams the sentences directly from disk/network. + The `documents` iterable can be simply a list of TaggedDocument elements, but for larger corpora, + consider an iterable that streams the documents directly from disk/network. - If you don't supply `sentences`, the model is left uninitialized -- use if + If you don't supply `documents`, the model is left uninitialized -- use if you plan to initialize it in some other way. `dm` defines the training algorithm. By default (`dm=1`), 'distributed memory' (PV-DM) is used. @@ -435,7 +454,8 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, `size` is the dimensionality of the feature vectors. - `window` is the maximum distance between the current and predicted word within a sentence. + `window` is the maximum distance between the predicted word and context words used for prediction + within a document. `alpha` is the initial learning rate (will linearly drop to zero as training progresses). @@ -466,7 +486,7 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, dm_concat mode; default is 1. `dbow_words` if set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW - doc-vector training; default is 0 (faster training of doc-vectors only. + doc-vector training; default is 0 (faster training of doc-vectors only). """ Word2Vec.__init__(self, size=size, alpha=alpha, window=window, min_count=min_count, @@ -479,9 +499,9 @@ def __init__(self, sentences=None, size=300, alpha=0.025, window=8, min_count=5, self.docvecs = docvecs if not self.docvecs: self.docvecs = DocvecsArray(docvecs_mapfile) - if sentences is not None: - self.build_vocab(sentences) - self.train(sentences) + if documents is not None: + self.build_vocab(documents) + self.train(documents) def clear_sims(self): Word2Vec.reset_weights(self) @@ -522,11 +542,11 @@ def _vocab_from(self, sentences): def _prepare_sentences(self, sentences): for sentence in sentences: - # avoid calling random_sample() where prob >= 1, to speed things up a little: yield (self._tokens_to_vocabs(sentence.words), self.docvecs.indexed_doctags(sentence.tags)) def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): + """Convert list of tokens to items (Vocabs) from source_dict.""" if source_dict is None: source_dict = self.vocab if sample: @@ -539,16 +559,16 @@ def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): def _get_job_words(self, alpha, work, job, neu1): if self.sg: tally = sum(train_sentence_dbow(self, sentence, doctag_indices, alpha, work, train_words=self.dbow_words, - doctag_vectors=doctag_vectors) - for sentence, (doctag_indices, doctag_vectors, ignored) in job) + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) + for sentence, (doctag_indices, doctag_vectors, doctag_locks, ignored) in job) elif self.dm_concat: tally = sum(train_sentence_dm_concat(self, sentence, doctag_indices, alpha, work, neu1, - doctag_vectors=doctag_vectors) - for sentence, (doctag_indices, doctag_vectors, ignored) in job) + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) + for sentence, (doctag_indices, doctag_vectors, doctag_locks, ignored) in job) else: tally = sum(train_sentence_dm(self, sentence, doctag_indices, alpha, work, neu1, - doctag_vectors=doctag_vectors) - for sentence, (doctag_indices, doctag_vectors, ignored) in job) + doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) + for sentence, (doctag_indices, doctag_vectors, doctag_locks, ignored) in job) self.docvecs.trained_items(item for s, item in job) return tally @@ -568,6 +588,7 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): if not self.sg: neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) + print('docv: %f %f ...' % (doctag_vectors[0][0], doctag_vectors[0][1])) for i in range(steps): if self.sg: train_sentence_dbow(self, word_vocabs, doctag_indices, alpha, work, @@ -582,6 +603,7 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha + print('docv= %f %f ...' % (doctag_vectors[0][0], doctag_vectors[0][1])) return doctag_vectors[0] @@ -590,6 +612,7 @@ def __str__(self): @property def compact_name(self): + """Abbreviated name reflecting major configuration paramaters.""" segments = [] if self.sg: segments.append('dbow') # PV-DBOW (skip-gram-style) @@ -622,7 +645,7 @@ def compact_name(self): def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors - super(Doc2Vec, self).save(*args, **kwargs) ### TODO: save doctag fields + super(Doc2Vec, self).save(*args, **kwargs) ### TODO: save docvecs in same separate-numpy-file style class TaggedBrownCorpus(object): From a902dd83f2398839a888ebc62a135c29b91a97dd Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 04:17:09 -0700 Subject: [PATCH 31/49] don't clobber weights (ruining inference, among other things) --- gensim/models/doc2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index cb3162f500..1e0889c6c9 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -504,7 +504,7 @@ def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, self.train(documents) def clear_sims(self): - Word2Vec.reset_weights(self) + Word2Vec.clear_sims(self) self.docvecs.clear_sims() def reset_weights(self): From d1426404030f52065bf8b39fd3649652857869db Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 04:40:08 -0700 Subject: [PATCH 32/49] rm stray printing --- gensim/models/doc2vec.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 1e0889c6c9..650447ccb7 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -588,7 +588,6 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): if not self.sg: neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL) - print('docv: %f %f ...' % (doctag_vectors[0][0], doctag_vectors[0][1])) for i in range(steps): if self.sg: train_sentence_dbow(self, word_vocabs, doctag_indices, alpha, work, @@ -603,7 +602,6 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha - print('docv= %f %f ...' % (doctag_vectors[0][0], doctag_vectors[0][1])) return doctag_vectors[0] From 882cddd258016e5b9d06fb92c64e3a927dc58b64 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 14:02:05 -0700 Subject: [PATCH 33/49] inference in sanity checks; cleanup --- gensim/test/test_doc2vec.py | 50 +++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index caf415add5..ae1db52ddc 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -33,12 +33,6 @@ logger = logging.getLogger('gensim.test.test_doc2vec') -class LeeCorpus(object): - def __iter__(self): - with open(datapath('lee_background.cor')) as f: - for line in f: - yield utils.simple_preprocess(line) - class DocsLeeCorpus(object): def __init__(self, string_tags=False): self.string_tags = string_tags @@ -51,6 +45,7 @@ def __iter__(self): for i, line in enumerate(f): yield TaggedDocument(utils.simple_preprocess(line),[self._tag(i)]) +list_corpus = list(DocsLeeCorpus()) sentences = [ ['human', 'interface', 'computer'], @@ -112,13 +107,11 @@ def test_string_doctags(self): self.assertTrue(all(model.docvecs['_*0']==model.docvecs[0])) def test_empty_errors(self): - corpus = DocsLeeCorpus() - # no input => "RuntimeError: you must first build vocabulary before training the model" self.assertRaises(RuntimeError, doc2vec.Doc2Vec, []) # input not empty, but rather completely filtered out - self.assertRaises(RuntimeError, doc2vec.Doc2Vec, corpus, min_count=10000) + self.assertRaises(RuntimeError, doc2vec.Doc2Vec, list_corpus, min_count=10000) def model_sanity(self, model): """Any non-trivial model on DocsLeeCorpus can pass these sanity checks""" @@ -126,19 +119,26 @@ def model_sanity(self, model): fire2 = 8 # doc 8 sydney fires tennis1 = 6 # doc 6 tennis + # inferred vector should be top10 close to bulk-trained one + doc0_inferred = model.infer_vector(list(DocsLeeCorpus())[0].words) + sims_to_infer = model.docvecs.most_similar([doc0_inferred]) + self.assertTrue(fire1 in [match[0] for match in sims_to_infer]) + + # fire8 should be top20 close to fire1 sims = model.docvecs.most_similar(fire1,topn=20) sims = [(idx, round(dist,4)) for idx, dist in sims] - if fire2 not in [match[0] for match in sims]: - print(sims) self.assertTrue(fire2 in [match[0] for match in sims]) + # same sims should appear in lookup by vec as by index doc0_vec = model.docvecs[fire1] sims2 = model.docvecs.most_similar(positive=[doc0_vec], topn=21) sims2 = [(idx, round(dist,4)) for idx, dist in sims2] self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is doc itself + # tennis doc should be out-of-place among fire news self.assertEqual(model.docvecs.doesnt_match([fire1, tennis1, fire2]), tennis1) + # fire docs should be closer than fire-tennis self.assertTrue(model.docvecs.similarity(fire1,fire2) > model.docvecs.similarity(fire1,tennis1)) def test_training(self): @@ -157,42 +157,48 @@ def test_training(self): def test_dbow_hs(self): """Test DBOW doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=0, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=0, hs=1, negative=0, min_count=2, iter=20) self.model_sanity(model) def test_dmm_hs(self): """Test DM/mean doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, size=24, window=4, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=1, negative=0, + min_count=2, iter=20) self.model_sanity(model) def test_dms_hs(self): """Test DM/sum doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, size=24, window=4, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=0, size=24, window=4, hs=1, negative=0, + min_count=2, iter=20) self.model_sanity(model) def test_dmc_hs(self): """Test DM/concatenate doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=0, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_concat=1, size=24, window=4, hs=1, negative=0, + min_count=2, iter=20) self.model_sanity(model) def test_dbow_neg(self): """Test DBOW doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=0, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=0, hs=0, negative=10, min_count=2, iter=20) self.model_sanity(model) def test_dmm_neg(self): """Test DM/mean doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=1, size=24, window=4, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=1, size=24, window=4, hs=0, negative=10, + min_count=2, iter=20) self.model_sanity(model) def test_dms_neg(self): """Test DM/sum doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_mean=0, size=24, window=4, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_mean=0, size=24, window=4, hs=0, negative=10, + min_count=2, iter=20) self.model_sanity(model) def test_dmc_neg(self): """Test DM/concatenate doc2vec training.""" - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=0, negative=10, min_count=2, iter=20) + model = doc2vec.Doc2Vec(list_corpus, dm=1, dm_concat=1, size=24, window=4, hs=0, negative=10, + min_count=2, iter=20) self.model_sanity(model) def test_parallel(self): @@ -223,8 +229,10 @@ def test_deterministic_neg(self): def test_deterministic_dmc(self): """Test doc2vec results identical with identical RNG seed.""" # bigger, dmc - model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3, seed=42, workers=1) - model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3, seed=42, workers=1) + model = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3, + seed=42, workers=1) + model2 = doc2vec.Doc2Vec(DocsLeeCorpus(), dm=1, dm_concat=1, size=24, window=4, hs=1, negative=3, + seed=42, workers=1) self.models_equal(model, model2) def models_equal(self, model, model2): From 2f085b648264c3155bca476d8b642d4da4db2099 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 14:12:38 -0700 Subject: [PATCH 34/49] IMDB sentiment experiments --- docs/notebooks/doc2vec-IMDB.ipynb | 1917 +++++++++++++++++++++++++++++ 1 file changed, 1917 insertions(+) create mode 100644 docs/notebooks/doc2vec-IMDB.ipynb diff --git a/docs/notebooks/doc2vec-IMDB.ipynb b/docs/notebooks/doc2vec-IMDB.ipynb new file mode 100644 index 0000000000..296c824836 --- /dev/null +++ b/docs/notebooks/doc2vec-IMDB.ipynb @@ -0,0 +1,1917 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:3774d8aad3c7a1b4a207c1d4313427a78e834e7b8a3686a6cc142de169daf17b" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "heading", + "level": 1, + "metadata": {}, + "source": [ + "gensim doc2vec & IMDB sentiment dataset" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Load corpus" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Fetch and prep exactly as in Mikolov's go.sh shell script. (Note this cell tests for existence of required files, so steps won't repeat once the final summary file (`aclImdb/alldata-id.txt`) is available alongside this notebook.)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%%bash\n", + "# adapted from Mikolov's example go.sh script: \n", + "if [ ! -f \"aclImdb/alldata-id.txt\" ]\n", + "then\n", + " if [ ! -d \"aclImdb\" ] \n", + " then\n", + " if [ ! -f \"aclImdb_v1.tar.gz\" ]\n", + " then\n", + " wget -quiet http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", + " fi\n", + " tar xf aclImdb_v1.tar.gz\n", + " fi\n", + " \n", + " #this function will convert text to lowercase and will disconnect punctuation and special symbols from words\n", + " function normalize_text {\n", + " awk '{print tolower($0);}' < $1 | sed -e 's/\\./ \\. /g' -e 's/
/ /g' -e 's/\"/ \" /g' \\\n", + " -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\\!/ \\! /g' -e 's/\\?/ \\? /g' \\\n", + " -e 's/\\;/ \\; /g' -e 's/\\:/ \\: /g' > $1-norm\n", + " }\n", + "\n", + " export LC_ALL=C\n", + " for j in train/pos train/neg test/pos test/neg train/unsup; do\n", + " rm temp\n", + " for i in `ls aclImdb/$j`; do cat aclImdb/$j/$i >> temp; awk 'BEGIN{print;}' >> temp; done\n", + " normalize_text temp\n", + " mv temp-norm aclImdb/$j/norm.txt\n", + " done\n", + " mv aclImdb/train/pos/norm.txt aclImdb/train-pos.txt\n", + " mv aclImdb/train/neg/norm.txt aclImdb/train-neg.txt\n", + " mv aclImdb/test/pos/norm.txt aclImdb/test-pos.txt\n", + " mv aclImdb/test/neg/norm.txt aclImdb/test-neg.txt\n", + " mv aclImdb/train/unsup/norm.txt aclImdb/train-unsup.txt\n", + "\n", + " cat aclImdb/train-pos.txt aclImdb/train-neg.txt aclImdb/test-pos.txt aclImdb/test-neg.txt aclImdb/train-unsup.txt > aclImdb/alldata.txt\n", + " awk 'BEGIN{a=0;}{print \"_*\" a \" \" $0; a++;}' < aclImdb/alldata.txt > aclImdb/alldata-id.txt\n", + "fi" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 2 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import os.path\n", + "assert os.path.isfile(\"aclImdb/alldata-id.txt\"), \"alldata-id.txt unavailable\"" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 3 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The data is small enough to be read into memory. " + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from gensim.models.doc2vec import TaggedDocument\n", + "from collections import namedtuple\n", + "\n", + "SentimentDocument = namedtuple('SentimentDocument','words tags split sentiment')\n", + "\n", + "alldocs = [] # will hold all docs in original order\n", + "with open('aclImdb/alldata-id.txt') as alldata:\n", + " for line_no, line in enumerate(alldata):\n", + " tokens = line.split()\n", + " words = tokens[1:]\n", + " tags = [line_no] # `tags = [tokens[0]]` would also work at extra memory cost\n", + " split = ['train','test','extra','extra'][line_no//25000] # 25k train, 25k test, 25k extra\n", + " sentiment = [1.0, 0.0, 1.0, 0.0, None, None, None, None][line_no//12500] # [12.5K pos, 12.5K neg]*2 then unknown\n", + " alldocs.append(SentimentDocument(words, tags, split, sentiment))\n", + "\n", + "train_docs = [doc for doc in alldocs if doc.split == 'train']\n", + "test_docs = [doc for doc in alldocs if doc.split == 'test']\n", + "doc_list = alldocs[:] # for reshuffling per pass\n", + "\n", + "print('%d docs: %d train-sentiment, %d test-sentiment' % (len(doc_list), len(train_docs), len(test_docs)))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "100000 docs: 25000 train-sentiment, 25000 test-sentiment\n" + ] + } + ], + "prompt_number": 4 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Set-up Doc2Vec Training & Evaluation Models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Approximating experiment of Le & Mikolov [\"Distributed Representations of Sentences and Documents\"](http://cs.stanford.edu/~quocle/paragraph_vector.pdf), also with guidance from Mikolov's [example go.sh](https://groups.google.com/d/msg/word2vec-toolkit/Q49FIrNOQRo/J6KG8mUj45sJ):\n", + "\n", + "`./word2vec -train ../alldata-id.txt -output vectors.txt -cbow 0 -size 100 -window 10 -negative 5 -hs 0 -sample 1e-4 -threads 40 -binary 0 -iter 20 -min-count 1 -sentence-vectors 1`\n", + "\n", + "Parameter choices below vary:\n", + "\n", + "* 100-dimensional vectors, as the 400d vectors of the paper don't seem to offer much benefit on this task\n", + "* similarly, frequent word subsampling seems to decrease sentiment-prediction accuracy, so it's left out\n", + "* `cbow=0` means skip-gram which is equivalent to the paper's 'PV-DBOW' mode, matched in gensim with `dm=0`\n", + "* added to that DBOW model two DM models, one which averages context vectors (`dm_mean`) and one which concatenates them (`dm_concat`, resulting in a much larger model)\n", + "* a `min_count=2` saves quite a bit of model memory, discarding only words that appear in a single doc (and are thus no more expressive than the unique-to-each doc vectors themselves)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from gensim.models import Doc2Vec\n", + "import gensim.models.doc2vec\n", + "from collections import OrderedDict\n", + "import multiprocessing\n", + "\n", + "cores = multiprocessing.cpu_count()\n", + "assert gensim.models.doc2vec.FAST_VERSION > -1, \"this will be painfully slow otherwise\"\n", + "\n", + "simple_models = [\n", + " # PV-DM w/concatenation - window=5 (both sides) approximates paper's 10-word total window size\n", + " Doc2Vec(dm=1,dm_concat=1,size=100,window=5,negative=5,hs=0,min_count=2,workers=cores),\n", + " # PV-DBOW \n", + " Doc2Vec(dm=0,size=100,negative=5,hs=0,min_count=2,workers=cores),\n", + " # PV-DM w/average\n", + " Doc2Vec(dm=1,dm_mean=1,size=100,window=10,negative=5,hs=0,min_count=2,workers=cores),\n", + "]\n", + "\n", + "# speed setup by sharing results of 1st model's vocabulary scan\n", + "simple_models[0].build_vocab(alldocs) # PV-DM/concat requires one special NULL word so it serves as template\n", + "print(simple_models[0].compact_name)\n", + "for model in simple_models[1:]:\n", + " model.reset_from(simple_models[0])\n", + " print(model.compact_name)\n", + "\n", + "models_by_name = OrderedDict((model.compact_name, model) for model in simple_models)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "dmc_d100n5w5mc2t4\n", + "dbow_d100n5mc2t4" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "dmm_d100n5w10mc2t4" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 5 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Following the paper, we also evaluate models in pairs. These wrappers return the concatenation of the vectors from each model. (Only the singular models are trained.)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from gensim.test.test_doc2vec import ConcatenatedDoc2Vec\n", + "models_by_name['dbow+dmm'] = ConcatenatedDoc2Vec([models_by_name['dbow_d100n5mc2t4'], models_by_name['dmm_d100n5w10mc2t4']])\n", + "models_by_name['dbow+dmc'] = ConcatenatedDoc2Vec([models_by_name['dbow_d100n5mc2t4'], models_by_name['dmc_d100n5w5mc2t4']])" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 6 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 6 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Predictive Evaluation Methods" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Helper methods for evaluating error rate." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import numpy as np\n", + "import statsmodels.api as sm\n", + "from random import sample\n", + "\n", + "# for timing\n", + "from contextlib import contextmanager\n", + "from timeit import default_timer\n", + "import time \n", + "\n", + "@contextmanager\n", + "def elapsed_timer():\n", + " start = default_timer()\n", + " elapser = lambda: default_timer() - start\n", + " yield lambda: elapser()\n", + " end = default_timer()\n", + " elapser = lambda: end-start\n", + " \n", + "def logistic_predictor_from_data(train_targets, train_regressors):\n", + " logit = sm.Logit(train_targets, train_regressors)\n", + " predictor = logit.fit(disp=0)\n", + " #print(predictor.summary())\n", + " return predictor\n", + "\n", + "def error_rate_for_model(test_model, train_set, test_set, infer=False, infer_steps=3, infer_alpha=0.1, infer_subsample=0.1):\n", + " \"\"\"Report error rate on test_doc sentiments, using supplied model and train_docs\"\"\"\n", + "\n", + " train_targets, train_regressors = zip(*[(doc.sentiment, test_model.docvecs[doc.tags[0]]) for doc in train_set])\n", + " train_regressors = sm.add_constant(train_regressors)\n", + " predictor = logistic_predictor_from_data(train_targets, train_regressors)\n", + "\n", + " test_data = test_set\n", + " if infer:\n", + " if infer_subsample < 1.0:\n", + " test_data = sample(test_data, int(infer_subsample*len(test_data)))\n", + " test_regressors = [test_model.infer_vector(doc.words,steps=infer_steps,alpha=infer_alpha) for doc in test_data]\n", + " else:\n", + " test_regressors = [test_model.docvecs[doc.tags[0]] for doc in test_docs]\n", + " test_regressors = sm.add_constant(test_regressors)\n", + " \n", + " # predict & evaluate\n", + " test_predictions = predictor.predict(test_regressors)\n", + " corrects = sum(np.rint(test_predictions)==[doc.sentiment for doc in test_data])\n", + " errors = len(test_predictions) - corrects\n", + " error_rate = float(errors) / len(test_predictions)\n", + " return (error_rate, errors, len(test_predictions), predictor)\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 7 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Bulk Training" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from collections import defaultdict\n", + "best_error = defaultdict(lambda :1.0) # to selectively-print only best errors achieved" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 8 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using explicit multiple-pass, alpha-reduction approach as sketched in [gensim doc2vec blog post](http://radimrehurek.com/2014/12/doc2vec-tutorial/) \u2013 with added shuffling of corpus on each pass.\n", + "\n", + "Note that vector training is occurring on *all* documents of the dataset, which includes all TRAIN/TEST/DEV docs.\n", + "\n", + "Evaluation of each model's sentiment-predictive power is repeated after each pass, as an error rate (lower is better), to see the rates-of-relative-improvement. The base numbers reuse the TRAIN and TEST vectors stored in the models for the logistic regression, while the _inferred_ results use newly-inferred TEST vectors. \n", + "\n", + "(On a 4-core 2.6Ghz Intel Core i7, these 20 passes training and evaluating 3 main models takes about an hour.)" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from random import shuffle\n", + "import datetime\n", + "\n", + "alpha, min_alpha, passes = (0.025, 0.001, 20)\n", + "alpha_delta = (alpha - min_alpha) / passes\n", + "\n", + "print(\"START %s\" % datetime.datetime.now())\n", + "\n", + "for epoch in range(passes):\n", + " shuffle(doc_list) # shuffling gets best results\n", + " \n", + " for name, train_model in models_by_name.items():\n", + " # train\n", + " duration = 'na'\n", + " train_model.alpha, train_model.min_alpha = (alpha, alpha)\n", + " with elapsed_timer() as elapsed:\n", + " train_model.train(doc_list)\n", + " duration = '%.1f' % elapsed()\n", + " \n", + " # evaluate\n", + " eval_duration = ''\n", + " with elapsed_timer() as eval_elapsed:\n", + " (err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs)\n", + " eval_duration = '%.1f' % eval_elapsed()\n", + " if err < best_error[name]:\n", + " best_error[name] = err\n", + " print(\"%f : %i passes : %s %ss %ss\"%(err,epoch+1,name, duration, eval_duration))\n", + "\n", + " eval_duration = ''\n", + " with elapsed_timer() as eval_elapsed:\n", + " (infer_err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs, infer=True)\n", + " eval_duration = '%.1f' % eval_elapsed()\n", + " if infer_err < best_error[name+'_inferred']:\n", + " best_error[name+'_inferred'] = infer_err\n", + " print(\"%f : %i passes : %s %ss %ss\"%(infer_err,epoch+1,name+'_inferred', duration, eval_duration))\n", + "\n", + " print('completed pass %i at alpha %f'%(epoch+1,alpha))\n", + " alpha -= alpha_delta\n", + " \n", + "print(\"END %s\" % str(datetime.datetime.now()))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "START 2015-06-10 05:01:59.714002\n", + "0.418360 : 1 passes : dmc_d100n5w5mc2t4 66.0s 1.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.389600 : 1 passes : dmc_d100n5w5mc2t4_inferred 66.0s 10.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.221280 : 1 passes : dbow_d100n5mc2t4 28.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.193600 : 1 passes : dbow_d100n5mc2t4_inferred 28.6s 5.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.276920 : 1 passes : dmm_d100n5w10mc2t4 37.8s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.225200 : 1 passes : dmm_d100n5w10mc2t4_inferred 37.8s 6.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.207720 : 1 passes : dbow+dmm 0.0s 2.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.188400 : 1 passes : dbow+dmm_inferred 0.0s 12.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.221440 : 1 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.218000 : 1 passes : dbow+dmc_inferred 0.0s 17.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 1 at alpha 0.025000\n", + "0.364360 : 2 passes : dmc_d100n5w5mc2t4 59.6s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.368000 : 2 passes : dmc_d100n5w5mc2t4_inferred 59.6s 10.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.140840 : 2 passes : dbow_d100n5mc2t4 28.8s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.166000 : 2 passes : dbow_d100n5mc2t4_inferred 28.8s 5.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.223720 : 2 passes : dmm_d100n5w10mc2t4 35.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.198000 : 2 passes : dmm_d100n5w10mc2t4_inferred 35.6s 6.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.136240 : 2 passes : dbow+dmm 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.163600 : 2 passes : dbow+dmm_inferred 0.0s 12.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.140840 : 2 passes : dbow+dmc 0.0s 2.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.176400 : 2 passes : dbow+dmc_inferred 0.0s 16.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 2 at alpha 0.023800\n", + "0.331400 : 3 passes : dmc_d100n5w5mc2t4 59.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.357600 : 3 passes : dmc_d100n5w5mc2t4_inferred 59.1s 10.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.122240 : 3 passes : dbow_d100n5mc2t4 29.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.124000 : 3 passes : dbow_d100n5mc2t4_inferred 29.0s 5.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.200240 : 3 passes : dmm_d100n5w10mc2t4 34.8s 1.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.120560 : 3 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.129200 : 3 passes : dbow+dmm_inferred 0.0s 11.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.122480 : 3 passes : dbow+dmc 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.144400 : 3 passes : dbow+dmc_inferred 0.0s 16.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 3 at alpha 0.022600\n", + "0.307800 : 4 passes : dmc_d100n5w5mc2t4 56.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.336000 : 4 passes : dmc_d100n5w5mc2t4_inferred 56.1s 11.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.113840 : 4 passes : dbow_d100n5mc2t4 31.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.122400 : 4 passes : dbow_d100n5mc2t4_inferred 31.6s 5.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.187000 : 4 passes : dmm_d100n5w10mc2t4 34.9s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.112880 : 4 passes : dbow+dmm 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.124400 : 4 passes : dbow+dmm_inferred 0.0s 13.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.114040 : 4 passes : dbow+dmc 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.128800 : 4 passes : dbow+dmc_inferred 0.0s 16.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 4 at alpha 0.021400\n", + "0.283640 : 5 passes : dmc_d100n5w5mc2t4 59.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.325600 : 5 passes : dmc_d100n5w5mc2t4_inferred 59.6s 10.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.109600 : 5 passes : dbow_d100n5mc2t4 32.6s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.112800 : 5 passes : dbow_d100n5mc2t4_inferred 32.6s 6.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.180760 : 5 passes : dmm_d100n5w10mc2t4 44.2s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.191200 : 5 passes : dmm_d100n5w10mc2t4_inferred 44.2s 6.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.108600 : 5 passes : dbow+dmm 0.0s 1.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.120000 : 5 passes : dbow+dmm_inferred 0.0s 12.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.109720 : 5 passes : dbow+dmc 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.118000 : 5 passes : dbow+dmc_inferred 0.0s 18.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 5 at alpha 0.020200\n", + "0.270080 : 6 passes : dmc_d100n5w5mc2t4 70.3s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.308000 : 6 passes : dmc_d100n5w5mc2t4_inferred 70.3s 11.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.106120 : 6 passes : dbow_d100n5mc2t4 33.9s 1.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.175800 : 6 passes : dmm_d100n5w10mc2t4 45.1s 1.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.106520 : 6 passes : dbow+dmm 0.0s 1.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.113200 : 6 passes : dbow+dmm_inferred 0.0s 12.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.106600 : 6 passes : dbow+dmc 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 6 at alpha 0.019000" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.256880 : 7 passes : dmc_d100n5w5mc2t4 67.4s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.283600 : 7 passes : dmc_d100n5w5mc2t4_inferred 67.4s 11.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.105000 : 7 passes : dbow_d100n5mc2t4 34.8s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.172520 : 7 passes : dmm_d100n5w10mc2t4 44.5s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.104240 : 7 passes : dbow+dmm 0.0s 1.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.111200 : 7 passes : dbow+dmm_inferred 0.0s 13.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.106240 : 7 passes : dbow+dmc 0.0s 1.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.116400 : 7 passes : dbow+dmc_inferred 0.0s 16.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 7 at alpha 0.017800\n", + "0.249680 : 8 passes : dmc_d100n5w5mc2t4 71.7s 1.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.266400 : 8 passes : dmc_d100n5w5mc2t4_inferred 71.7s 11.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.109200 : 8 passes : dbow_d100n5mc2t4_inferred 34.7s 6.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.168840 : 8 passes : dmm_d100n5w10mc2t4 46.3s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.103520 : 8 passes : dbow+dmm 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.105320 : 8 passes : dbow+dmc 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 8 at alpha 0.016600" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.240200 : 9 passes : dmc_d100n5w5mc2t4 61.9s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.257600 : 9 passes : dmc_d100n5w5mc2t4_inferred 61.9s 10.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.104640 : 9 passes : dbow_d100n5mc2t4 28.5s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.107200 : 9 passes : dbow_d100n5mc2t4_inferred 28.5s 5.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.166800 : 9 passes : dmm_d100n5w10mc2t4 35.7s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.104200 : 9 passes : dbow+dmc 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.107200 : 9 passes : dbow+dmc_inferred 0.0s 16.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 9 at alpha 0.015400\n", + "0.236040 : 10 passes : dmc_d100n5w5mc2t4 68.6s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.253200 : 10 passes : dmc_d100n5w5mc2t4_inferred 68.6s 10.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.102960 : 10 passes : dbow_d100n5mc2t4 34.6s 1.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.095600 : 10 passes : dbow_d100n5mc2t4_inferred 34.6s 6.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.163880 : 10 passes : dmm_d100n5w10mc2t4 45.7s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.108000 : 10 passes : dbow+dmm_inferred 0.0s 12.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.104080 : 10 passes : dbow+dmc 0.0s 2.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.100000 : 10 passes : dbow+dmc_inferred 0.0s 17.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 10 at alpha 0.014200\n", + "0.229280 : 11 passes : dmc_d100n5w5mc2t4 70.8s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.163640 : 11 passes : dmm_d100n5w10mc2t4 34.9s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.106800 : 11 passes : dbow+dmm_inferred 0.0s 12.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 11 at alpha 0.013000" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.225440 : 12 passes : dmc_d100n5w5mc2t4 58.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.239200 : 12 passes : dmc_d100n5w5mc2t4_inferred 58.0s 10.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.102120 : 12 passes : dbow_d100n5mc2t4 28.8s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.161880 : 12 passes : dmm_d100n5w10mc2t4 35.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.186000 : 12 passes : dmm_d100n5w10mc2t4_inferred 35.1s 6.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.102320 : 12 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.103320 : 12 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 12 at alpha 0.011800" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.223600 : 13 passes : dmc_d100n5w5mc2t4 52.7s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.232400 : 13 passes : dmc_d100n5w5mc2t4_inferred 52.7s 10.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.161320 : 13 passes : dmm_d100n5w10mc2t4 34.4s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.102040 : 13 passes : dbow+dmm 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.104000 : 13 passes : dbow+dmm_inferred 0.0s 11.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 13 at alpha 0.010600" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.221040 : 14 passes : dmc_d100n5w5mc2t4 53.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.159560 : 14 passes : dmm_d100n5w10mc2t4 35.2s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.185600 : 14 passes : dmm_d100n5w10mc2t4_inferred 35.2s 7.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 14 at alpha 0.009400" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.217920 : 15 passes : dmc_d100n5w5mc2t4 54.3s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.159200 : 15 passes : dmm_d100n5w10mc2t4 35.3s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.101720 : 15 passes : dbow+dmm 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 15 at alpha 0.008200" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.217440 : 16 passes : dmc_d100n5w5mc2t4 52.9s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.158640 : 16 passes : dmm_d100n5w10mc2t4 35.9s 1.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.101280 : 16 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.098000 : 16 passes : dbow+dmm_inferred 0.0s 12.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 16 at alpha 0.007000" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.215960 : 17 passes : dmc_d100n5w5mc2t4 52.2s 1.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.101200 : 17 passes : dbow+dmm 0.0s 1.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.094800 : 17 passes : dbow+dmm_inferred 0.0s 13.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 17 at alpha 0.005800" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.229200 : 18 passes : dmc_d100n5w5mc2t4_inferred 52.6s 10.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.158440 : 18 passes : dmm_d100n5w10mc2t4 35.4s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.181600 : 18 passes : dmm_d100n5w10mc2t4_inferred 35.4s 6.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 18 at alpha 0.004600" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.215440 : 19 passes : dmc_d100n5w5mc2t4 52.5s 1.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.217600 : 19 passes : dmc_d100n5w5mc2t4_inferred 52.5s 9.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.158280 : 19 passes : dmm_d100n5w10mc2t4 34.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.179600 : 19 passes : dmm_d100n5w10mc2t4_inferred 34.1s 7.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.100720 : 19 passes : dbow+dmm 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.103240 : 19 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 19 at alpha 0.003400" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.215400 : 20 passes : dmc_d100n5w5mc2t4 50.4s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "0.157960 : 20 passes : dmm_d100n5w10mc2t4 33.8s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 20 at alpha 0.002200" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "END 2015-06-10 06:03:58.499051\n" + ] + } + ], + "prompt_number": 9 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Achieved Sentiment-Prediction Accuracy" + ] + }, + { + "cell_type": "code", + "collapsed": true, + "input": [ + "# print best error rates achieved\n", + "errs = [(rate,name) for name, rate in best_error.items()]\n", + "errs.sort(key=lambda pair: pair[0])\n", + "for err in errs:\n", + " print(\"%f %s\"%(err[0],err[1]))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "0.094800 dbow+dmm_inferred\n", + "0.095600 dbow_d100n5mc2t4_inferred\n", + "0.100000 dbow+dmc_inferred\n", + "0.100720 dbow+dmm\n", + "0.102120 dbow_d100n5mc2t4\n", + "0.103240 dbow+dmc\n", + "0.157960 dmm_d100n5w10mc2t4\n", + "0.179600 dmm_d100n5w10mc2t4_inferred\n", + "0.215400 dmc_d100n5w5mc2t4\n", + "0.217600 dmc_d100n5w5mc2t4_inferred\n" + ] + } + ], + "prompt_number": 10 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In my testing, unlike the paper's report, DBOW performs best. Concatenating vectors from different models only offers a small improvement. The best results I've seen are still just under 10% error rate, still a ways from the paper's 7.42%. \n" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Examining Results" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Are inferred vectors close to the precalculated ones?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", + "print('for doc %d...' % doc_id)\n", + "for model in simple_models:\n", + " inferred_docvec = model.infer_vector(alldocs[doc_id].words)\n", + " print('%s: %s' % (model.compact_name, model.docvecs.most_similar([inferred_docvec],topn=3)))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "for doc 59538...\n", + "dmc_d100n5w5mc2t4: [(59538, 0.7505937814712524), (19191, 0.4108924865722656), (59704, 0.40968963503837585)]" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "dbow_d100n5mc2t4: [(59538, 0.9496155977249146), (63920, 0.6974467635154724), (45836, 0.6542057394981384)]\n", + "dmm_d100n5w10mc2t4: [(59538, 0.8298757076263428), (28388, 0.8006762266159058), (70797, 0.7880938053131104)]" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 20 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Yes, here the stored vector from 20 epochs of training is usually one of the closest to a freshly-inferred vector for the same words. Note the defaults for inference are very abbreviated \u2013 just 3 steps starting at a high alpha \u2013 and likely need tuning for other applications.)" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Do close documents seem more related than distant ones?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import random\n", + "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", + "model = random.choice(simple_models) # and a random model\n", + "sims = model.docvecs.most_similar(doc_id, topn=model.docvecs.count) # get *all* similar documents\n", + "print('TARGET (%d): \u00ab%s\u00bb\\n' % (doc_id, ' '.join(alldocs[doc_id].words)))\n", + "print('SIMILAR/DISSIMILAR DOCS PER MODEL %s:\\n' % model.compact_name)\n", + "for label, index in [('MOST',0), ('MEDIAN',len(sims)//2), ('LEAST',len(sims)-1)]:\n", + " print('%s %s: \u00ab%s\u00bb\\n' % (label, sims[index], ' '.join(alldocs[sims[index][0]].words)))\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "TARGET (44092): \u00abthis is awful , you just could't believe it . the score is annoying , the filming is bad , for example , sometimes you see the shadow of the cameraman appearing on some actors faces . the quality of the movie is ultra bad , seems like it was made in the 20ies . it's terrible . there is a bit of blood in the beginning and through the movie but always too dark filmed . no gore no effects . the director made some better one like blood rites . but out there there is a following of the man , 'cause searching to find this cheap flick isn't that hard but you have to pay hard earned cash for it . surely this will get in my top ten of worst horrormovies ever , i don't know if i would call it horror . there is too much talking , you will get bloodthirsty after watching it\u00bb\n", + "\n", + "SIMILAR/DISSIMILAR DOCS PER MODEL dmm_d100n5w10mc2t4:\n", + "\n", + "MOST (13474, 0.7617485523223877): \u00abthis film is so much of a rip-off of the masterpeice \" demons \" and thats the only thing that makes the movie worth watching . the acting is terrible , the action scenes are speeded up , the script is almost painful and budget non existent . if you think this film is good then you havn't seen a real horror film , skip this and get a copy of the movie demons .\u00bb\n", + "\n", + "MEDIAN (68465, 0.41534656286239624): \u00abfor over 1000 years beowulf the epic has described beowulf as a mighty hero who killed grendel and grendel's mother . he became a mighty king in his own right after protecting the existing incumbent and his son . at the end of a life of courage and honesty he sets out to fight one last battle , knowing he may be going to his death but willing to protect his people for one last time . during the final battle with the fire spewing serpent , beowulf was losing , but one of his companions remembered his duty and where others deserted beowulf , wiglaf returned to stand by his dying lord , shielding him and dealing a stroke that abated the serpent's fire , enabling beowulf to deal the death stroke to the serpent with his battle knife . finally after the death of the serpent , and the subsequent death of his dear lord from his wounds , wiglaf berates the cowards who deserted their lord and made them feel their shame . a short synopsis of the epic of beowulf . what beowulf is presented in this movie ? a pervert who sleeps with a demon , holds his lands at her behest , abhors his life and spawns a mutant . wrong wrong wrong wrong wrong , so wrong it is not remotely funny . the main thing that bothers me is that those who see the film will think it is in any way accurate ( even in the fight against grendel epic beowulf stated he scorns to carry sword or shield he does mention his shirt of mail ! ) and a tale of unblemished heroism that has lasted fire and reformation over 10 centuries gets buried by a below average cgi flick with a bigger advertising budget .\u00bb\n", + "\n", + "LEAST (79375, -0.08628208190202713): \u00abas i watched the survivors , i couldn't help but wonder what was going through the mind of director michael ritchie when he was presented with the script . outside of the enormous gaps in plot and development , he had to see some humor in it somewhere to cast two direct opposites of the comedy spectrum to helm this project . there had to be a mission or a reason in ritchie's mind when he decided that robin williams , a fast-talking comedian that can sometimes be uncontrollable , and walter matthau , a slow-methodical comedian that appeals to the \" every man \" , would be his key players . i wish i could have been a fly on the wall during this opening meeting because this little fly would have spoken up and mentioned that this pairing would doom the script , and possibly put a black mark on both of these actor's careers . i wouldn't just stop there , i would tear this film to pieces trying to get others to explain to me the subsequent ending and missing tone . the only element that i would be content with would be the casting of jerry reed , who honestly brought some humor and intelligence to this scarred film . the survivors was not a film , but instead an attempt to allow two comedians the opportunity to express themselves coupled with heavy firepower . nothing more , nothing less . could somebody , anybody , please help me out with the story surrounding the survivors ? from the zigzag opening centered around the parrot and robin williams' job to the incident at matthau's gas station ( a plot point never mentioned or concluded ) , ritchie spends no time developing anything . his choice of direction is simply to allow williams to be as \" zany \" as possible and see how matthau reacts to it . if it weren't for jerry reed this film would have been nearly an hour and a half of forced jokes , gunshots , and awkward moments . the story was pointless . in most instances i can find bits and pieces of a story which keeps my attention allowing me to be curious about how the ending will resolve itself . for there to be this resolution , there has to be a conflict . ritchie attempts to create one with the entire \" survival of the fittest \" byline , but even that idea is never fully announced . i felt like a ping-pong ball in this film , constantly going back and forth between williams and matthau hoping that i would land on something that scored a point , but alas , this was the game that would never end . ritchie even takes us into the wilderness in attempts to bring more laughs and eventually draw an ending , but again , nothing happens . nothing is explained , nothing is developed , nothing is linear . williams goes into the woods to be trained in survival , yet for the amount of time he was there it was as if he was unable to learn anything . also , where did he get the funds to buy the house out in the woods ? then , without giving anything away , there was that pathetic ending . what happened ? i use big words there because there was not one iota of a conclusion . enemies became friends , friends became enemies , and before words could be spoken the ending credits appeared . i would like to announce this here , but i believe michael ritchie could not even handle the simplest of tasks with this film . the direction was horrible because ritchie could not control his actors . it was obvious as you watched williams and matthau on screen that there were getting no advice or pointers from the man behind the camera . ritchie didn't stop williams during his rants ( which at times were never relevant to the film ) and did not help matthau react to the insanity that williams was bringing to the table . what should have been the best part of this film was easily the most painful to watch . williams and matthau , in this critic's eye , possibly could be ranked as the worst comic pairing in cinema . matthau's form of comedy is completely , if not 100% , different to williams' shenanigans . while in some film cases this would work to a movie's advantage , for the survivors , it did not . there were no characters for these two comedians to enter into . i sat during the entire hour and a half watching robin williams be robin williams and the same for walter matthau . i could not see any semblance of a character between the two of them . both seemed to jump from one trait to the next . neither seemed to have a complete hold or knowledge of who they were attempting to portray . this is half due to the flimsy story , but mainly i place the blame on ritchie . with williams and matthau at the helm , this had the beginnings of a hilarious possible gut-busting , laugh-out-loud comedy that would be a staple in the film community , but ritchie , in my eyes , could not handle it . he relied to heavily on his actor's comic \" personas \" instead of actually building characters for them . overall , this was a very sad excuse for a film . i have read some other reviews that speak highly of the comedy in this film while do speak similarly of the lacking story , but for me everything was broken . there were no characters , there was no direction , there was obviously no story , and our two central actors didn't work for their money , but just read through their lines and gave a measly 30% to the final product . the only plus i give this film is the accomplishment of jerry reed . he was worth watching . the scene between his wife and i was nearly close to perfection . i think it was the only time that i found myself chuckling through this entire film . ritchie could not handle this film and in the end the survivors is probably a film that neither williams or matthau wants to remember . grade : ** out of *****\u00bb\n", + "\n" + ] + } + ], + "prompt_number": 101 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Yes, in terms of reviewer tone, movie genre, etc... the MOST similar docs usually seem more like the TARGET than the MEDIAN or LEAST.)" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Do the word vectors show useful similarities?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "word_models = simple_models[:]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 86 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from IPython.display import HTML\n", + "# pick a random word with a suitable number of occurences\n", + "while True:\n", + " word = random.choice(word_models[0].index2word)\n", + " if word_models[0].vocab[word].count > 10:\n", + " break\n", + "# or just pick a word from the relevant domain:\n", + "# word = 'plot'\n", + "similars_per_model = [str(model.most_similar(word,topn=20)).replace('), ','),
\\n') for model in word_models]\n", + "similar_table = (\"
\" +\n", + " \"\".join([model.compact_name for model in word_models]) + \n", + " \"
\" +\n", + " \"\".join(similars_per_model) +\n", + " \"
\")\n", + "print(\"most similar words for '%s' (%d occurences)\" % (word, simple_models[0].vocab[word].count))\n", + "HTML(similar_table)" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "most similar words for 'reaction' (912 occurences)\n" + ] + }, + { + "html": [ + "
dmc_d100n5w5mc2t4dbow_d100n5mc2t4dmm_d100n5w10mc2t4
[('response', 0.7310047745704651),
\n", + "('reactions', 0.71299809217453),
\n", + "('obliviousness', 0.5911725163459778),
\n", + "('objection', 0.5809897184371948),
\n", + "('responses', 0.5774205327033997),
\n", + "('suggestion', 0.5519779324531555),
\n", + "('correlation', 0.5490224361419678),
\n", + "('rationality', 0.5477378964424133),
\n", + "('conclusion', 0.5427832007408142),
\n", + "('aversion', 0.5426162481307983),
\n", + "('reply', 0.5408373475074768),
\n", + "('introduction', 0.5372020602226257),
\n", + "('contribution', 0.5326882600784302),
\n", + "('magnetism', 0.5296876430511475),
\n", + "('aggressiveness', 0.527334988117218),
\n", + "('soliloquy', 0.5229986906051636),
\n", + "('precursor', 0.5208104848861694),
\n", + "('apprehension', 0.5201228857040405),
\n", + "('v-card', 0.5191075801849365),
\n", + "('inclination', 0.5187612771987915)]
[('yol', 0.40934062004089355),
\n", + "('motels', 0.4083479046821594),
\n", + "('discharged', 0.38723981380462646),
\n", + "('legalized', 0.38527610898017883),
\n", + "('frazee', 0.37536871433258057),
\n", + "(\"short's\", 0.3726074993610382),
\n", + "('humberfloob', 0.3696143627166748),
\n", + "(\"'chicago\", 0.3695574998855591),
\n", + "('partnered', 0.36848723888397217),
\n", + "('archard', 0.36810845136642456),
\n", + "('tenko', 0.3663099408149719),
\n", + "('concessions', 0.3662480115890503),
\n", + "('policier', 0.36383259296417236),
\n", + "('aide-de-camp', 0.3637371063232422),
\n", + "('cates', 0.3626943826675415),
\n", + "('punk', 0.36106568574905396),
\n", + "('robin', 0.35913658142089844),
\n", + "('geysers', 0.35744380950927734),
\n", + "('cheated', 0.3571518659591675),
\n", + "('surveying', 0.35693296790122986)]
[('response', 0.7806841135025024),
\n", + "('reactions', 0.7511624693870544),
\n", + "('introduction', 0.6644599437713623),
\n", + "('dismay', 0.6599670648574829),
\n", + "('face', 0.6541042327880859),
\n", + "('contribution', 0.6497777700424194),
\n", + "('decision', 0.6441717743873596),
\n", + "('stomach', 0.6292012929916382),
\n", + "('attraction', 0.6280107498168945),
\n", + "('objection', 0.6244252920150757),
\n", + "('reference', 0.6200262308120728),
\n", + "('attachment', 0.617138147354126),
\n", + "('inclination', 0.6141039133071899),
\n", + "('approach', 0.6132383942604065),
\n", + "('responses', 0.6101058125495911),
\n", + "('suggestion', 0.6090033054351807),
\n", + "('counterpoint', 0.6075911521911621),
\n", + "('counter-balance', 0.6052888035774231),
\n", + "('reply', 0.5980963110923767),
\n", + "('transition', 0.5962440967559814)]
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 93, + "text": [ + "" + ] + } + ], + "prompt_number": 93 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Do the DBOW words look meaningless? That's because the gensim DBOW model doesn't train word vectors \u2013 they remain at their random initialized values \u2013 unless you ask with the `dbow_words=1` initialization parameter. The DBOW doc vectors can be trained faster \u2013 and are even better on tasks like IMDB sentiment-prediction \u2013 *without* simultaneous word-training.) \n", + "\n", + "Words from DM models tend to show meaningfully similar words when there are many examples in the training data (as with 'plot' or 'actor'). (All DM modes inherently involve word vector training concurrent with doc vector training.)" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Are the word vectors from this dataset any good at analogies?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "# assuming something like\n", + "# https://word2vec.googlecode.com/svn/trunk/questions-words.txt \n", + "# is in local directory\n", + "for model in word_models:\n", + " sections = model.accuracy('questions-words.txt')\n", + " correct, incorrect = (len(sum((s['correct'] for s in sections), [])), len(sum((s['incorrect'] for s in sections),[])))\n", + " print('%s: %0.2f%% correct (%d of %d)' % (model.compact_name, float(correct*100)/(correct+incorrect), correct, correct+incorrect))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "dmc_d100n5w5mc2t4: 27.28% correct (5462 of 20024)\n", + "dbow_d100n5mc2t4: 0.00% correct (0 of 20024)" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "dmm_d100n5w10mc2t4: 27.50% correct (5506 of 20024)" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n" + ] + } + ], + "prompt_number": 91 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even though this is a tiny, domain-specific dataset, it shows some meagher capability on the general word analogies \u2013 at least for the DM/concat and DM/mean models which actually train word vectors. (The untrained random-initialized words of the DBOW model of course fail miserably.)" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Slop" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To mix the Google dataset (if locally available) into the word tests..." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from gensim.models import Word2Vec\n", + "w2v_g100b = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz',binary=True)\n", + "w2v_g100b.compact_name = 'w2v_g100b'\n", + "word_models.append(w2v_g100b)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get copious logging output from above steps..." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import logging\n", + "logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)\n", + "rootLogger = logging.getLogger()\n", + "rootLogger.setLevel(logging.INFO)" + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To auto-reload python code while developing..." + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%load_ext autoreload\n", + "%autoreload 2" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 1 + } + ], + "metadata": {} + } + ] +} \ No newline at end of file From cb13723ccaa20347e35ff26489774e3beee48b8e Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 16:07:39 -0700 Subject: [PATCH 35/49] np.allclose for float checks --- gensim/test/test_doc2vec.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index ae1db52ddc..49ad101a04 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -126,14 +126,14 @@ def model_sanity(self, model): # fire8 should be top20 close to fire1 sims = model.docvecs.most_similar(fire1,topn=20) - sims = [(idx, round(dist,4)) for idx, dist in sims] self.assertTrue(fire2 in [match[0] for match in sims]) # same sims should appear in lookup by vec as by index doc0_vec = model.docvecs[fire1] sims2 = model.docvecs.most_similar(positive=[doc0_vec], topn=21) - sims2 = [(idx, round(dist,4)) for idx, dist in sims2] - self.assertEqual(sims, sims2[1:]) # ignore first element of sims2, which is doc itself + sims2 = sims2[1:] # ignore first element of sims2, which is doc itself + self.assertEqual(list(zip(*sims))[0], list(zip(*sims2))[0]) # same doc ids + self.assertTrue(np.allclose(list(zip(*sims))[1], list(zip(*sims2))[1])) # close-enough dists # tennis doc should be out-of-place among fire news self.assertEqual(model.docvecs.doesnt_match([fire1, tennis1, fire2]), tennis1) From ec47ec691c1fc75de73a39235b35964c022a37bc Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 16:14:51 -0700 Subject: [PATCH 36/49] notebook tweaks --- docs/notebooks/doc2vec-IMDB.ipynb | 1404 +++++++---------------------- 1 file changed, 330 insertions(+), 1074 deletions(-) diff --git a/docs/notebooks/doc2vec-IMDB.ipynb b/docs/notebooks/doc2vec-IMDB.ipynb index 296c824836..b8fac4d476 100644 --- a/docs/notebooks/doc2vec-IMDB.ipynb +++ b/docs/notebooks/doc2vec-IMDB.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:3774d8aad3c7a1b4a207c1d4313427a78e834e7b8a3686a6cc142de169daf17b" + "signature": "sha256:0399a84128f6ba9801e5c6a287bec7a0644784b6892896de60db48144d68daa8" }, "nbformat": 3, "nbformat_minor": 0, @@ -75,7 +75,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 2 + "prompt_number": 1 }, { "cell_type": "code", @@ -87,7 +87,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 3 + "prompt_number": 2 }, { "cell_type": "markdown", @@ -132,7 +132,7 @@ ] } ], - "prompt_number": 4 + "prompt_number": 3 }, { "cell_type": "heading", @@ -155,7 +155,7 @@ "* 100-dimensional vectors, as the 400d vectors of the paper don't seem to offer much benefit on this task\n", "* similarly, frequent word subsampling seems to decrease sentiment-prediction accuracy, so it's left out\n", "* `cbow=0` means skip-gram which is equivalent to the paper's 'PV-DBOW' mode, matched in gensim with `dm=0`\n", - "* added to that DBOW model two DM models, one which averages context vectors (`dm_mean`) and one which concatenates them (`dm_concat`, resulting in a much larger model)\n", + "* added to that DBOW model are two DM models, one which averages context vectors (`dm_mean`) and one which concatenates them (`dm_concat`, resulting in a much larger, slower, more data-hungry model)\n", "* a `min_count=2` saves quite a bit of model memory, discarding only words that appear in a single doc (and are thus no more expressive than the unique-to-each doc vectors themselves)" ] }, @@ -216,7 +216,7 @@ ] } ], - "prompt_number": 5 + "prompt_number": 4 }, { "cell_type": "markdown", @@ -236,16 +236,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 6 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 6 + "prompt_number": 5 }, { "cell_type": "heading", @@ -315,7 +306,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 7 + "prompt_number": 6 }, { "cell_type": "heading", @@ -325,18 +316,6 @@ "Bulk Training" ] }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from collections import defaultdict\n", - "best_error = defaultdict(lambda :1.0) # to selectively-print only best errors achieved" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 8 - }, { "cell_type": "markdown", "metadata": {}, @@ -350,6 +329,18 @@ "(On a 4-core 2.6Ghz Intel Core i7, these 20 passes training and evaluating 3 main models takes about an hour.)" ] }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from collections import defaultdict\n", + "best_error = defaultdict(lambda :1.0) # to selectively-print only best errors achieved" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 7 + }, { "cell_type": "code", "collapsed": false, @@ -402,8 +393,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "START 2015-06-10 05:01:59.714002\n", - "0.418360 : 1 passes : dmc_d100n5w5mc2t4 66.0s 1.3s" + "START 2015-06-10 15:45:49.141886\n", + "0.419480 : 1 passes : dmc_d100n5w5mc2t4 86.9s 1.4s" ] }, { @@ -411,7 +402,7 @@ "stream": "stdout", "text": [ "\n", - "0.389600 : 1 passes : dmc_d100n5w5mc2t4_inferred 66.0s 10.8s" + "0.384000 : 1 passes : dmc_d100n5w5mc2t4_inferred 86.9s 12.0s" ] }, { @@ -419,7 +410,7 @@ "stream": "stdout", "text": [ "\n", - "0.221280 : 1 passes : dbow_d100n5mc2t4 28.6s 0.8s" + "0.220440 : 1 passes : dbow_d100n5mc2t4 34.1s 0.8s" ] }, { @@ -427,7 +418,7 @@ "stream": "stdout", "text": [ "\n", - "0.193600 : 1 passes : dbow_d100n5mc2t4_inferred 28.6s 5.3s" + "0.216000 : 1 passes : dbow_d100n5mc2t4_inferred 34.1s 5.5s" ] }, { @@ -435,7 +426,7 @@ "stream": "stdout", "text": [ "\n", - "0.276920 : 1 passes : dmm_d100n5w10mc2t4 37.8s 0.8s" + "0.269200 : 1 passes : dmm_d100n5w10mc2t4 46.7s 0.8s" ] }, { @@ -443,7 +434,7 @@ "stream": "stdout", "text": [ "\n", - "0.225200 : 1 passes : dmm_d100n5w10mc2t4_inferred 37.8s 6.8s" + "0.214800 : 1 passes : dmm_d100n5w10mc2t4_inferred 46.7s 7.3s" ] }, { @@ -451,7 +442,7 @@ "stream": "stdout", "text": [ "\n", - "0.207720 : 1 passes : dbow+dmm 0.0s 2.2s" + "0.210280 : 1 passes : dbow+dmm 0.0s 2.7s" ] }, { @@ -459,7 +450,7 @@ "stream": "stdout", "text": [ "\n", - "0.188400 : 1 passes : dbow+dmm_inferred 0.0s 12.9s" + "0.192000 : 1 passes : dbow+dmm_inferred 0.0s 14.3s" ] }, { @@ -467,7 +458,7 @@ "stream": "stdout", "text": [ "\n", - "0.221440 : 1 passes : dbow+dmc 0.0s 1.5s" + "0.219520 : 1 passes : dbow+dmc 0.0s 2.2s" ] }, { @@ -475,7 +466,7 @@ "stream": "stdout", "text": [ "\n", - "0.218000 : 1 passes : dbow+dmc_inferred 0.0s 17.1s" + "0.220000 : 1 passes : dbow+dmc_inferred 0.0s 18.8s" ] }, { @@ -484,15 +475,7 @@ "text": [ "\n", "completed pass 1 at alpha 0.025000\n", - "0.364360 : 2 passes : dmc_d100n5w5mc2t4 59.6s 0.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.368000 : 2 passes : dmc_d100n5w5mc2t4_inferred 59.6s 10.7s" + "0.365280 : 2 passes : dmc_d100n5w5mc2t4 61.0s 0.8s" ] }, { @@ -500,7 +483,7 @@ "stream": "stdout", "text": [ "\n", - "0.140840 : 2 passes : dbow_d100n5mc2t4 28.8s 1.7s" + "0.337200 : 2 passes : dmc_d100n5w5mc2t4_inferred 61.0s 10.6s" ] }, { @@ -508,7 +491,7 @@ "stream": "stdout", "text": [ "\n", - "0.166000 : 2 passes : dbow_d100n5mc2t4_inferred 28.8s 5.3s" + "0.142400 : 2 passes : dbow_d100n5mc2t4 29.3s 1.4s" ] }, { @@ -516,7 +499,7 @@ "stream": "stdout", "text": [ "\n", - "0.223720 : 2 passes : dmm_d100n5w10mc2t4 35.6s 0.8s" + "0.159600 : 2 passes : dbow_d100n5mc2t4_inferred 29.3s 5.5s" ] }, { @@ -524,7 +507,7 @@ "stream": "stdout", "text": [ "\n", - "0.198000 : 2 passes : dmm_d100n5w10mc2t4_inferred 35.6s 6.6s" + "0.220720 : 2 passes : dmm_d100n5w10mc2t4 35.1s 0.8s" ] }, { @@ -532,7 +515,7 @@ "stream": "stdout", "text": [ "\n", - "0.136240 : 2 passes : dbow+dmm 0.0s 1.6s" + "0.138480 : 2 passes : dbow+dmm 0.0s 1.9s" ] }, { @@ -540,7 +523,7 @@ "stream": "stdout", "text": [ "\n", - "0.163600 : 2 passes : dbow+dmm_inferred 0.0s 12.1s" + "0.163200 : 2 passes : dbow+dmm_inferred 0.0s 13.1s" ] }, { @@ -548,7 +531,7 @@ "stream": "stdout", "text": [ "\n", - "0.140840 : 2 passes : dbow+dmc 0.0s 2.2s" + "0.141840 : 2 passes : dbow+dmc 0.0s 2.2s" ] }, { @@ -556,7 +539,7 @@ "stream": "stdout", "text": [ "\n", - "0.176400 : 2 passes : dbow+dmc_inferred 0.0s 16.2s" + "0.172400 : 2 passes : dbow+dmc_inferred 0.0s 18.1s" ] }, { @@ -565,15 +548,7 @@ "text": [ "\n", "completed pass 2 at alpha 0.023800\n", - "0.331400 : 3 passes : dmc_d100n5w5mc2t4 59.1s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.357600 : 3 passes : dmc_d100n5w5mc2t4_inferred 59.1s 10.7s" + "0.332200 : 3 passes : dmc_d100n5w5mc2t4 61.5s 0.8s" ] }, { @@ -581,7 +556,7 @@ "stream": "stdout", "text": [ "\n", - "0.122240 : 3 passes : dbow_d100n5mc2t4 29.0s 0.8s" + "0.122920 : 3 passes : dbow_d100n5mc2t4 28.9s 0.8s" ] }, { @@ -589,7 +564,7 @@ "stream": "stdout", "text": [ "\n", - "0.124000 : 3 passes : dbow_d100n5mc2t4_inferred 29.0s 5.5s" + "0.128000 : 3 passes : dbow_d100n5mc2t4_inferred 28.9s 5.5s" ] }, { @@ -597,7 +572,7 @@ "stream": "stdout", "text": [ "\n", - "0.200240 : 3 passes : dmm_d100n5w10mc2t4 34.8s 1.3s" + "0.202280 : 3 passes : dmm_d100n5w10mc2t4 35.1s 1.4s" ] }, { @@ -605,7 +580,7 @@ "stream": "stdout", "text": [ "\n", - "0.120560 : 3 passes : dbow+dmm 0.0s 1.5s" + "0.121240 : 3 passes : dbow+dmm 0.0s 1.6s" ] }, { @@ -613,7 +588,7 @@ "stream": "stdout", "text": [ "\n", - "0.129200 : 3 passes : dbow+dmm_inferred 0.0s 11.9s" + "0.138400 : 3 passes : dbow+dmm_inferred 0.0s 12.4s" ] }, { @@ -621,7 +596,7 @@ "stream": "stdout", "text": [ "\n", - "0.122480 : 3 passes : dbow+dmc 0.0s 1.6s" + "0.121560 : 3 passes : dbow+dmc 0.0s 1.6s" ] }, { @@ -629,7 +604,7 @@ "stream": "stdout", "text": [ "\n", - "0.144400 : 3 passes : dbow+dmc_inferred 0.0s 16.7s" + "0.148800 : 3 passes : dbow+dmc_inferred 0.0s 17.3s" ] }, { @@ -638,7 +613,7 @@ "text": [ "\n", "completed pass 3 at alpha 0.022600\n", - "0.307800 : 4 passes : dmc_d100n5w5mc2t4 56.1s 0.8s" + "0.312160 : 4 passes : dmc_d100n5w5mc2t4 59.4s 1.4s" ] }, { @@ -646,7 +621,7 @@ "stream": "stdout", "text": [ "\n", - "0.336000 : 4 passes : dmc_d100n5w5mc2t4_inferred 56.1s 11.1s" + "0.322000 : 4 passes : dmc_d100n5w5mc2t4_inferred 59.4s 11.0s" ] }, { @@ -654,7 +629,7 @@ "stream": "stdout", "text": [ "\n", - "0.113840 : 4 passes : dbow_d100n5mc2t4 31.6s 0.8s" + "0.114560 : 4 passes : dbow_d100n5mc2t4 32.2s 0.9s" ] }, { @@ -662,7 +637,7 @@ "stream": "stdout", "text": [ "\n", - "0.122400 : 4 passes : dbow_d100n5mc2t4_inferred 31.6s 5.3s" + "0.123200 : 4 passes : dbow_d100n5mc2t4_inferred 32.2s 5.6s" ] }, { @@ -670,7 +645,7 @@ "stream": "stdout", "text": [ "\n", - "0.187000 : 4 passes : dmm_d100n5w10mc2t4 34.9s 0.8s" + "0.190120 : 4 passes : dmm_d100n5w10mc2t4 41.9s 0.8s" ] }, { @@ -678,7 +653,7 @@ "stream": "stdout", "text": [ "\n", - "0.112880 : 4 passes : dbow+dmm 0.0s 1.7s" + "0.214000 : 4 passes : dmm_d100n5w10mc2t4_inferred 41.9s 6.7s" ] }, { @@ -686,7 +661,7 @@ "stream": "stdout", "text": [ "\n", - "0.124400 : 4 passes : dbow+dmm_inferred 0.0s 13.2s" + "0.114680 : 4 passes : dbow+dmm 0.0s 2.1s" ] }, { @@ -694,7 +669,7 @@ "stream": "stdout", "text": [ "\n", - "0.114040 : 4 passes : dbow+dmc 0.0s 1.6s" + "0.122400 : 4 passes : dbow+dmm_inferred 0.0s 12.5s" ] }, { @@ -702,7 +677,7 @@ "stream": "stdout", "text": [ "\n", - "0.128800 : 4 passes : dbow+dmc_inferred 0.0s 16.8s" + "0.114280 : 4 passes : dbow+dmc 0.0s 1.5s" ] }, { @@ -710,8 +685,7 @@ "stream": "stdout", "text": [ "\n", - "completed pass 4 at alpha 0.021400\n", - "0.283640 : 5 passes : dmc_d100n5w5mc2t4 59.6s 0.8s" + "0.134400 : 4 passes : dbow+dmc_inferred 0.0s 17.6s" ] }, { @@ -719,7 +693,8 @@ "stream": "stdout", "text": [ "\n", - "0.325600 : 5 passes : dmc_d100n5w5mc2t4_inferred 59.6s 10.8s" + "completed pass 4 at alpha 0.021400\n", + "0.290560 : 5 passes : dmc_d100n5w5mc2t4 60.4s 1.0s" ] }, { @@ -727,7 +702,7 @@ "stream": "stdout", "text": [ "\n", - "0.109600 : 5 passes : dbow_d100n5mc2t4 32.6s 0.9s" + "0.109080 : 5 passes : dbow_d100n5mc2t4 33.6s 1.4s" ] }, { @@ -735,39 +710,114 @@ "stream": "stdout", "text": [ "\n", - "0.112800 : 5 passes : dbow_d100n5mc2t4_inferred 32.6s 6.3s" + "0.108800 : 5 passes : dbow_d100n5mc2t4_inferred 33.6s 5.6s" ] }, { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "0.180760 : 5 passes : dmm_d100n5w10mc2t4 44.2s 0.8s" + "\n" ] }, { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.191200 : 5 passes : dmm_d100n5w10mc2t4_inferred 44.2s 6.5s" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "pyerr", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mtrain_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin_alpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0melapsed_timer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0melapsed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mtrain_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0mduration\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'%.1f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0melapsed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/scratch/Documents/dev2015/gensim_venv/src/gensim-develop/gensim/models/word2vec.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, sentences, total_words, word_count, chunksize)\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mjob_no\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_sentences\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"putting job #%i in the queue, qsize=%i\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mjob_no\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 528\u001b[0;31m \u001b[0mjobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 529\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"reached the end of input; waiting to finish %i outstanding jobs\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mjobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/scratch/miniconda3/envs/gensim_cenv/lib/python3.4/queue.py\u001b[0m in \u001b[0;36mput\u001b[0;34m(self, item, block, timeout)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaxsize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_full\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 137\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/Users/scratch/miniconda3/envs/gensim_cenv/lib/python3.4/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] - }, + } + ], + "prompt_number": 8 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Achieved Sentiment-Prediction Accuracy" + ] + }, + { + "cell_type": "code", + "collapsed": true, + "input": [ + "# print best error rates achieved\n", + "errs = [(rate,name) for name, rate in best_error.items()]\n", + "errs.sort(key=lambda pair: pair[0])\n", + "for err in errs:\n", + " print(\"%f %s\"%(err[0],err[1]))" + ], + "language": "python", + "metadata": {}, + "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "0.108600 : 5 passes : dbow+dmm 0.0s 1.8s" + "0.108800 dbow_d100n5mc2t4_inferred\n", + "0.109080 dbow_d100n5mc2t4\n", + "0.114280 dbow+dmc\n", + "0.114680 dbow+dmm\n", + "0.122400 dbow+dmm_inferred\n", + "0.134400 dbow+dmc_inferred\n", + "0.190120 dmm_d100n5w10mc2t4\n", + "0.214000 dmm_d100n5w10mc2t4_inferred\n", + "0.290560 dmc_d100n5w5mc2t4\n", + "0.322000 dmc_d100n5w5mc2t4_inferred\n" ] - }, + } + ], + "prompt_number": 9 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In my testing, unlike the paper's report, DBOW performs best. Concatenating vectors from different models only offers a small predictive improvement. The best results I've seen are still just under 10% error rate, still a ways from the paper's 7.42%.\n" + ] + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Examining Results" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Are inferred vectors close to the precalculated ones?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", + "print('for doc %d...' % doc_id)\n", + "for model in simple_models:\n", + " inferred_docvec = model.infer_vector(alldocs[doc_id].words)\n", + " print('%s: %s' % (model.compact_name, model.docvecs.most_similar([inferred_docvec],topn=3)))" + ], + "language": "python", + "metadata": {}, + "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "0.120000 : 5 passes : dbow+dmm_inferred 0.0s 12.6s" + "for doc 81518...\n", + "dmc_d100n5w5mc2t4: [(81518, 0.6632639169692993), (82236, 0.5500479340553284), (11391, 0.5488752126693726)]" ] }, { @@ -775,7 +825,7 @@ "stream": "stdout", "text": [ "\n", - "0.109720 : 5 passes : dbow+dmc 0.0s 1.7s" + "dbow_d100n5mc2t4: [(81518, 0.9144332408905029), (61723, 0.6625540256500244), (60474, 0.6582651138305664)]" ] }, { @@ -783,1006 +833,202 @@ "stream": "stdout", "text": [ "\n", - "0.118000 : 5 passes : dbow+dmc_inferred 0.0s 18.0s" + "dmm_d100n5w10mc2t4: [(81518, 0.8487115502357483), (61608, 0.8010116815567017), (82548, 0.8009110689163208)]" ] }, { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "completed pass 5 at alpha 0.020200\n", - "0.270080 : 6 passes : dmc_d100n5w5mc2t4 70.3s 0.9s" + "\n" ] - }, + } + ], + "prompt_number": 10 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Yes, here the stored vector from 20 epochs of training is usually one of the closest to a freshly-inferred vector for the same words. Note the defaults for inference are very abbreviated \u2013 just 3 steps starting at a high alpha \u2013 and likely need tuning for other applications.)" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Do close documents seem more related than distant ones?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import random\n", + "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", + "model = random.choice(simple_models) # and a random model\n", + "sims = model.docvecs.most_similar(doc_id, topn=model.docvecs.count) # get *all* similar documents\n", + "print('TARGET (%d): \u00ab%s\u00bb\\n' % (doc_id, ' '.join(alldocs[doc_id].words)))\n", + "print('SIMILAR/DISSIMILAR DOCS PER MODEL %s:\\n' % model.compact_name)\n", + "for label, index in [('MOST',0), ('MEDIAN',len(sims)//2), ('LEAST',len(sims)-1)]:\n", + " print('%s %s: \u00ab%s\u00bb\\n' % (label, sims[index], ' '.join(alldocs[sims[index][0]].words)))\n" + ], + "language": "python", + "metadata": {}, + "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ + "TARGET (74201): \u00ablook , i have a strong stomach , but i have no use for torture porn in my entertainment . a few weeks ago , i saw a preview of this film that gave no indication of it's true nature . this , plus the intriguing poster led me to believe this was going to be a brooding drama in the hitchcock tradition . instead , i found myself watching the ugliest , most disgusting film i have ever attended of my own free will . i should have guessed when the cashier gave me a funny look while getting my ticket . the first thing i realized was the script was by an amateur . the scenes and dialog jumping around with no thought or subtlety . like other reviewers , i knew who the torturing villain was within the first few minutes . but , i still had to sit through several scenes of dismemberment and pain , which made me sick . sick that i had spent money to watch this disaster . i can't imagine why lindsay lohan would agree to be in this production . there are other venues to stretch her acting talents . neal mcdonough and julia ormond's rent must have been due . the story , such as it is has at it's core an interesting premise . a top director might have made a respectable film out of it with a total rewrite , without the gore and more atmosphere . this movie is an absolute , total disaster . no one involved has anything to be proud of .\u00bb\n", "\n", - "0.308000 : 6 passes : dmc_d100n5w5mc2t4_inferred 70.3s 11.3s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ + "SIMILAR/DISSIMILAR DOCS PER MODEL dbow_d100n5mc2t4:\n", "\n", - "0.106120 : 6 passes : dbow_d100n5mc2t4 33.9s 1.0s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ + "MOST (80740, 0.745945394039154): \u00abwhat a waste of film stock . overly atmospheric . dafoe and walken mailed their performances in . argento couldn't find a stamp . the dialogue seemed like improvisation , which i hope it was , because nobody should have been paid for it . even the possible saving grace of sex and nude scenes were uninspired .\u00bb\n", "\n", - "0.175800 : 6 passes : dmm_d100n5w10mc2t4 45.1s 1.0s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ + "MEDIAN (41578, 0.4059261083602905): \u00abawful film . terrible acting , cheesy , totally unrealistic , embarrassing to anyone who has played the game . for a start that guy is not a hooker , he would be snapped in two . as for ''i score , that's my job'' well no it's not . for the the uneducated american audience it might come across as a good film . for me , well , that's a few hours of my life i'll never get back . i read through the reviews and came across one where the guy sounded like he knew what he was talking about . then i read - ''and while american rugby may never reach the level of talent that new zealand or south africa has , third in the world is also nothing to hang your head about'' all i can say is , lmfao ! keep playing your american football and baseball , leave the real sports to the big boys .\u00bb\n", "\n", - "0.106520 : 6 passes : dbow+dmm 0.0s 1.8s" + "LEAST (31799, 0.05202261731028557): \u00abok , so in any wile e . coyote-road runner cartoons , we know that wec is going to set up all sorts of traps for rr , but always maim himself in various ways . that certainly happens in \" beep , beep \" . predictable ? i guess that it is , but when you think about it , these cartoons show how the more you try to harm someone else , the more you get harmed ; sort of like how daffy duck always tries to undermine bugs bunny's integrity but bugs sees around it . overall , this is another classic from the termite terrace crowd . sometimes , i think that if we really had wanted to ease cold war tensions , we could have just let the soviet union see looney tunes cartoons ; i'm sure that they would have loved them . another great one . ps : i learned on \" jeopardy ! \" that wile e . coyote's middle name is ethelbert .\u00bb\n", + "\n" ] - }, + } + ], + "prompt_number": 16 + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "(Somewhat, in terms of reviewer tone, movie genre, etc... the MOST cosine-similar docs usually seem more like the TARGET than the MEDIAN or LEAST.)" + ] + }, + { + "cell_type": "heading", + "level": 3, + "metadata": {}, + "source": [ + "Do the word vectors show useful similarities?" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "word_models = simple_models[:]" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 12 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "from IPython.display import HTML\n", + "# pick a random word with a suitable number of occurences\n", + "while True:\n", + " word = random.choice(word_models[0].index2word)\n", + " if word_models[0].vocab[word].count > 10:\n", + " break\n", + "# or just pick a word from the relevant domain:\n", + "# word = 'plot'\n", + "similars_per_model = [str(model.most_similar(word,topn=20)).replace('), ','),
\\n') for model in word_models]\n", + "similar_table = (\"
\" +\n", + " \"\".join([model.compact_name for model in word_models]) + \n", + " \"
\" +\n", + " \"\".join(similars_per_model) +\n", + " \"
\")\n", + "print(\"most similar words for '%s' (%d occurences)\" % (word, simple_models[0].vocab[word].count))\n", + "HTML(similar_table)" + ], + "language": "python", + "metadata": {}, + "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ - "\n", - "0.113200 : 6 passes : dbow+dmm_inferred 0.0s 12.2s" + "most similar words for 'abundance' (146 occurences)\n" ] }, { - "output_type": "stream", - "stream": "stdout", + "html": [ + "
dmc_d100n5w5mc2t4dbow_d100n5mc2t4dmm_d100n5w10mc2t4
[('array', 0.6298288702964783),
\n", + "('quantity', 0.5925834774971008),
\n", + "('overabundance', 0.5884098410606384),
\n", + "('meaninglessness', 0.588019073009491),
\n", + "('assemblage', 0.5832504034042358),
\n", + "('totality', 0.582420825958252),
\n", + "('vapidity', 0.5777873992919922),
\n", + "('extremity', 0.5740029811859131),
\n", + "('excess', 0.5700401067733765),
\n", + "('arsenal', 0.5695343017578125),
\n", + "('iqs', 0.5652327537536621),
\n", + "('8-9', 0.5581415891647339),
\n", + "('assortment', 0.5561405420303345),
\n", + "('tons', 0.5546265840530396),
\n", + "('torrents', 0.5545516014099121),
\n", + "('ultimatums', 0.5542378425598145),
\n", + "('amount', 0.5540366172790527),
\n", + "('quantities', 0.5512855052947998),
\n", + "('roster', 0.5505189895629883),
\n", + "('litany', 0.549481987953186)]
[('bespattered', 0.4104897975921631),
\n", + "('borel', 0.39388203620910645),
\n", + "(\"'devil'\", 0.3879944086074829),
\n", + "('train', 0.38379138708114624),
\n", + "('nagoya', 0.377510130405426),
\n", + "('gencon', 0.37605035305023193),
\n", + "('geometric', 0.3748994469642639),
\n", + "('un-funniest', 0.3717661201953888),
\n", + "('psychotherapy', 0.3682197332382202),
\n", + "('casted', 0.36680951714515686),
\n", + "('high-tailing', 0.3651661276817322),
\n", + "(\"ensign's\", 0.35965579748153687),
\n", + "('rocque', 0.3588852286338806),
\n", + "('publishers', 0.3534497022628784),
\n", + "('pseudo-comic', 0.35254913568496704),
\n", + "('mignard', 0.3523959219455719),
\n", + "('pritchert', 0.3517644703388214),
\n", + "('ours', 0.3500378429889679),
\n", + "('bayonne', 0.3490917384624481),
\n", + "('soup\u00e7on', 0.34788060188293457)]
[('overabundance', 0.8075829744338989),
\n", + "('assemblage', 0.8018349409103394),
\n", + "('assortment', 0.7681021094322205),
\n", + "('ounce', 0.762627363204956),
\n", + "('array', 0.7543219327926636),
\n", + "('excess', 0.7436821460723877),
\n", + "('amalgam', 0.7408836483955383),
\n", + "('unheard', 0.740603506565094),
\n", + "('over-abundance', 0.7144984602928162),
\n", + "('over-load', 0.7117946743965149),
\n", + "('overdose', 0.7113994359970093),
\n", + "('infestation', 0.7078109383583069),
\n", + "('amalgamation', 0.7040037512779236),
\n", + "('exemplar', 0.688831090927124),
\n", + "('accumulation', 0.686922550201416),
\n", + "('onslaught', 0.685982346534729),
\n", + "('exhibition', 0.6785738468170166),
\n", + "('arsenal', 0.6754549741744995),
\n", + "('involuntary', 0.6744215488433838),
\n", + "('oodles', 0.6684854626655579)]
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 13, "text": [ - "\n", - "0.106600 : 6 passes : dbow+dmc 0.0s 1.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 6 at alpha 0.019000" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.256880 : 7 passes : dmc_d100n5w5mc2t4 67.4s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.283600 : 7 passes : dmc_d100n5w5mc2t4_inferred 67.4s 11.3s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.105000 : 7 passes : dbow_d100n5mc2t4 34.8s 0.9s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.172520 : 7 passes : dmm_d100n5w10mc2t4 44.5s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.104240 : 7 passes : dbow+dmm 0.0s 1.9s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.111200 : 7 passes : dbow+dmm_inferred 0.0s 13.2s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.106240 : 7 passes : dbow+dmc 0.0s 1.9s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.116400 : 7 passes : dbow+dmc_inferred 0.0s 16.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 7 at alpha 0.017800\n", - "0.249680 : 8 passes : dmc_d100n5w5mc2t4 71.7s 1.0s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.266400 : 8 passes : dmc_d100n5w5mc2t4_inferred 71.7s 11.2s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.109200 : 8 passes : dbow_d100n5mc2t4_inferred 34.7s 6.2s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.168840 : 8 passes : dmm_d100n5w10mc2t4 46.3s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.103520 : 8 passes : dbow+dmm 0.0s 1.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.105320 : 8 passes : dbow+dmc 0.0s 1.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 8 at alpha 0.016600" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.240200 : 9 passes : dmc_d100n5w5mc2t4 61.9s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.257600 : 9 passes : dmc_d100n5w5mc2t4_inferred 61.9s 10.3s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.104640 : 9 passes : dbow_d100n5mc2t4 28.5s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.107200 : 9 passes : dbow_d100n5mc2t4_inferred 28.5s 5.6s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.166800 : 9 passes : dmm_d100n5w10mc2t4 35.7s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.104200 : 9 passes : dbow+dmc 0.0s 1.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.107200 : 9 passes : dbow+dmc_inferred 0.0s 16.0s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 9 at alpha 0.015400\n", - "0.236040 : 10 passes : dmc_d100n5w5mc2t4 68.6s 0.9s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.253200 : 10 passes : dmc_d100n5w5mc2t4_inferred 68.6s 10.6s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.102960 : 10 passes : dbow_d100n5mc2t4 34.6s 1.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.095600 : 10 passes : dbow_d100n5mc2t4_inferred 34.6s 6.2s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.163880 : 10 passes : dmm_d100n5w10mc2t4 45.7s 0.9s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.108000 : 10 passes : dbow+dmm_inferred 0.0s 12.2s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.104080 : 10 passes : dbow+dmc 0.0s 2.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.100000 : 10 passes : dbow+dmc_inferred 0.0s 17.6s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 10 at alpha 0.014200\n", - "0.229280 : 11 passes : dmc_d100n5w5mc2t4 70.8s 0.9s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.163640 : 11 passes : dmm_d100n5w10mc2t4 34.9s 1.6s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.106800 : 11 passes : dbow+dmm_inferred 0.0s 12.1s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 11 at alpha 0.013000" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.225440 : 12 passes : dmc_d100n5w5mc2t4 58.0s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.239200 : 12 passes : dmc_d100n5w5mc2t4_inferred 58.0s 10.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.102120 : 12 passes : dbow_d100n5mc2t4 28.8s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.161880 : 12 passes : dmm_d100n5w10mc2t4 35.1s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.186000 : 12 passes : dmm_d100n5w10mc2t4_inferred 35.1s 6.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.102320 : 12 passes : dbow+dmm 0.0s 1.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.103320 : 12 passes : dbow+dmc 0.0s 1.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 12 at alpha 0.011800" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.223600 : 13 passes : dmc_d100n5w5mc2t4 52.7s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.232400 : 13 passes : dmc_d100n5w5mc2t4_inferred 52.7s 10.2s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.161320 : 13 passes : dmm_d100n5w10mc2t4 34.4s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.102040 : 13 passes : dbow+dmm 0.0s 1.6s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.104000 : 13 passes : dbow+dmm_inferred 0.0s 11.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 13 at alpha 0.010600" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.221040 : 14 passes : dmc_d100n5w5mc2t4 53.6s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.159560 : 14 passes : dmm_d100n5w10mc2t4 35.2s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.185600 : 14 passes : dmm_d100n5w10mc2t4_inferred 35.2s 7.1s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 14 at alpha 0.009400" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.217920 : 15 passes : dmc_d100n5w5mc2t4 54.3s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.159200 : 15 passes : dmm_d100n5w10mc2t4 35.3s 0.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.101720 : 15 passes : dbow+dmm 0.0s 1.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 15 at alpha 0.008200" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.217440 : 16 passes : dmc_d100n5w5mc2t4 52.9s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.158640 : 16 passes : dmm_d100n5w10mc2t4 35.9s 1.4s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.101280 : 16 passes : dbow+dmm 0.0s 1.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.098000 : 16 passes : dbow+dmm_inferred 0.0s 12.0s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 16 at alpha 0.007000" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.215960 : 17 passes : dmc_d100n5w5mc2t4 52.2s 1.4s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.101200 : 17 passes : dbow+dmm 0.0s 1.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.094800 : 17 passes : dbow+dmm_inferred 0.0s 13.3s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 17 at alpha 0.005800" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.229200 : 18 passes : dmc_d100n5w5mc2t4_inferred 52.6s 10.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.158440 : 18 passes : dmm_d100n5w10mc2t4 35.4s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.181600 : 18 passes : dmm_d100n5w10mc2t4_inferred 35.4s 6.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 18 at alpha 0.004600" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.215440 : 19 passes : dmc_d100n5w5mc2t4 52.5s 1.3s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.217600 : 19 passes : dmc_d100n5w5mc2t4_inferred 52.5s 9.6s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.158280 : 19 passes : dmm_d100n5w10mc2t4 34.1s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.179600 : 19 passes : dmm_d100n5w10mc2t4_inferred 34.1s 7.0s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.100720 : 19 passes : dbow+dmm 0.0s 1.7s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.103240 : 19 passes : dbow+dmc 0.0s 1.5s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 19 at alpha 0.003400" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.215400 : 20 passes : dmc_d100n5w5mc2t4 50.4s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "0.157960 : 20 passes : dmm_d100n5w10mc2t4 33.8s 0.8s" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "completed pass 20 at alpha 0.002200" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "END 2015-06-10 06:03:58.499051\n" - ] - } - ], - "prompt_number": 9 - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Achieved Sentiment-Prediction Accuracy" - ] - }, - { - "cell_type": "code", - "collapsed": true, - "input": [ - "# print best error rates achieved\n", - "errs = [(rate,name) for name, rate in best_error.items()]\n", - "errs.sort(key=lambda pair: pair[0])\n", - "for err in errs:\n", - " print(\"%f %s\"%(err[0],err[1]))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "0.094800 dbow+dmm_inferred\n", - "0.095600 dbow_d100n5mc2t4_inferred\n", - "0.100000 dbow+dmc_inferred\n", - "0.100720 dbow+dmm\n", - "0.102120 dbow_d100n5mc2t4\n", - "0.103240 dbow+dmc\n", - "0.157960 dmm_d100n5w10mc2t4\n", - "0.179600 dmm_d100n5w10mc2t4_inferred\n", - "0.215400 dmc_d100n5w5mc2t4\n", - "0.217600 dmc_d100n5w5mc2t4_inferred\n" - ] - } - ], - "prompt_number": 10 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "In my testing, unlike the paper's report, DBOW performs best. Concatenating vectors from different models only offers a small improvement. The best results I've seen are still just under 10% error rate, still a ways from the paper's 7.42%. \n" - ] - }, - { - "cell_type": "heading", - "level": 2, - "metadata": {}, - "source": [ - "Examining Results" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Are inferred vectors close to the precalculated ones?" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", - "print('for doc %d...' % doc_id)\n", - "for model in simple_models:\n", - " inferred_docvec = model.infer_vector(alldocs[doc_id].words)\n", - " print('%s: %s' % (model.compact_name, model.docvecs.most_similar([inferred_docvec],topn=3)))" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "for doc 59538...\n", - "dmc_d100n5w5mc2t4: [(59538, 0.7505937814712524), (19191, 0.4108924865722656), (59704, 0.40968963503837585)]" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n", - "dbow_d100n5mc2t4: [(59538, 0.9496155977249146), (63920, 0.6974467635154724), (45836, 0.6542057394981384)]\n", - "dmm_d100n5w10mc2t4: [(59538, 0.8298757076263428), (28388, 0.8006762266159058), (70797, 0.7880938053131104)]" - ] - }, - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "\n" - ] - } - ], - "prompt_number": 20 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "(Yes, here the stored vector from 20 epochs of training is usually one of the closest to a freshly-inferred vector for the same words. Note the defaults for inference are very abbreviated \u2013 just 3 steps starting at a high alpha \u2013 and likely need tuning for other applications.)" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Do close documents seem more related than distant ones?" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "import random\n", - "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", - "model = random.choice(simple_models) # and a random model\n", - "sims = model.docvecs.most_similar(doc_id, topn=model.docvecs.count) # get *all* similar documents\n", - "print('TARGET (%d): \u00ab%s\u00bb\\n' % (doc_id, ' '.join(alldocs[doc_id].words)))\n", - "print('SIMILAR/DISSIMILAR DOCS PER MODEL %s:\\n' % model.compact_name)\n", - "for label, index in [('MOST',0), ('MEDIAN',len(sims)//2), ('LEAST',len(sims)-1)]:\n", - " print('%s %s: \u00ab%s\u00bb\\n' % (label, sims[index], ' '.join(alldocs[sims[index][0]].words)))\n" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "TARGET (44092): \u00abthis is awful , you just could't believe it . the score is annoying , the filming is bad , for example , sometimes you see the shadow of the cameraman appearing on some actors faces . the quality of the movie is ultra bad , seems like it was made in the 20ies . it's terrible . there is a bit of blood in the beginning and through the movie but always too dark filmed . no gore no effects . the director made some better one like blood rites . but out there there is a following of the man , 'cause searching to find this cheap flick isn't that hard but you have to pay hard earned cash for it . surely this will get in my top ten of worst horrormovies ever , i don't know if i would call it horror . there is too much talking , you will get bloodthirsty after watching it\u00bb\n", - "\n", - "SIMILAR/DISSIMILAR DOCS PER MODEL dmm_d100n5w10mc2t4:\n", - "\n", - "MOST (13474, 0.7617485523223877): \u00abthis film is so much of a rip-off of the masterpeice \" demons \" and thats the only thing that makes the movie worth watching . the acting is terrible , the action scenes are speeded up , the script is almost painful and budget non existent . if you think this film is good then you havn't seen a real horror film , skip this and get a copy of the movie demons .\u00bb\n", - "\n", - "MEDIAN (68465, 0.41534656286239624): \u00abfor over 1000 years beowulf the epic has described beowulf as a mighty hero who killed grendel and grendel's mother . he became a mighty king in his own right after protecting the existing incumbent and his son . at the end of a life of courage and honesty he sets out to fight one last battle , knowing he may be going to his death but willing to protect his people for one last time . during the final battle with the fire spewing serpent , beowulf was losing , but one of his companions remembered his duty and where others deserted beowulf , wiglaf returned to stand by his dying lord , shielding him and dealing a stroke that abated the serpent's fire , enabling beowulf to deal the death stroke to the serpent with his battle knife . finally after the death of the serpent , and the subsequent death of his dear lord from his wounds , wiglaf berates the cowards who deserted their lord and made them feel their shame . a short synopsis of the epic of beowulf . what beowulf is presented in this movie ? a pervert who sleeps with a demon , holds his lands at her behest , abhors his life and spawns a mutant . wrong wrong wrong wrong wrong , so wrong it is not remotely funny . the main thing that bothers me is that those who see the film will think it is in any way accurate ( even in the fight against grendel epic beowulf stated he scorns to carry sword or shield he does mention his shirt of mail ! ) and a tale of unblemished heroism that has lasted fire and reformation over 10 centuries gets buried by a below average cgi flick with a bigger advertising budget .\u00bb\n", - "\n", - "LEAST (79375, -0.08628208190202713): \u00abas i watched the survivors , i couldn't help but wonder what was going through the mind of director michael ritchie when he was presented with the script . outside of the enormous gaps in plot and development , he had to see some humor in it somewhere to cast two direct opposites of the comedy spectrum to helm this project . there had to be a mission or a reason in ritchie's mind when he decided that robin williams , a fast-talking comedian that can sometimes be uncontrollable , and walter matthau , a slow-methodical comedian that appeals to the \" every man \" , would be his key players . i wish i could have been a fly on the wall during this opening meeting because this little fly would have spoken up and mentioned that this pairing would doom the script , and possibly put a black mark on both of these actor's careers . i wouldn't just stop there , i would tear this film to pieces trying to get others to explain to me the subsequent ending and missing tone . the only element that i would be content with would be the casting of jerry reed , who honestly brought some humor and intelligence to this scarred film . the survivors was not a film , but instead an attempt to allow two comedians the opportunity to express themselves coupled with heavy firepower . nothing more , nothing less . could somebody , anybody , please help me out with the story surrounding the survivors ? from the zigzag opening centered around the parrot and robin williams' job to the incident at matthau's gas station ( a plot point never mentioned or concluded ) , ritchie spends no time developing anything . his choice of direction is simply to allow williams to be as \" zany \" as possible and see how matthau reacts to it . if it weren't for jerry reed this film would have been nearly an hour and a half of forced jokes , gunshots , and awkward moments . the story was pointless . in most instances i can find bits and pieces of a story which keeps my attention allowing me to be curious about how the ending will resolve itself . for there to be this resolution , there has to be a conflict . ritchie attempts to create one with the entire \" survival of the fittest \" byline , but even that idea is never fully announced . i felt like a ping-pong ball in this film , constantly going back and forth between williams and matthau hoping that i would land on something that scored a point , but alas , this was the game that would never end . ritchie even takes us into the wilderness in attempts to bring more laughs and eventually draw an ending , but again , nothing happens . nothing is explained , nothing is developed , nothing is linear . williams goes into the woods to be trained in survival , yet for the amount of time he was there it was as if he was unable to learn anything . also , where did he get the funds to buy the house out in the woods ? then , without giving anything away , there was that pathetic ending . what happened ? i use big words there because there was not one iota of a conclusion . enemies became friends , friends became enemies , and before words could be spoken the ending credits appeared . i would like to announce this here , but i believe michael ritchie could not even handle the simplest of tasks with this film . the direction was horrible because ritchie could not control his actors . it was obvious as you watched williams and matthau on screen that there were getting no advice or pointers from the man behind the camera . ritchie didn't stop williams during his rants ( which at times were never relevant to the film ) and did not help matthau react to the insanity that williams was bringing to the table . what should have been the best part of this film was easily the most painful to watch . williams and matthau , in this critic's eye , possibly could be ranked as the worst comic pairing in cinema . matthau's form of comedy is completely , if not 100% , different to williams' shenanigans . while in some film cases this would work to a movie's advantage , for the survivors , it did not . there were no characters for these two comedians to enter into . i sat during the entire hour and a half watching robin williams be robin williams and the same for walter matthau . i could not see any semblance of a character between the two of them . both seemed to jump from one trait to the next . neither seemed to have a complete hold or knowledge of who they were attempting to portray . this is half due to the flimsy story , but mainly i place the blame on ritchie . with williams and matthau at the helm , this had the beginnings of a hilarious possible gut-busting , laugh-out-loud comedy that would be a staple in the film community , but ritchie , in my eyes , could not handle it . he relied to heavily on his actor's comic \" personas \" instead of actually building characters for them . overall , this was a very sad excuse for a film . i have read some other reviews that speak highly of the comedy in this film while do speak similarly of the lacking story , but for me everything was broken . there were no characters , there was no direction , there was obviously no story , and our two central actors didn't work for their money , but just read through their lines and gave a measly 30% to the final product . the only plus i give this film is the accomplishment of jerry reed . he was worth watching . the scene between his wife and i was nearly close to perfection . i think it was the only time that i found myself chuckling through this entire film . ritchie could not handle this film and in the end the survivors is probably a film that neither williams or matthau wants to remember . grade : ** out of *****\u00bb\n", - "\n" - ] - } - ], - "prompt_number": 101 - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "(Yes, in terms of reviewer tone, movie genre, etc... the MOST similar docs usually seem more like the TARGET than the MEDIAN or LEAST.)" - ] - }, - { - "cell_type": "heading", - "level": 3, - "metadata": {}, - "source": [ - "Do the word vectors show useful similarities?" - ] - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "word_models = simple_models[:]" - ], - "language": "python", - "metadata": {}, - "outputs": [], - "prompt_number": 86 - }, - { - "cell_type": "code", - "collapsed": false, - "input": [ - "from IPython.display import HTML\n", - "# pick a random word with a suitable number of occurences\n", - "while True:\n", - " word = random.choice(word_models[0].index2word)\n", - " if word_models[0].vocab[word].count > 10:\n", - " break\n", - "# or just pick a word from the relevant domain:\n", - "# word = 'plot'\n", - "similars_per_model = [str(model.most_similar(word,topn=20)).replace('), ','),
\\n') for model in word_models]\n", - "similar_table = (\"
\" +\n", - " \"\".join([model.compact_name for model in word_models]) + \n", - " \"
\" +\n", - " \"\".join(similars_per_model) +\n", - " \"
\")\n", - "print(\"most similar words for '%s' (%d occurences)\" % (word, simple_models[0].vocab[word].count))\n", - "HTML(similar_table)" - ], - "language": "python", - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "stream": "stdout", - "text": [ - "most similar words for 'reaction' (912 occurences)\n" - ] - }, - { - "html": [ - "
dmc_d100n5w5mc2t4dbow_d100n5mc2t4dmm_d100n5w10mc2t4
[('response', 0.7310047745704651),
\n", - "('reactions', 0.71299809217453),
\n", - "('obliviousness', 0.5911725163459778),
\n", - "('objection', 0.5809897184371948),
\n", - "('responses', 0.5774205327033997),
\n", - "('suggestion', 0.5519779324531555),
\n", - "('correlation', 0.5490224361419678),
\n", - "('rationality', 0.5477378964424133),
\n", - "('conclusion', 0.5427832007408142),
\n", - "('aversion', 0.5426162481307983),
\n", - "('reply', 0.5408373475074768),
\n", - "('introduction', 0.5372020602226257),
\n", - "('contribution', 0.5326882600784302),
\n", - "('magnetism', 0.5296876430511475),
\n", - "('aggressiveness', 0.527334988117218),
\n", - "('soliloquy', 0.5229986906051636),
\n", - "('precursor', 0.5208104848861694),
\n", - "('apprehension', 0.5201228857040405),
\n", - "('v-card', 0.5191075801849365),
\n", - "('inclination', 0.5187612771987915)]
[('yol', 0.40934062004089355),
\n", - "('motels', 0.4083479046821594),
\n", - "('discharged', 0.38723981380462646),
\n", - "('legalized', 0.38527610898017883),
\n", - "('frazee', 0.37536871433258057),
\n", - "(\"short's\", 0.3726074993610382),
\n", - "('humberfloob', 0.3696143627166748),
\n", - "(\"'chicago\", 0.3695574998855591),
\n", - "('partnered', 0.36848723888397217),
\n", - "('archard', 0.36810845136642456),
\n", - "('tenko', 0.3663099408149719),
\n", - "('concessions', 0.3662480115890503),
\n", - "('policier', 0.36383259296417236),
\n", - "('aide-de-camp', 0.3637371063232422),
\n", - "('cates', 0.3626943826675415),
\n", - "('punk', 0.36106568574905396),
\n", - "('robin', 0.35913658142089844),
\n", - "('geysers', 0.35744380950927734),
\n", - "('cheated', 0.3571518659591675),
\n", - "('surveying', 0.35693296790122986)]
[('response', 0.7806841135025024),
\n", - "('reactions', 0.7511624693870544),
\n", - "('introduction', 0.6644599437713623),
\n", - "('dismay', 0.6599670648574829),
\n", - "('face', 0.6541042327880859),
\n", - "('contribution', 0.6497777700424194),
\n", - "('decision', 0.6441717743873596),
\n", - "('stomach', 0.6292012929916382),
\n", - "('attraction', 0.6280107498168945),
\n", - "('objection', 0.6244252920150757),
\n", - "('reference', 0.6200262308120728),
\n", - "('attachment', 0.617138147354126),
\n", - "('inclination', 0.6141039133071899),
\n", - "('approach', 0.6132383942604065),
\n", - "('responses', 0.6101058125495911),
\n", - "('suggestion', 0.6090033054351807),
\n", - "('counterpoint', 0.6075911521911621),
\n", - "('counter-balance', 0.6052888035774231),
\n", - "('reply', 0.5980963110923767),
\n", - "('transition', 0.5962440967559814)]
" - ], - "metadata": {}, - "output_type": "pyout", - "prompt_number": 93, - "text": [ - "" + "" ] } ], - "prompt_number": 93 + "prompt_number": 13 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Do the DBOW words look meaningless? That's because the gensim DBOW model doesn't train word vectors \u2013 they remain at their random initialized values \u2013 unless you ask with the `dbow_words=1` initialization parameter. The DBOW doc vectors can be trained faster \u2013 and are even better on tasks like IMDB sentiment-prediction \u2013 *without* simultaneous word-training.) \n", + "Do the DBOW words look meaningless? That's because the gensim DBOW model doesn't train word vectors \u2013 they remain at their random initialized values \u2013 unless you ask with the `dbow_words=1` initialization parameter. The DBOW doc vectors train faster \u2013 and are even better on tasks like IMDB sentiment-prediction \u2013 *without* simultaneous word-training. \n", "\n", "Words from DM models tend to show meaningfully similar words when there are many examples in the training data (as with 'plot' or 'actor'). (All DM modes inherently involve word vector training concurrent with doc vector training.)" ] @@ -1802,6 +1048,7 @@ "# assuming something like\n", "# https://word2vec.googlecode.com/svn/trunk/questions-words.txt \n", "# is in local directory\n", + "# note: this takes many minutes\n", "for model in word_models:\n", " sections = model.accuracy('questions-words.txt')\n", " correct, incorrect = (len(sum((s['correct'] for s in sections), [])), len(sum((s['incorrect'] for s in sections),[])))\n", @@ -1814,7 +1061,7 @@ "output_type": "stream", "stream": "stdout", "text": [ - "dmc_d100n5w5mc2t4: 27.28% correct (5462 of 20024)\n", + "dmc_d100n5w5mc2t4: 23.76% correct (4758 of 20024)\n", "dbow_d100n5mc2t4: 0.00% correct (0 of 20024)" ] }, @@ -1823,7 +1070,7 @@ "stream": "stdout", "text": [ "\n", - "dmm_d100n5w10mc2t4: 27.50% correct (5506 of 20024)" + "dmm_d100n5w10mc2t4: 26.57% correct (5320 of 20024)" ] }, { @@ -1834,7 +1081,7 @@ ] } ], - "prompt_number": 91 + "prompt_number": 14 }, { "cell_type": "markdown", @@ -1851,6 +1098,16 @@ "Slop" ] }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "This cell left intentionally erroneous. " + ], + "language": "python", + "metadata": {}, + "outputs": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -1907,8 +1164,7 @@ ], "language": "python", "metadata": {}, - "outputs": [], - "prompt_number": 1 + "outputs": [] } ], "metadata": {} From 6c2c4e97a1a0ea40d7c2d42daa3f5f4405e1547e Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 17:34:49 -0700 Subject: [PATCH 37/49] nest with:s for py2.6 --- gensim/test/test_doc2vec.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 49ad101a04..a5d452f2b0 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -312,20 +312,20 @@ def read_su_sentiment_rotten_tomatoes(dirname, lowercase=True): # read sentences to temp {sentence -> (id,split) dict, to correlate with dictionary.txt info_by_sentence = {} - with open(os.path.join(dirname, 'datasetSentences.txt'), 'r') as sentences, \ - open(os.path.join(dirname, 'datasetSplit.txt'), 'r') as splits: - next(sentences) # legend - next(splits) # legend - for sentence_line, split_line in izip(sentences, splits): - (id, text) = sentence_line.split('\t') - id = int(id) - text = text.rstrip() - for junk, fix in sentence_fixups: - text = text.replace(junk, fix) - (id2, split_i) = split_line.split(',') - assert id == int(id2) - if text not in info_by_sentence: # discard duplicates - info_by_sentence[text] = (id, int(split_i)) + with open(os.path.join(dirname, 'datasetSentences.txt'), 'r') as sentences: + with open(os.path.join(dirname, 'datasetSplit.txt'), 'r') as splits: + next(sentences) # legend + next(splits) # legend + for sentence_line, split_line in izip(sentences, splits): + (id, text) = sentence_line.split('\t') + id = int(id) + text = text.rstrip() + for junk, fix in sentence_fixups: + text = text.replace(junk, fix) + (id2, split_i) = split_line.split(',') + assert id == int(id2) + if text not in info_by_sentence: # discard duplicates + info_by_sentence[text] = (id, int(split_i)) # read all phrase text phrases = [None] * 239232 # known size of phrases From 125e8ef49f7b354e71d816ff7ad9be6dff3117d0 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 18:58:02 -0700 Subject: [PATCH 38/49] minimize imports, simplify logging --- gensim/test/test_doc2vec.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index a5d452f2b0..ab9832c702 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -24,14 +24,10 @@ from gensim import utils, matutils from gensim.models import doc2vec -from gensim.models import word2vec -from gensim.models.doc2vec import TaggedDocument module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder datapath = lambda fname: os.path.join(module_path, 'test_data', fname) -logger = logging.getLogger('gensim.test.test_doc2vec') - class DocsLeeCorpus(object): def __init__(self, string_tags=False): @@ -43,7 +39,7 @@ def _tag(self, i): def __iter__(self): with open(datapath('lee_background.cor')) as f: for i, line in enumerate(f): - yield TaggedDocument(utils.simple_preprocess(line),[self._tag(i)]) + yield doc2vec.TaggedDocument(utils.simple_preprocess(line),[self._tag(i)]) list_corpus = list(DocsLeeCorpus()) @@ -59,7 +55,7 @@ def __iter__(self): ['graph', 'minors', 'survey'] ] -sentences = [TaggedDocument(words,[i]) for i, words in enumerate(sentences)] +sentences = [doc2vec.TaggedDocument(words,[i]) for i, words in enumerate(sentences)] def testfile(): @@ -248,7 +244,7 @@ def models_equal(self, model, model2): self.assertEqual(len(model.docvecs.index2doctag), len(model2.docvecs.index2doctag)) self.assertTrue(np.allclose(model.docvecs.doctag_syn0, model2.docvecs.doctag_syn0)) -#endclass TestWord2VecModel +#endclass TestDoc2VecModel # following code is useful for reproducing paragraph-vectors paper sentiment experiments @@ -292,7 +288,7 @@ def read_su_sentiment_rotten_tomatoes(dirname, lowercase=True): http://nlp.stanford.edu/~socherr/stanfordSentimentTreebank.zip has been expanded. It's not too big, so compose entirely into memory. """ - logger.info("loading corpus from %s" % dirname) + logging.info("loading corpus from %s" % dirname) # many mangled chars in sentences (datasetSentences.txt) chars_sst_mangled = ['à', 'á', 'â', 'ã', 'æ', 'ç', 'è', 'é', 'í', @@ -358,7 +354,7 @@ def read_su_sentiment_rotten_tomatoes(dirname, lowercase=True): assert len([phrase for phrase in phrases if phrase.split == 'test']) == 2210 # 'test' assert len([phrase for phrase in phrases if phrase.split == 'dev']) == 1100 # 'dev' - logger.info("loaded corpus with %i sentences and %i phrases from %s" + logging.info("loaded corpus with %i sentences and %i phrases from %s" % (len(info_by_sentence), len(phrases), dirname)) return phrases From e445e3e79e3d77fced16b4551768a9ef2cfc91ed Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 20:54:18 -0700 Subject: [PATCH 39/49] touch w/ comment --- gensim/test/test_doc2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index ab9832c702..25d2178543 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -266,7 +266,7 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): return np.concatenate([model.infer_vector(document,alpha,min_alpha,steps) for model in self.models]) def train(self, ignored): - pass + pass # train subcomponents individually class ConcatenatedDocvecs(object): def __init__(self, models): From f5b4e30dbc3ea71520b61e9d586cc946e2388ae0 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 21:07:07 -0700 Subject: [PATCH 40/49] rm stray import breaking py2.6 build --- gensim/test/test_doc2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py index 25d2178543..74c44d14d2 100644 --- a/gensim/test/test_doc2vec.py +++ b/gensim/test/test_doc2vec.py @@ -18,7 +18,7 @@ import bz2 from six import iteritems, iterkeys from six.moves import xrange, zip as izip -from collections import namedtuple, Counter +from collections import namedtuple import numpy as np From 53d864537ffd901bcbc4f7221db8f6208b0f92ef Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 10 Jun 2015 22:44:54 -0700 Subject: [PATCH 41/49] wget --quiet (two dashes) --- docs/notebooks/doc2vec-IMDB.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/notebooks/doc2vec-IMDB.ipynb b/docs/notebooks/doc2vec-IMDB.ipynb index b8fac4d476..39dc6726d5 100644 --- a/docs/notebooks/doc2vec-IMDB.ipynb +++ b/docs/notebooks/doc2vec-IMDB.ipynb @@ -43,7 +43,7 @@ " then\n", " if [ ! -f \"aclImdb_v1.tar.gz\" ]\n", " then\n", - " wget -quiet http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", + " wget --quiet http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\n", " fi\n", " tar xf aclImdb_v1.tar.gz\n", " fi\n", @@ -1170,4 +1170,4 @@ "metadata": {} } ] -} \ No newline at end of file +} From 09a30b3c7f0e5c702affb0faf5e2f5d28d6bbe44 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Tue, 16 Jun 2015 01:44:28 -0700 Subject: [PATCH 42/49] comments; sentence->document; ipynb tweaks --- docs/notebooks/doc2vec-IMDB.ipynb | 987 +++++++++++++++++++++++++----- gensim/models/doc2vec.py | 230 +++---- gensim/models/doc2vec_inner.c | 582 +++++++++--------- gensim/models/doc2vec_inner.pyx | 136 ++-- gensim/models/word2vec.py | 6 +- 5 files changed, 1307 insertions(+), 634 deletions(-) diff --git a/docs/notebooks/doc2vec-IMDB.ipynb b/docs/notebooks/doc2vec-IMDB.ipynb index 39dc6726d5..4f59ffb8de 100644 --- a/docs/notebooks/doc2vec-IMDB.ipynb +++ b/docs/notebooks/doc2vec-IMDB.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:0399a84128f6ba9801e5c6a287bec7a0644784b6892896de60db48144d68daa8" + "signature": "sha256:fc80920786d62c0737c8530d8458e059e9c5f1a95cfefb7c34beafd875b34aa6" }, "nbformat": 3, "nbformat_minor": 0, @@ -182,12 +182,12 @@ "\n", "# speed setup by sharing results of 1st model's vocabulary scan\n", "simple_models[0].build_vocab(alldocs) # PV-DM/concat requires one special NULL word so it serves as template\n", - "print(simple_models[0].compact_name)\n", + "print(simple_models[0])\n", "for model in simple_models[1:]:\n", " model.reset_from(simple_models[0])\n", - " print(model.compact_name)\n", + " print(model)\n", "\n", - "models_by_name = OrderedDict((model.compact_name, model) for model in simple_models)" + "models_by_name = OrderedDict((str(model), model) for model in simple_models)" ], "language": "python", "metadata": {}, @@ -196,8 +196,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "dmc_d100n5w5mc2t4\n", - "dbow_d100n5mc2t4" + "Doc2Vec(dm/c,d100,n5,w5,mc2,t4)\n", + "Doc2Vec(dbow,d100,n5,mc2,t4)" ] }, { @@ -205,7 +205,7 @@ "stream": "stdout", "text": [ "\n", - "dmm_d100n5w10mc2t4" + "Doc2Vec(dm/m,d100,n5,w10,mc2,t4)" ] }, { @@ -230,8 +230,8 @@ "collapsed": false, "input": [ "from gensim.test.test_doc2vec import ConcatenatedDoc2Vec\n", - "models_by_name['dbow+dmm'] = ConcatenatedDoc2Vec([models_by_name['dbow_d100n5mc2t4'], models_by_name['dmm_d100n5w10mc2t4']])\n", - "models_by_name['dbow+dmc'] = ConcatenatedDoc2Vec([models_by_name['dbow_d100n5mc2t4'], models_by_name['dmc_d100n5w5mc2t4']])" + "models_by_name['dbow+dmm'] = ConcatenatedDoc2Vec([simple_models[1], simple_models[2]])\n", + "models_by_name['dbow+dmc'] = ConcatenatedDoc2Vec([simple_models[1], simple_models[0]])" ], "language": "python", "metadata": {}, @@ -369,17 +369,22 @@ " with elapsed_timer() as eval_elapsed:\n", " (err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs)\n", " eval_duration = '%.1f' % eval_elapsed()\n", - " if err < best_error[name]:\n", + " best_indicator = ' '\n", + " if err <= best_error[name]:\n", " best_error[name] = err\n", - " print(\"%f : %i passes : %s %ss %ss\"%(err,epoch+1,name, duration, eval_duration))\n", + " best_indicator = '*' \n", + " print(\"%s%f : %i passes : %s %ss %ss\"%(best_indicator,err,epoch+1,name, duration, eval_duration))\n", "\n", - " eval_duration = ''\n", - " with elapsed_timer() as eval_elapsed:\n", - " (infer_err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs, infer=True)\n", - " eval_duration = '%.1f' % eval_elapsed()\n", - " if infer_err < best_error[name+'_inferred']:\n", - " best_error[name+'_inferred'] = infer_err\n", - " print(\"%f : %i passes : %s %ss %ss\"%(infer_err,epoch+1,name+'_inferred', duration, eval_duration))\n", + " if epoch == 0 or (epoch % 5) == 0:\n", + " eval_duration = ''\n", + " with elapsed_timer() as eval_elapsed:\n", + " (infer_err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs, infer=True)\n", + " eval_duration = '%.1f' % eval_elapsed()\n", + " best_indicator = ' '\n", + " if infer_err < best_error[name+'_inferred']:\n", + " best_error[name+'_inferred'] = infer_err\n", + " best_indicator = '*'\n", + " print(\"%s%f : %i passes : %s %ss %ss\"%(best_indicator,infer_err,epoch+1,name+'_inferred', duration, eval_duration))\n", "\n", " print('completed pass %i at alpha %f'%(epoch+1,alpha))\n", " alpha -= alpha_delta\n", @@ -393,8 +398,375 @@ "output_type": "stream", "stream": "stdout", "text": [ - "START 2015-06-10 15:45:49.141886\n", - "0.419480 : 1 passes : dmc_d100n5w5mc2t4 86.9s 1.4s" + "START 2015-06-15 23:42:20.225229\n", + "*0.412640 : 1 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 58.4s 1.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.401200 : 1 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 58.4s 10.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.218280 : 1 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.195600 : 1 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 28.0s 5.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.273280 : 1 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.5s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.214000 : 1 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 34.5s 6.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.204680 : 1 passes : dbow+dmm 0.0s 2.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.182400 : 1 passes : dbow+dmm_inferred 0.0s 11.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.216240 : 1 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.222000 : 1 passes : dbow+dmc_inferred 0.0s 16.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 1 at alpha 0.025000\n", + "*0.358040 : 2 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 57.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.140320 : 2 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.223920 : 2 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.0s 1.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.136520 : 2 passes : dbow+dmm 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.141120 : 2 passes : dbow+dmc 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 2 at alpha 0.023800\n", + "*0.325440 : 3 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 62.6s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.124000 : 3 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.198680 : 3 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.2s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.121760 : 3 passes : dbow+dmm 0.0s 2.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.125120 : 3 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 3 at alpha 0.022600\n", + "*0.300600 : 4 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.0s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.115760 : 4 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.188680 : 4 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.6s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.114760 : 4 passes : dbow+dmm 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.115440 : 4 passes : dbow+dmc 0.0s 2.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 4 at alpha 0.021400\n", + "*0.281360 : 5 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 55.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.112000 : 5 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.4s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.182360 : 5 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.5s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.111800 : 5 passes : dbow+dmm 0.0s 1.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.111560 : 5 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 5 at alpha 0.020200\n", + "*0.266200 : 6 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.7s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.272000 : 6 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 54.7s 11.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.110560 : 6 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 29.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.112800 : 6 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 29.1s 5.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.178520 : 6 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 33.7s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.199200 : 6 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 33.7s 6.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.109040 : 6 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.118800 : 6 passes : dbow+dmm_inferred 0.0s 12.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.110400 : 6 passes : dbow+dmc 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.118400 : 6 passes : dbow+dmc_inferred 0.0s 16.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 6 at alpha 0.019000\n", + "*0.254600 : 7 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.107920 : 7 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.5s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.175560 : 7 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.0s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.107880 : 7 passes : dbow+dmm 0.0s 2.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.107760 : 7 passes : dbow+dmc 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 7 at alpha 0.017800\n", + "*0.246160 : 8 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.5s 0.9s" ] }, { @@ -402,7 +774,7 @@ "stream": "stdout", "text": [ "\n", - "0.384000 : 1 passes : dmc_d100n5w5mc2t4_inferred 86.9s 12.0s" + "*0.106640 : 8 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" ] }, { @@ -410,7 +782,7 @@ "stream": "stdout", "text": [ "\n", - "0.220440 : 1 passes : dbow_d100n5mc2t4 34.1s 0.8s" + "*0.173720 : 8 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.8s" ] }, { @@ -418,7 +790,7 @@ "stream": "stdout", "text": [ "\n", - "0.216000 : 1 passes : dbow_d100n5mc2t4_inferred 34.1s 5.5s" + "*0.106640 : 8 passes : dbow+dmm 0.0s 2.4s" ] }, { @@ -426,7 +798,7 @@ "stream": "stdout", "text": [ "\n", - "0.269200 : 1 passes : dmm_d100n5w10mc2t4 46.7s 0.8s" + "*0.106320 : 8 passes : dbow+dmc 0.0s 1.7s" ] }, { @@ -434,7 +806,8 @@ "stream": "stdout", "text": [ "\n", - "0.214800 : 1 passes : dmm_d100n5w10mc2t4_inferred 46.7s 7.3s" + "completed pass 8 at alpha 0.016600\n", + "*0.239160 : 9 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 55.4s 0.8s" ] }, { @@ -442,7 +815,7 @@ "stream": "stdout", "text": [ "\n", - "0.210280 : 1 passes : dbow+dmm 0.0s 2.7s" + "*0.104120 : 9 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.8s 0.8s" ] }, { @@ -450,7 +823,7 @@ "stream": "stdout", "text": [ "\n", - "0.192000 : 1 passes : dbow+dmm_inferred 0.0s 14.3s" + "*0.170400 : 9 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.8s" ] }, { @@ -458,7 +831,7 @@ "stream": "stdout", "text": [ "\n", - "0.219520 : 1 passes : dbow+dmc 0.0s 2.2s" + "*0.103840 : 9 passes : dbow+dmm 0.0s 1.5s" ] }, { @@ -466,7 +839,7 @@ "stream": "stdout", "text": [ "\n", - "0.220000 : 1 passes : dbow+dmc_inferred 0.0s 18.8s" + "*0.104920 : 9 passes : dbow+dmc 0.0s 1.5s" ] }, { @@ -474,8 +847,8 @@ "stream": "stdout", "text": [ "\n", - "completed pass 1 at alpha 0.025000\n", - "0.365280 : 2 passes : dmc_d100n5w5mc2t4 61.0s 0.8s" + "completed pass 9 at alpha 0.015400\n", + "*0.233320 : 10 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 53.4s 1.3s" ] }, { @@ -483,7 +856,7 @@ "stream": "stdout", "text": [ "\n", - "0.337200 : 2 passes : dmc_d100n5w5mc2t4_inferred 61.0s 10.6s" + "*0.104120 : 10 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" ] }, { @@ -491,7 +864,7 @@ "stream": "stdout", "text": [ "\n", - "0.142400 : 2 passes : dbow_d100n5mc2t4 29.3s 1.4s" + "*0.170000 : 10 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.2s 0.7s" ] }, { @@ -499,7 +872,7 @@ "stream": "stdout", "text": [ "\n", - "0.159600 : 2 passes : dbow_d100n5mc2t4_inferred 29.3s 5.5s" + " 0.104080 : 10 passes : dbow+dmm 0.0s 1.5s" ] }, { @@ -507,7 +880,7 @@ "stream": "stdout", "text": [ "\n", - "0.220720 : 2 passes : dmm_d100n5w10mc2t4 35.1s 0.8s" + "*0.104600 : 10 passes : dbow+dmc 0.0s 1.5s" ] }, { @@ -515,7 +888,8 @@ "stream": "stdout", "text": [ "\n", - "0.138480 : 2 passes : dbow+dmm 0.0s 1.9s" + "completed pass 10 at alpha 0.014200\n", + "*0.228680 : 11 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 52.8s 1.4s" ] }, { @@ -523,7 +897,7 @@ "stream": "stdout", "text": [ "\n", - "0.163200 : 2 passes : dbow+dmm_inferred 0.0s 13.1s" + "*0.222400 : 11 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 52.8s 10.3s" ] }, { @@ -531,7 +905,7 @@ "stream": "stdout", "text": [ "\n", - "0.141840 : 2 passes : dbow+dmc 0.0s 2.2s" + "*0.103280 : 11 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.9s" ] }, { @@ -539,7 +913,7 @@ "stream": "stdout", "text": [ "\n", - "0.172400 : 2 passes : dbow+dmc_inferred 0.0s 18.1s" + "*0.106000 : 11 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 28.1s 5.3s" ] }, { @@ -547,8 +921,7 @@ "stream": "stdout", "text": [ "\n", - "completed pass 2 at alpha 0.023800\n", - "0.332200 : 3 passes : dmc_d100n5w5mc2t4 61.5s 0.8s" + "*0.167280 : 11 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.7s" ] }, { @@ -556,7 +929,7 @@ "stream": "stdout", "text": [ "\n", - "0.122920 : 3 passes : dbow_d100n5mc2t4 28.9s 0.8s" + " 0.206800 : 11 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 34.3s 6.5s" ] }, { @@ -564,7 +937,7 @@ "stream": "stdout", "text": [ "\n", - "0.128000 : 3 passes : dbow_d100n5mc2t4_inferred 28.9s 5.5s" + "*0.101800 : 11 passes : dbow+dmm 0.0s 2.2s" ] }, { @@ -572,7 +945,7 @@ "stream": "stdout", "text": [ "\n", - "0.202280 : 3 passes : dmm_d100n5w10mc2t4 35.1s 1.4s" + "*0.115600 : 11 passes : dbow+dmm_inferred 0.0s 12.1s" ] }, { @@ -580,7 +953,7 @@ "stream": "stdout", "text": [ "\n", - "0.121240 : 3 passes : dbow+dmm 0.0s 1.6s" + "*0.102920 : 11 passes : dbow+dmc 0.0s 1.4s" ] }, { @@ -588,7 +961,7 @@ "stream": "stdout", "text": [ "\n", - "0.138400 : 3 passes : dbow+dmm_inferred 0.0s 12.4s" + "*0.100400 : 11 passes : dbow+dmc_inferred 0.0s 15.9s" ] }, { @@ -596,7 +969,8 @@ "stream": "stdout", "text": [ "\n", - "0.121560 : 3 passes : dbow+dmc 0.0s 1.6s" + "completed pass 11 at alpha 0.013000\n", + "*0.225600 : 12 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.3s 0.8s" ] }, { @@ -604,7 +978,7 @@ "stream": "stdout", "text": [ "\n", - "0.148800 : 3 passes : dbow+dmc_inferred 0.0s 17.3s" + " 0.104040 : 12 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" ] }, { @@ -612,8 +986,7 @@ "stream": "stdout", "text": [ "\n", - "completed pass 3 at alpha 0.022600\n", - "0.312160 : 4 passes : dmc_d100n5w5mc2t4 59.4s 1.4s" + "*0.165160 : 12 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.9s 0.8s" ] }, { @@ -621,7 +994,7 @@ "stream": "stdout", "text": [ "\n", - "0.322000 : 4 passes : dmc_d100n5w5mc2t4_inferred 59.4s 11.0s" + " 0.102720 : 12 passes : dbow+dmm 0.0s 2.0s" ] }, { @@ -629,7 +1002,7 @@ "stream": "stdout", "text": [ "\n", - "0.114560 : 4 passes : dbow_d100n5mc2t4 32.2s 0.9s" + " 0.103360 : 12 passes : dbow+dmc 0.0s 1.5s" ] }, { @@ -637,7 +1010,8 @@ "stream": "stdout", "text": [ "\n", - "0.123200 : 4 passes : dbow_d100n5mc2t4_inferred 32.2s 5.6s" + "completed pass 12 at alpha 0.011800\n", + "*0.223720 : 13 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 51.6s 0.8s" ] }, { @@ -645,7 +1019,7 @@ "stream": "stdout", "text": [ "\n", - "0.190120 : 4 passes : dmm_d100n5w10mc2t4 41.9s 0.8s" + " 0.103520 : 13 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.4s 0.8s" ] }, { @@ -653,7 +1027,7 @@ "stream": "stdout", "text": [ "\n", - "0.214000 : 4 passes : dmm_d100n5w10mc2t4_inferred 41.9s 6.7s" + " 0.165320 : 13 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.0s 0.8s" ] }, { @@ -661,7 +1035,7 @@ "stream": "stdout", "text": [ "\n", - "0.114680 : 4 passes : dbow+dmm 0.0s 2.1s" + " 0.102080 : 13 passes : dbow+dmm 0.0s 2.1s" ] }, { @@ -669,7 +1043,7 @@ "stream": "stdout", "text": [ "\n", - "0.122400 : 4 passes : dbow+dmm_inferred 0.0s 12.5s" + "*0.102480 : 13 passes : dbow+dmc 0.0s 1.5s" ] }, { @@ -677,7 +1051,8 @@ "stream": "stdout", "text": [ "\n", - "0.114280 : 4 passes : dbow+dmc 0.0s 1.5s" + "completed pass 13 at alpha 0.010600\n", + "*0.221680 : 14 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.8s 0.8s" ] }, { @@ -685,7 +1060,7 @@ "stream": "stdout", "text": [ "\n", - "0.134400 : 4 passes : dbow+dmc_inferred 0.0s 17.6s" + "*0.102440 : 14 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.5s 0.9s" ] }, { @@ -693,8 +1068,7 @@ "stream": "stdout", "text": [ "\n", - "completed pass 4 at alpha 0.021400\n", - "0.290560 : 5 passes : dmc_d100n5w5mc2t4 60.4s 1.0s" + "*0.164480 : 14 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 40.0s 0.8s" ] }, { @@ -702,7 +1076,7 @@ "stream": "stdout", "text": [ "\n", - "0.109080 : 5 passes : dbow_d100n5mc2t4 33.6s 1.4s" + " 0.102120 : 14 passes : dbow+dmm 0.0s 1.5s" ] }, { @@ -710,27 +1084,302 @@ "stream": "stdout", "text": [ "\n", - "0.108800 : 5 passes : dbow_d100n5mc2t4_inferred 33.6s 5.6s" + " 0.103640 : 14 passes : dbow+dmc 0.0s 1.5s" ] }, { "output_type": "stream", "stream": "stdout", "text": [ - "\n" + "\n", + "completed pass 14 at alpha 0.009400\n", + "*0.220560 : 15 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 52.6s 1.4s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.102040 : 15 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 29.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.163160 : 15 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 37.6s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102160 : 15 passes : dbow+dmm 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102880 : 15 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 15 at alpha 0.008200\n", + "*0.218400 : 16 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 56.1s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.233600 : 16 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 56.1s 11.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102840 : 16 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.9s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.106000 : 16 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 28.0s 5.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.161920 : 16 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.190800 : 16 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 34.3s 6.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102280 : 16 passes : dbow+dmm 0.0s 1.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.109200 : 16 passes : dbow+dmm_inferred 0.0s 12.0s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102840 : 16 passes : dbow+dmc 0.0s 2.2s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.114800 : 16 passes : dbow+dmc_inferred 0.0s 15.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 16 at alpha 0.007000\n", + " 0.219000 : 17 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 53.9s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102960 : 17 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.9s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.161480 : 17 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.8s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102120 : 17 passes : dbow+dmm 0.0s 1.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.102040 : 17 passes : dbow+dmc 0.0s 2.1s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 17 at alpha 0.005800\n", + " 0.219600 : 18 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 53.1s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102400 : 18 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 29.0s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.161680 : 18 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.8s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.101680 : 18 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102120 : 18 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 18 at alpha 0.004600\n", + " 0.218920 : 19 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 51.5s 1.3s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102320 : 19 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.161600 : 19 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.2s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.101640 : 19 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102160 : 19 passes : dbow+dmc 0.0s 1.6s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 19 at alpha 0.003400\n", + "*0.218200 : 20 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 51.2s 0.8s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102560 : 20 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.9s 1.4s" ] }, { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "pyerr", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0mtrain_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmin_alpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0melapsed_timer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0melapsed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m \u001b[0mtrain_model\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdoc_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 18\u001b[0m \u001b[0mduration\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'%.1f'\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0melapsed\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/scratch/Documents/dev2015/gensim_venv/src/gensim-develop/gensim/models/word2vec.py\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(self, sentences, total_words, word_count, chunksize)\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mjob_no\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgrouper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_prepare_sentences\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msentences\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchunksize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 527\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"putting job #%i in the queue, qsize=%i\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mjob_no\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 528\u001b[0;31m \u001b[0mjobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mjob\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 529\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minfo\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"reached the end of input; waiting to finish %i outstanding jobs\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mjobs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mqsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 530\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/scratch/miniconda3/envs/gensim_cenv/lib/python3.4/queue.py\u001b[0m in \u001b[0;36mput\u001b[0;34m(self, item, block, timeout)\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaxsize\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 136\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_full\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 137\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/scratch/miniconda3/envs/gensim_cenv/lib/python3.4/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.161360 : 20 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.6s 0.7s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "*0.101360 : 20 passes : dbow+dmm 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + " 0.102560 : 20 passes : dbow+dmc 0.0s 1.5s" + ] + }, + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "completed pass 20 at alpha 0.002200\n", + "END 2015-06-16 00:27:00.604456\n" ] } ], @@ -761,16 +1410,16 @@ "output_type": "stream", "stream": "stdout", "text": [ - "0.108800 dbow_d100n5mc2t4_inferred\n", - "0.109080 dbow_d100n5mc2t4\n", - "0.114280 dbow+dmc\n", - "0.114680 dbow+dmm\n", - "0.122400 dbow+dmm_inferred\n", - "0.134400 dbow+dmc_inferred\n", - "0.190120 dmm_d100n5w10mc2t4\n", - "0.214000 dmm_d100n5w10mc2t4_inferred\n", - "0.290560 dmc_d100n5w5mc2t4\n", - "0.322000 dmc_d100n5w5mc2t4_inferred\n" + "0.100400 dbow+dmc_inferred\n", + "0.101360 dbow+dmm\n", + "0.102040 Doc2Vec(dbow,d100,n5,mc2,t4)\n", + "0.102040 dbow+dmc\n", + "0.106000 Doc2Vec(dbow,d100,n5,mc2,t4)_inferred\n", + "0.109200 dbow+dmm_inferred\n", + "0.161360 Doc2Vec(dm/m,d100,n5,w10,mc2,t4)\n", + "0.190800 Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred\n", + "0.218200 Doc2Vec(dm/c,d100,n5,w5,mc2,t4)\n", + "0.222400 Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred\n" ] } ], @@ -807,7 +1456,7 @@ "print('for doc %d...' % doc_id)\n", "for model in simple_models:\n", " inferred_docvec = model.infer_vector(alldocs[doc_id].words)\n", - " print('%s: %s' % (model.compact_name, model.docvecs.most_similar([inferred_docvec],topn=3)))" + " print('%s:\\n %s' % (model, model.docvecs.most_similar([inferred_docvec],topn=3)))" ], "language": "python", "metadata": {}, @@ -816,8 +1465,9 @@ "output_type": "stream", "stream": "stdout", "text": [ - "for doc 81518...\n", - "dmc_d100n5w5mc2t4: [(81518, 0.6632639169692993), (82236, 0.5500479340553284), (11391, 0.5488752126693726)]" + "for doc 10937...\n", + "Doc2Vec(dm/c,d100,n5,w5,mc2,t4):\n", + " [(10937, 0.6842625141143799), (7308, 0.42190566658973694), (10839, 0.4074726700782776)]" ] }, { @@ -825,7 +1475,8 @@ "stream": "stdout", "text": [ "\n", - "dbow_d100n5mc2t4: [(81518, 0.9144332408905029), (61723, 0.6625540256500244), (60474, 0.6582651138305664)]" + "Doc2Vec(dbow,d100,n5,mc2,t4):\n", + " [(10937, 0.9522888660430908), (12203, 0.5845203399658203), (35262, 0.575614869594574)]" ] }, { @@ -833,7 +1484,8 @@ "stream": "stdout", "text": [ "\n", - "dmm_d100n5w10mc2t4: [(81518, 0.8487115502357483), (61608, 0.8010116815567017), (82548, 0.8009110689163208)]" + "Doc2Vec(dm/m,d100,n5,w10,mc2,t4):\n", + " [(10937, 0.8651494979858398), (11717, 0.8156246542930603), (58074, 0.8120745420455933)]" ] }, { @@ -844,7 +1496,7 @@ ] } ], - "prompt_number": 10 + "prompt_number": 11 }, { "cell_type": "markdown", @@ -870,7 +1522,7 @@ "model = random.choice(simple_models) # and a random model\n", "sims = model.docvecs.most_similar(doc_id, topn=model.docvecs.count) # get *all* similar documents\n", "print('TARGET (%d): \u00ab%s\u00bb\\n' % (doc_id, ' '.join(alldocs[doc_id].words)))\n", - "print('SIMILAR/DISSIMILAR DOCS PER MODEL %s:\\n' % model.compact_name)\n", + "print('SIMILAR/DISSIMILAR DOCS PER MODEL %s:\\n' % model)\n", "for label, index in [('MOST',0), ('MEDIAN',len(sims)//2), ('LEAST',len(sims)-1)]:\n", " print('%s %s: \u00ab%s\u00bb\\n' % (label, sims[index], ' '.join(alldocs[sims[index][0]].words)))\n" ], @@ -881,20 +1533,20 @@ "output_type": "stream", "stream": "stdout", "text": [ - "TARGET (74201): \u00ablook , i have a strong stomach , but i have no use for torture porn in my entertainment . a few weeks ago , i saw a preview of this film that gave no indication of it's true nature . this , plus the intriguing poster led me to believe this was going to be a brooding drama in the hitchcock tradition . instead , i found myself watching the ugliest , most disgusting film i have ever attended of my own free will . i should have guessed when the cashier gave me a funny look while getting my ticket . the first thing i realized was the script was by an amateur . the scenes and dialog jumping around with no thought or subtlety . like other reviewers , i knew who the torturing villain was within the first few minutes . but , i still had to sit through several scenes of dismemberment and pain , which made me sick . sick that i had spent money to watch this disaster . i can't imagine why lindsay lohan would agree to be in this production . there are other venues to stretch her acting talents . neal mcdonough and julia ormond's rent must have been due . the story , such as it is has at it's core an interesting premise . a top director might have made a respectable film out of it with a total rewrite , without the gore and more atmosphere . this movie is an absolute , total disaster . no one involved has anything to be proud of .\u00bb\n", + "TARGET (90609): \u00absomehow in line with \" calendar girls \" and \" mrs henderson presents \" as it deals with the sex life of elderly ladies , \" irina palm \" is the story of the slightly-more-than-middle-aged maggie who has to raise a large sum of money in order to save her grandchild from dying , takes a job as a w*nker in a sex club ( minimal physical touch , no nude scenes , all done in the best taste ) ( . . . imaginable under the circumstances , that is ) - and finds that she has a rare talent for just that sort of work . i liked it . the story is given every conceivable , foreseeable twist and turn - a romance with the sex bar proprietor who just had to sample her talent on the sly ; her friends who are dying to be let in on the particulars , but still are too prudish not to turn their backs on her ; her son finding out and flying into a rage , and the reconciliation with her hostile daughter-in-law when she learns about maggie's sacrifice - all predictable , but still : i liked it . perhaps because everybody in the film puts out great performance . miki manoljovic is very good as the sex bar owner who falls in love with his unlikely ace employee , kevin bishop is frighteningly good as the loving , mild-mannered son who cannot really see his way through to understand his mother ( which son could , given her line of work ? ) , and marianne faithful , that rarely seen blast from the past ( my past , at least ) is certainly a far cry from her ophelia in 1969 ( yes , i do know that she's been doing bits and bobs in between , but somehow i've missed them ) . marianne faithful's slow , slightly hazy style is recognizable still , and i'd say she carries this film through in a very touching way - no pun intended .\u00bb\n", "\n", - "SIMILAR/DISSIMILAR DOCS PER MODEL dbow_d100n5mc2t4:\n", + "SIMILAR/DISSIMILAR DOCS PER MODEL Doc2Vec(dm/m,d100,n5,w10,mc2,t4):\n", "\n", - "MOST (80740, 0.745945394039154): \u00abwhat a waste of film stock . overly atmospheric . dafoe and walken mailed their performances in . argento couldn't find a stamp . the dialogue seemed like improvisation , which i hope it was , because nobody should have been paid for it . even the possible saving grace of sex and nude scenes were uninspired .\u00bb\n", + "MOST (36095, 0.6738423109054565): \u00abwalter matthau is wonderful as the \" philandering \" dentist dr . julian winston whose frequent fibs to girlfriend goldie provide textbook proof of the dangers of lying . goldie hawn's touching kook toni simmons certainly deserved to win her oscar . ingrid bergman's work as the stiff-as-starch nurse stephanie is also touching to watch as she comes out of her shell , slowly and nervously . this is a great movie to watch in the springtime , or any time for that matter . it's very underrated ; i never heard about it until i found it in the video store , and what a find !\u00bb\n", "\n", - "MEDIAN (41578, 0.4059261083602905): \u00abawful film . terrible acting , cheesy , totally unrealistic , embarrassing to anyone who has played the game . for a start that guy is not a hooker , he would be snapped in two . as for ''i score , that's my job'' well no it's not . for the the uneducated american audience it might come across as a good film . for me , well , that's a few hours of my life i'll never get back . i read through the reviews and came across one where the guy sounded like he knew what he was talking about . then i read - ''and while american rugby may never reach the level of talent that new zealand or south africa has , third in the world is also nothing to hang your head about'' all i can say is , lmfao ! keep playing your american football and baseball , leave the real sports to the big boys .\u00bb\n", + "MEDIAN (84045, 0.33689069747924805): \u00abembarrassingly bad , low-budget italian-made war movie set in holland in the dying days of wwii . a tedious , plodding storyline concerning a plot to steal some diamonds from a german hq , awful acting and dreadful editing make this movie a prize turkey from the opening scene right through to the cringeworthy oh-so-60s `romantic' ending which will have you reaching for the puke bucket - that is if you haven't already reached for the `off' button long before . the worst performances come from john ireland as captain o'connor and the blonde female lead , whose name escapes me . she plays the `love interest' to our rugged leading man . perhaps it wasn't entirely her fault , as back then female romantic leads , especially in action movies , were often written as weak , wishy-washy , sobbing , super-sensitive emotional jellyfish . this one is no exception , and the second , supporting female character is just as bad . simply nauseating to watch . even the action scenes ( which are few and far between , except towards the end ) are boring and predictable . most ludicrously , in the climatic battle , we have rugged leading man and his two mates holed up in some sandbagged bunker and effortlessly gunning down endless attacking germans right , left and centre . the germans of course are all terrible shots and even seem to be eager cannon-fodder , as they make little or no effort to take cover , dying spectacularly in droves amid much flailing of arms and comic-book shouts of `aaaaargh' . this is glorification of war at its very worst . then suddenly - right in the middle of the battle - the resistance guy pulls up completely unmolested in a stolen german jeep and trots effortlessly along a convenient trench - which leads directly into the bunker and which somehow hundreds of germans approaching from all sides have failed to spot - and calmly joins our heroes inside the bunker . another scene of crass stupidity that really must be seen to be believed has captain o'connor flying over the german lines in a reconnaissance plane which , with the help of some clumsily-inserted old newsreel footage , is suddenly and miraculously transformed into a heavy bomber disgorging its massive payload from wide-open bomb bays and pulverising the germans beneath , before once more instantaneously reverting to being a small reconnaissance plane again . the concept of an ongoing truce between the resistance fighters and the occupying german army also seems ludicrous to me , yet this is a central theme of the movie . the english title of the film was obviously inspired by `the dirty dozen' ( which was made around the same time ) but it doesn't deserve to be mentioned in the same breath . the original italian title of this film ( dalle ardenne all' inferno ) , the sleeve notes for the english language video release are also grossly misleading . this film has absolutely nothing to do with the battle of the ardennes . the ardennes isn't even in holland - it's a part of belgium - which indicates that the film-makers' knowledge of world war ii events and geography was just as limited as their ability to make even a half-decent film . don't waste two hours of your valuable time on this rubbish . one of the worst movies i've had the misfortune to sit through - and i've sat through some garbage ! !\u00bb\n", "\n", - "LEAST (31799, 0.05202261731028557): \u00abok , so in any wile e . coyote-road runner cartoons , we know that wec is going to set up all sorts of traps for rr , but always maim himself in various ways . that certainly happens in \" beep , beep \" . predictable ? i guess that it is , but when you think about it , these cartoons show how the more you try to harm someone else , the more you get harmed ; sort of like how daffy duck always tries to undermine bugs bunny's integrity but bugs sees around it . overall , this is another classic from the termite terrace crowd . sometimes , i think that if we really had wanted to ease cold war tensions , we could have just let the soviet union see looney tunes cartoons ; i'm sure that they would have loved them . another great one . ps : i learned on \" jeopardy ! \" that wile e . coyote's middle name is ethelbert .\u00bb\n", + "LEAST (44173, -0.15633562207221985): \u00abfrom what i understand , fox was embarrassed they released a pg-13 alien/predator movie not so long ago . it was not well received by any means . not exactly sure where to go next , seeing as they thought anderson was the best director for the franchise and they had produced a true sci-fi gem , fox turned to it's small , but knowledgeable group of monkeys for answers . these monkeys were by no means veterans of writing sci-fi flicks , but had seen burton's planet of the apes remake and house of the dead . their first task : hire actors . fox gave them a reasonable budget but the monkeys wanted to save the money . they hired fifteen tv actors shortly after . now , the script . the monkeys wanted to save more of the budget so they wrote the movie themselves . leaving out important aspects of the two franchises was the easy part . thinking of great new lines for the general audience to remember years down the line - that was more difficult . they butted heads awhile and came up with a truly award-winning screenplay equipped with clich\u00e9 characters , idiotic decisions an gaping plot holes . fox was pleased thus far with the results but wanted to see what was to become of the centerpieces to the film - the aliens and predators . the monkeys again wanted to save money in the budget so they decided to trash the great robotics used in the otherwise terrible avp original and go with the man-in-the-suit alien seen in the old films . the actors playing the aliens had trouble fitting into the suits as they weren't properly sized by the monkeys so they jiggled their plastic heads throughout the film with honor . as for the predators , the monkeys decided one predator was enough this time around ( again , saving budget ) to fight the hordes of aliens that seemingly come out of nowhere . but what about the effects , you ask ? come on now , people . they may be monkeys but they clearly knew cgi would play a key role in the film . without diving into the budget , the monkeys used a standard final cut program and cut and pasted some very nice fire and spark effects throughout . putting red and green filters over the camera lens provided some excellent predator visions . the setting was something the monkeys thought long and hard about . if this was to be on earth , in colorado of all places , they needed to make it realistic . this was where they admitted they might have made a mistake . see , the monkeys didn't have proper training in this department so they thought turning the lights off in the city and having the movie play out in the dead of night and in the rain was the right thing to do . they simply forgot people like to see the creatures instead of looking at shadows and rain the whole film . to add insult to injury , the monkeys accidentally filmed all the fight scenes incredibly close so no one could see what was fighting or who it was . but again , rookie mistake . the rating . fox told the monkeys to make the movie r-rated . that was easy . without showing how many of the injuries or deaths actually happened , the monkeys made a habit of showing the carnage after the fact . it was simple : the viewers got the gore they desired and the monkeys didn't have to film the majority of action shots involving that violence . some of the actors originally had questions concerning the screenplay . why does a blue liquid the predator has endless amounts of magically disintegrate whatever he wants it to and nothing more than that ? why is an ex-convict driving around in a police car the entire movie ? why did the monkeys forget to show a full body shot of the aliens ? why does a clock play a more memorable role than any of the main characters ? the list of questions just kept growing but the monkeys ignored them and finished their masterpiece . fox was thrilled with their work . so thrilled that they opened the movie nationwide on christmas day and even spent a few bucks advertising it the week before it came out . the monkeys had successfully made another installment in these cherished franchises . but some ask , what ever happened to the budget the monkeys forgot to use ? they put it towards their next film : aliens vs . predator vs . hulk hogan . they knew the general public would be upset with the title but they have since released this statement : \" to the people- do not worry about our upcoming film . it will be rated r and will have violence . \" and everyone lived happily ever after . the end .\u00bb\n", "\n" ] } ], - "prompt_number": 16 + "prompt_number": 13 }, { "cell_type": "markdown", @@ -920,12 +1572,13 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 12 + "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ + "import random\n", "from IPython.display import HTML\n", "# pick a random word with a suitable number of occurences\n", "while True:\n", @@ -936,7 +1589,7 @@ "# word = 'plot'\n", "similars_per_model = [str(model.most_similar(word,topn=20)).replace('), ','),
\\n') for model in word_models]\n", "similar_table = (\"
\" +\n", - " \"\".join([model.compact_name for model in word_models]) + \n", + " \"\".join([str(model) for model in word_models]) + \n", " \"
\" +\n", " \"\".join(similars_per_model) +\n", " \"
\")\n", @@ -950,85 +1603,85 @@ "output_type": "stream", "stream": "stdout", "text": [ - "most similar words for 'abundance' (146 occurences)\n" + "most similar words for 'comedy/drama' (38 occurences)\n" ] }, { "html": [ - "
dmc_d100n5w5mc2t4dbow_d100n5mc2t4dmm_d100n5w10mc2t4
[('array', 0.6298288702964783),
\n", - "('quantity', 0.5925834774971008),
\n", - "('overabundance', 0.5884098410606384),
\n", - "('meaninglessness', 0.588019073009491),
\n", - "('assemblage', 0.5832504034042358),
\n", - "('totality', 0.582420825958252),
\n", - "('vapidity', 0.5777873992919922),
\n", - "('extremity', 0.5740029811859131),
\n", - "('excess', 0.5700401067733765),
\n", - "('arsenal', 0.5695343017578125),
\n", - "('iqs', 0.5652327537536621),
\n", - "('8-9', 0.5581415891647339),
\n", - "('assortment', 0.5561405420303345),
\n", - "('tons', 0.5546265840530396),
\n", - "('torrents', 0.5545516014099121),
\n", - "('ultimatums', 0.5542378425598145),
\n", - "('amount', 0.5540366172790527),
\n", - "('quantities', 0.5512855052947998),
\n", - "('roster', 0.5505189895629883),
\n", - "('litany', 0.549481987953186)]
[('bespattered', 0.4104897975921631),
\n", - "('borel', 0.39388203620910645),
\n", - "(\"'devil'\", 0.3879944086074829),
\n", - "('train', 0.38379138708114624),
\n", - "('nagoya', 0.377510130405426),
\n", - "('gencon', 0.37605035305023193),
\n", - "('geometric', 0.3748994469642639),
\n", - "('un-funniest', 0.3717661201953888),
\n", - "('psychotherapy', 0.3682197332382202),
\n", - "('casted', 0.36680951714515686),
\n", - "('high-tailing', 0.3651661276817322),
\n", - "(\"ensign's\", 0.35965579748153687),
\n", - "('rocque', 0.3588852286338806),
\n", - "('publishers', 0.3534497022628784),
\n", - "('pseudo-comic', 0.35254913568496704),
\n", - "('mignard', 0.3523959219455719),
\n", - "('pritchert', 0.3517644703388214),
\n", - "('ours', 0.3500378429889679),
\n", - "('bayonne', 0.3490917384624481),
\n", - "('soup\u00e7on', 0.34788060188293457)]
[('overabundance', 0.8075829744338989),
\n", - "('assemblage', 0.8018349409103394),
\n", - "('assortment', 0.7681021094322205),
\n", - "('ounce', 0.762627363204956),
\n", - "('array', 0.7543219327926636),
\n", - "('excess', 0.7436821460723877),
\n", - "('amalgam', 0.7408836483955383),
\n", - "('unheard', 0.740603506565094),
\n", - "('over-abundance', 0.7144984602928162),
\n", - "('over-load', 0.7117946743965149),
\n", - "('overdose', 0.7113994359970093),
\n", - "('infestation', 0.7078109383583069),
\n", - "('amalgamation', 0.7040037512779236),
\n", - "('exemplar', 0.688831090927124),
\n", - "('accumulation', 0.686922550201416),
\n", - "('onslaught', 0.685982346534729),
\n", - "('exhibition', 0.6785738468170166),
\n", - "('arsenal', 0.6754549741744995),
\n", - "('involuntary', 0.6744215488433838),
\n", - "('oodles', 0.6684854626655579)]
" + "
Doc2Vec(dm/c,d100,n5,w5,mc2,t4)Doc2Vec(dbow,d100,n5,mc2,t4)Doc2Vec(dm/m,d100,n5,w10,mc2,t4)
[('comedy', 0.7096928358078003),
\n", + "('drama', 0.6825233101844788),
\n", + "('dramedy', 0.6664647459983826),
\n", + "('thriller', 0.6615678071975708),
\n", + "('horror/comedy', 0.6410363912582397),
\n", + "('adventure', 0.6175029277801514),
\n", + "('chiller', 0.5992485284805298),
\n", + "('melodrama', 0.5929774045944214),
\n", + "('romance', 0.576662540435791),
\n", + "('romp', 0.5749073028564453),
\n", + "('science-fiction', 0.5690299868583679),
\n", + "('farce', 0.5652514100074768),
\n", + "('weeper', 0.5628592371940613),
\n", + "('drama/comedy', 0.5627389550209045),
\n", + "('whodunit', 0.5624251961708069),
\n", + "('sci-fi', 0.5603950023651123),
\n", + "('mockumentary', 0.5558925271034241),
\n", + "('biopic', 0.5510786771774292),
\n", + "('sitcom', 0.5482240915298462),
\n", + "('road-movie', 0.5472671985626221)]
[('adrenaline-pumping', 0.45663613080978394),
\n", + "('kipling', 0.4251996576786041),
\n", + "('appears', 0.4016043245792389),
\n", + "('five-second', 0.3902825713157654),
\n", + "(\"fmv's\", 0.3895858824253082),
\n", + "('aardvarks', 0.3821781873703003),
\n", + "('promulgated', 0.3801535367965698),
\n", + "('inert', 0.37795290350914),
\n", + "('floorboards', 0.37393927574157715),
\n", + "(\"skeletor's\", 0.37129074335098267),
\n", + "('generate', 0.36898282170295715),
\n", + "('open-ended', 0.36304017901420593),
\n", + "('`i', 0.36297476291656494),
\n", + "('inching', 0.3623065948486328),
\n", + "('digestive', 0.36074209213256836),
\n", + "('yoji', 0.36062514781951904),
\n", + "('bergman', 0.36027780175209045),
\n", + "(\"hodder's\", 0.35910874605178833),
\n", + "('40-something', 0.355363667011261),
\n", + "('bushido', 0.35415762662887573)]
[('comedy-drama', 0.6464394330978394),
\n", + "('thriller', 0.6074070930480957),
\n", + "('comedy', 0.597672700881958),
\n", + "('dramedy', 0.5864953994750977),
\n", + "('action-drama', 0.5831291079521179),
\n", + "('actioner', 0.5727459192276001),
\n", + "('potboiler', 0.5611938238143921),
\n", + "('road-movie', 0.5596767663955688),
\n", + "('weeper', 0.5421388149261475),
\n", + "('romcom', 0.5401057004928589),
\n", + "('chiller', 0.5400895476341248),
\n", + "('drama', 0.5357507467269897),
\n", + "('flick', 0.5278905630111694),
\n", + "('action/thriller', 0.5270429253578186),
\n", + "('diversion', 0.5243188738822937),
\n", + "('action-comedy', 0.5170137882232666),
\n", + "('confection', 0.5149624347686768),
\n", + "('telemovie', 0.5138946771621704),
\n", + "('yarn', 0.5052254796028137),
\n", + "('farce', 0.5036333799362183)]
" ], "metadata": {}, "output_type": "pyout", - "prompt_number": 13, + "prompt_number": 16, "text": [ - "" + "" ] } ], - "prompt_number": 13 + "prompt_number": 16 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Do the DBOW words look meaningless? That's because the gensim DBOW model doesn't train word vectors \u2013 they remain at their random initialized values \u2013 unless you ask with the `dbow_words=1` initialization parameter. The DBOW doc vectors train faster \u2013 and are even better on tasks like IMDB sentiment-prediction \u2013 *without* simultaneous word-training. \n", + "Do the DBOW words look meaningless? That's because the gensim DBOW model doesn't train word vectors \u2013 they remain at their random initialized values \u2013 unless you ask with the `dbow_words=1` initialization parameter. Concurrent word-training slows DBOW mode significantly, and offers little improvement (and sometimes a little worsening) of the error rate on this IMDB sentiment-prediction task. \n", "\n", "Words from DM models tend to show meaningfully similar words when there are many examples in the training data (as with 'plot' or 'actor'). (All DM modes inherently involve word vector training concurrent with doc vector training.)" ] @@ -1052,7 +1705,7 @@ "for model in word_models:\n", " sections = model.accuracy('questions-words.txt')\n", " correct, incorrect = (len(sum((s['correct'] for s in sections), [])), len(sum((s['incorrect'] for s in sections),[])))\n", - " print('%s: %0.2f%% correct (%d of %d)' % (model.compact_name, float(correct*100)/(correct+incorrect), correct, correct+incorrect))" + " print('%s: %0.2f%% correct (%d of %d)' % (model, float(correct*100)/(correct+incorrect), correct, correct+incorrect))" ], "language": "python", "metadata": {}, @@ -1061,8 +1714,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "dmc_d100n5w5mc2t4: 23.76% correct (4758 of 20024)\n", - "dbow_d100n5mc2t4: 0.00% correct (0 of 20024)" + "Doc2Vec(dm/c,d100,n5,w5,mc2,t4): 28.13% correct (5650 of 20086)\n", + "Doc2Vec(dbow,d100,n5,mc2,t4): 0.01% correct (2 of 20086)" ] }, { @@ -1070,7 +1723,7 @@ "stream": "stdout", "text": [ "\n", - "dmm_d100n5w10mc2t4: 26.57% correct (5320 of 20024)" + "Doc2Vec(dm/m,d100,n5,w10,mc2,t4): 27.49% correct (5522 of 20086)" ] }, { @@ -1081,7 +1734,7 @@ ] } ], - "prompt_number": 14 + "prompt_number": 18 }, { "cell_type": "markdown", diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 650447ccb7..f0581b62ae 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -14,7 +14,7 @@ Initialize a model with e.g.:: ->>> model = Doc2Vec(sentences, size=100, window=8, min_count=5, workers=4) +>>> model = Doc2Vec(documents, size=100, window=8, min_count=5, workers=4) Persist a model to disk with:: @@ -56,28 +56,31 @@ from six import string_types, integer_types try: - from gensim.models.doc2vec_inner import train_sentence_dbow, train_sentence_dm, train_sentence_dm_concat,\ + from gensim.models.doc2vec_inner import train_document_dbow, train_document_dm, train_document_dm_concat,\ FAST_VERSION except: # failed... fall back to plain numpy (20-80x slower training than the above) FAST_VERSION = -1 - def train_sentence_dbow(model, word_vocabs, doctag_indices, alpha, work=None, + def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ - Update distributed bag of words model by training on a single sentence. + Update distributed bag of words model ("PV-DBOW") by training on a single document. - The sentence is a list of Vocab objects (or None, where the corresponding - word is not in the vocabulary. Called internally from `Doc2Vec.train()`. + Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. - If train_words is True, simultaneously train word-to-word (not just doc-to-word) + The document is provided as `word_vocabs`, a list of Vocab objects which provide + indexes into the word_vector array, and `doctag_indexes`, which provide indexes + int the doctag_vectors array. (See `_prepare_items()`.) + + If `train_words` is True, simultaneously train word-to-word (not just doc-to-word) examples, exactly as per Word2Vec skip-gram training. (Without this option, word vectors are neither consulted nor updated during DBOW doc vector training.) - If learn_words is True, training examples will cause word vectors to be - updated. If learn_hidden is True, training examples will update the internal - hidden layer weights. + Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to + prevent learning-updates to those respective model weights, as if using the + (partially-)frozen model to infer other compatible vectors. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from doc2vec_inner instead. @@ -90,24 +93,35 @@ def train_sentence_dbow(model, word_vocabs, doctag_indices, alpha, work=None, if train_words and learn_words: train_sentence_sg(model, word_vocabs, alpha, work) # TODO: adapt for word_vectors/word_locks - for doctag_index in doctag_indices: + for doctag_index in doctag_indexes: for word in word_vocabs: if word is None: - continue # OOV word in the input sentence => skip + continue # OOV word in the input document => skip train_sg_pair(model, word, doctag_index, alpha, learn_vectors=learn_doctags, learn_hidden=learn_hidden, context_vectors=doctag_vectors, context_locks=doctag_locks) return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm(model, word_vocabs, doctag_indices, alpha, work=None, neu1=None, + def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ - Update distributed memory model by training on a single sentence. + Update distributed memory model ("PV-DM") by training on a single document. + + Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This + method implements the DM model with a projection (input) layer that is + either the sum or mean of the context vectors, depending on the model's + `dm_mean` configuration field. See `train_dm_concat()` for the DM model + with a concatenated input layer. + + The document is provided as `word_vocabs`, a list of Vocab objects which provide + indexes into the word_vector array, and `doctag_indexes`, which provide indexes + int the doctag_vectors array. (See `_prepare_items()`.) - The sentence is a list of Vocab objects (or None, where the corresponding - word is not in the vocabulary. Called internally from `Doc2Vec.train()`. + Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to + prevent learning-updates to those respective model weights, as if using the + (partially-)frozen model to infer other compatible vectors. This is the non-optimized, Python version. If you have a C compiler, gensim will use the optimized version from doc2vec_inner instead. @@ -122,41 +136,48 @@ def train_sentence_dm(model, word_vocabs, doctag_indices, alpha, work=None, neu1 if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf - doctag_sum = np_sum(doctag_vectors[doctag_indices], axis=0) - doctag_len = len(doctag_indices) + doctag_sum = np_sum(doctag_vectors[doctag_indexes], axis=0) + doctag_len = len(doctag_indexes) for pos, word in enumerate(word_vocabs): if word is None: - continue # OOV word in the input sentence => skip + continue # OOV word in the input document => skip reduced_window = random.randint(model.window) # `b` in the original doc2vec code start = max(0, pos - model.window + reduced_window) window_pos = enumerate(word_vocabs[start : pos + model.window + 1 - reduced_window], start) - word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] - l1 = np_sum(word_vectors[word2_indices], axis=0) + doctag_sum # 1 x layer1_size - if word2_indices and model.cbow_mean: - l1 /= (len(word2_indices) + doctag_len) - neu1e = train_cbow_pair(model, word, word2_indices, l1, alpha, learn_vectors=False, learn_hidden=True) - if word2_indices and not model.cbow_mean: - neu1e /= (len(word2_indices) + doctag_len) + word2_indexes = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)] + l1 = np_sum(word_vectors[word2_indexes], axis=0) + doctag_sum # 1 x layer1_size + if word2_indexes and model.cbow_mean: + l1 /= (len(word2_indexes) + doctag_len) + neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha, learn_vectors=False, learn_hidden=True) + if word2_indexes and not model.cbow_mean: + neu1e /= (len(word2_indexes) + doctag_len) if learn_doctags: - doctag_vectors[doctag_indices] += \ - neu1e * np_repeat(doctag_locks[doctag_indices],model.vector_size).reshape(-1,model.vector_size) + doctag_vectors[doctag_indexes] += \ + neu1e * np_repeat(doctag_locks[doctag_indexes],model.vector_size).reshape(-1,model.vector_size) if learn_words: - word_vectors[word2_indices] += \ - neu1e * np_repeat(word_locks[word2_indices],model.vector_size).reshape(-1,model.vector_size) + word_vectors[word2_indexes] += \ + neu1e * np_repeat(word_locks[word2_indexes],model.vector_size).reshape(-1,model.vector_size) return len([word for word in word_vocabs if word is not None]) - def train_sentence_dm_concat(model, word_vocabs, doctag_indices, alpha, work=None, neu1=None, + def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ - Update distributed memory model by training on a single sentence, using a + Update distributed memory model ("PV-DM") by training on a single document, using a concatenation of the context window word vectors (rather than a sum or average). - The sentence is a list of Vocab objects (or None, where the corresponding - word is not in the vocabulary. Called internally from `Doc2Vec.train()`. + Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. + + The document is provided as `word_vocabs`, a list of Vocab objects which provide + indexes into the word_vector array, and `doctag_indexes`, which provide indexes + int the doctag_vectors array. (See `_prepare_items()`.) + + Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to + prevent learning-updates to those respective model weights, as if using the + (partially-)frozen model to infer other compatible vectors. This is the non-optimized, Python version. If you have a C compiler, gensim will use the optimized version from doc2vec_inner instead. @@ -171,41 +192,41 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indices, alpha, work=Non if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf - doctag_len = len(doctag_indices) + doctag_len = len(doctag_indexes) if doctag_len != model.dm_tag_count: return 0 # skip doc without expected doctag(s) null_word = model.vocab['\0'] pre_pad_count = model.window post_pad_count = model.window - padded_sentence_indices = ( + padded_document_indexes = ( (pre_pad_count * [null_word.index]) # pre-padding + [word.index for word in word_vocabs if word is not None] # elide out-of-Vocabulary words + (post_pad_count * [null_word.index]) # post-padding ) - for pos in range(pre_pad_count, len(padded_sentence_indices) - post_pad_count): - word_context_indices = ( - padded_sentence_indices[pos - pre_pad_count : pos] # preceding words - + padded_sentence_indices[pos + 1 : pos + 1 + post_pad_count] # following words + for pos in range(pre_pad_count, len(padded_document_indexes) - post_pad_count): + word_context_indexes = ( + padded_document_indexes[pos - pre_pad_count : pos] # preceding words + + padded_document_indexes[pos + 1 : pos + 1 + post_pad_count] # following words ) - word_context_len = len(word_context_indices) - predict_word = model.vocab[model.index2word[padded_sentence_indices[pos]]] + word_context_len = len(word_context_indexes) + predict_word = model.vocab[model.index2word[padded_document_indexes[pos]]] # numpy advanced-indexing copies; concatenate, flatten to 1d - l1 = concatenate((doctag_vectors[doctag_indices], word_vectors[word_context_indices])).ravel() + l1 = concatenate((doctag_vectors[doctag_indexes], word_vectors[word_context_indexes])).ravel() neu1e = train_cbow_pair(model, predict_word, None, l1, alpha, learn_hidden=learn_hidden, learn_vectors=False) # filter by locks and shape for addition to source vectors - e_locks = concatenate((doctag_locks[doctag_indices], word_locks[word_context_indices])) + e_locks = concatenate((doctag_locks[doctag_indexes], word_locks[word_context_indexes])) neu1e_r = (neu1e.reshape(-1,model.vector_size) * np_repeat(e_locks,model.vector_size).reshape(-1,model.vector_size)) if learn_doctags: - np_add.at(doctag_vectors, doctag_indices, neu1e_r[:doctag_len]) + np_add.at(doctag_vectors, doctag_indexes, neu1e_r[:doctag_len]) if learn_words: - np_add.at(word_vectors, word_context_indices, neu1e_r[doctag_len:]) + np_add.at(word_vectors, word_context_indexes, neu1e_r[doctag_len:]) - return len(padded_sentence_indices) - pre_pad_count - post_pad_count + return len(padded_document_indexes) - pre_pad_count - post_pad_count class TaggedDocument(namedtuple('TaggedDocument','words tags')): @@ -253,15 +274,15 @@ def __init__(self, mapfile_path=None): self.count = -1 self.mapfile_path = mapfile_path - def note_doctag(self, key, sentence_no, sentence_length): + def note_doctag(self, key, document_no, document_length): """Note a document tag during initial corpus scan, for structure sizing.""" if isinstance(key, int): self.count = max(self.count, key+1) else: if key in self.doctags: - self.doctags[key] = self.doctags[key].repeat(sentence_length) + self.doctags[key] = self.doctags[key].repeat(document_length) else: - self.doctags[key] = Doctag(sentence_no, sentence_length, 1) + self.doctags[key] = Doctag(document_no, document_length, 1) self.index2doctag.append(key) self.count = max(self.count, len(self.index2doctag)) @@ -271,7 +292,7 @@ def indexed_doctags(self, doctag_tokens): self.doctag_syn0, self.doctag_syn0_lockf, doctag_tokens) def trained_items(self, indexed_tuples): - """Persist any changes made to the given indices (matching tuple previously + """Persist any changes made to the given indexes (matching tuple previously returned by indexed_doctags()); a no-op for this implementation""" pass @@ -438,9 +459,9 @@ class Doc2Vec(Word2Vec): def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, sample=0, seed=1, workers=1, min_alpha=0.0001, dm=1, hs=1, negative=0, dbow_words=0, dm_mean=0, dm_concat=0, dm_tag_count=1, - docvecs=None, docvecs_mapfile=None, **kwargs): + docvecs=None, docvecs_mapfile=None, comment=None, **kwargs): """ - Initialize the model from an iterable of `documents`. Each sentence is a + Initialize the model from an iterable of `documents`. Each document is a TaggedDocument object that will be used for training. The `documents` iterable can be simply a list of TaggedDocument elements, but for larger corpora, @@ -482,7 +503,7 @@ def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, is no longer the size of one (sampled or arithmatically combined) word vector, but the size of the tag(s) and all words in the context strung together. - `dm_tag_count` = expected constant number of sentence tags per sentence, when using + `dm_tag_count` = expected constant number of document tags per document, when using dm_concat mode; default is 1. `dbow_words` if set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW @@ -499,6 +520,7 @@ def __init__(self, documents=None, size=300, alpha=0.025, window=8, min_count=5, self.docvecs = docvecs if not self.docvecs: self.docvecs = DocvecsArray(docvecs_mapfile) + self.comment = comment if documents is not None: self.build_vocab(documents) self.train(documents) @@ -520,30 +542,30 @@ def reset_from(self, other_model): self.docvecs.borrow_from(other_model.docvecs) Word2Vec.reset_from(self, other_model) - def _vocab_from(self, sentences): - sentence_no, vocab = -1, {} + def _vocab_from(self, documents): + document_no, vocab = -1, {} total_words = 0 - for sentence_no, sentence in enumerate(sentences): - if sentence_no % 10000 == 0: - logger.info("PROGRESS: at item #%i, processed %i words and %i word types" % - (sentence_no, total_words, len(vocab))) - sentence_length = len(sentence.words) - for tag in sentence.tags: - self.docvecs.note_doctag(tag, sentence_no, sentence_length) - for word in sentence.words: + for document_no, document in enumerate(documents): + if document_no % 10000 == 0: + logger.info("PROGRESS: at document #%i, processed %i words and %i word types" % + (document_no, total_words, len(vocab))) + document_length = len(document.words) + for tag in document.tags: + self.docvecs.note_doctag(tag, document_no, document_length) + for word in document.words: total_words += 1 if word in vocab: vocab[word].count += 1 else: vocab[word] = Vocab(count=1) - logger.info("collected %i word types from a corpus of %i words and %i items" % - (len(vocab), total_words, sentence_no + 1)) + logger.info("collected %i word types from a corpus of %i words and %i documents" % + (len(vocab), total_words, document_no + 1)) return vocab - def _prepare_sentences(self, sentences): - for sentence in sentences: - yield (self._tokens_to_vocabs(sentence.words), - self.docvecs.indexed_doctags(sentence.tags)) + def _prepare_items(self, documents): + for document in documents: + yield (self._tokens_to_vocabs(document.words), + self.docvecs.indexed_doctags(document.tags)) def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): """Convert list of tokens to items (Vocabs) from source_dict.""" @@ -558,17 +580,17 @@ def _tokens_to_vocabs(self, tokens, sample=True, source_dict=None): def _get_job_words(self, alpha, work, job, neu1): if self.sg: - tally = sum(train_sentence_dbow(self, sentence, doctag_indices, alpha, work, train_words=self.dbow_words, + tally = sum(train_document_dbow(self, word_vocabs, doctag_indexes, alpha, work, train_words=self.dbow_words, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) - for sentence, (doctag_indices, doctag_vectors, doctag_locks, ignored) in job) + for word_vocabs, (doctag_indexes, doctag_vectors, doctag_locks, ignored) in job) elif self.dm_concat: - tally = sum(train_sentence_dm_concat(self, sentence, doctag_indices, alpha, work, neu1, + tally = sum(train_document_dm_concat(self, word_vocabs, doctag_indexes, alpha, work, neu1, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) - for sentence, (doctag_indices, doctag_vectors, doctag_locks, ignored) in job) + for word_vocabs, (doctag_indexes, doctag_vectors, doctag_locks, ignored) in job) else: - tally = sum(train_sentence_dm(self, sentence, doctag_indices, alpha, work, neu1, + tally = sum(train_document_dm(self, word_vocabs, doctag_indexes, alpha, work, neu1, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) - for sentence, (doctag_indices, doctag_vectors, doctag_locks, ignored) in job) + for word_vocabs, (doctag_indexes, doctag_vectors, doctag_locks, ignored) in job) self.docvecs.trained_items(item for s, item in job) return tally @@ -581,7 +603,7 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): doctag_vectors = empty((1, self.vector_size), dtype=REAL) doctag_vectors[0] = self.seeded_vector(' '.join(document)) doctag_locks = ones(1, dtype=REAL) - doctag_indices = [0] + doctag_indexes = [0] word_vocabs = self._tokens_to_vocabs(document) work = zeros(self.layer1_size, dtype=REAL) @@ -590,15 +612,15 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): for i in range(steps): if self.sg: - train_sentence_dbow(self, word_vocabs, doctag_indices, alpha, work, + train_document_dbow(self, word_vocabs, doctag_indexes, alpha, work, learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) elif self.dm_concat: - train_sentence_dm_concat(self, word_vocabs, doctag_indices, alpha, work, neu1, + train_document_dm_concat(self, word_vocabs, doctag_indexes, alpha, work, neu1, learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) else: - train_sentence_dm(self, word_vocabs, doctag_indices, alpha, work, neu1, + train_document_dm(self, word_vocabs, doctag_indexes, alpha, work, neu1, learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks) alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha @@ -606,26 +628,24 @@ def infer_vector(self, document, alpha=0.1, min_alpha=0.0001, steps=5): return doctag_vectors[0] def __str__(self): - return "Doc2Vec(%id, sg=%i, hs=%i, negative=%i, dm_concat=%i)" % (self.vector_size, self.sg, self.hs, self.negative, self.dm_concat) - - @property - def compact_name(self): """Abbreviated name reflecting major configuration paramaters.""" segments = [] + if self.comment: + segments.append('"%s"' % self.comment) if self.sg: - segments.append('dbow') # PV-DBOW (skip-gram-style) if self.dbow_words: - segments.append('w') # also training words - else: - segments.append('dm') # PV-DM... + segments.append('dbow+w') # also training words + else: + segments.append('dbow') # PV-DBOW (skip-gram-style) + + else: # PV-DM... if self.dm_concat: - segments.append('c') # ...with concatenative context layer + segments.append('dm/c') # ...with concatenative context layer else: if self.cbow_mean: - segments.append('m') + segments.append('dm/m') else: - segments.append('s') - segments.append('_') + segments.append('dm/s') segments.append('d%d' % self.vector_size) # dimensions if self.negative: segments.append('n%d' % self.negative) # negative samples @@ -636,10 +656,10 @@ def compact_name(self): if self.min_count > 1: segments.append('mc%d' % self.min_count) if self.sample > 0: - segments.append('s%d' % self.sample) + segments.append('s%E' % self.sample) if self.workers > 1: segments.append('t%d' % self.workers) - return ''.join(segments) + return 'Doc2Vec(%s)' % ','.join(segments) def save(self, *args, **kwargs): kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors @@ -647,8 +667,8 @@ def save(self, *args, **kwargs): class TaggedBrownCorpus(object): - """Iterate over sentences from the Brown corpus (part of NLTK data), yielding - each sentence out as a TaggedSentence object.""" + """Iterate over documents from the Brown corpus (part of NLTK data), yielding + each document out as a TaggedDocument object.""" def __init__(self, dirname): self.dirname = dirname @@ -659,33 +679,33 @@ def __iter__(self): continue for item_no, line in enumerate(utils.smart_open(fname)): line = utils.to_unicode(line) - # each file line is a single sentence in the Brown corpus + # each file line is a single document in the Brown corpus # each token is WORD/POS_TAG token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2] # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff) words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()] - if not words: # don't bother sending out empty sentences + if not words: # don't bother sending out empty documents continue - yield TaggedSentence(words, ['%s_SENT_%s' % (fname, item_no)]) + yield TaggedDocument(words, ['%s_SENT_%s' % (fname, item_no)]) -class TaggedLineSentence(object): - """Simple format: one sentence = one line = one TaggedDocument object. +class TaggedLineDocument(object): + """Simple format: one document = one line = one TaggedDocument object. Words are expected to be already preprocessed and separated by whitespace, - tags are constructed automatically from the sentence line number.""" + tags are constructed automatically from the document line number.""" def __init__(self, source): """ `source` can be either a string (filename) or a file object. Example:: - sentences = TaggedLineSentence('myfile.txt') + documents = TaggedLineDocument('myfile.txt') Or for compressed files:: - sentences = TaggedLineSentence('compressed_text.txt.bz2') - sentences = TaggedLineSentence('compressed_text.txt.gz') + documents = TaggedLineDocument('compressed_text.txt.bz2') + documents = TaggedLineDocument('compressed_text.txt.gz') """ self.source = source diff --git a/gensim/models/doc2vec_inner.c b/gensim/models/doc2vec_inner.c index 1032adb4fe..65a888fc56 100644 --- a/gensim/models/doc2vec_inner.c +++ b/gensim/models/doc2vec_inner.c @@ -653,7 +653,7 @@ typedef npy_longdouble __pyx_t_5numpy_longdouble_t; * REAL = np.float32 * ctypedef np.float32_t REAL_t # <<<<<<<<<<<<<< * - * DEF MAX_SENTENCE_LEN = 10000 + * DEF MAX_DOCUMENT_LEN = 10000 */ typedef __pyx_t_5numpy_float32_t __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t; #if CYTHON_CCOMPLEX @@ -716,7 +716,7 @@ typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t; typedef npy_cdouble __pyx_t_5numpy_complex_t; /* "trunk/gensim/models/doc2vec_inner.pyx":28 - * DEF MAX_SENTENCE_LEN = 10000 + * DEF MAX_DOCUMENT_LEN = 10000 * * ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< * ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil @@ -1151,12 +1151,12 @@ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gen static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_float(int const *, float const *, int const *, float const *, int const *); /*proto*/ static __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_dot_noblas(int const *, float const *, int const *, float const *, int const *); /*proto*/ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, unsigned PY_LONG_LONG, int, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dm_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dmc_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *, int const , int const , int); /*proto*/ #define __Pyx_MODULE_NAME "trunk.gensim.models.doc2vec_inner" int __pyx_module_is_main_trunk__gensim__models__doc2vec_inner = 0; @@ -1165,9 +1165,9 @@ static PyObject *__pyx_builtin_range; static PyObject *__pyx_builtin_enumerate; static PyObject *__pyx_builtin_ValueError; static PyObject *__pyx_builtin_RuntimeError; -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_document_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_document_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks); /* proto */ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_6init(CYTHON_UNUSED PyObject *__pyx_self); /* proto */ static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */ static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */ @@ -1265,9 +1265,9 @@ static char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; static char __pyx_k_RuntimeError[] = "RuntimeError"; static char __pyx_k_dm_tag_count[] = "dm_tag_count"; static char __pyx_k_doctag_locks[] = "doctag_locks"; +static char __pyx_k_document_len[] = "document_len"; static char __pyx_k_learn_hidden[] = "learn_hidden"; static char __pyx_k_predict_word[] = "predict_word"; -static char __pyx_k_sentence_len[] = "sentence_len"; static char __pyx_k_word_locks_2[] = "_word_locks"; static char __pyx_k_word_vectors[] = "word_vectors"; static char __pyx_k_learn_doctags[] = "learn_doctags"; @@ -1286,10 +1286,10 @@ static char __pyx_k_doctag_indexes_2[] = "_doctag_indexes"; static char __pyx_k_doctag_vectors_2[] = "_doctag_vectors"; static char __pyx_k_doctag_syn0_lockf[] = "doctag_syn0_lockf"; static char __pyx_k_scipy_linalg_blas[] = "scipy.linalg.blas"; -static char __pyx_k_train_sentence_dm[] = "train_sentence_dm"; +static char __pyx_k_train_document_dm[] = "train_document_dm"; static char __pyx_k_expected_doctag_len[] = "expected_doctag_len"; -static char __pyx_k_train_sentence_dbow[] = "train_sentence_dbow"; -static char __pyx_k_train_sentence_dm_concat[] = "train_sentence_dm_concat"; +static char __pyx_k_train_document_dbow[] = "train_document_dbow"; +static char __pyx_k_train_document_dm_concat[] = "train_document_dm_concat"; static char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous"; static char __pyx_k_Users_scratch_Documents_dev2015[] = "/Users/scratch/Documents/dev2015/gensim_venv/src/trunk/gensim/models/doc2vec_inner.pyx"; static char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)"; @@ -1326,6 +1326,7 @@ static PyObject *__pyx_n_s_doctag_syn0; static PyObject *__pyx_n_s_doctag_syn0_lockf; static PyObject *__pyx_n_s_doctag_vectors; static PyObject *__pyx_n_s_doctag_vectors_2; +static PyObject *__pyx_n_s_document_len; static PyObject *__pyx_n_s_docvecs; static PyObject *__pyx_n_s_dtype; static PyObject *__pyx_n_s_enumerate; @@ -1376,7 +1377,6 @@ static PyObject *__pyx_n_s_saxpy; static PyObject *__pyx_n_s_scipy_linalg_blas; static PyObject *__pyx_n_s_scopy; static PyObject *__pyx_n_s_sdot; -static PyObject *__pyx_n_s_sentence_len; static PyObject *__pyx_n_s_size; static PyObject *__pyx_n_s_snrm2; static PyObject *__pyx_n_s_sscal; @@ -1387,9 +1387,9 @@ static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_table; static PyObject *__pyx_n_s_table_len; static PyObject *__pyx_n_s_test; -static PyObject *__pyx_n_s_train_sentence_dbow; -static PyObject *__pyx_n_s_train_sentence_dm; -static PyObject *__pyx_n_s_train_sentence_dm_concat; +static PyObject *__pyx_n_s_train_document_dbow; +static PyObject *__pyx_n_s_train_document_dm; +static PyObject *__pyx_n_s_train_document_dm_concat; static PyObject *__pyx_n_s_train_words; static PyObject *__pyx_n_s_train_words_2; static PyObject *__pyx_n_s_trunk_gensim_models_doc2vec_inne; @@ -1614,12 +1614,12 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_our_saxpy_noblas(int /* "trunk/gensim/models/doc2vec_inner.pyx":82 * * - * cdef void fast_sentence_dbow_hs( # <<<<<<<<<<<<<< + * cdef void fast_document_dbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *context_vectors, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_vectors, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_context_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_learn_context, int __pyx_v_learn_hidden, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_locks) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_vectors, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_context_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_learn_context, int __pyx_v_learn_hidden, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_locks) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -1781,7 +1781,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs /* "trunk/gensim/models/doc2vec_inner.pyx":82 * * - * cdef void fast_sentence_dbow_hs( # <<<<<<<<<<<<<< + * cdef void fast_document_dbow_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *context_vectors, REAL_t *syn1, const int size, */ @@ -1792,12 +1792,12 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs /* "trunk/gensim/models/doc2vec_inner.pyx":107 * * - * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_document_dbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, * REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_vectors, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_context_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_learn_context, int __pyx_v_learn_hidden, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_locks) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_vectors, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_context_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, int __pyx_v_learn_context, int __pyx_v_learn_hidden, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_context_locks) { PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; @@ -2062,7 +2062,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":107 * * - * cdef unsigned long long fast_sentence_dbow_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_document_dbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, * REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ @@ -2075,12 +2075,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":147 * * - * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< + * cdef void fast_document_dm_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_size, int __pyx_v_learn_hidden) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dm_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; @@ -2201,7 +2201,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ /* "trunk/gensim/models/doc2vec_inner.pyx":147 * * - * cdef void fast_sentence_dm_hs( # <<<<<<<<<<<<<< + * cdef void fast_document_dm_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ @@ -2212,12 +2212,12 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs(_ /* "trunk/gensim/models/doc2vec_inner.pyx":170 * * - * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_document_dm_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_predict_word_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_size, int __pyx_v_learn_hidden) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dm_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_predict_word_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; @@ -2433,7 +2433,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast * * return next_random # <<<<<<<<<<<<<< * - * cdef void fast_sentence_dmc_hs( + * cdef void fast_document_dmc_hs( */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; @@ -2441,7 +2441,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":170 * * - * cdef unsigned long long fast_sentence_dm_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_document_dm_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ @@ -2454,12 +2454,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":206 * return next_random * - * cdef void fast_sentence_dmc_hs( # <<<<<<<<<<<<<< + * cdef void fast_document_dmc_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ -static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_layer1_size, CYTHON_UNUSED int const __pyx_v_vector_size, int __pyx_v_learn_hidden) { +static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int __pyx_v_word_code_len, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_layer1_size, CYTHON_UNUSED int const __pyx_v_vector_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; @@ -2580,7 +2580,7 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( /* "trunk/gensim/models/doc2vec_inner.pyx":206 * return next_random * - * cdef void fast_sentence_dmc_hs( # <<<<<<<<<<<<<< + * cdef void fast_document_dmc_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, * REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, */ @@ -2591,12 +2591,12 @@ static void __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs( /* "trunk/gensim/models/doc2vec_inner.pyx":230 * * - * cdef unsigned long long fast_sentence_dmc_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_document_dmc_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_predict_word_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_layer1_size, CYTHON_UNUSED int const __pyx_v_vector_size, int __pyx_v_learn_hidden) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dmc_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_predict_word_index, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *__pyx_v_work, int const __pyx_v_layer1_size, CYTHON_UNUSED int const __pyx_v_vector_size, int __pyx_v_learn_hidden) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t __pyx_v_f; @@ -2820,7 +2820,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":230 * * - * cdef unsigned long long fast_sentence_dmc_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_document_dmc_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, * REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, */ @@ -2833,15 +2833,15 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* Python wrapper */ -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow = {"train_sentence_dbow", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_document_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_document_dbow = {"train_document_dbow", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_document_dbow, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_document_dbow(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; PyObject *__pyx_v_doctag_indexes = 0; @@ -2860,7 +2860,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("train_sentence_dbow (wrapper)", 0); + __Pyx_RefNannySetupContext("train_document_dbow (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_train_words,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; @@ -2868,7 +2868,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":269 * - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs @@ -2879,7 +2879,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":270 - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -2917,17 +2917,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -2976,7 +2976,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -3013,18 +3013,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dbow", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dbow", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_document_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_document_dbow(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_train_words, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -3034,7 +3034,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_document_dbow(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_train_words, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__train_words; @@ -3052,7 +3052,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v__doctag_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; - int __pyx_v_sentence_len; + int __pyx_v_document_len; int __pyx_v_doctag_len; int __pyx_v_window; int __pyx_v_i; @@ -3092,7 +3092,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("train_sentence_dbow", 0); + __Pyx_RefNannySetupContext("train_document_dbow", 0); __Pyx_INCREF(__pyx_v_work); __Pyx_INCREF(__pyx_v_word_vectors); __Pyx_INCREF(__pyx_v_word_locks); @@ -3180,7 +3180,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * - * cdef int codelens[MAX_SENTENCE_LEN] + * cdef int codelens[MAX_DOCUMENT_LEN] */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -3189,7 +3189,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_size = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":292 - * cdef int sentence_len + * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * @@ -3494,7 +3494,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if work is None: * work = zeros(model.layer1_size, dtype=REAL) # <<<<<<<<<<<<<< * _work = np.PyArray_DATA(work) - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) */ __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_zeros); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); @@ -3526,8 +3526,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if work is None: * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) # <<<<<<<<<<<<<< - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) */ if (!(likely(((__pyx_v_work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__work = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_work))); @@ -3535,8 +3535,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":335 * work = zeros(model.layer1_size, dtype=REAL) * _work = np.PyArray_DATA(work) - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * */ __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} @@ -3546,14 +3546,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ } else { __pyx_t_12 = __pyx_t_11; } - __pyx_v_sentence_len = ((int)__pyx_t_12); + __pyx_v_document_len = ((int)__pyx_t_12); /* "trunk/gensim/models/doc2vec_inner.pyx":336 * _work = np.PyArray_DATA(work) - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * - * for i in range(sentence_len): + * for i in range(document_len): */ __pyx_t_12 = PyObject_Length(__pyx_v_doctag_indexes); if (unlikely(__pyx_t_12 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; @@ -3565,22 +3565,22 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_doctag_len = ((int)__pyx_t_7); /* "trunk/gensim/models/doc2vec_inner.pyx":338 - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * - * for i in range(sentence_len): # <<<<<<<<<<<<<< + * for i in range(document_len): # <<<<<<<<<<<<<< * predict_word = word_vocabs[i] * if predict_word is None: */ - __pyx_t_2 = __pyx_v_sentence_len; + __pyx_t_2 = __pyx_v_document_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":339 * - * for i in range(sentence_len): + * for i in range(document_len): * predict_word = word_vocabs[i] # <<<<<<<<<<<<<< * if predict_word is None: - * # shrink sentence to leave out word + * # shrink document to leave out word */ __pyx_t_10 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_10 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_10); @@ -3588,11 +3588,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_t_10 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":340 - * for i in range(sentence_len): + * for i in range(document_len): * predict_word = word_vocabs[i] * if predict_word is None: # <<<<<<<<<<<<<< - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 + * # shrink document to leave out word + * document_len = document_len - 1 */ __pyx_t_5 = (__pyx_v_predict_word == Py_None); __pyx_t_4 = (__pyx_t_5 != 0); @@ -3600,16 +3600,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":342 * if predict_word is None: - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< + * # shrink document to leave out word + * document_len = document_len - 1 # <<<<<<<<<<<<<< * continue # leaving j unchanged * else: */ - __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); + __pyx_v_document_len = (__pyx_v_document_len - 1); /* "trunk/gensim/models/doc2vec_inner.pyx":343 - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 + * # shrink document to leave out word + * document_len = document_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< * else: * indexes[i] = predict_word.index @@ -3711,7 +3711,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * result += 1 * if _train_words: # <<<<<<<<<<<<<< * # single randint() call avoids a big thread-synchronization slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * for i, item in enumerate(np.random.randint(0, window, document_len)): */ __pyx_t_4 = (__pyx_v__train_words != 0); if (__pyx_t_4) { @@ -3719,7 +3719,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":355 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * for i, item in enumerate(np.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * for i in range(doctag_len): */ @@ -3734,7 +3734,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __pyx_t_15 = NULL; __pyx_t_7 = 0; @@ -3811,7 +3811,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":356 * # single randint() call avoids a big thread-synchronization slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * for i, item in enumerate(np.random.randint(0, window, document_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] @@ -3822,7 +3822,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":355 * if _train_words: * # single randint() call avoids a big thread-synchronization slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * for i, item in enumerate(np.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * for i in range(doctag_len): */ @@ -3833,7 +3833,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_L14:; /* "trunk/gensim/models/doc2vec_inner.pyx":357 - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * for i, item in enumerate(np.random.randint(0, window, document_len)): * reduced_windows[i] = item * for i in range(doctag_len): # <<<<<<<<<<<<<< * _doctag_indexes[i] = doctag_indexes[i] @@ -3861,16 +3861,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< * - * # release GIL & train on the sentence + * # release GIL & train on the document */ __pyx_v_result = (__pyx_v_result + 1); } /* "trunk/gensim/models/doc2vec_inner.pyx":362 * - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * if codelens[i] == 0: */ { @@ -3881,19 +3881,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /*try:*/ { /* "trunk/gensim/models/doc2vec_inner.pyx":363 - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: - * for i in range(sentence_len): # <<<<<<<<<<<<<< + * for i in range(document_len): # <<<<<<<<<<<<<< * if codelens[i] == 0: * continue */ - __pyx_t_2 = __pyx_v_sentence_len; + __pyx_t_2 = __pyx_v_document_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":364 * with nogil: - * for i in range(sentence_len): + * for i in range(document_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< * continue * if _train_words: # simultaneous skip-gram wordvec-training @@ -3902,7 +3902,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":365 - * for i in range(sentence_len): + * for i in range(document_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< * if _train_words: # simultaneous skip-gram wordvec-training @@ -3945,7 +3945,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if j < 0: * j = 0 # <<<<<<<<<<<<<< * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: + * if k > document_len: */ __pyx_v_j = 0; goto __pyx_L26; @@ -3956,36 +3956,36 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< - * if k > sentence_len: - * k = sentence_len + * if k > document_len: + * k = document_len */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); /* "trunk/gensim/models/doc2vec_inner.pyx":371 * j = 0 * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len + * if k > document_len: # <<<<<<<<<<<<<< + * k = document_len * for j in range(j, k): */ - __pyx_t_4 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + __pyx_t_4 = ((__pyx_v_k > __pyx_v_document_len) != 0); if (__pyx_t_4) { /* "trunk/gensim/models/doc2vec_inner.pyx":372 * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: - * k = sentence_len # <<<<<<<<<<<<<< + * if k > document_len: + * k = document_len # <<<<<<<<<<<<<< * for j in range(j, k): * if j == i or codelens[j] == 0: */ - __pyx_v_k = __pyx_v_sentence_len; + __pyx_v_k = __pyx_v_document_len; goto __pyx_L27; } __pyx_L27:; /* "trunk/gensim/models/doc2vec_inner.pyx":373 - * if k > sentence_len: - * k = sentence_len + * if k > document_len: + * k = document_len * for j in range(j, k): # <<<<<<<<<<<<<< * if j == i or codelens[j] == 0: * continue @@ -3995,7 +3995,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __pyx_v_j = __pyx_t_18; /* "trunk/gensim/models/doc2vec_inner.pyx":374 - * k = sentence_len + * k = document_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< * continue @@ -4027,7 +4027,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * continue * if hs: # <<<<<<<<<<<<<< * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], */ __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { @@ -4035,21 +4035,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":378 * if hs: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], # <<<<<<<<<<<<<< * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__word_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__word_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); goto __pyx_L33; } __pyx_L33:; /* "trunk/gensim/models/doc2vec_inner.pyx":380 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], * _alpha, _work, _learn_words, _learn_hidden, _word_locks) * if negative: # <<<<<<<<<<<<<< * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, + * next_random = fast_document_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, */ __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { @@ -4057,11 +4057,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":382 * if negative: * # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - * next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< + * next_random = fast_document_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, # <<<<<<<<<<<<<< * indexes[i], indexes[j], _alpha, _work, next_random, * _learn_words, _learn_hidden, _word_locks) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__word_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__word_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_words, __pyx_v__learn_hidden, __pyx_v__word_locks); goto __pyx_L34; } __pyx_L34:; @@ -4076,7 +4076,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * # docvec-training * for j in range(doctag_len): # <<<<<<<<<<<<<< * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], */ __pyx_t_18 = __pyx_v_doctag_len; for (__pyx_t_19 = 0; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { @@ -4086,7 +4086,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ * # docvec-training * for j in range(doctag_len): * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) */ __pyx_t_4 = (__pyx_v_hs != 0); @@ -4095,20 +4095,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":389 * for j in range(doctag_len): * if hs: - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], # <<<<<<<<<<<<<< + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], # <<<<<<<<<<<<<< * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doctag_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__doctag_vectors, __pyx_v_syn1, __pyx_v_size, (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); goto __pyx_L37; } __pyx_L37:; /* "trunk/gensim/models/doc2vec_inner.pyx":391 - * fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + * fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, + * next_random = fast_document_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, * indexes[i], _doctag_indexes[j], _alpha, _work, next_random, */ __pyx_t_4 = (__pyx_v_negative != 0); @@ -4117,11 +4117,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":392 * _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) * if negative: - * next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, # <<<<<<<<<<<<<< + * next_random = fast_document_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, # <<<<<<<<<<<<<< * indexes[i], _doctag_indexes[j], _alpha, _work, next_random, * _learn_doctags, _learn_hidden, _doctag_locks) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doctag_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v__doctag_vectors, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v__doctag_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v__work, __pyx_v_next_random, __pyx_v__learn_doctags, __pyx_v__learn_hidden, __pyx_v__doctag_locks); goto __pyx_L38; } __pyx_L38:; @@ -4132,9 +4132,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":362 * - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * if codelens[i] == 0: */ /*finally:*/ { @@ -4165,7 +4165,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -4178,7 +4178,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_15); __Pyx_XDECREF(__pyx_t_16); - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_document_dbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_predict_word); @@ -4196,15 +4196,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_train_sentence_ /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* Python wrapper */ -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm = {"train_sentence_dm", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_document_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_document_dm = {"train_document_dm", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_document_dm, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_document_dm(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; PyObject *__pyx_v_doctag_indexes = 0; @@ -4223,7 +4223,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("train_sentence_dm (wrapper)", 0); + __Pyx_RefNannySetupContext("train_document_dm (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; @@ -4232,7 +4232,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":400 * - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs @@ -4242,7 +4242,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":401 - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -4280,17 +4280,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -4339,7 +4339,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -4376,18 +4376,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_document_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_document_dm(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -4397,7 +4397,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_document_dm(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__learn_doctags; @@ -4418,7 +4418,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v__doctag_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_reduced_windows[10000]; - int __pyx_v_sentence_len; + int __pyx_v_document_len; int __pyx_v_doctag_len; int __pyx_v_window; int __pyx_v_i; @@ -4459,7 +4459,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("train_sentence_dm", 0); + __Pyx_RefNannySetupContext("train_document_dm", 0); __Pyx_INCREF(__pyx_v_work); __Pyx_INCREF(__pyx_v_neu1); __Pyx_INCREF(__pyx_v_word_vectors); @@ -4560,7 +4560,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * - * cdef int codelens[MAX_SENTENCE_LEN] + * cdef int codelens[MAX_DOCUMENT_LEN] */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 417; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -4569,7 +4569,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_size = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":425 - * cdef int sentence_len + * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * @@ -4961,7 +4961,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) */ if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 470; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); @@ -4969,9 +4969,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":472 * _neu1 = np.PyArray_DATA(neu1) * - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 - * for i in range(sentence_len): + * for i in range(document_len): */ __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 472; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_11 = 10000; @@ -4980,34 +4980,34 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence } else { __pyx_t_12 = __pyx_t_11; } - __pyx_v_sentence_len = ((int)__pyx_t_12); + __pyx_v_document_len = ((int)__pyx_t_12); /* "trunk/gensim/models/doc2vec_inner.pyx":473 * - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * word = word_vocabs[i] */ __pyx_v_j = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":474 - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) * j = 0 - * for i in range(sentence_len): # <<<<<<<<<<<<<< + * for i in range(document_len): # <<<<<<<<<<<<<< * word = word_vocabs[i] * if word is None: */ - __pyx_t_2 = __pyx_v_sentence_len; + __pyx_t_2 = __pyx_v_document_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":475 * j = 0 - * for i in range(sentence_len): + * for i in range(document_len): * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: - * # shrink sentence to leave out word + * # shrink document to leave out word */ __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 475; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); @@ -5015,11 +5015,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_t_6 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":476 - * for i in range(sentence_len): + * for i in range(document_len): * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 + * # shrink document to leave out word + * document_len = document_len - 1 */ __pyx_t_4 = (__pyx_v_word == Py_None); __pyx_t_5 = (__pyx_t_4 != 0); @@ -5027,16 +5027,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":478 * if word is None: - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< + * # shrink document to leave out word + * document_len = document_len - 1 # <<<<<<<<<<<<<< * continue # leaving j unchanged * else: */ - __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); + __pyx_v_document_len = (__pyx_v_document_len - 1); /* "trunk/gensim/models/doc2vec_inner.pyx":479 - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 + * # shrink document to leave out word + * document_len = document_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< * else: * indexes[j] = word.index @@ -5124,7 +5124,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * result += 1 * j = j + 1 # <<<<<<<<<<<<<< * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * for i, item in enumerate(np.random.randint(0, window, document_len)): */ __pyx_v_j = (__pyx_v_j + 1); } @@ -5134,7 +5134,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":489 * j = j + 1 * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * for i, item in enumerate(np.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ @@ -5149,7 +5149,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = __Pyx_PyInt_From_int(__pyx_v_document_len); if (unlikely(!__pyx_t_10)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 489; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_10); __pyx_t_15 = NULL; __pyx_t_12 = 0; @@ -5226,10 +5226,10 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":490 * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): + * for i, item in enumerate(np.random.randint(0, window, document_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) */ __pyx_t_14 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_14 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 490; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_14; @@ -5237,7 +5237,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":489 * j = j + 1 * # single randint() call avoids a big thread-sync slowdown - * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< + * for i, item in enumerate(np.random.randint(0, window, document_len)): # <<<<<<<<<<<<<< * reduced_windows[i] = item * */ @@ -5247,7 +5247,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":492 * reduced_windows[i] = item * - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] */ @@ -5262,7 +5262,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":493 * - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): # <<<<<<<<<<<<<< * _doctag_indexes[i] = doctag_indexes[i] * result += 1 @@ -5272,7 +5272,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":494 - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * for i in range(doctag_len): * _doctag_indexes[i] = doctag_indexes[i] # <<<<<<<<<<<<<< * result += 1 @@ -5289,16 +5289,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< * - * # release GIL & train on the sentence + * # release GIL & train on the document */ __pyx_v_result = (__pyx_v_result + 1); } /* "trunk/gensim/models/doc2vec_inner.pyx":498 * - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window + reduced_windows[i] */ { @@ -5309,19 +5309,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /*try:*/ { /* "trunk/gensim/models/doc2vec_inner.pyx":499 - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: - * for i in range(sentence_len): # <<<<<<<<<<<<<< + * for i in range(document_len): # <<<<<<<<<<<<<< * j = i - window + reduced_windows[i] * if j < 0: */ - __pyx_t_2 = __pyx_v_sentence_len; + __pyx_t_2 = __pyx_v_document_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":500 * with nogil: - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< * if j < 0: * j = 0 @@ -5329,7 +5329,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); /* "trunk/gensim/models/doc2vec_inner.pyx":501 - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< * j = 0 @@ -5343,7 +5343,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if j < 0: * j = 0 # <<<<<<<<<<<<<< * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: + * if k > document_len: */ __pyx_v_j = 0; goto __pyx_L24; @@ -5354,29 +5354,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< - * if k > sentence_len: - * k = sentence_len + * if k > document_len: + * k = document_len */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); /* "trunk/gensim/models/doc2vec_inner.pyx":504 * j = 0 * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: # <<<<<<<<<<<<<< - * k = sentence_len + * if k > document_len: # <<<<<<<<<<<<<< + * k = document_len * */ - __pyx_t_5 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); + __pyx_t_5 = ((__pyx_v_k > __pyx_v_document_len) != 0); if (__pyx_t_5) { /* "trunk/gensim/models/doc2vec_inner.pyx":505 * k = i + window + 1 - reduced_windows[i] - * if k > sentence_len: - * k = sentence_len # <<<<<<<<<<<<<< + * if k > document_len: + * k = document_len # <<<<<<<<<<<<<< * * # compose l1 (in _neu1) & clear _work */ - __pyx_v_k = __pyx_v_sentence_len; + __pyx_v_k = __pyx_v_document_len; goto __pyx_L25; } __pyx_L25:; @@ -5531,7 +5531,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error # <<<<<<<<<<<<<< * if hs: - * fast_sentence_dm_hs(points[i], codes[i], codelens[i], + * fast_document_dm_hs(points[i], codes[i], codelens[i], */ memset(__pyx_v__work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t)))); @@ -5539,7 +5539,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dm_hs(points[i], codes[i], codelens[i], + * fast_document_dm_hs(points[i], codes[i], codelens[i], * _neu1, syn1, _alpha, _work, */ __pyx_t_5 = (__pyx_v_hs != 0); @@ -5548,11 +5548,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":525 * memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error * if hs: - * fast_sentence_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< + * fast_document_dm_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< * _neu1, syn1, _alpha, _work, * size, _learn_hidden) */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dm_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); goto __pyx_L33; } __pyx_L33:; @@ -5561,7 +5561,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence * _neu1, syn1, _alpha, _work, * size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, + * next_random = fast_document_dm_neg(negative, table, table_len, next_random, * _neu1, syn1neg, indexes[i], _alpha, _work, */ __pyx_t_5 = (__pyx_v_negative != 0); @@ -5570,11 +5570,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":529 * size, _learn_hidden) * if negative: - * next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< + * next_random = fast_document_dm_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< * _neu1, syn1neg, indexes[i], _alpha, _work, * size, _learn_hidden) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dm_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_size, __pyx_v__learn_hidden); goto __pyx_L34; } __pyx_L34:; @@ -5696,9 +5696,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":498 * - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window + reduced_windows[i] */ /*finally:*/ { @@ -5729,7 +5729,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -5742,7 +5742,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_15); __Pyx_XDECREF(__pyx_t_16); - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_document_dm", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); @@ -5761,15 +5761,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_2train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ /* Python wrapper */ -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ -static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat = {"train_sentence_dm_concat", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, METH_VARARGS|METH_KEYWORDS, 0}; -static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_document_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/ +static PyMethodDef __pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_document_dm_concat = {"train_document_dm_concat", (PyCFunction)__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, METH_VARARGS|METH_KEYWORDS, 0}; +static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_document_dm_concat(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) { PyObject *__pyx_v_model = 0; PyObject *__pyx_v_word_vocabs = 0; PyObject *__pyx_v_doctag_indexes = 0; @@ -5788,7 +5788,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence int __pyx_clineno = 0; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations - __Pyx_RefNannySetupContext("train_sentence_dm_concat (wrapper)", 0); + __Pyx_RefNannySetupContext("train_document_dm_concat (wrapper)", 0); { static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_model,&__pyx_n_s_word_vocabs,&__pyx_n_s_doctag_indexes,&__pyx_n_s_alpha,&__pyx_n_s_work,&__pyx_n_s_neu1,&__pyx_n_s_learn_doctags,&__pyx_n_s_learn_words,&__pyx_n_s_learn_hidden,&__pyx_n_s_word_vectors,&__pyx_n_s_word_locks,&__pyx_n_s_doctag_vectors,&__pyx_n_s_doctag_locks,0}; PyObject* values[13] = {0,0,0,0,0,0,0,0,0,0,0,0,0}; @@ -5797,7 +5797,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":552 * - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, # <<<<<<<<<<<<<< * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): * cdef int hs = model.hs @@ -5807,7 +5807,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence values[8] = ((PyObject *)Py_True); /* "trunk/gensim/models/doc2vec_inner.pyx":553 - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -5845,17 +5845,17 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_word_vocabs)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_doctag_indexes)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (kw_args > 0) { @@ -5904,7 +5904,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_document_dm_concat") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -5941,18 +5941,18 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_document_dm_concat", 0, 4, 13, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_document_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; - __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); + __pyx_r = __pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(__pyx_self, __pyx_v_model, __pyx_v_word_vocabs, __pyx_v_doctag_indexes, __pyx_v_alpha, __pyx_v_work, __pyx_v_neu1, __pyx_v_learn_doctags, __pyx_v_learn_words, __pyx_v_learn_hidden, __pyx_v_word_vectors, __pyx_v_word_locks, __pyx_v_doctag_vectors, __pyx_v_doctag_locks); /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -5962,7 +5962,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence return __pyx_r; } -static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { +static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_document_dm_concat(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_model, PyObject *__pyx_v_word_vocabs, PyObject *__pyx_v_doctag_indexes, PyObject *__pyx_v_alpha, PyObject *__pyx_v_work, PyObject *__pyx_v_neu1, PyObject *__pyx_v_learn_doctags, PyObject *__pyx_v_learn_words, PyObject *__pyx_v_learn_hidden, PyObject *__pyx_v_word_vectors, PyObject *__pyx_v_word_locks, PyObject *__pyx_v_doctag_vectors, PyObject *__pyx_v_doctag_locks) { int __pyx_v_hs; int __pyx_v_negative; int __pyx_v__learn_doctags; @@ -5981,7 +5981,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_5numpy_uint32_t __pyx_v_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v__doctag_indexes[10000]; __pyx_t_5numpy_uint32_t __pyx_v_window_indexes[10000]; - int __pyx_v_sentence_len; + int __pyx_v_document_len; int __pyx_v_doctag_len; int __pyx_v_window; int __pyx_v_expected_doctag_len; @@ -6021,7 +6021,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; - __Pyx_RefNannySetupContext("train_sentence_dm_concat", 0); + __Pyx_RefNannySetupContext("train_document_dm_concat", 0); __Pyx_INCREF(__pyx_v_work); __Pyx_INCREF(__pyx_v_neu1); __Pyx_INCREF(__pyx_v_word_vectors); @@ -6113,7 +6113,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * cdef int layer1_size = model.layer1_size * cdef int vector_size = model.vector_size # <<<<<<<<<<<<<< * - * cdef int codelens[MAX_SENTENCE_LEN] + * cdef int codelens[MAX_DOCUMENT_LEN] */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_vector_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 568; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -6122,7 +6122,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_v_vector_size = __pyx_t_2; /* "trunk/gensim/models/doc2vec_inner.pyx":576 - * cdef int sentence_len + * cdef int document_len * cdef int doctag_len * cdef int window = model.window # <<<<<<<<<<<<<< * cdef int expected_doctag_len = model.dm_tag_count @@ -6178,7 +6178,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":594 * cdef unsigned long long next_random * - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) # <<<<<<<<<<<<<< * if doctag_len != expected_doctag_len: * return 0 # skip doc without expected nmber of tags */ @@ -6193,7 +6193,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":595 * - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: # <<<<<<<<<<<<<< * return 0 # skip doc without expected nmber of tags * @@ -6202,7 +6202,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence if (__pyx_t_8) { /* "trunk/gensim/models/doc2vec_inner.pyx":596 - * doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + * doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) * if doctag_len != expected_doctag_len: * return 0 # skip doc without expected nmber of tags # <<<<<<<<<<<<<< * @@ -6585,7 +6585,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * neu1 = zeros(model.layer1_size, dtype=REAL) * _neu1 = np.PyArray_DATA(neu1) # <<<<<<<<<<<<<< * - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) */ if (!(likely(((__pyx_v_neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v_neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 627; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__neu1 = ((__pyx_t_5trunk_6gensim_6models_13doc2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v_neu1))); @@ -6593,9 +6593,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":629 * _neu1 = np.PyArray_DATA(neu1) * - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) # <<<<<<<<<<<<<< * j = 0 - * for i in range(sentence_len): + * for i in range(document_len): */ __pyx_t_7 = PyObject_Length(__pyx_v_word_vocabs); if (unlikely(__pyx_t_7 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_6 = 10000; @@ -6604,34 +6604,34 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence } else { __pyx_t_5 = __pyx_t_6; } - __pyx_v_sentence_len = ((int)__pyx_t_5); + __pyx_v_document_len = ((int)__pyx_t_5); /* "trunk/gensim/models/doc2vec_inner.pyx":630 * - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) * j = 0 # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * word = word_vocabs[i] */ __pyx_v_j = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":631 - * sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + * document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) * j = 0 - * for i in range(sentence_len): # <<<<<<<<<<<<<< + * for i in range(document_len): # <<<<<<<<<<<<<< * word = word_vocabs[i] * if word is None: */ - __pyx_t_2 = __pyx_v_sentence_len; + __pyx_t_2 = __pyx_v_document_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":632 * j = 0 - * for i in range(sentence_len): + * for i in range(document_len): * word = word_vocabs[i] # <<<<<<<<<<<<<< * if word is None: - * # shrink sentence to leave out word + * # shrink document to leave out word */ __pyx_t_4 = __Pyx_GetItemInt(__pyx_v_word_vocabs, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_4 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 632; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_4); @@ -6639,11 +6639,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_4 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":633 - * for i in range(sentence_len): + * for i in range(document_len): * word = word_vocabs[i] * if word is None: # <<<<<<<<<<<<<< - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 + * # shrink document to leave out word + * document_len = document_len - 1 */ __pyx_t_8 = (__pyx_v_word == Py_None); __pyx_t_9 = (__pyx_t_8 != 0); @@ -6651,16 +6651,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":635 * if word is None: - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 # <<<<<<<<<<<<<< + * # shrink document to leave out word + * document_len = document_len - 1 # <<<<<<<<<<<<<< * continue # leaving j unchanged * else: */ - __pyx_v_sentence_len = (__pyx_v_sentence_len - 1); + __pyx_v_document_len = (__pyx_v_document_len - 1); /* "trunk/gensim/models/doc2vec_inner.pyx":636 - * # shrink sentence to leave out word - * sentence_len = sentence_len - 1 + * # shrink document to leave out word + * document_len = document_len - 1 * continue # leaving j unchanged # <<<<<<<<<<<<<< * else: * indexes[j] = word.index @@ -6795,16 +6795,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * _doctag_indexes[i] = doctag_indexes[i] * result += 1 # <<<<<<<<<<<<<< * - * # release GIL & train on the sentence + * # release GIL & train on the document */ __pyx_v_result = (__pyx_v_result + 1); } /* "trunk/gensim/models/doc2vec_inner.pyx":653 * - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window # negative OK: will pad with null word */ { @@ -6815,29 +6815,29 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /*try:*/ { /* "trunk/gensim/models/doc2vec_inner.pyx":654 - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: - * for i in range(sentence_len): # <<<<<<<<<<<<<< + * for i in range(document_len): # <<<<<<<<<<<<<< * j = i - window # negative OK: will pad with null word - * k = i + window + 1 # past sentence end OK: will pad with null word + * k = i + window + 1 # past document end OK: will pad with null word */ - __pyx_t_2 = __pyx_v_sentence_len; + __pyx_t_2 = __pyx_v_document_len; for (__pyx_t_13 = 0; __pyx_t_13 < __pyx_t_2; __pyx_t_13+=1) { __pyx_v_i = __pyx_t_13; /* "trunk/gensim/models/doc2vec_inner.pyx":655 * with nogil: - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window # negative OK: will pad with null word # <<<<<<<<<<<<<< - * k = i + window + 1 # past sentence end OK: will pad with null word + * k = i + window + 1 # past document end OK: will pad with null word * */ __pyx_v_j = (__pyx_v_i - __pyx_v_window); /* "trunk/gensim/models/doc2vec_inner.pyx":656 - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window # negative OK: will pad with null word - * k = i + window + 1 # past sentence end OK: will pad with null word # <<<<<<<<<<<<<< + * k = i + window + 1 # past document end OK: will pad with null word # <<<<<<<<<<<<<< * * # compose l1 & clear work */ @@ -6889,7 +6889,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * # word vectors in window * if m == i: # <<<<<<<<<<<<<< * continue - * if m < 0 or m >= sentence_len: + * if m < 0 or m >= document_len: */ __pyx_t_9 = ((__pyx_v_m == __pyx_v_i) != 0); if (__pyx_t_9) { @@ -6898,7 +6898,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * # word vectors in window * if m == i: * continue # <<<<<<<<<<<<<< - * if m < 0 or m >= sentence_len: + * if m < 0 or m >= document_len: * window_indexes[n] = null_word_index */ goto __pyx_L25_continue; @@ -6907,7 +6907,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":668 * if m == i: * continue - * if m < 0 or m >= sentence_len: # <<<<<<<<<<<<<< + * if m < 0 or m >= document_len: # <<<<<<<<<<<<<< * window_indexes[n] = null_word_index * else: */ @@ -6917,14 +6917,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __pyx_t_9 = __pyx_t_8; goto __pyx_L29_bool_binop_done; } - __pyx_t_8 = ((__pyx_v_m >= __pyx_v_sentence_len) != 0); + __pyx_t_8 = ((__pyx_v_m >= __pyx_v_document_len) != 0); __pyx_t_9 = __pyx_t_8; __pyx_L29_bool_binop_done:; if (__pyx_t_9) { /* "trunk/gensim/models/doc2vec_inner.pyx":669 * continue - * if m < 0 or m >= sentence_len: + * if m < 0 or m >= document_len: * window_indexes[n] = null_word_index # <<<<<<<<<<<<<< * else: * window_indexes[n] = indexes[m] @@ -6990,7 +6990,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error * * if hs: # <<<<<<<<<<<<<< - * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], + * fast_document_dmc_hs(points[i], codes[i], codelens[i], * _neu1, syn1, _alpha, _work, */ __pyx_t_9 = (__pyx_v_hs != 0); @@ -6999,11 +6999,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":679 * * if hs: - * fast_sentence_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< + * fast_document_dmc_hs(points[i], codes[i], codelens[i], # <<<<<<<<<<<<<< * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) */ - __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); + __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dmc_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v__neu1, __pyx_v_syn1, __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); goto __pyx_L33; } __pyx_L33:; @@ -7012,7 +7012,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence * _neu1, syn1, _alpha, _work, * layer1_size, vector_size, _learn_hidden) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, + * next_random = fast_document_dmc_neg(negative, table, table_len, next_random, * _neu1, syn1neg, indexes[i], _alpha, _work, */ __pyx_t_9 = (__pyx_v_negative != 0); @@ -7021,11 +7021,11 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":683 * layer1_size, vector_size, _learn_hidden) * if negative: - * next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< + * next_random = fast_document_dmc_neg(negative, table, table_len, next_random, # <<<<<<<<<<<<<< * _neu1, syn1neg, indexes[i], _alpha, _work, * layer1_size, vector_size, _learn_hidden) */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_sentence_dmc_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_13doc2vec_inner_fast_document_dmc_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_next_random, __pyx_v__neu1, __pyx_v_syn1neg, (__pyx_v_indexes[__pyx_v_i]), __pyx_v__alpha, __pyx_v__work, __pyx_v_layer1_size, __pyx_v_vector_size, __pyx_v__learn_hidden); goto __pyx_L34; } __pyx_L34:; @@ -7102,9 +7102,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":653 * - * # release GIL & train on the sentence + * # release GIL & train on the document * with nogil: # <<<<<<<<<<<<<< - * for i in range(sentence_len): + * for i in range(document_len): * j = i - window # negative OK: will pad with null word */ /*finally:*/ { @@ -7135,7 +7135,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ @@ -7146,7 +7146,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_13doc2vec_inner_4train_sentence __Pyx_XDECREF(__pyx_t_4); __Pyx_XDECREF(__pyx_t_10); __Pyx_XDECREF(__pyx_t_12); - __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_sentence_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); + __Pyx_AddTraceback("trunk.gensim.models.doc2vec_inner.train_document_dm_concat", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XDECREF(__pyx_v_word); @@ -9467,6 +9467,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_doctag_syn0_lockf, __pyx_k_doctag_syn0_lockf, sizeof(__pyx_k_doctag_syn0_lockf), 0, 0, 1, 1}, {&__pyx_n_s_doctag_vectors, __pyx_k_doctag_vectors, sizeof(__pyx_k_doctag_vectors), 0, 0, 1, 1}, {&__pyx_n_s_doctag_vectors_2, __pyx_k_doctag_vectors_2, sizeof(__pyx_k_doctag_vectors_2), 0, 0, 1, 1}, + {&__pyx_n_s_document_len, __pyx_k_document_len, sizeof(__pyx_k_document_len), 0, 0, 1, 1}, {&__pyx_n_s_docvecs, __pyx_k_docvecs, sizeof(__pyx_k_docvecs), 0, 0, 1, 1}, {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1}, {&__pyx_n_s_enumerate, __pyx_k_enumerate, sizeof(__pyx_k_enumerate), 0, 0, 1, 1}, @@ -9517,7 +9518,6 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_scipy_linalg_blas, __pyx_k_scipy_linalg_blas, sizeof(__pyx_k_scipy_linalg_blas), 0, 0, 1, 1}, {&__pyx_n_s_scopy, __pyx_k_scopy, sizeof(__pyx_k_scopy), 0, 0, 1, 1}, {&__pyx_n_s_sdot, __pyx_k_sdot, sizeof(__pyx_k_sdot), 0, 0, 1, 1}, - {&__pyx_n_s_sentence_len, __pyx_k_sentence_len, sizeof(__pyx_k_sentence_len), 0, 0, 1, 1}, {&__pyx_n_s_size, __pyx_k_size, sizeof(__pyx_k_size), 0, 0, 1, 1}, {&__pyx_n_s_snrm2, __pyx_k_snrm2, sizeof(__pyx_k_snrm2), 0, 0, 1, 1}, {&__pyx_n_s_sscal, __pyx_k_sscal, sizeof(__pyx_k_sscal), 0, 0, 1, 1}, @@ -9528,9 +9528,9 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_table, __pyx_k_table, sizeof(__pyx_k_table), 0, 0, 1, 1}, {&__pyx_n_s_table_len, __pyx_k_table_len, sizeof(__pyx_k_table_len), 0, 0, 1, 1}, {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, - {&__pyx_n_s_train_sentence_dbow, __pyx_k_train_sentence_dbow, sizeof(__pyx_k_train_sentence_dbow), 0, 0, 1, 1}, - {&__pyx_n_s_train_sentence_dm, __pyx_k_train_sentence_dm, sizeof(__pyx_k_train_sentence_dm), 0, 0, 1, 1}, - {&__pyx_n_s_train_sentence_dm_concat, __pyx_k_train_sentence_dm_concat, sizeof(__pyx_k_train_sentence_dm_concat), 0, 0, 1, 1}, + {&__pyx_n_s_train_document_dbow, __pyx_k_train_document_dbow, sizeof(__pyx_k_train_document_dbow), 0, 0, 1, 1}, + {&__pyx_n_s_train_document_dm, __pyx_k_train_document_dm, sizeof(__pyx_k_train_document_dm), 0, 0, 1, 1}, + {&__pyx_n_s_train_document_dm_concat, __pyx_k_train_document_dm_concat, sizeof(__pyx_k_train_document_dm_concat), 0, 0, 1, 1}, {&__pyx_n_s_train_words, __pyx_k_train_words, sizeof(__pyx_k_train_words), 0, 0, 1, 1}, {&__pyx_n_s_train_words_2, __pyx_k_train_words_2, sizeof(__pyx_k_train_words_2), 0, 0, 1, 1}, {&__pyx_n_s_trunk_gensim_models_doc2vec_inne, __pyx_k_trunk_gensim_models_doc2vec_inne, sizeof(__pyx_k_trunk_gensim_models_doc2vec_inne), 0, 0, 1, 1}, @@ -9677,38 +9677,38 @@ static int __Pyx_InitCachedConstants(void) { /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__14 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__14 = PyTuple_Pack(46, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_train_words, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_train_words_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_learn_doctags_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_predict_word, __pyx_n_s_item, __pyx_n_s_k); if (unlikely(!__pyx_tuple__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__14); __Pyx_GIVEREF(__pyx_tuple__14); - __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__15 = (PyObject*)__Pyx_PyCode_New(13, 0, 46, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__14, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_document_dbow, 268, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__16 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_cbow_mean, __pyx_n_s_count, __pyx_n_s_inv_count, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_reduced_windows, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__16); __Pyx_GIVEREF(__pyx_tuple__16); - __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm, 399, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__17 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_document_dm, 399, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_sentence_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__18 = PyTuple_Pack(50, __pyx_n_s_model, __pyx_n_s_word_vocabs, __pyx_n_s_doctag_indexes, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_learn_doctags, __pyx_n_s_learn_words, __pyx_n_s_learn_hidden, __pyx_n_s_word_vectors, __pyx_n_s_word_locks, __pyx_n_s_doctag_vectors, __pyx_n_s_doctag_locks, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_learn_doctags_2, __pyx_n_s_learn_words_2, __pyx_n_s_learn_hidden_2, __pyx_n_s_word_vectors_2, __pyx_n_s_doctag_vectors_2, __pyx_n_s_word_locks_2, __pyx_n_s_doctag_locks_2, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_layer1_size, __pyx_n_s_vector_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_doctag_indexes_2, __pyx_n_s_window_indexes, __pyx_n_s_document_len, __pyx_n_s_doctag_len, __pyx_n_s_window, __pyx_n_s_expected_doctag_len, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_m, __pyx_n_s_n, __pyx_n_s_result, __pyx_n_s_null_word_index, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word); if (unlikely(!__pyx_tuple__18)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__18); __Pyx_GIVEREF(__pyx_tuple__18); - __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_dm_concat, 551, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__19 = (PyObject*)__Pyx_PyCode_New(13, 0, 50, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__18, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_document_dm_concat, 551, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} /* "trunk/gensim/models/doc2vec_inner.pyx":699 * @@ -10043,37 +10043,37 @@ PyMODINIT_FUNC PyInit_doc2vec_inner(void) /* "trunk/gensim/models/doc2vec_inner.pyx":268 * * - * def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< + * def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # <<<<<<<<<<<<<< * train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_sentence_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_1train_document_dbow, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dbow, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dbow, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 268; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":399 * * - * def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_sentence_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_3train_document_dm, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 399; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":551 * * - * def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< + * def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, # <<<<<<<<<<<<<< * learn_doctags=True, learn_words=True, learn_hidden=True, * word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): */ - __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_sentence_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_13doc2vec_inner_5train_document_dm_concat, NULL, __pyx_n_s_trunk_gensim_models_doc2vec_inne); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_document_dm_concat, __pyx_t_2) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 551; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; /* "trunk/gensim/models/doc2vec_inner.pyx":699 diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx index 8ea164513c..af8c77aee9 100644 --- a/gensim/models/doc2vec_inner.pyx +++ b/gensim/models/doc2vec_inner.pyx @@ -23,7 +23,7 @@ from scipy.linalg.blas import fblas REAL = np.float32 ctypedef np.float32_t REAL_t -DEF MAX_SENTENCE_LEN = 10000 +DEF MAX_DOCUMENT_LEN = 10000 ctypedef void (*scopy_ptr) (const int *N, const float *X, const int *incX, float *Y, const int *incY) nogil ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil @@ -79,7 +79,7 @@ cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, con Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] -cdef void fast_sentence_dbow_hs( +cdef void fast_document_dbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *context_vectors, REAL_t *syn1, const int size, const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, int learn_context, int learn_hidden, @@ -104,7 +104,7 @@ cdef void fast_sentence_dbow_hs( our_saxpy(&size, &context_locks[context_index], work, &ONE, &context_vectors[row1], &ONE) -cdef unsigned long long fast_sentence_dbow_neg( +cdef unsigned long long fast_document_dbow_neg( const int negative, np.uint32_t *table, unsigned long long table_len, REAL_t *context_vectors, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t context_index, const REAL_t alpha, REAL_t *work, @@ -144,7 +144,7 @@ cdef unsigned long long fast_sentence_dbow_neg( return next_random -cdef void fast_sentence_dm_hs( +cdef void fast_document_dm_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, const int size, int learn_hidden) nogil: @@ -167,7 +167,7 @@ cdef void fast_sentence_dm_hs( our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) -cdef unsigned long long fast_sentence_dm_neg( +cdef unsigned long long fast_document_dm_neg( const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, const int size, int learn_hidden) nogil: @@ -203,7 +203,7 @@ cdef unsigned long long fast_sentence_dm_neg( return next_random -cdef void fast_sentence_dmc_hs( +cdef void fast_document_dmc_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int word_code_len, REAL_t *neu1, REAL_t *syn1, const REAL_t alpha, REAL_t *work, const int layer1_size, const int vector_size, int learn_hidden) nogil: @@ -227,7 +227,7 @@ cdef void fast_sentence_dmc_hs( our_saxpy(&layer1_size, &g, neu1, &ONE, &syn1[row2], &ONE) -cdef unsigned long long fast_sentence_dmc_neg( +cdef unsigned long long fast_document_dmc_neg( const int negative, np.uint32_t *table, unsigned long long table_len, unsigned long long next_random, REAL_t *neu1, REAL_t *syn1neg, const int predict_word_index, const REAL_t alpha, REAL_t *work, const int layer1_size, const int vector_size, int learn_hidden) nogil: @@ -265,7 +265,7 @@ cdef unsigned long long fast_sentence_dmc_neg( return next_random -def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, +def train_document_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs @@ -283,11 +283,11 @@ def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, cdef REAL_t _alpha = alpha cdef int size = model.layer1_size - cdef int codelens[MAX_SENTENCE_LEN] - cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t _doctag_indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] - cdef int sentence_len + cdef int codelens[MAX_DOCUMENT_LEN] + cdef np.uint32_t indexes[MAX_DOCUMENT_LEN] + cdef np.uint32_t _doctag_indexes[MAX_DOCUMENT_LEN] + cdef np.uint32_t reduced_windows[MAX_DOCUMENT_LEN] + cdef int document_len cdef int doctag_len cdef int window = model.window @@ -296,8 +296,8 @@ def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, # For hierarchical softmax cdef REAL_t *syn1 - cdef np.uint32_t *points[MAX_SENTENCE_LEN] - cdef np.uint8_t *codes[MAX_SENTENCE_LEN] + cdef np.uint32_t *points[MAX_DOCUMENT_LEN] + cdef np.uint8_t *codes[MAX_DOCUMENT_LEN] # For negative sampling cdef REAL_t *syn1neg @@ -332,14 +332,14 @@ def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, if work is None: work = zeros(model.layer1_size, dtype=REAL) _work = np.PyArray_DATA(work) - sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) - doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) + doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) - for i in range(sentence_len): + for i in range(document_len): predict_word = word_vocabs[i] if predict_word is None: - # shrink sentence to leave out word - sentence_len = sentence_len - 1 + # shrink document to leave out word + document_len = document_len - 1 continue # leaving j unchanged else: indexes[i] = predict_word.index @@ -352,15 +352,15 @@ def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, result += 1 if _train_words: # single randint() call avoids a big thread-synchronization slowdown - for i, item in enumerate(np.random.randint(0, window, sentence_len)): + for i, item in enumerate(np.random.randint(0, window, document_len)): reduced_windows[i] = item for i in range(doctag_len): _doctag_indexes[i] = doctag_indexes[i] result += 1 - # release GIL & train on the sentence + # release GIL & train on the document with nogil: - for i in range(sentence_len): + for i in range(document_len): if codelens[i] == 0: continue if _train_words: # simultaneous skip-gram wordvec-training @@ -368,35 +368,35 @@ def train_sentence_dbow(model, word_vocabs, doctag_indexes, alpha, work=None, if j < 0: j = 0 k = i + window + 1 - reduced_windows[i] - if k > sentence_len: - k = sentence_len + if k > document_len: + k = document_len for j in range(j, k): if j == i or codelens[j] == 0: continue if hs: # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], + fast_document_dbow_hs(points[i], codes[i], codelens[i], _word_vectors, syn1, size, indexes[j], _alpha, _work, _learn_words, _learn_hidden, _word_locks) if negative: # we reuse the DBOW function, as it is equivalent to skip-gram for this purpose - next_random = fast_sentence_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, + next_random = fast_document_dbow_neg(negative, table, table_len, _word_vectors, syn1neg, size, indexes[i], indexes[j], _alpha, _work, next_random, _learn_words, _learn_hidden, _word_locks) # docvec-training for j in range(doctag_len): if hs: - fast_sentence_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], + fast_document_dbow_hs(points[i], codes[i], codelens[i], _doctag_vectors, syn1, size, _doctag_indexes[j], _alpha, _work, _learn_doctags, _learn_hidden, _doctag_locks) if negative: - next_random = fast_sentence_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, + next_random = fast_document_dbow_neg(negative, table, table_len, _doctag_vectors, syn1neg, size, indexes[i], _doctag_indexes[j], _alpha, _work, next_random, _learn_doctags, _learn_hidden, _doctag_locks) return result -def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, +def train_document_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs @@ -416,11 +416,11 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 cdef REAL_t _alpha = alpha cdef int size = model.layer1_size - cdef int codelens[MAX_SENTENCE_LEN] - cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t _doctag_indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] - cdef int sentence_len + cdef int codelens[MAX_DOCUMENT_LEN] + cdef np.uint32_t indexes[MAX_DOCUMENT_LEN] + cdef np.uint32_t _doctag_indexes[MAX_DOCUMENT_LEN] + cdef np.uint32_t reduced_windows[MAX_DOCUMENT_LEN] + cdef int document_len cdef int doctag_len cdef int window = model.window @@ -429,8 +429,8 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 # For hierarchical softmax cdef REAL_t *syn1 - cdef np.uint32_t *points[MAX_SENTENCE_LEN] - cdef np.uint8_t *codes[MAX_SENTENCE_LEN] + cdef np.uint32_t *points[MAX_DOCUMENT_LEN] + cdef np.uint8_t *codes[MAX_DOCUMENT_LEN] # For negative sampling cdef REAL_t *syn1neg @@ -469,13 +469,13 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 neu1 = zeros(model.layer1_size, dtype=REAL) _neu1 = np.PyArray_DATA(neu1) - sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) j = 0 - for i in range(sentence_len): + for i in range(document_len): word = word_vocabs[i] if word is None: - # shrink sentence to leave out word - sentence_len = sentence_len - 1 + # shrink document to leave out word + document_len = document_len - 1 continue # leaving j unchanged else: indexes[j] = word.index @@ -486,23 +486,23 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 result += 1 j = j + 1 # single randint() call avoids a big thread-sync slowdown - for i, item in enumerate(np.random.randint(0, window, sentence_len)): + for i, item in enumerate(np.random.randint(0, window, document_len)): reduced_windows[i] = item - doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) for i in range(doctag_len): _doctag_indexes[i] = doctag_indexes[i] result += 1 - # release GIL & train on the sentence + # release GIL & train on the document with nogil: - for i in range(sentence_len): + for i in range(document_len): j = i - window + reduced_windows[i] if j < 0: j = 0 k = i + window + 1 - reduced_windows[i] - if k > sentence_len: - k = sentence_len + if k > document_len: + k = document_len # compose l1 (in _neu1) & clear _work memset(_neu1, 0, size * cython.sizeof(REAL_t)) @@ -522,11 +522,11 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 sscal(&size, &inv_count, _neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(_work, 0, size * cython.sizeof(REAL_t)) # work to accumulate l1 error if hs: - fast_sentence_dm_hs(points[i], codes[i], codelens[i], + fast_document_dm_hs(points[i], codes[i], codelens[i], _neu1, syn1, _alpha, _work, size, _learn_hidden) if negative: - next_random = fast_sentence_dm_neg(negative, table, table_len, next_random, + next_random = fast_document_dm_neg(negative, table, table_len, next_random, _neu1, syn1neg, indexes[i], _alpha, _work, size, _learn_hidden) @@ -548,7 +548,7 @@ def train_sentence_dm(model, word_vocabs, doctag_indexes, alpha, work=None, neu1 return result -def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, +def train_document_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): cdef int hs = model.hs @@ -567,11 +567,11 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non cdef int layer1_size = model.layer1_size cdef int vector_size = model.vector_size - cdef int codelens[MAX_SENTENCE_LEN] - cdef np.uint32_t indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t _doctag_indexes[MAX_SENTENCE_LEN] - cdef np.uint32_t window_indexes[MAX_SENTENCE_LEN] - cdef int sentence_len + cdef int codelens[MAX_DOCUMENT_LEN] + cdef np.uint32_t indexes[MAX_DOCUMENT_LEN] + cdef np.uint32_t _doctag_indexes[MAX_DOCUMENT_LEN] + cdef np.uint32_t window_indexes[MAX_DOCUMENT_LEN] + cdef int document_len cdef int doctag_len cdef int window = model.window cdef int expected_doctag_len = model.dm_tag_count @@ -582,8 +582,8 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non # For hierarchical softmax cdef REAL_t *syn1 - cdef np.uint32_t *points[MAX_SENTENCE_LEN] - cdef np.uint8_t *codes[MAX_SENTENCE_LEN] + cdef np.uint32_t *points[MAX_DOCUMENT_LEN] + cdef np.uint8_t *codes[MAX_DOCUMENT_LEN] # For negative sampling cdef REAL_t *syn1neg @@ -591,7 +591,7 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non cdef unsigned long long table_len cdef unsigned long long next_random - doctag_len = min(MAX_SENTENCE_LEN, len(doctag_indexes)) + doctag_len = min(MAX_DOCUMENT_LEN, len(doctag_indexes)) if doctag_len != expected_doctag_len: return 0 # skip doc without expected nmber of tags @@ -626,13 +626,13 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non neu1 = zeros(model.layer1_size, dtype=REAL) _neu1 = np.PyArray_DATA(neu1) - sentence_len = min(MAX_SENTENCE_LEN, len(word_vocabs)) + document_len = min(MAX_DOCUMENT_LEN, len(word_vocabs)) j = 0 - for i in range(sentence_len): + for i in range(document_len): word = word_vocabs[i] if word is None: - # shrink sentence to leave out word - sentence_len = sentence_len - 1 + # shrink document to leave out word + document_len = document_len - 1 continue # leaving j unchanged else: indexes[j] = word.index @@ -649,11 +649,11 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non _doctag_indexes[i] = doctag_indexes[i] result += 1 - # release GIL & train on the sentence + # release GIL & train on the document with nogil: - for i in range(sentence_len): + for i in range(document_len): j = i - window # negative OK: will pad with null word - k = i + window + 1 # past sentence end OK: will pad with null word + k = i + window + 1 # past document end OK: will pad with null word # compose l1 & clear work for m in range(doctag_len): @@ -665,7 +665,7 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non # word vectors in window if m == i: continue - if m < 0 or m >= sentence_len: + if m < 0 or m >= document_len: window_indexes[n] = null_word_index else: window_indexes[n] = indexes[m] @@ -676,11 +676,11 @@ def train_sentence_dm_concat(model, word_vocabs, doctag_indexes, alpha, work=Non memset(_work, 0, layer1_size * cython.sizeof(REAL_t)) # work to accumulate l1 error if hs: - fast_sentence_dmc_hs(points[i], codes[i], codelens[i], + fast_document_dmc_hs(points[i], codes[i], codelens[i], _neu1, syn1, _alpha, _work, layer1_size, vector_size, _learn_hidden) if negative: - next_random = fast_sentence_dmc_neg(negative, table, table_len, next_random, + next_random = fast_document_dmc_neg(negative, table, table_len, next_random, _neu1, syn1neg, indexes[i], _alpha, _work, layer1_size, vector_size, _learn_hidden) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 42fc77983e..a1a0453e63 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -450,8 +450,8 @@ def reset_from(self, other_model): self.table = other_model.table self.reset_weights() - def _prepare_sentences(self, sentences): - for sentence in sentences: + def _prepare_items(self, items): + for sentence in items: # avoid calling random_sample() where prob >= 1, to speed things up a little: sampled = [self.vocab[word] for word in sentence if word in self.vocab and (self.vocab[word].sample_probability >= 1.0 or @@ -523,7 +523,7 @@ def worker_train(): thread.start() # convert input strings to Vocab objects (eliding OOV/downsampled words), and start filling the jobs queue - for job_no, job in enumerate(utils.grouper(self._prepare_sentences(sentences), chunksize)): + for job_no, job in enumerate(utils.grouper(self._prepare_items(sentences), chunksize)): logger.debug("putting job #%i in the queue, qsize=%i" % (job_no, jobs.qsize())) jobs.put(job) logger.info("reached the end of input; waiting to finish %i outstanding jobs" % jobs.qsize()) From f88beab17f29c69979c562f7e47f325169e6dc5d Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 24 Jun 2015 02:47:49 -0700 Subject: [PATCH 43/49] recursive SaveLoad for DocvecsArray numpys --- gensim/models/doc2vec.py | 6 +- gensim/models/word2vec.py | 10 ++- gensim/utils.py | 144 +++++++++++++++++++++++++------------- 3 files changed, 107 insertions(+), 53 deletions(-) diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index f0581b62ae..895546b107 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -243,7 +243,7 @@ def __str__(self): return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags) -class DocvecsArray(object): +class DocvecsArray(utils.SaveLoad): """ Default storage of doc vectors during/after training, in a numpy array. @@ -661,10 +661,6 @@ def __str__(self): segments.append('t%d' % self.workers) return 'Doc2Vec(%s)' % ','.join(segments) - def save(self, *args, **kwargs): - kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors - super(Doc2Vec, self).save(*args, **kwargs) ### TODO: save docvecs in same separate-numpy-file style - class TaggedBrownCorpus(object): """Iterate over documents from the Brown corpus (part of NLTK data), yielding diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index a1a0453e63..8329ff21c5 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -993,10 +993,18 @@ def __str__(self): def save(self, *args, **kwargs): - kwargs['ignore'] = kwargs.get('ignore', ['syn0norm']) # don't bother storing the cached normalized vectors + # don't bother storing the cached normalized vectors, recalculable table + kwargs['ignore'] = kwargs.get('ignore', ['syn0norm','table']) super(Word2Vec, self).save(*args, **kwargs) save.__doc__ = utils.SaveLoad.save.__doc__ + @classmethod + def load(cls, *args, **kwargs): + model = super(Word2Vec, cls).load(*args, **kwargs) + if model.negative and not model.table: + model.make_table() # rebuild table if missing + return model + load.__doc__ = utils.SaveLoad.load.__doc__ class BrownCorpus(object): """Iterate over sentences from the Brown corpus (part of NLTK data).""" diff --git a/gensim/utils.py b/gensim/utils.py index 9e28d52b6b..09d35dc552 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -247,56 +247,78 @@ def load(cls, fname, mmap=None): """ logger.info("loading %s object from %s" % (cls.__name__, fname)) - if fname.endswith('.gz') or fname.endswith('.bz2'): - compress = True - subname = lambda *args: '.'.join([fname] + list(args) + ['npz']) - else: - compress = False - subname = lambda *args: '.'.join([fname] + list(args) + ['npy']) - + compress, subname = SaveLoad._adapt_by_suffix(fname) mmap_error = lambda x, y: IOError( 'Cannot mmap compressed object %s in file %s. ' % (x, y) + 'Use `load(fname, mmap=None)` or uncompress files manually.') obj = unpickle(fname) - for attrib in getattr(obj, '__numpys', []): + obj._load_specials(fname, mmap, compress, subname) + return obj + + + def _load_specials(self, fname, mmap, compress, subname): + """ + Loads any attributes that were stored specially, and gives the same + opportunity to recursively included SaveLoad instances. + + """ + for attrib in getattr(self, '__recursive_saveloads', []): + cfname = '.'.join((fname, attrib)) + logger.info("loading %s recursively from %s.* with mmap=%s" % ( + attrib, cfname, mmap)) + getattr(self, attrib)._load_specials(cfname, mmap, compress, subname) + + for attrib in getattr(self, '__numpys', []): logger.info("loading %s from %s with mmap=%s" % ( - attrib, subname(attrib), mmap)) + attrib, subname(fname, attrib), mmap)) if compress: if mmap: - raise mmap_error(attrib, subname(attrib)) + raise mmap_error(attrib, subname(fname, attrib)) - val = numpy.load(subname(attrib))['val'] + val = numpy.load(subname(fname, attrib))['val'] else: - val = numpy.load(subname(attrib), mmap_mode=mmap) + val = numpy.load(subname(fname, attrib), mmap_mode=mmap) - setattr(obj, attrib, val) + setattr(self, attrib, val) - for attrib in getattr(obj, '__scipys', []): + for attrib in getattr(self, '__scipys', []): logger.info("loading %s from %s with mmap=%s" % ( - attrib, subname(attrib), mmap)) - sparse = unpickle(subname(attrib)) + attrib, subname(fname, attrib), mmap)) + sparse = unpickle(subname(fname, attrib)) if compress: if mmap: - raise mmap_error(attrib, subname(attrib)) + raise mmap_error(fname, attrib, subname(fname, attrib)) - with numpy.load(subname(attrib, 'sparse')) as f: + with numpy.load(subname(fname, attrib, 'sparse')) as f: sparse.data = f['data'] sparse.indptr = f['indptr'] sparse.indices = f['indices'] else: - sparse.data = numpy.load(subname(attrib, 'data'), mmap_mode=mmap) - sparse.indptr = numpy.load(subname(attrib, 'indptr'), mmap_mode=mmap) - sparse.indices = numpy.load(subname(attrib, 'indices'), mmap_mode=mmap) + sparse.data = numpy.load(subname(fname, attrib, 'data'), mmap_mode=mmap) + sparse.indptr = numpy.load(subname(fname, attrib, 'indptr'), mmap_mode=mmap) + sparse.indices = numpy.load(subname(fname, attrib, 'indices'), mmap_mode=mmap) - setattr(obj, attrib, sparse) + setattr(self, attrib, sparse) - for attrib in getattr(obj, '__ignoreds', []): + for attrib in getattr(self, '__ignoreds', []): logger.info("setting ignored attribute %s to None" % (attrib)) - setattr(obj, attrib, None) - return obj + setattr(self, attrib, None) + + + @staticmethod + def _adapt_by_suffix(fname): + """Give appropriate compress setting and filename formula""" + if fname.endswith('.gz') or fname.endswith('.bz2'): + compress = True + subname = lambda *args: '.'.join(list(args) + ['npz']) + else: + compress = False + subname = lambda *args: '.'.join(list(args) + ['npy']) + return (compress, subname) + def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2): @@ -324,14 +346,39 @@ def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, "saving %s object under %s, separately %s" % ( self.__class__.__name__, fname, separately)) - if fname.endswith('.gz') or fname.endswith('.bz2'): - compress = True - subname = lambda *args: '.'.join([fname] + list(args) + ['npz']) - else: - compress = False - subname = lambda *args: '.'.join([fname] + list(args) + ['npy']) + compress, subname = SaveLoad._adapt_by_suffix(fname) + + restores = self._save_specials(fname, separately, sep_limit, ignore, pickle_protocol, + compress, subname) + try: + pickle(self, fname, protocol=pickle_protocol) + finally: + # restore attribs handled specially + for obj, asides in restores: + for attrib, val in iteritems(asides): + setattr(obj, attrib, val) + + + def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol, compress, subname): + """ + Save aside any attributes that need to be handled separately, including + by recursion any attributes that are themselves SaveLoad instances. + + Returns a list of (obj, {attrib: value, ...}) settings that the caller + should use to restore each object's attributes that were set aside + during the default pickle(). + + """ + asides = {} + recursive_saveloads = [] + restores = [] + for attrib, val in iteritems(self.__dict__): + if isinstance(val, SaveLoad): + recursive_saveloads.append(attrib) + cfname = '.'.join((fname,attrib)) + restores.extend(val._save_specials(cfname, separately, sep_limit, ignore, + pickle_protocol,compress, subname)) - tmp = {} sparse_matrices = (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix) if separately is None: separately = [] @@ -344,43 +391,43 @@ def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, # whatever's in `separately` or `ignore` at this point won't get pickled for attrib in separately + list(ignore): if hasattr(self, attrib): - tmp[attrib] = getattr(self, attrib) + asides[attrib] = getattr(self, attrib) delattr(self, attrib) try: numpys, scipys, ignoreds = [], [], [] - for attrib, val in iteritems(tmp): + for attrib, val in iteritems(asides): if isinstance(val, numpy.ndarray) and attrib not in ignore: numpys.append(attrib) logger.info("storing numpy array '%s' to %s" % ( - attrib, subname(attrib))) + attrib, subname(fname, attrib))) if compress: - numpy.savez_compressed(subname(attrib), val=numpy.ascontiguousarray(val)) + numpy.savez_compressed(subname(fname, attrib), val=numpy.ascontiguousarray(val)) else: - numpy.save(subname(attrib), numpy.ascontiguousarray(val)) + numpy.save(subname(fname, attrib), numpy.ascontiguousarray(val)) elif isinstance(val, (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)) and attrib not in ignore: scipys.append(attrib) logger.info("storing scipy.sparse array '%s' under %s" % ( - attrib, subname(attrib))) + attrib, subname(fname, attrib))) if compress: - numpy.savez_compressed(subname(attrib, 'sparse'), + numpy.savez_compressed(subname(fname, attrib, 'sparse'), data=val.data, indptr=val.indptr, indices=val.indices) else: - numpy.save(subname(attrib, 'data'), val.data) - numpy.save(subname(attrib, 'indptr'), val.indptr) - numpy.save(subname(attrib, 'indices'), val.indices) + numpy.save(subname(fname, attrib, 'data'), val.data) + numpy.save(subname(fname, attrib, 'indptr'), val.indptr) + numpy.save(subname(fname, attrib, 'indices'), val.indices) data, indptr, indices = val.data, val.indptr, val.indices val.data, val.indptr, val.indices = None, None, None try: # store array-less object - pickle(val, subname(attrib), protocol=pickle_protocol) + pickle(val, subname(fname, attrib), protocol=pickle_protocol) finally: val.data, val.indptr, val.indices = data, indptr, indices else: @@ -390,11 +437,14 @@ def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, self.__dict__['__numpys'] = numpys self.__dict__['__scipys'] = scipys self.__dict__['__ignoreds'] = ignoreds - pickle(self, fname, protocol=pickle_protocol) - finally: - # restore the attributes - for attrib, val in iteritems(tmp): + self.__dict__['__recursive_saveloads'] = recursive_saveloads + except: + # restore the attributes if exception-interrupted + for attrib, val in iteritems(asides): setattr(self, attrib, val) + raise + return restores + [(self, asides)] + def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2): From 19faaab6dac7e6b7bb80664ec5bcb2d9342b9be5 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 24 Jun 2015 03:29:51 -0700 Subject: [PATCH 44/49] don't (try to) share __doc__ --- gensim/models/word2vec.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 8329ff21c5..bf76a28c4e 100755 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -998,14 +998,15 @@ def save(self, *args, **kwargs): super(Word2Vec, self).save(*args, **kwargs) save.__doc__ = utils.SaveLoad.save.__doc__ + @classmethod def load(cls, *args, **kwargs): model = super(Word2Vec, cls).load(*args, **kwargs) if model.negative and not model.table: model.make_table() # rebuild table if missing return model - load.__doc__ = utils.SaveLoad.load.__doc__ + class BrownCorpus(object): """Iterate over sentences from the Brown corpus (part of NLTK data).""" def __init__(self, dirname): From a1ed4902bc7fbc23fe9b8095f90778e4dbf0c450 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 24 Jun 2015 03:52:24 -0700 Subject: [PATCH 45/49] reorder to respect ignores; move mmap_error (fixes unit tests) --- gensim/utils.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/gensim/utils.py b/gensim/utils.py index 09d35dc552..fe1c2e9744 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -249,10 +249,6 @@ def load(cls, fname, mmap=None): compress, subname = SaveLoad._adapt_by_suffix(fname) - mmap_error = lambda x, y: IOError( - 'Cannot mmap compressed object %s in file %s. ' % (x, y) + - 'Use `load(fname, mmap=None)` or uncompress files manually.') - obj = unpickle(fname) obj._load_specials(fname, mmap, compress, subname) return obj @@ -264,6 +260,11 @@ def _load_specials(self, fname, mmap, compress, subname): opportunity to recursively included SaveLoad instances. """ + + mmap_error = lambda x, y: IOError( + 'Cannot mmap compressed object %s in file %s. ' % (x, y) + + 'Use `load(fname, mmap=None)` or uncompress files manually.') + for attrib in getattr(self, '__recursive_saveloads', []): cfname = '.'.join((fname, attrib)) logger.info("loading %s recursively from %s.* with mmap=%s" % ( @@ -290,7 +291,7 @@ def _load_specials(self, fname, mmap, compress, subname): sparse = unpickle(subname(fname, attrib)) if compress: if mmap: - raise mmap_error(fname, attrib, subname(fname, attrib)) + raise mmap_error(attrib, subname(fname, attrib)) with numpy.load(subname(fname, attrib, 'sparse')) as f: sparse.data = f['data'] @@ -370,15 +371,6 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol, """ asides = {} - recursive_saveloads = [] - restores = [] - for attrib, val in iteritems(self.__dict__): - if isinstance(val, SaveLoad): - recursive_saveloads.append(attrib) - cfname = '.'.join((fname,attrib)) - restores.extend(val._save_specials(cfname, separately, sep_limit, ignore, - pickle_protocol,compress, subname)) - sparse_matrices = (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix) if separately is None: separately = [] @@ -394,6 +386,15 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol, asides[attrib] = getattr(self, attrib) delattr(self, attrib) + recursive_saveloads = [] + restores = [] + for attrib, val in iteritems(self.__dict__): + if isinstance(val, SaveLoad): + recursive_saveloads.append(attrib) + cfname = '.'.join((fname,attrib)) + restores.extend(val._save_specials(cfname, separately, sep_limit, ignore, + pickle_protocol,compress, subname)) + try: numpys, scipys, ignoreds = [], [], [] for attrib, val in iteritems(asides): From d02b5743c10bb937f8e47f11692b5ff6bbf74b57 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 24 Jun 2015 04:22:22 -0700 Subject: [PATCH 46/49] =?UTF-8?q?only=20swap=20dot/saxpy=20=E2=80=93=20red?= =?UTF-8?q?uce=20redundancy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- gensim/models/word2vec_inner.c | 4375 +++++++----------------------- gensim/models/word2vec_inner.pyx | 464 +--- 2 files changed, 978 insertions(+), 3861 deletions(-) diff --git a/gensim/models/word2vec_inner.c b/gensim/models/word2vec_inner.c index f369134f5c..7c0390435d 100644 --- a/gensim/models/word2vec_inner.c +++ b/gensim/models/word2vec_inner.c @@ -765,45 +765,27 @@ typedef double (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_snrm2_ptr)(int * ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil # <<<<<<<<<<<<<< * - * ctypedef void (*fast_sentence_sg_hs_ptr) ( + * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x */ typedef void (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_sscal_ptr)(int const *, float const *, float const *, int const *); -/* "trunk/gensim/models/word2vec_inner.pyx":34 - * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil - * - * ctypedef void (*fast_sentence_sg_hs_ptr) ( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, - */ -typedef void (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs_ptr)(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); - -/* "trunk/gensim/models/word2vec_inner.pyx":39 - * const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work) nogil +/* "trunk/gensim/models/word2vec_inner.pyx":50 * - * ctypedef unsigned long long (*fast_sentence_sg_neg_ptr) ( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ -typedef unsigned PY_LONG_LONG (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg_ptr)(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG); - -/* "trunk/gensim/models/word2vec_inner.pyx":45 - * unsigned long long next_random) nogil + * # function implementations swapped based on BLAS detected + * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil # <<<<<<<<<<<<<< + * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil * - * ctypedef void (*fast_sentence_cbow_hs_ptr) ( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ -typedef void (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs_ptr)(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int); +typedef __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_our_dot_ptr)(int const *, float const *, int const *, float const *, int const *); /* "trunk/gensim/models/word2vec_inner.pyx":51 - * int i, int j, int k, int cbow_mean) nogil + * # function implementations swapped based on BLAS detected + * ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil + * ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil # <<<<<<<<<<<<<< * - * ctypedef unsigned long long (*fast_sentence_cbow_neg_ptr) ( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * cdef our_dot_ptr our_dot */ -typedef unsigned PY_LONG_LONG (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg_ptr)(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG); +typedef void (*__pyx_t_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_ptr)(int const *, float const *, float const *, int const *, float *, int const *); /* --- Runtime support code (head) --- */ #ifndef CYTHON_REFNANNY @@ -1160,25 +1142,19 @@ static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_sdot_ptr __pyx_v_5trunk_6 static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_dsdot_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot; static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_snrm2_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_snrm2; static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_sscal_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal; -static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs; -static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg; -static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs; -static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg; static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[1000]; static int __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE; static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG); /*proto*/ +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_our_dot_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot; +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_ptr __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy; +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_double(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_float(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_noblas(int const *, float const *, int const *, float const *, int const *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG); /*proto*/ #define __Pyx_MODULE_NAME "trunk.gensim.models.word2vec_inner" int __pyx_module_is_main_trunk__gensim__models__word2vec_inner = 0; @@ -1375,167 +1351,192 @@ static PyObject *__pyx_codeobj__12; static PyObject *__pyx_codeobj__14; static PyObject *__pyx_codeobj__16; -/* "trunk/gensim/models/word2vec_inner.pyx":76 - * cdef REAL_t ONEF = 1.0 +/* "trunk/gensim/models/word2vec_inner.pyx":57 + * + * # for when fblas.sdot returns a double + * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return dsdot(N, X, incX, Y, incY) * - * cdef void fast_sentence0_sg_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work) { - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_double(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_r; - /* "trunk/gensim/models/word2vec_inner.pyx":82 + /* "trunk/gensim/models/word2vec_inner.pyx":58 + * # for when fblas.sdot returns a double + * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + * return dsdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< * - * cdef long long a, b - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef REAL_t f, g + * # for when fblas.sdot returns a float + */ + __pyx_r = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); + goto __pyx_L0; + + /* "trunk/gensim/models/word2vec_inner.pyx":57 + * + * # for when fblas.sdot returns a double + * cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return dsdot(N, X, incX, Y, incY) * */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":85 - * cdef REAL_t f, g + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/word2vec_inner.pyx":61 + * + * # for when fblas.sdot returns a float + * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return sdot(N, X, incX, Y, incY) * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * for b in range(codelen): - * row2 = word_point[b] * size */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":86 +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_float(int const *__pyx_v_N, float const *__pyx_v_X, int const *__pyx_v_incX, float const *__pyx_v_Y, int const *__pyx_v_incY) { + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_r; + + /* "trunk/gensim/models/word2vec_inner.pyx":62 + * # for when fblas.sdot returns a float + * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + * return sdot(N, X, incX, Y, incY) # <<<<<<<<<<<<<< * - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelen): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * # for when no blas available */ - __pyx_t_1 = __pyx_v_codelen; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_b = __pyx_t_2; + __pyx_r = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_sdot(__pyx_v_N, __pyx_v_X, __pyx_v_incX, __pyx_v_Y, __pyx_v_incY)); + goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":87 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelen): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: + /* "trunk/gensim/models/word2vec_inner.pyx":61 + * + * # for when fblas.sdot returns a float + * cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * return sdot(N, X, incX, Y, incY) + * */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":88 - * for b in range(codelen): - * row2 = word_point[b] * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/word2vec_inner.pyx":65 + * + * # for when no blas available + * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * # not a true full dot()-implementation: just enough for our cases + * cdef int i */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); - /* "trunk/gensim/models/word2vec_inner.pyx":89 - * row2 = word_point[b] * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] +static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_noblas(int const *__pyx_v_N, float const *__pyx_v_X, CYTHON_UNUSED int const *__pyx_v_incX, float const *__pyx_v_Y, CYTHON_UNUSED int const *__pyx_v_incY) { + int __pyx_v_i; + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_a; + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_r; + int __pyx_t_1; + + /* "trunk/gensim/models/word2vec_inner.pyx":69 + * cdef int i + * cdef REAL_t a + * a = 0.0 # <<<<<<<<<<<<<< + * for i from 0 <= i < N[0] by 1: + * a += X[i] * Y[i] */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { + __pyx_v_a = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/word2vec_inner.pyx":90 - * f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha + /* "trunk/gensim/models/word2vec_inner.pyx":70 + * cdef REAL_t a + * a = 0.0 + * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< + * a += X[i] * Y[i] + * return a */ - goto __pyx_L3_continue; - } + __pyx_t_1 = (__pyx_v_N[0]); + for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { - /* "trunk/gensim/models/word2vec_inner.pyx":91 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":71 + * a = 0.0 + * for i from 0 <= i < N[0] by 1: + * a += X[i] * Y[i] # <<<<<<<<<<<<<< + * return a + * */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); + __pyx_v_a = (__pyx_v_a + ((__pyx_v_X[__pyx_v_i]) * (__pyx_v_Y[__pyx_v_i]))); + } - /* "trunk/gensim/models/word2vec_inner.pyx":92 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":72 + * for i from 0 <= i < N[0] by 1: + * a += X[i] * Y[i] + * return a # <<<<<<<<<<<<<< + * + * # for when no blas available */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); + __pyx_r = __pyx_v_a; + goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":93 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":65 + * + * # for when no blas available + * cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * # not a true full dot()-implementation: just enough for our cases + * cdef int i */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":94 - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + /* function exit code */ + __pyx_L0:; + return __pyx_r; +} + +/* "trunk/gensim/models/word2vec_inner.pyx":75 * + * # for when no blas available + * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * cdef int i + * for i from 0 <= i < N[0] by 1: */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L3_continue:; - } - /* "trunk/gensim/models/word2vec_inner.pyx":95 - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const *__pyx_v_N, float const *__pyx_v_alpha, float const *__pyx_v_X, int const *__pyx_v_incX, float *__pyx_v_Y, int const *__pyx_v_incY) { + int __pyx_v_i; + int __pyx_t_1; + + /* "trunk/gensim/models/word2vec_inner.pyx":77 + * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: + * cdef int i + * for i from 0 <= i < N[0] by 1: # <<<<<<<<<<<<<< + * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] + * + */ + __pyx_t_1 = (__pyx_v_N[0]); + for (__pyx_v_i = 0; __pyx_v_i < __pyx_t_1; __pyx_v_i+=1) { + + /* "trunk/gensim/models/word2vec_inner.pyx":78 + * cdef int i + * for i from 0 <= i < N[0] by 1: + * Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))]) = (((__pyx_v_alpha[0]) * (__pyx_v_X[(__pyx_v_i * (__pyx_v_incX[0]))])) + (__pyx_v_Y[(__pyx_v_i * (__pyx_v_incY[0]))])); + } - /* "trunk/gensim/models/word2vec_inner.pyx":76 - * cdef REAL_t ONEF = 1.0 + /* "trunk/gensim/models/word2vec_inner.pyx":75 * - * cdef void fast_sentence0_sg_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, + * # for when no blas available + * cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: # <<<<<<<<<<<<<< + * cdef int i + * for i from 0 <= i < N[0] by 1: */ /* function exit code */ } -/* "trunk/gensim/models/word2vec_inner.pyx":98 +/* "trunk/gensim/models/word2vec_inner.pyx":81 * * - * cdef void fast_sentence1_sg_hs( # <<<<<<<<<<<<<< + * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work) { +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -1546,7 +1547,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs int __pyx_t_3; int __pyx_t_4; - /* "trunk/gensim/models/word2vec_inner.pyx":104 + /* "trunk/gensim/models/word2vec_inner.pyx":87 * * cdef long long a, b * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< @@ -1555,7 +1556,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":107 + /* "trunk/gensim/models/word2vec_inner.pyx":90 * cdef REAL_t f, g * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -1564,38 +1565,38 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":108 + /* "trunk/gensim/models/word2vec_inner.pyx":91 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): # <<<<<<<<<<<<<< * row2 = word_point[b] * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) */ __pyx_t_1 = __pyx_v_codelen; for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_b = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":109 + /* "trunk/gensim/models/word2vec_inner.pyx":92 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelen): * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":110 + /* "trunk/gensim/models/word2vec_inner.pyx":93 * for b in range(codelen): * row2 = word_point[b] * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_sdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":111 + /* "trunk/gensim/models/word2vec_inner.pyx":94 * row2 = word_point[b] * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1611,8 +1612,8 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":112 - * f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":95 + * f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -1621,57 +1622,57 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs goto __pyx_L3_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":113 + /* "trunk/gensim/models/word2vec_inner.pyx":96 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/word2vec_inner.pyx":114 + /* "trunk/gensim/models/word2vec_inner.pyx":97 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/word2vec_inner.pyx":115 + /* "trunk/gensim/models/word2vec_inner.pyx":98 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":116 + /* "trunk/gensim/models/word2vec_inner.pyx":99 * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); __pyx_L3_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":117 - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":100 + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":98 + /* "trunk/gensim/models/word2vec_inner.pyx":81 * * - * cdef void fast_sentence1_sg_hs( # <<<<<<<<<<<<<< + * cdef void fast_sentence_sg_hs( # <<<<<<<<<<<<<< * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, * REAL_t *syn0, REAL_t *syn1, const int size, */ @@ -1679,2339 +1680,117 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs /* function exit code */ } -/* "trunk/gensim/models/word2vec_inner.pyx":120 +/* "trunk/gensim/models/word2vec_inner.pyx":103 * * - * cdef void fast_sentence2_sg_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, + * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, + * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work) { - PY_LONG_LONG __pyx_v_a; - PY_LONG_LONG __pyx_v_b; +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random) { PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; + unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; + __pyx_t_5numpy_uint32_t __pyx_v_target_index; + int __pyx_v_d; + unsigned PY_LONG_LONG __pyx_r; + long __pyx_t_1; + int __pyx_t_2; int __pyx_t_3; - PY_LONG_LONG __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; + int __pyx_t_4; - /* "trunk/gensim/models/word2vec_inner.pyx":126 + /* "trunk/gensim/models/word2vec_inner.pyx":110 * - * cdef long long a, b + * cdef long long a * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef REAL_t f, g - * + * cdef unsigned long long modulo = 281474976710655ULL + * cdef REAL_t f, g, label */ __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":129 - * cdef REAL_t f, g - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * for b in range(codelen): + /* "trunk/gensim/models/word2vec_inner.pyx":111 + * cdef long long a + * cdef long long row1 = word2_index * size, row2 + * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< + * cdef REAL_t f, g, label + * cdef np.uint32_t target_index */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/word2vec_inner.pyx":130 + /* "trunk/gensim/models/word2vec_inner.pyx":116 + * cdef int d * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< - * for b in range(codelen): - * row2 = word_point[b] * size + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< + * + * for d in range(negative+1): */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":131 - * for a in range(size): - * work[a] = 0.0 - * for b in range(codelen): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = 0.0 + /* "trunk/gensim/models/word2vec_inner.pyx":118 + * memset(work, 0, size * cython.sizeof(REAL_t)) + * + * for d in range(negative+1): # <<<<<<<<<<<<<< + * if d == 0: + * target_index = word_index */ - __pyx_t_1 = __pyx_v_codelen; + __pyx_t_1 = (__pyx_v_negative + 1); for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_b = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":132 - * work[a] = 0.0 - * for b in range(codelen): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); + __pyx_v_d = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":133 - * for b in range(codelen): - * row2 = word_point[b] * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += syn0[row1 + a] * syn1[row2 + a] + /* "trunk/gensim/models/word2vec_inner.pyx":119 + * + * for d in range(negative+1): + * if d == 0: # <<<<<<<<<<<<<< + * target_index = word_index + * label = ONEF */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); + __pyx_t_3 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":134 - * row2 = word_point[b] * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += syn0[row1 + a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: + /* "trunk/gensim/models/word2vec_inner.pyx":120 + * for d in range(negative+1): + * if d == 0: + * target_index = word_index # <<<<<<<<<<<<<< + * label = ONEF + * else: */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_a = __pyx_t_4; + __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/word2vec_inner.pyx":135 - * f = 0.0 - * for a in range(size): - * f += syn0[row1 + a] * syn1[row2 + a] # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue + /* "trunk/gensim/models/word2vec_inner.pyx":121 + * if d == 0: + * target_index = word_index + * label = ONEF # <<<<<<<<<<<<<< + * else: + * target_index = table[(next_random >> 16) % table_len] */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]) * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); + __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; + goto __pyx_L5; } + /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":136 - * for a in range(size): - * f += syn0[row1 + a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] + /* "trunk/gensim/models/word2vec_inner.pyx":123 + * label = ONEF + * else: + * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< + * next_random = (next_random * 25214903917ULL + 11) & modulo + * if target_index == word_index: */ - __pyx_t_6 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_6) { - } else { - __pyx_t_5 = __pyx_t_6; - goto __pyx_L10_bool_binop_done; - } - __pyx_t_6 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_5 = __pyx_t_6; - __pyx_L10_bool_binop_done:; - if (__pyx_t_5) { + __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/word2vec_inner.pyx":137 - * f += syn0[row1 + a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha + /* "trunk/gensim/models/word2vec_inner.pyx":124 + * else: + * target_index = table[(next_random >> 16) % table_len] + * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< + * if target_index == word_index: + * continue */ - goto __pyx_L5_continue; - } + __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/word2vec_inner.pyx":138 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":139 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1[row2 + a] - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":140 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1[row2 + a] - * for a in range(size): - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_a = __pyx_t_4; - - /* "trunk/gensim/models/word2vec_inner.pyx":141 - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - * work[a] += g * syn1[row2 + a] # <<<<<<<<<<<<<< - * for a in range(size): - * syn1[row2 + a] += g * syn0[row1 + a] - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_work[__pyx_t_7]) = ((__pyx_v_work[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":142 - * for a in range(size): - * work[a] += g * syn1[row2 + a] - * for a in range(size): # <<<<<<<<<<<<<< - * syn1[row2 + a] += g * syn0[row1 + a] - * for a in range(size): - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_4 = 0; __pyx_t_4 < __pyx_t_3; __pyx_t_4+=1) { - __pyx_v_a = __pyx_t_4; - - /* "trunk/gensim/models/word2vec_inner.pyx":143 - * work[a] += g * syn1[row2 + a] - * for a in range(size): - * syn1[row2 + a] += g * syn0[row1 + a] # <<<<<<<<<<<<<< - * for a in range(size): - * syn0[row1 + a] += work[a] - */ - __pyx_t_7 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1[__pyx_t_7]) = ((__pyx_v_syn1[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]))); - } - __pyx_L5_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":144 - * for a in range(size): - * syn1[row2 + a] += g * syn0[row1 + a] - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[row1 + a] += work[a] - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":145 - * syn1[row2 + a] += g * syn0[row1 + a] - * for a in range(size): - * syn0[row1 + a] += work[a] # <<<<<<<<<<<<<< - * - * - */ - __pyx_t_4 = (__pyx_v_row1 + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_4]) = ((__pyx_v_syn0[__pyx_t_4]) + (__pyx_v_work[__pyx_v_a])); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":120 - * - * - * cdef void fast_sentence2_sg_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - * REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/word2vec_inner.pyx":148 - * - * - * cdef unsigned long long fast_sentence0_sg_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random) { - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - int __pyx_v_d; - unsigned PY_LONG_LONG __pyx_r; - long __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - - /* "trunk/gensim/models/word2vec_inner.pyx":155 - * - * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef unsigned long long modulo = 281474976710655ULL - * cdef REAL_t f, g, label - */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":156 - * cdef long long a - * cdef long long row1 = word2_index * size, row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, label - * cdef np.uint32_t target_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/word2vec_inner.pyx":161 - * cdef int d - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":163 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * if d == 0: - * target_index = word_index - */ - __pyx_t_1 = (__pyx_v_negative + 1); - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_d = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":164 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":165 - * for d in range(negative+1): - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/word2vec_inner.pyx":166 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L5; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":168 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/word2vec_inner.pyx":169 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/word2vec_inner.pyx":170 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":171 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L3_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":172 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - __pyx_L5:; - - /* "trunk/gensim/models/word2vec_inner.pyx":174 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":175 - * - * row2 = target_index * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); - - /* "trunk/gensim/models/word2vec_inner.pyx":176 - * row2 = target_index * size - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L8_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L8_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":177 - * f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - */ - goto __pyx_L3_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":178 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":179 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":180 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - - /* "trunk/gensim/models/word2vec_inner.pyx":181 - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":183 - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - - /* "trunk/gensim/models/word2vec_inner.pyx":185 - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - * - * return next_random # <<<<<<<<<<<<<< - * - * cdef unsigned long long fast_sentence1_sg_neg( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/word2vec_inner.pyx":148 - * - * - * cdef unsigned long long fast_sentence0_sg_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "trunk/gensim/models/word2vec_inner.pyx":187 - * return next_random - * - * cdef unsigned long long fast_sentence1_sg_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random) { - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - int __pyx_v_d; - unsigned PY_LONG_LONG __pyx_r; - long __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - - /* "trunk/gensim/models/word2vec_inner.pyx":194 - * - * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef unsigned long long modulo = 281474976710655ULL - * cdef REAL_t f, g, label - */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":195 - * cdef long long a - * cdef long long row1 = word2_index * size, row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, label - * cdef np.uint32_t target_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/word2vec_inner.pyx":200 - * cdef int d - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":202 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * - * if d == 0: - */ - __pyx_t_1 = (__pyx_v_negative + 1); - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_d = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":204 - * for d in range(negative+1): - * - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":205 - * - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/word2vec_inner.pyx":206 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L5; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":208 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/word2vec_inner.pyx":209 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/word2vec_inner.pyx":210 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":211 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L3_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":212 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - __pyx_L5:; - - /* "trunk/gensim/models/word2vec_inner.pyx":214 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":215 - * - * row2 = target_index * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_sdot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); - - /* "trunk/gensim/models/word2vec_inner.pyx":216 - * row2 = target_index * size - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L8_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L8_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":217 - * f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - */ - goto __pyx_L3_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":218 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":219 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":220 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - - /* "trunk/gensim/models/word2vec_inner.pyx":221 - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< - * - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":223 - * saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - * - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - - /* "trunk/gensim/models/word2vec_inner.pyx":225 - * saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - * - * return next_random # <<<<<<<<<<<<<< - * - * cdef unsigned long long fast_sentence2_sg_neg( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/word2vec_inner.pyx":187 - * return next_random - * - * cdef unsigned long long fast_sentence1_sg_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "trunk/gensim/models/word2vec_inner.pyx":227 - * return next_random - * - * cdef unsigned long long fast_sentence2_sg_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random) { - PY_LONG_LONG __pyx_v_a; - PY_LONG_LONG __pyx_v_row1; - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - int __pyx_v_d; - unsigned PY_LONG_LONG __pyx_r; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; - long __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; - - /* "trunk/gensim/models/word2vec_inner.pyx":234 - * - * cdef long long a - * cdef long long row1 = word2_index * size, row2 # <<<<<<<<<<<<<< - * cdef unsigned long long modulo = 281474976710655ULL - * cdef REAL_t f, g, label - */ - __pyx_v_row1 = (__pyx_v_word2_index * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":235 - * cdef long long a - * cdef long long row1 = word2_index * size, row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, label - * cdef np.uint32_t target_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/word2vec_inner.pyx":240 - * cdef int d - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":241 - * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":243 - * work[a] = 0.0 - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * - * if d == 0: - */ - __pyx_t_3 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_3; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; - - /* "trunk/gensim/models/word2vec_inner.pyx":245 - * for d in range(negative+1): - * - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_4 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":246 - * - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/word2vec_inner.pyx":247 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L7; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":249 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/word2vec_inner.pyx":250 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/word2vec_inner.pyx":251 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":252 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 - * - */ - goto __pyx_L5_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":253 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - __pyx_L7:; - - /* "trunk/gensim/models/word2vec_inner.pyx":255 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): - */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":256 - * - * row2 = target_index * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += syn0[row1 + a] * syn1neg[row2 + a] - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":257 - * row2 = target_index * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += syn0[row1 + a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_t_5 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_5; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":258 - * f = 0.0 - * for a in range(size): - * f += syn0[row1 + a] * syn1neg[row2 + a] # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]) * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":259 - * for a in range(size): - * f += syn0[row1 + a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_6 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_6) { - } else { - __pyx_t_4 = __pyx_t_6; - goto __pyx_L12_bool_binop_done; - } - __pyx_t_6 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_4 = __pyx_t_6; - __pyx_L12_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":260 - * f += syn0[row1 + a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - */ - goto __pyx_L5_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":261 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * for a in range(size): - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":262 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] - */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":263 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1neg[row2 + a] - * for a in range(size): - */ - __pyx_t_5 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_5; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":264 - * g = (label - f) * alpha - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] # <<<<<<<<<<<<<< - * for a in range(size): - * syn1neg[row2 + a] += g * syn0[row1 + a] - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_work[__pyx_t_7]) = ((__pyx_v_work[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":265 - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] - * for a in range(size): # <<<<<<<<<<<<<< - * syn1neg[row2 + a] += g * syn0[row1 + a] - * - */ - __pyx_t_5 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_5; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":266 - * work[a] += g * syn1neg[row2 + a] - * for a in range(size): - * syn1neg[row2 + a] += g * syn0[row1 + a] # <<<<<<<<<<<<<< - * - * for a in range(size): - */ - __pyx_t_7 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1neg[__pyx_t_7]) = ((__pyx_v_syn1neg[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn0[(__pyx_v_row1 + __pyx_v_a)]))); - } - __pyx_L5_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":268 - * syn1neg[row2 + a] += g * syn0[row1 + a] - * - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[row1 + a] += work[a] - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":269 - * - * for a in range(size): - * syn0[row1 + a] += work[a] # <<<<<<<<<<<<<< - * - * return next_random - */ - __pyx_t_7 = (__pyx_v_row1 + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":271 - * syn0[row1 + a] += work[a] - * - * return next_random # <<<<<<<<<<<<<< - * - * cdef void fast_sentence0_cbow_hs( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/word2vec_inner.pyx":227 - * return next_random - * - * cdef unsigned long long fast_sentence2_sg_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, - * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - */ - - /* function exit code */ - __pyx_L0:; - return __pyx_r; -} - -/* "trunk/gensim/models/word2vec_inner.pyx":273 - * return next_random - * - * cdef void fast_sentence0_cbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean) { - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; - int __pyx_v_m; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - PY_LONG_LONG __pyx_t_5; - - /* "trunk/gensim/models/word2vec_inner.pyx":284 - * cdef int m - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":285 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":286 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":287 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":288 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":290 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - - /* "trunk/gensim/models/word2vec_inner.pyx":291 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":292 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L9_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L9_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":293 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/word2vec_inner.pyx":294 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - goto __pyx_L8; - } - __pyx_L8:; - - /* "trunk/gensim/models/word2vec_inner.pyx":296 - * sscal(&size, &inv_count, neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":297 - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_b = __pyx_t_5; - - /* "trunk/gensim/models/word2vec_inner.pyx":298 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":299 - * for b in range(codelens[i]): - * row2 = word_point[b] * size - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); - - /* "trunk/gensim/models/word2vec_inner.pyx":300 - * row2 = word_point[b] * size - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L14_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L14_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":301 - * f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L11_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":302 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":303 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":304 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - - /* "trunk/gensim/models/word2vec_inner.pyx":305 - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * - * for m in range(j, k): - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L11_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":307 - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":308 - * - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L19_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L19_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":309 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - */ - goto __pyx_L16_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":311 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< - * - * cdef void fast_sentence1_cbow_hs( - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - } - __pyx_L16_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":273 - * return next_random - * - * cdef void fast_sentence0_cbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/word2vec_inner.pyx":313 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - * - * cdef void fast_sentence1_cbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean) { - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; - int __pyx_v_m; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - PY_LONG_LONG __pyx_t_5; - - /* "trunk/gensim/models/word2vec_inner.pyx":324 - * cdef int m - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":325 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":326 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":327 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":328 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":330 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - - /* "trunk/gensim/models/word2vec_inner.pyx":331 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":332 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count , neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L9_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L9_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":333 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count , neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/word2vec_inner.pyx":334 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count , neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - goto __pyx_L8; - } - __pyx_L8:; - - /* "trunk/gensim/models/word2vec_inner.pyx":336 - * sscal(&size, &inv_count , neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":337 - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { - __pyx_v_b = __pyx_t_5; - - /* "trunk/gensim/models/word2vec_inner.pyx":338 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * for b in range(codelens[i]): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":339 - * for b in range(codelens[i]): - * row2 = word_point[b] * size - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_sdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); - - /* "trunk/gensim/models/word2vec_inner.pyx":340 - * row2 = word_point[b] * size - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L14_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L14_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":341 - * f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L11_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":342 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":343 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":344 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - - /* "trunk/gensim/models/word2vec_inner.pyx":345 - * g = (1 - word_code[b] - f) * alpha - * saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * - * for m in range(j, k): - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L11_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":347 - * saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - * - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":348 - * - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L19_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L19_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":349 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - */ - goto __pyx_L16_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":351 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< - * - * cdef void fast_sentence2_cbow_hs( - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - } - __pyx_L16_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":313 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - * - * cdef void fast_sentence1_cbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/word2vec_inner.pyx":353 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * - * cdef void fast_sentence2_cbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean) { - PY_LONG_LONG __pyx_v_a; - PY_LONG_LONG __pyx_v_b; - PY_LONG_LONG __pyx_v_row2; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; - int __pyx_v_m; - int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; - PY_LONG_LONG __pyx_t_8; - - /* "trunk/gensim/models/word2vec_inner.pyx":364 - * cdef int m - * - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] = 0.0 - * count = 0.0 - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":365 - * - * for a in range(size): - * neu1[a] = 0.0 # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - (__pyx_v_neu1[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":366 - * for a in range(size): - * neu1[a] = 0.0 - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":367 - * neu1[a] = 0.0 - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; - - /* "trunk/gensim/models/word2vec_inner.pyx":368 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L8_bool_binop_done; - } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L8_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":369 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L5_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":371 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - - /* "trunk/gensim/models/word2vec_inner.pyx":372 - * else: - * count += ONEF - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] += syn0[indexes[m] * size + a] - * if cbow_mean and count > (0.5): - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":373 - * count += ONEF - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) + (__pyx_v_syn0[(((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a)])); - } - } - __pyx_L5_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":374 - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] /= count - */ - __pyx_t_5 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L13_bool_binop_done; - } - __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L13_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":375 - * neu1[a] += syn0[indexes[m] * size + a] - * if cbow_mean and count > (0.5): - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] /= count - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":376 - * if cbow_mean and count > (0.5): - * for a in range(size): - * neu1[a] /= count # <<<<<<<<<<<<<< - * - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) / __pyx_v_count); - } - goto __pyx_L12; - } - __pyx_L12:; - - /* "trunk/gensim/models/word2vec_inner.pyx":378 - * neu1[a] /= count - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * for b in range(codelens[i]): - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":379 - * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< - * for b in range(codelens[i]): - * row2 = word_point[b] * size - */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":380 - * for a in range(size): - * work[a] = 0.0 - * for b in range(codelens[i]): # <<<<<<<<<<<<<< - * row2 = word_point[b] * size - * f = 0.0 - */ - __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_b = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":381 - * work[a] = 0.0 - * for b in range(codelens[i]): - * row2 = word_point[b] * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): - */ - __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - - /* "trunk/gensim/models/word2vec_inner.pyx":382 - * for b in range(codelens[i]): - * row2 = word_point[b] * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += neu1[a] * syn1[row2 + a] - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":383 - * row2 = word_point[b] * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += neu1[a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7+=1) { - __pyx_v_a = __pyx_t_7; - - /* "trunk/gensim/models/word2vec_inner.pyx":384 - * f = 0.0 - * for a in range(size): - * f += neu1[a] * syn1[row2 + a] # <<<<<<<<<<<<<< - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_neu1[__pyx_v_a]) * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":385 - * for a in range(size): - * f += neu1[a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - */ - __pyx_t_5 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L24_bool_binop_done; - } - __pyx_t_5 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L24_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":386 - * f += neu1[a] * syn1[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue # <<<<<<<<<<<<<< - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - */ - goto __pyx_L19_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":387 - * if f <= -MAX_EXP or f >= MAX_EXP: - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - */ - __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - - /* "trunk/gensim/models/word2vec_inner.pyx":388 - * continue - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1[row2 + a] - */ - __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - - /* "trunk/gensim/models/word2vec_inner.pyx":389 - * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1[row2 + a] - * for a in range(size): - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7+=1) { - __pyx_v_a = __pyx_t_7; - - /* "trunk/gensim/models/word2vec_inner.pyx":390 - * g = (1 - word_code[b] - f) * alpha - * for a in range(size): - * work[a] += g * syn1[row2 + a] # <<<<<<<<<<<<<< - * for a in range(size): - * syn1[row2 + a] += g * neu1[a] - */ - __pyx_t_8 = __pyx_v_a; - (__pyx_v_work[__pyx_t_8]) = ((__pyx_v_work[__pyx_t_8]) + (__pyx_v_g * (__pyx_v_syn1[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":391 - * for a in range(size): - * work[a] += g * syn1[row2 + a] - * for a in range(size): # <<<<<<<<<<<<<< - * syn1[row2 + a] += g * neu1[a] - * - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_7 = 0; __pyx_t_7 < __pyx_t_3; __pyx_t_7+=1) { - __pyx_v_a = __pyx_t_7; - - /* "trunk/gensim/models/word2vec_inner.pyx":392 - * work[a] += g * syn1[row2 + a] - * for a in range(size): - * syn1[row2 + a] += g * neu1[a] # <<<<<<<<<<<<<< - * - * for m in range(j, k): - */ - __pyx_t_8 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1[__pyx_t_8]) = ((__pyx_v_syn1[__pyx_t_8]) + (__pyx_v_g * (__pyx_v_neu1[__pyx_v_a]))); - } - __pyx_L19_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":394 - * syn1[row2 + a] += g * neu1[a] - * - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; - - /* "trunk/gensim/models/word2vec_inner.pyx":395 - * - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { - } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L33_bool_binop_done; - } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L33_bool_binop_done:; - if (__pyx_t_4) { - - /* "trunk/gensim/models/word2vec_inner.pyx":396 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * for a in range(size): - */ - goto __pyx_L30_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":398 - * continue - * else: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[indexes[m] * size + a] += work[a] - * - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":399 - * else: - * for a in range(size): - * syn0[indexes[m] * size + a] += work[a] # <<<<<<<<<<<<<< - * - * cdef unsigned long long fast_sentence0_cbow_neg( - */ - __pyx_t_7 = (((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } - } - __pyx_L30_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":353 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * - * cdef void fast_sentence2_cbow_hs( # <<<<<<<<<<<<<< - * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - */ - - /* function exit code */ -} - -/* "trunk/gensim/models/word2vec_inner.pyx":401 - * syn0[indexes[m] * size + a] += work[a] - * - * cdef unsigned long long fast_sentence0_cbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - */ - -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random) { - PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - __pyx_t_5numpy_uint32_t __pyx_v_word_index; - int __pyx_v_d; - int __pyx_v_m; - unsigned PY_LONG_LONG __pyx_r; - int __pyx_t_1; - int __pyx_t_2; - int __pyx_t_3; - int __pyx_t_4; - long __pyx_t_5; - - /* "trunk/gensim/models/word2vec_inner.pyx":409 - * cdef long long a - * cdef long long row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, count, inv_count, label - * cdef np.uint32_t target_index, word_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/word2vec_inner.pyx":414 - * cdef int d, m - * - * word_index = indexes[i] # <<<<<<<<<<<<<< - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - - /* "trunk/gensim/models/word2vec_inner.pyx":416 - * word_index = indexes[i] - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * count = 0.0 - * for m in range(j, k): - */ - memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":417 - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 # <<<<<<<<<<<<<< - * for m in range(j, k): - * if m == i or codelens[m] == 0: - */ - __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":418 - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - * count = 0.0 - * for m in range(j, k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":419 - * count = 0.0 - * for m in range(j, k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L6_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L6_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":420 - * for m in range(j, k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * count += ONEF - */ - goto __pyx_L3_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":422 - * continue - * else: - * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - */ - __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - - /* "trunk/gensim/models/word2vec_inner.pyx":423 - * else: - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - } - __pyx_L3_continue:; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":424 - * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) - */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L9_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L9_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":425 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/word2vec_inner.pyx":426 - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< - * - * memset(work, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - goto __pyx_L8; - } - __pyx_L8:; - - /* "trunk/gensim/models/word2vec_inner.pyx":428 - * sscal(&size, &inv_count, neu1, &ONE) - * - * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): - */ - memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - - /* "trunk/gensim/models/word2vec_inner.pyx":430 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * if d == 0: - * target_index = word_index - */ - __pyx_t_5 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; - - /* "trunk/gensim/models/word2vec_inner.pyx":431 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":432 - * for d in range(negative+1): - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/word2vec_inner.pyx":433 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L13; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":435 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/word2vec_inner.pyx":436 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/word2vec_inner.pyx":437 + /* "trunk/gensim/models/word2vec_inner.pyx":125 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -4021,17 +1800,17 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":438 + /* "trunk/gensim/models/word2vec_inner.pyx":126 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 * */ - goto __pyx_L11_continue; + goto __pyx_L3_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":439 + /* "trunk/gensim/models/word2vec_inner.pyx":127 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -4040,29 +1819,29 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); } - __pyx_L13:; + __pyx_L5:; - /* "trunk/gensim/models/word2vec_inner.pyx":441 + /* "trunk/gensim/models/word2vec_inner.pyx":129 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":442 + /* "trunk/gensim/models/word2vec_inner.pyx":130 * * row2 = target_index * size - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":443 + /* "trunk/gensim/models/word2vec_inner.pyx":131 * row2 = target_index * size - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -4071,129 +1850,86 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L16_bool_binop_done; + goto __pyx_L8_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L16_bool_binop_done:; + __pyx_L8_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":444 - * f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":132 + * f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha */ - goto __pyx_L11_continue; + goto __pyx_L3_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":445 + /* "trunk/gensim/models/word2vec_inner.pyx":133 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/word2vec_inner.pyx":446 + /* "trunk/gensim/models/word2vec_inner.pyx":134 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/word2vec_inner.pyx":447 + /* "trunk/gensim/models/word2vec_inner.pyx":135 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":448 + /* "trunk/gensim/models/word2vec_inner.pyx":136 * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * - * for m in range(j,k): + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L11_continue:; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_L3_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":450 - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - * - * for m in range(j,k): # <<<<<<<<<<<<<< - * if m == i or codelens[m] == 0: - * continue - */ - __pyx_t_1 = __pyx_v_k; - for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_m = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":451 + /* "trunk/gensim/models/word2vec_inner.pyx":138 + * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * - * for m in range(j,k): - * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< - * continue - * else: - */ - __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L21_bool_binop_done; - } - __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L21_bool_binop_done:; - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":452 - * for m in range(j,k): - * if m == i or codelens[m] == 0: - * continue # <<<<<<<<<<<<<< - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - */ - goto __pyx_L18_continue; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":454 - * continue - * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - } - __pyx_L18_continue:; - } + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":456 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":140 + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< * - * cdef unsigned long long fast_sentence1_cbow_neg( + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":401 - * syn0[indexes[m] * size + a] += work[a] + /* "trunk/gensim/models/word2vec_inner.pyx":103 * - * cdef unsigned long long fast_sentence0_cbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * + * cdef unsigned long long fast_sentence_sg_neg( # <<<<<<<<<<<<<< + * const int negative, np.uint32_t *table, unsigned long long table_len, + * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ /* function exit code */ @@ -4201,53 +1937,30 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":458 - * return next_random +/* "trunk/gensim/models/word2vec_inner.pyx":143 * - * cdef unsigned long long fast_sentence1_cbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * + * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random) { +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean) { + PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; - unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; - __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; - __pyx_t_5numpy_uint32_t __pyx_v_target_index; - __pyx_t_5numpy_uint32_t __pyx_v_word_index; - int __pyx_v_d; int __pyx_v_m; - unsigned PY_LONG_LONG __pyx_r; int __pyx_t_1; int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - long __pyx_t_5; - - /* "trunk/gensim/models/word2vec_inner.pyx":466 - * cdef long long a - * cdef long long row2 - * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, count, inv_count, label - * cdef np.uint32_t target_index, word_index - */ - __pyx_v_modulo = 281474976710655ULL; - - /* "trunk/gensim/models/word2vec_inner.pyx":471 - * cdef int d, m - * - * word_index = indexes[i] # <<<<<<<<<<<<<< - * - * memset(neu1, 0, size * cython.sizeof(REAL_t)) - */ - __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); + PY_LONG_LONG __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":473 - * word_index = indexes[i] + /* "trunk/gensim/models/word2vec_inner.pyx":154 + * cdef int m * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 @@ -4255,7 +1968,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":474 + /* "trunk/gensim/models/word2vec_inner.pyx":155 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -4264,7 +1977,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/word2vec_inner.pyx":475 + /* "trunk/gensim/models/word2vec_inner.pyx":156 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -4275,7 +1988,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":476 + /* "trunk/gensim/models/word2vec_inner.pyx":157 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -4293,7 +2006,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":477 + /* "trunk/gensim/models/word2vec_inner.pyx":158 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -4304,33 +2017,33 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":479 + /* "trunk/gensim/models/word2vec_inner.pyx":160 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - /* "trunk/gensim/models/word2vec_inner.pyx":480 + /* "trunk/gensim/models/word2vec_inner.pyx":161 * else: * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * if cbow_mean and count > (0.5): * inv_count = ONEF/count */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } __pyx_L3_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":481 + /* "trunk/gensim/models/word2vec_inner.pyx":162 * count += ONEF - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) */ __pyx_t_4 = (__pyx_v_cbow_mean != 0); if (__pyx_t_4) { @@ -4343,19 +2056,19 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_L9_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":482 - * saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":163 + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * */ __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - /* "trunk/gensim/models/word2vec_inner.pyx":483 + /* "trunk/gensim/models/word2vec_inner.pyx":164 * if cbow_mean and count > (0.5): * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * * memset(work, 0, size * cython.sizeof(REAL_t)) */ @@ -4364,127 +2077,47 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":485 - * sscal(&size, &inv_count, neu1, &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":166 + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< - * - * for d in range(negative+1): + * for b in range(codelens[i]): + * row2 = word_point[b] * size */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":487 - * memset(work, 0, size * cython.sizeof(REAL_t)) - * - * for d in range(negative+1): # <<<<<<<<<<<<<< - * if d == 0: - * target_index = word_index - */ - __pyx_t_5 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { - __pyx_v_d = __pyx_t_1; - - /* "trunk/gensim/models/word2vec_inner.pyx":488 - * - * for d in range(negative+1): - * if d == 0: # <<<<<<<<<<<<<< - * target_index = word_index - * label = ONEF - */ - __pyx_t_3 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":489 - * for d in range(negative+1): - * if d == 0: - * target_index = word_index # <<<<<<<<<<<<<< - * label = ONEF - * else: - */ - __pyx_v_target_index = __pyx_v_word_index; - - /* "trunk/gensim/models/word2vec_inner.pyx":490 - * if d == 0: - * target_index = word_index - * label = ONEF # <<<<<<<<<<<<<< - * else: - * target_index = table[(next_random >> 16) % table_len] - */ - __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L13; - } - /*else*/ { - - /* "trunk/gensim/models/word2vec_inner.pyx":492 - * label = ONEF - * else: - * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - */ - __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - - /* "trunk/gensim/models/word2vec_inner.pyx":493 - * else: - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< - * if target_index == word_index: - * continue - */ - __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - - /* "trunk/gensim/models/word2vec_inner.pyx":494 - * target_index = table[(next_random >> 16) % table_len] - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: # <<<<<<<<<<<<<< - * continue - * label = 0.0 - */ - __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_3) { - - /* "trunk/gensim/models/word2vec_inner.pyx":495 - * next_random = (next_random * 25214903917ULL + 11) & modulo - * if target_index == word_index: - * continue # <<<<<<<<<<<<<< - * label = 0.0 + /* "trunk/gensim/models/word2vec_inner.pyx":167 * + * memset(work, 0, size * cython.sizeof(REAL_t)) + * for b in range(codelens[i]): # <<<<<<<<<<<<<< + * row2 = word_point[b] * size + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) */ - goto __pyx_L11_continue; - } - - /* "trunk/gensim/models/word2vec_inner.pyx":496 - * if target_index == word_index: - * continue - * label = 0.0 # <<<<<<<<<<<<<< - * - * row2 = target_index * size - */ - __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } - __pyx_L13:; + __pyx_t_1 = (__pyx_v_codelens[__pyx_v_i]); + for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { + __pyx_v_b = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":498 - * label = 0.0 - * - * row2 = target_index * size # <<<<<<<<<<<<<< - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":168 + * memset(work, 0, size * cython.sizeof(REAL_t)) + * for b in range(codelens[i]): + * row2 = word_point[b] * size # <<<<<<<<<<<<<< + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: */ - __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); + __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":499 - * - * row2 = target_index * size - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":169 + * for b in range(codelens[i]): + * row2 = word_point[b] * size + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)__pyx_v_5trunk_6gensim_6models_14word2vec_inner_sdot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE))); + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":500 - * row2 = target_index * size - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":170 + * row2 = word_point[b] * size + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] @@ -4493,65 +2126,65 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L16_bool_binop_done; + goto __pyx_L14_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L16_bool_binop_done:; + __pyx_L14_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":501 - * f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":171 + * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha + * g = (1 - word_code[b] - f) * alpha */ goto __pyx_L11_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":502 + /* "trunk/gensim/models/word2vec_inner.pyx":172 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/word2vec_inner.pyx":503 + /* "trunk/gensim/models/word2vec_inner.pyx":173 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha # <<<<<<<<<<<<<< - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) */ - __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); + __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/word2vec_inner.pyx":504 + /* "trunk/gensim/models/word2vec_inner.pyx":174 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":505 - * g = (label - f) * alpha - * saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":175 + * g = (1 - word_code[b] - f) * alpha + * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * - * for m in range(j,k): + * for m in range(j, k): */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); __pyx_L11_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":507 - * saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + /* "trunk/gensim/models/word2vec_inner.pyx":177 + * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * - * for m in range(j,k): # <<<<<<<<<<<<<< + * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ @@ -4559,9 +2192,9 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":508 + /* "trunk/gensim/models/word2vec_inner.pyx":178 * - * for m in range(j,k): + * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: @@ -4570,74 +2203,62 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L21_bool_binop_done; + goto __pyx_L19_bool_binop_done; } __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L21_bool_binop_done:; + __pyx_L19_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":509 - * for m in range(j,k): + /* "trunk/gensim/models/word2vec_inner.pyx":179 + * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) */ - goto __pyx_L18_continue; + goto __pyx_L16_continue; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":511 + /* "trunk/gensim/models/word2vec_inner.pyx":181 * continue * else: - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< + * * - * return next_random */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } - __pyx_L18_continue:; + __pyx_L16_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":513 - * saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - * - * return next_random # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":143 * - * cdef unsigned long long fast_sentence2_cbow_neg( - */ - __pyx_r = __pyx_v_next_random; - goto __pyx_L0; - - /* "trunk/gensim/models/word2vec_inner.pyx":458 - * return next_random * - * cdef unsigned long long fast_sentence1_cbow_neg( # <<<<<<<<<<<<<< - * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, + * cdef void fast_sentence_cbow_hs( # <<<<<<<<<<<<<< + * const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], + * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ /* function exit code */ - __pyx_L0:; - return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":515 - * return next_random +/* "trunk/gensim/models/word2vec_inner.pyx":184 + * * - * cdef unsigned long long fast_sentence2_cbow_neg( # <<<<<<<<<<<<<< + * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random) { - PY_LONG_LONG __pyx_v_a; +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_g; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_count; + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_inv_count; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_label; __pyx_t_5numpy_uint32_t __pyx_v_target_index; __pyx_t_5numpy_uint32_t __pyx_v_word_index; @@ -4645,15 +2266,12 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas int __pyx_v_m; unsigned PY_LONG_LONG __pyx_r; int __pyx_t_1; - PY_LONG_LONG __pyx_t_2; + int __pyx_t_2; int __pyx_t_3; int __pyx_t_4; - int __pyx_t_5; - int __pyx_t_6; - PY_LONG_LONG __pyx_t_7; - long __pyx_t_8; + long __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":523 + /* "trunk/gensim/models/word2vec_inner.pyx":192 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< @@ -4662,205 +2280,164 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_modulo = 281474976710655ULL; - /* "trunk/gensim/models/word2vec_inner.pyx":528 + /* "trunk/gensim/models/word2vec_inner.pyx":197 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< * - * for a in range(size): + * memset(neu1, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "trunk/gensim/models/word2vec_inner.pyx":530 + /* "trunk/gensim/models/word2vec_inner.pyx":199 * word_index = indexes[i] * - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] = 0.0 - * count = 0.0 - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":531 - * - * for a in range(size): - * neu1[a] = 0.0 # <<<<<<<<<<<<<< + * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * count = 0.0 * for m in range(j, k): */ - (__pyx_v_neu1[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } + memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":532 - * for a in range(size): - * neu1[a] = 0.0 + /* "trunk/gensim/models/word2vec_inner.pyx":200 + * + * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< * for m in range(j, k): * if m == i or codelens[m] == 0: */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/word2vec_inner.pyx":533 - * neu1[a] = 0.0 + /* "trunk/gensim/models/word2vec_inner.pyx":201 + * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":534 + /* "trunk/gensim/models/word2vec_inner.pyx":202 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); + if (!__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L8_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L6_bool_binop_done; } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L8_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L6_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":535 + /* "trunk/gensim/models/word2vec_inner.pyx":203 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: * count += ONEF */ - goto __pyx_L5_continue; + goto __pyx_L3_continue; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":537 + /* "trunk/gensim/models/word2vec_inner.pyx":205 * continue * else: * count += ONEF # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * if cbow_mean and count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - /* "trunk/gensim/models/word2vec_inner.pyx":538 + /* "trunk/gensim/models/word2vec_inner.pyx":206 * else: * count += ONEF - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] += syn0[indexes[m] * size + a] + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< * if cbow_mean and count > (0.5): + * inv_count = ONEF/count */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":539 - * count += ONEF - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): - * for a in range(size): - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) + (__pyx_v_syn0[(((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a)])); - } + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } - __pyx_L5_continue:; + __pyx_L3_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":540 - * for a in range(size): - * neu1[a] += syn0[indexes[m] * size + a] + /* "trunk/gensim/models/word2vec_inner.pyx":207 + * count += ONEF + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< - * for a in range(size): - * neu1[a] /= count + * inv_count = ONEF/count + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) */ - __pyx_t_5 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_5) { + __pyx_t_4 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L13_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L9_bool_binop_done; } - __pyx_t_5 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L13_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L9_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":541 - * neu1[a] += syn0[indexes[m] * size + a] + /* "trunk/gensim/models/word2vec_inner.pyx":208 + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) * if cbow_mean and count > (0.5): - * for a in range(size): # <<<<<<<<<<<<<< - * neu1[a] /= count + * inv_count = ONEF/count # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - /* "trunk/gensim/models/word2vec_inner.pyx":542 + /* "trunk/gensim/models/word2vec_inner.pyx":209 * if cbow_mean and count > (0.5): - * for a in range(size): - * neu1[a] /= count # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * - * for a in range(size): + * memset(work, 0, size * cython.sizeof(REAL_t)) */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_neu1[__pyx_t_7]) = ((__pyx_v_neu1[__pyx_t_7]) / __pyx_v_count); - } - goto __pyx_L12; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + goto __pyx_L8; } - __pyx_L12:; - - /* "trunk/gensim/models/word2vec_inner.pyx":544 - * neu1[a] /= count - * - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] = 0.0 - * - */ - __pyx_t_1 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":545 + /* "trunk/gensim/models/word2vec_inner.pyx":211 + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * - * for a in range(size): - * work[a] = 0.0 # <<<<<<<<<<<<<< + * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< * * for d in range(negative+1): */ - (__pyx_v_work[__pyx_v_a]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - } + memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":547 - * work[a] = 0.0 + /* "trunk/gensim/models/word2vec_inner.pyx":213 + * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< * if d == 0: * target_index = word_index */ - __pyx_t_8 = (__pyx_v_negative + 1); - for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_8; __pyx_t_1+=1) { + __pyx_t_5 = (__pyx_v_negative + 1); + for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "trunk/gensim/models/word2vec_inner.pyx":548 + /* "trunk/gensim/models/word2vec_inner.pyx":214 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< * target_index = word_index * label = ONEF */ - __pyx_t_4 = ((__pyx_v_d == 0) != 0); - if (__pyx_t_4) { + __pyx_t_3 = ((__pyx_v_d == 0) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":549 + /* "trunk/gensim/models/word2vec_inner.pyx":215 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -4869,7 +2446,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/word2vec_inner.pyx":550 + /* "trunk/gensim/models/word2vec_inner.pyx":216 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -4877,11 +2454,11 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas * target_index = table[(next_random >> 16) % table_len] */ __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L21; + goto __pyx_L13; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":552 + /* "trunk/gensim/models/word2vec_inner.pyx":218 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -4890,7 +2467,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/word2vec_inner.pyx":553 + /* "trunk/gensim/models/word2vec_inner.pyx":219 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -4899,27 +2476,27 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/word2vec_inner.pyx":554 + /* "trunk/gensim/models/word2vec_inner.pyx":220 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< * continue * label = 0.0 */ - __pyx_t_4 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); - if (__pyx_t_4) { + __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":555 + /* "trunk/gensim/models/word2vec_inner.pyx":221 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 * */ - goto __pyx_L19_continue; + goto __pyx_L11_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":556 + /* "trunk/gensim/models/word2vec_inner.pyx":222 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -4928,218 +2505,158 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); } - __pyx_L21:; + __pyx_L13:; - /* "trunk/gensim/models/word2vec_inner.pyx":558 + /* "trunk/gensim/models/word2vec_inner.pyx":224 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< - * f = 0.0 - * for a in range(size): + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + * if f <= -MAX_EXP or f >= MAX_EXP: */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":559 + /* "trunk/gensim/models/word2vec_inner.pyx":225 * * row2 = target_index * size - * f = 0.0 # <<<<<<<<<<<<<< - * for a in range(size): - * f += neu1[a] * syn1neg[row2 + a] - */ - __pyx_v_f = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - - /* "trunk/gensim/models/word2vec_inner.pyx":560 - * row2 = target_index * size - * f = 0.0 - * for a in range(size): # <<<<<<<<<<<<<< - * f += neu1[a] * syn1neg[row2 + a] - * if f <= -MAX_EXP or f >= MAX_EXP: - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":561 - * f = 0.0 - * for a in range(size): - * f += neu1[a] * syn1neg[row2 + a] # <<<<<<<<<<<<<< + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * if f <= -MAX_EXP or f >= MAX_EXP: * continue */ - __pyx_v_f = (__pyx_v_f + ((__pyx_v_neu1[__pyx_v_a]) * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } + __pyx_v_f = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":562 - * for a in range(size): - * f += neu1[a] * syn1neg[row2 + a] + /* "trunk/gensim/models/word2vec_inner.pyx":226 + * row2 = target_index * size + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] */ - __pyx_t_5 = ((__pyx_v_f <= -6.0) != 0); - if (!__pyx_t_5) { + __pyx_t_4 = ((__pyx_v_f <= -6.0) != 0); + if (!__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L26_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L16_bool_binop_done; } - __pyx_t_5 = ((__pyx_v_f >= 6.0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L26_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L16_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":563 - * f += neu1[a] * syn1neg[row2 + a] + /* "trunk/gensim/models/word2vec_inner.pyx":227 + * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha */ - goto __pyx_L19_continue; + goto __pyx_L11_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":564 + /* "trunk/gensim/models/word2vec_inner.pyx":228 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< * g = (label - f) * alpha - * for a in range(size): + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/word2vec_inner.pyx":565 + /* "trunk/gensim/models/word2vec_inner.pyx":229 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/word2vec_inner.pyx":566 + /* "trunk/gensim/models/word2vec_inner.pyx":230 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha - * for a in range(size): # <<<<<<<<<<<<<< - * work[a] += g * syn1neg[row2 + a] - * for a in range(size): - */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":567 - * g = (label - f) * alpha - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] # <<<<<<<<<<<<<< - * for a in range(size): - * syn1neg[row2 + a] += g * neu1[a] - */ - __pyx_t_7 = __pyx_v_a; - (__pyx_v_work[__pyx_t_7]) = ((__pyx_v_work[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_syn1neg[(__pyx_v_row2 + __pyx_v_a)]))); - } - - /* "trunk/gensim/models/word2vec_inner.pyx":568 - * for a in range(size): - * work[a] += g * syn1neg[row2 + a] - * for a in range(size): # <<<<<<<<<<<<<< - * syn1neg[row2 + a] += g * neu1[a] + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * */ - __pyx_t_3 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_3; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":569 - * work[a] += g * syn1neg[row2 + a] - * for a in range(size): - * syn1neg[row2 + a] += g * neu1[a] # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":231 + * g = (label - f) * alpha + * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * - * for m in range(j, k): + * for m in range(j,k): */ - __pyx_t_7 = (__pyx_v_row2 + __pyx_v_a); - (__pyx_v_syn1neg[__pyx_t_7]) = ((__pyx_v_syn1neg[__pyx_t_7]) + (__pyx_v_g * (__pyx_v_neu1[__pyx_v_a]))); - } - __pyx_L19_continue:; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_L11_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":571 - * syn1neg[row2 + a] += g * neu1[a] + /* "trunk/gensim/models/word2vec_inner.pyx":233 + * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * - * for m in range(j, k): # <<<<<<<<<<<<<< + * for m in range(j,k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue */ __pyx_t_1 = __pyx_v_k; - for (__pyx_t_3 = __pyx_v_j; __pyx_t_3 < __pyx_t_1; __pyx_t_3+=1) { - __pyx_v_m = __pyx_t_3; + for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { + __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":572 + /* "trunk/gensim/models/word2vec_inner.pyx":234 * - * for m in range(j, k): + * for m in range(j,k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< * continue * else: */ - __pyx_t_5 = ((__pyx_v_m == __pyx_v_i) != 0); - if (!__pyx_t_5) { + __pyx_t_4 = ((__pyx_v_m == __pyx_v_i) != 0); + if (!__pyx_t_4) { } else { - __pyx_t_4 = __pyx_t_5; - goto __pyx_L35_bool_binop_done; + __pyx_t_3 = __pyx_t_4; + goto __pyx_L21_bool_binop_done; } - __pyx_t_5 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); - __pyx_t_4 = __pyx_t_5; - __pyx_L35_bool_binop_done:; - if (__pyx_t_4) { + __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); + __pyx_t_3 = __pyx_t_4; + __pyx_L21_bool_binop_done:; + if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":573 - * for m in range(j, k): + /* "trunk/gensim/models/word2vec_inner.pyx":235 + * for m in range(j,k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * for a in range(size): + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) */ - goto __pyx_L32_continue; + goto __pyx_L18_continue; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":575 + /* "trunk/gensim/models/word2vec_inner.pyx":237 * continue * else: - * for a in range(size): # <<<<<<<<<<<<<< - * syn0[indexes[m] * size + a] += work[a] - * - */ - __pyx_t_6 = __pyx_v_size; - for (__pyx_t_2 = 0; __pyx_t_2 < __pyx_t_6; __pyx_t_2+=1) { - __pyx_v_a = __pyx_t_2; - - /* "trunk/gensim/models/word2vec_inner.pyx":576 - * else: - * for a in range(size): - * syn0[indexes[m] * size + a] += work[a] # <<<<<<<<<<<<<< + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_t_7 = (((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size) + __pyx_v_a); - (__pyx_v_syn0[__pyx_t_7]) = ((__pyx_v_syn0[__pyx_t_7]) + (__pyx_v_work[__pyx_v_a])); - } + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } - __pyx_L32_continue:; + __pyx_L18_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":578 - * syn0[indexes[m] * size + a] += work[a] + /* "trunk/gensim/models/word2vec_inner.pyx":239 + * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< * - * def train_sentence_sg(model, sentence, alpha, _work): + * */ __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":515 - * return next_random + /* "trunk/gensim/models/word2vec_inner.pyx":184 * - * cdef unsigned long long fast_sentence2_cbow_neg( # <<<<<<<<<<<<<< + * + * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< * const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, */ @@ -5149,8 +2666,8 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":580 - * return next_random +/* "trunk/gensim/models/word2vec_inner.pyx":242 + * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -5193,21 +2710,21 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_1train_sentenc case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { goto __pyx_L5_argtuple_error; @@ -5224,7 +2741,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_1train_sentenc } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.word2vec_inner.train_sentence_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -5288,82 +2805,82 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_sg", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":581 + /* "trunk/gensim/models/word2vec_inner.pyx":243 * * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 581; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":582 + /* "trunk/gensim/models/word2vec_inner.pyx":244 * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 582; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":584 + /* "trunk/gensim/models/word2vec_inner.pyx":246 * cdef int negative = model.negative * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t _alpha = alpha */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 584; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 584; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":586 + /* "trunk/gensim/models/word2vec_inner.pyx":248 * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 586; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":587 + /* "trunk/gensim/models/word2vec_inner.pyx":249 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 587; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":593 + /* "trunk/gensim/models/word2vec_inner.pyx":255 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 593; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":596 + /* "trunk/gensim/models/word2vec_inner.pyx":258 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -5372,7 +2889,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":609 + /* "trunk/gensim/models/word2vec_inner.pyx":271 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -5382,23 +2899,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":610 + /* "trunk/gensim/models/word2vec_inner.pyx":272 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 610; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/word2vec_inner.pyx":612 + /* "trunk/gensim/models/word2vec_inner.pyx":274 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -5408,106 +2925,106 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":613 + /* "trunk/gensim/models/word2vec_inner.pyx":275 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 613; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":614 + /* "trunk/gensim/models/word2vec_inner.pyx":276 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 614; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 614; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":615 + /* "trunk/gensim/models/word2vec_inner.pyx":277 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 615; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":616 + /* "trunk/gensim/models/word2vec_inner.pyx":278 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/word2vec_inner.pyx":619 + /* "trunk/gensim/models/word2vec_inner.pyx":281 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 619; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/word2vec_inner.pyx":620 + /* "trunk/gensim/models/word2vec_inner.pyx":282 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 620; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -5516,7 +3033,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":622 + /* "trunk/gensim/models/word2vec_inner.pyx":284 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5527,19 +3044,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":623 + /* "trunk/gensim/models/word2vec_inner.pyx":285 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 623; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":624 + /* "trunk/gensim/models/word2vec_inner.pyx":286 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -5550,7 +3067,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":625 + /* "trunk/gensim/models/word2vec_inner.pyx":287 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -5562,20 +3079,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":627 + /* "trunk/gensim/models/word2vec_inner.pyx":289 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 627; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 627; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":628 + /* "trunk/gensim/models/word2vec_inner.pyx":290 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -5585,49 +3102,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":629 + /* "trunk/gensim/models/word2vec_inner.pyx":291 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 629; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":630 + /* "trunk/gensim/models/word2vec_inner.pyx":292 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 630; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":631 + /* "trunk/gensim/models/word2vec_inner.pyx":293 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 631; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 631; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":633 + /* "trunk/gensim/models/word2vec_inner.pyx":295 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -5638,7 +3155,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":634 + /* "trunk/gensim/models/word2vec_inner.pyx":296 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -5650,7 +3167,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_L7:; } - /* "trunk/gensim/models/word2vec_inner.pyx":636 + /* "trunk/gensim/models/word2vec_inner.pyx":298 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -5658,17 +3175,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -5682,7 +3199,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -5696,7 +3213,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -5704,9 +3221,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -5714,16 +3231,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -5732,7 +3249,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -5743,17 +3260,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/word2vec_inner.pyx":637 + /* "trunk/gensim/models/word2vec_inner.pyx":299 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 637; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":636 + /* "trunk/gensim/models/word2vec_inner.pyx":298 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -5763,7 +3280,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":640 + /* "trunk/gensim/models/word2vec_inner.pyx":302 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -5777,7 +3294,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":641 + /* "trunk/gensim/models/word2vec_inner.pyx":303 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -5788,7 +3305,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":642 + /* "trunk/gensim/models/word2vec_inner.pyx":304 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -5798,7 +3315,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":643 + /* "trunk/gensim/models/word2vec_inner.pyx":305 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -5808,7 +3325,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence goto __pyx_L14_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":644 + /* "trunk/gensim/models/word2vec_inner.pyx":306 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -5817,7 +3334,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":645 + /* "trunk/gensim/models/word2vec_inner.pyx":307 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -5827,7 +3344,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = ((__pyx_v_j < 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":646 + /* "trunk/gensim/models/word2vec_inner.pyx":308 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -5839,7 +3356,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L17:; - /* "trunk/gensim/models/word2vec_inner.pyx":647 + /* "trunk/gensim/models/word2vec_inner.pyx":309 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -5848,7 +3365,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":648 + /* "trunk/gensim/models/word2vec_inner.pyx":310 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -5858,7 +3375,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":649 + /* "trunk/gensim/models/word2vec_inner.pyx":311 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -5870,7 +3387,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L18:; - /* "trunk/gensim/models/word2vec_inner.pyx":650 + /* "trunk/gensim/models/word2vec_inner.pyx":312 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -5881,7 +3398,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "trunk/gensim/models/word2vec_inner.pyx":651 + /* "trunk/gensim/models/word2vec_inner.pyx":313 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< @@ -5899,7 +3416,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_L22_bool_binop_done:; if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":652 + /* "trunk/gensim/models/word2vec_inner.pyx":314 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< @@ -5909,7 +3426,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence goto __pyx_L19_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":653 + /* "trunk/gensim/models/word2vec_inner.pyx":315 * if j == i or codelens[j] == 0: * continue * if hs: # <<<<<<<<<<<<<< @@ -5919,19 +3436,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":654 + /* "trunk/gensim/models/word2vec_inner.pyx":316 * continue * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) # <<<<<<<<<<<<<< * if negative: * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work); + __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work); goto __pyx_L24; } __pyx_L24:; - /* "trunk/gensim/models/word2vec_inner.pyx":655 + /* "trunk/gensim/models/word2vec_inner.pyx":317 * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) * if negative: # <<<<<<<<<<<<<< @@ -5941,14 +3458,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_negative != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":656 + /* "trunk/gensim/models/word2vec_inner.pyx":318 * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) * if negative: * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) # <<<<<<<<<<<<<< * * return result */ - __pyx_v_next_random = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random); goto __pyx_L25; } __pyx_L25:; @@ -5958,7 +3475,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } } - /* "trunk/gensim/models/word2vec_inner.pyx":640 + /* "trunk/gensim/models/word2vec_inner.pyx":302 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -5976,7 +3493,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } } - /* "trunk/gensim/models/word2vec_inner.pyx":658 + /* "trunk/gensim/models/word2vec_inner.pyx":320 * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) * * return result # <<<<<<<<<<<<<< @@ -5984,14 +3501,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 658; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":580 - * return next_random + /* "trunk/gensim/models/word2vec_inner.pyx":242 + * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs @@ -6016,7 +3533,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":661 +/* "trunk/gensim/models/word2vec_inner.pyx":323 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6062,26 +3579,26 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_3train_sentenc case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -6100,7 +3617,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_3train_sentenc } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.word2vec_inner.train_sentence_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -6164,95 +3681,95 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_cbow", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":662 + /* "trunk/gensim/models/word2vec_inner.pyx":324 * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 662; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 662; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":663 + /* "trunk/gensim/models/word2vec_inner.pyx":325 * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 663; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 663; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":664 + /* "trunk/gensim/models/word2vec_inner.pyx":326 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 664; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 664; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":666 + /* "trunk/gensim/models/word2vec_inner.pyx":328 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 666; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 666; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":669 + /* "trunk/gensim/models/word2vec_inner.pyx":331 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 669; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":670 + /* "trunk/gensim/models/word2vec_inner.pyx":332 * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 670; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 670; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":676 + /* "trunk/gensim/models/word2vec_inner.pyx":338 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 676; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 676; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":679 + /* "trunk/gensim/models/word2vec_inner.pyx":341 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -6261,7 +3778,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_result = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":692 + /* "trunk/gensim/models/word2vec_inner.pyx":354 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -6271,23 +3788,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":693 + /* "trunk/gensim/models/word2vec_inner.pyx":355 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 693; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 693; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/word2vec_inner.pyx":695 + /* "trunk/gensim/models/word2vec_inner.pyx":357 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -6297,116 +3814,116 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":696 + /* "trunk/gensim/models/word2vec_inner.pyx":358 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 696; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 696; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":697 + /* "trunk/gensim/models/word2vec_inner.pyx":359 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 697; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":698 + /* "trunk/gensim/models/word2vec_inner.pyx":360 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 698; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":699 + /* "trunk/gensim/models/word2vec_inner.pyx":361 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/word2vec_inner.pyx":702 + /* "trunk/gensim/models/word2vec_inner.pyx":364 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 702; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/word2vec_inner.pyx":703 + /* "trunk/gensim/models/word2vec_inner.pyx":365 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 703; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "trunk/gensim/models/word2vec_inner.pyx":704 + /* "trunk/gensim/models/word2vec_inner.pyx":366 * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 704; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -6415,7 +3932,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":706 + /* "trunk/gensim/models/word2vec_inner.pyx":368 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6426,19 +3943,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":707 + /* "trunk/gensim/models/word2vec_inner.pyx":369 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 707; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":708 + /* "trunk/gensim/models/word2vec_inner.pyx":370 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -6449,7 +3966,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":709 + /* "trunk/gensim/models/word2vec_inner.pyx":371 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -6461,20 +3978,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":711 + /* "trunk/gensim/models/word2vec_inner.pyx":373 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 711; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 711; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":712 + /* "trunk/gensim/models/word2vec_inner.pyx":374 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -6484,49 +4001,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":713 + /* "trunk/gensim/models/word2vec_inner.pyx":375 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 713; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":714 + /* "trunk/gensim/models/word2vec_inner.pyx":376 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 714; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 714; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":715 + /* "trunk/gensim/models/word2vec_inner.pyx":377 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 715; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":717 + /* "trunk/gensim/models/word2vec_inner.pyx":379 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -6537,7 +4054,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":718 + /* "trunk/gensim/models/word2vec_inner.pyx":380 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -6549,7 +4066,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_L7:; } - /* "trunk/gensim/models/word2vec_inner.pyx":720 + /* "trunk/gensim/models/word2vec_inner.pyx":382 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -6557,17 +4074,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -6581,7 +4098,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -6595,7 +4112,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -6603,9 +4120,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -6613,16 +4130,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -6631,7 +4148,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 720; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -6642,17 +4159,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/word2vec_inner.pyx":721 + /* "trunk/gensim/models/word2vec_inner.pyx":383 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 721; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":720 + /* "trunk/gensim/models/word2vec_inner.pyx":382 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -6662,7 +4179,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":724 + /* "trunk/gensim/models/word2vec_inner.pyx":386 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6676,7 +4193,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc #endif /*try:*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":725 + /* "trunk/gensim/models/word2vec_inner.pyx":387 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -6687,7 +4204,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":726 + /* "trunk/gensim/models/word2vec_inner.pyx":388 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -6697,7 +4214,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":727 + /* "trunk/gensim/models/word2vec_inner.pyx":389 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -6707,7 +4224,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc goto __pyx_L14_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":728 + /* "trunk/gensim/models/word2vec_inner.pyx":390 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -6716,7 +4233,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":729 + /* "trunk/gensim/models/word2vec_inner.pyx":391 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -6726,7 +4243,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = ((__pyx_v_j < 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":730 + /* "trunk/gensim/models/word2vec_inner.pyx":392 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -6738,7 +4255,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L17:; - /* "trunk/gensim/models/word2vec_inner.pyx":731 + /* "trunk/gensim/models/word2vec_inner.pyx":393 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -6747,7 +4264,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":732 + /* "trunk/gensim/models/word2vec_inner.pyx":394 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -6757,7 +4274,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":733 + /* "trunk/gensim/models/word2vec_inner.pyx":395 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -6769,7 +4286,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L18:; - /* "trunk/gensim/models/word2vec_inner.pyx":734 + /* "trunk/gensim/models/word2vec_inner.pyx":396 * if k > sentence_len: * k = sentence_len * if hs: # <<<<<<<<<<<<<< @@ -6779,19 +4296,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":735 + /* "trunk/gensim/models/word2vec_inner.pyx":397 * k = sentence_len * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) # <<<<<<<<<<<<<< * if negative: * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean); + __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean); goto __pyx_L19; } __pyx_L19:; - /* "trunk/gensim/models/word2vec_inner.pyx":736 + /* "trunk/gensim/models/word2vec_inner.pyx":398 * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) * if negative: # <<<<<<<<<<<<<< @@ -6801,14 +4318,14 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_negative != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":737 + /* "trunk/gensim/models/word2vec_inner.pyx":399 * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) * if negative: * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) # <<<<<<<<<<<<<< * * return result */ - __pyx_v_next_random = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random); goto __pyx_L20; } __pyx_L20:; @@ -6816,7 +4333,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } } - /* "trunk/gensim/models/word2vec_inner.pyx":724 + /* "trunk/gensim/models/word2vec_inner.pyx":386 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -6834,7 +4351,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } } - /* "trunk/gensim/models/word2vec_inner.pyx":739 + /* "trunk/gensim/models/word2vec_inner.pyx":401 * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) * * return result # <<<<<<<<<<<<<< @@ -6842,13 +4359,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 739; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":661 + /* "trunk/gensim/models/word2vec_inner.pyx":323 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -6874,7 +4391,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":742 +/* "trunk/gensim/models/word2vec_inner.pyx":404 * * * def init(): # <<<<<<<<<<<<<< @@ -6913,7 +4430,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":754 + /* "trunk/gensim/models/word2vec_inner.pyx":414 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -6923,7 +4440,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/word2vec_inner.pyx":755 + /* "trunk/gensim/models/word2vec_inner.pyx":415 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -6933,7 +4450,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":756 + /* "trunk/gensim/models/word2vec_inner.pyx":416 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -6942,7 +4459,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/word2vec_inner.pyx":757 + /* "trunk/gensim/models/word2vec_inner.pyx":417 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -6951,7 +4468,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_size = 1; - /* "trunk/gensim/models/word2vec_inner.pyx":762 + /* "trunk/gensim/models/word2vec_inner.pyx":422 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -6961,7 +4478,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":763 + /* "trunk/gensim/models/word2vec_inner.pyx":423 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -6970,7 +4487,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/word2vec_inner.pyx":764 + /* "trunk/gensim/models/word2vec_inner.pyx":424 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -6980,7 +4497,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/word2vec_inner.pyx":767 + /* "trunk/gensim/models/word2vec_inner.pyx":427 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -6989,67 +4506,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":768 + /* "trunk/gensim/models/word2vec_inner.pyx":428 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< * if (abs(d_res - expected) < 0.0001): - * fast_sentence_sg_hs = fast_sentence0_sg_hs + * our_dot = our_dot_double */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/word2vec_inner.pyx":769 + /* "trunk/gensim/models/word2vec_inner.pyx":429 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< - * fast_sentence_sg_hs = fast_sentence0_sg_hs - * fast_sentence_sg_neg = fast_sentence0_sg_neg + * our_dot = our_dot_double + * our_saxpy = saxpy */ __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":770 + /* "trunk/gensim/models/word2vec_inner.pyx":430 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): - * fast_sentence_sg_hs = fast_sentence0_sg_hs # <<<<<<<<<<<<<< - * fast_sentence_sg_neg = fast_sentence0_sg_neg - * fast_sentence_cbow_hs = fast_sentence0_cbow_hs - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_sg_hs; - - /* "trunk/gensim/models/word2vec_inner.pyx":771 - * if (abs(d_res - expected) < 0.0001): - * fast_sentence_sg_hs = fast_sentence0_sg_hs - * fast_sentence_sg_neg = fast_sentence0_sg_neg # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs = fast_sentence0_cbow_hs - * fast_sentence_cbow_neg = fast_sentence0_cbow_neg - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_sg_neg; - - /* "trunk/gensim/models/word2vec_inner.pyx":772 - * fast_sentence_sg_hs = fast_sentence0_sg_hs - * fast_sentence_sg_neg = fast_sentence0_sg_neg - * fast_sentence_cbow_hs = fast_sentence0_cbow_hs # <<<<<<<<<<<<<< - * fast_sentence_cbow_neg = fast_sentence0_cbow_neg + * our_dot = our_dot_double # <<<<<<<<<<<<<< + * our_saxpy = saxpy * return 0 # double */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_cbow_hs; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_double; - /* "trunk/gensim/models/word2vec_inner.pyx":773 - * fast_sentence_sg_neg = fast_sentence0_sg_neg - * fast_sentence_cbow_hs = fast_sentence0_cbow_hs - * fast_sentence_cbow_neg = fast_sentence0_cbow_neg # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":431 + * if (abs(d_res - expected) < 0.0001): + * our_dot = our_dot_double + * our_saxpy = saxpy # <<<<<<<<<<<<<< * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence0_cbow_neg; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy; - /* "trunk/gensim/models/word2vec_inner.pyx":774 - * fast_sentence_cbow_hs = fast_sentence0_cbow_hs - * fast_sentence_cbow_neg = fast_sentence0_cbow_neg + /* "trunk/gensim/models/word2vec_inner.pyx":432 + * our_dot = our_dot_double + * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< * elif (abs(p_res[0] - expected) < 0.0001): - * fast_sentence_sg_hs = fast_sentence1_sg_hs + * our_dot = our_dot_float */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(__pyx_int_0); @@ -7057,55 +4556,37 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U goto __pyx_L0; } - /* "trunk/gensim/models/word2vec_inner.pyx":775 - * fast_sentence_cbow_neg = fast_sentence0_cbow_neg + /* "trunk/gensim/models/word2vec_inner.pyx":433 + * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< - * fast_sentence_sg_hs = fast_sentence1_sg_hs - * fast_sentence_sg_neg = fast_sentence1_sg_neg + * our_dot = our_dot_float + * our_saxpy = saxpy */ __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":776 + /* "trunk/gensim/models/word2vec_inner.pyx":434 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): - * fast_sentence_sg_hs = fast_sentence1_sg_hs # <<<<<<<<<<<<<< - * fast_sentence_sg_neg = fast_sentence1_sg_neg - * fast_sentence_cbow_hs = fast_sentence1_cbow_hs - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_hs; - - /* "trunk/gensim/models/word2vec_inner.pyx":777 - * elif (abs(p_res[0] - expected) < 0.0001): - * fast_sentence_sg_hs = fast_sentence1_sg_hs - * fast_sentence_sg_neg = fast_sentence1_sg_neg # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs = fast_sentence1_cbow_hs - * fast_sentence_cbow_neg = fast_sentence1_cbow_neg - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_sg_neg; - - /* "trunk/gensim/models/word2vec_inner.pyx":778 - * fast_sentence_sg_hs = fast_sentence1_sg_hs - * fast_sentence_sg_neg = fast_sentence1_sg_neg - * fast_sentence_cbow_hs = fast_sentence1_cbow_hs # <<<<<<<<<<<<<< - * fast_sentence_cbow_neg = fast_sentence1_cbow_neg + * our_dot = our_dot_float # <<<<<<<<<<<<<< + * our_saxpy = saxpy * return 1 # float */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_cbow_hs; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_float; - /* "trunk/gensim/models/word2vec_inner.pyx":779 - * fast_sentence_sg_neg = fast_sentence1_sg_neg - * fast_sentence_cbow_hs = fast_sentence1_cbow_hs - * fast_sentence_cbow_neg = fast_sentence1_cbow_neg # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":435 + * elif (abs(p_res[0] - expected) < 0.0001): + * our_dot = our_dot_float + * our_saxpy = saxpy # <<<<<<<<<<<<<< * return 1 # float * else: */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence1_cbow_neg; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy; - /* "trunk/gensim/models/word2vec_inner.pyx":780 - * fast_sentence_cbow_hs = fast_sentence1_cbow_hs - * fast_sentence_cbow_neg = fast_sentence1_cbow_neg + /* "trunk/gensim/models/word2vec_inner.pyx":436 + * our_dot = our_dot_float + * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< * else: * # neither => use cython loops, no BLAS @@ -7117,45 +4598,27 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":784 + /* "trunk/gensim/models/word2vec_inner.pyx":440 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here - * fast_sentence_sg_hs = fast_sentence2_sg_hs # <<<<<<<<<<<<<< - * fast_sentence_sg_neg = fast_sentence2_sg_neg - * fast_sentence_cbow_hs = fast_sentence2_cbow_hs - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_sg_hs; - - /* "trunk/gensim/models/word2vec_inner.pyx":785 - * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here - * fast_sentence_sg_hs = fast_sentence2_sg_hs - * fast_sentence_sg_neg = fast_sentence2_sg_neg # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs = fast_sentence2_cbow_hs - * fast_sentence_cbow_neg = fast_sentence2_cbow_neg - */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_sg_neg; - - /* "trunk/gensim/models/word2vec_inner.pyx":786 - * fast_sentence_sg_hs = fast_sentence2_sg_hs - * fast_sentence_sg_neg = fast_sentence2_sg_neg - * fast_sentence_cbow_hs = fast_sentence2_cbow_hs # <<<<<<<<<<<<<< - * fast_sentence_cbow_neg = fast_sentence2_cbow_neg + * our_dot = our_dot_noblas # <<<<<<<<<<<<<< + * our_saxpy = our_saxpy_noblas * return 2 */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_cbow_hs; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/word2vec_inner.pyx":787 - * fast_sentence_sg_neg = fast_sentence2_sg_neg - * fast_sentence_cbow_hs = fast_sentence2_cbow_hs - * fast_sentence_cbow_neg = fast_sentence2_cbow_neg # <<<<<<<<<<<<<< + /* "trunk/gensim/models/word2vec_inner.pyx":441 + * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here + * our_dot = our_dot_noblas + * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< * return 2 * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence2_cbow_neg; + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/word2vec_inner.pyx":788 - * fast_sentence_cbow_hs = fast_sentence2_cbow_hs - * fast_sentence_cbow_neg = fast_sentence2_cbow_neg + /* "trunk/gensim/models/word2vec_inner.pyx":442 + * our_dot = our_dot_noblas + * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< * * FAST_VERSION = init() # initialize the module @@ -7166,7 +4629,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U goto __pyx_L0; } - /* "trunk/gensim/models/word2vec_inner.pyx":742 + /* "trunk/gensim/models/word2vec_inner.pyx":404 * * * def init(): # <<<<<<<<<<<<<< @@ -9282,8 +6745,8 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {0, 0, 0, 0, 0, 0, 0} }; static int __Pyx_InitCachedBuiltins(void) { - __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 86; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 636; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -9295,31 +6758,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":616 + /* "trunk/gensim/models/word2vec_inner.pyx":278 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 616; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/word2vec_inner.pyx":699 + /* "trunk/gensim/models/word2vec_inner.pyx":361 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 699; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -9389,41 +6852,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "trunk/gensim/models/word2vec_inner.pyx":580 - * return next_random + /* "trunk/gensim/models/word2vec_inner.pyx":242 + * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__11 = PyTuple_Pack(28, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__11 = PyTuple_Pack(28, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 28, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_sg, 580, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 28, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_sg, 242, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/word2vec_inner.pyx":661 + /* "trunk/gensim/models/word2vec_inner.pyx":323 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(31, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__13 = PyTuple_Pack(31, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 31, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_cbow, 661, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 31, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_cbow, 323, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/word2vec_inner.pyx":742 + /* "trunk/gensim/models/word2vec_inner.pyx":404 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 742, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 404, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -9589,115 +7052,115 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_REAL, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 22; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":57 - * int i, int j, int k, int cbow_mean, unsigned long long next_random) nogil + /* "trunk/gensim/models/word2vec_inner.pyx":34 + * ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x # <<<<<<<<<<<<<< * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_scopy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_scopy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 57; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_14word2vec_inner_scopy = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_scopy_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":58 + /* "trunk/gensim/models/word2vec_inner.pyx":35 * * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x # <<<<<<<<<<<<<< * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_saxpy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_saxpy); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 58; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 35; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_saxpy_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":59 + /* "trunk/gensim/models/word2vec_inner.pyx":36 * cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) # <<<<<<<<<<<<<< * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 59; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sdot = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_sdot_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":60 + /* "trunk/gensim/models/word2vec_inner.pyx":37 * cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) # <<<<<<<<<<<<<< * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sdot); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 60; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_dsdot_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":61 + /* "trunk/gensim/models/word2vec_inner.pyx":38 * cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) # <<<<<<<<<<<<<< * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x - * cdef fast_sentence_sg_hs_ptr fast_sentence_sg_hs + * */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_snrm2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_snrm2); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 38; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_14word2vec_inner_snrm2 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_snrm2_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":62 + /* "trunk/gensim/models/word2vec_inner.pyx":39 * cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) * cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) * cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x # <<<<<<<<<<<<<< - * cdef fast_sentence_sg_hs_ptr fast_sentence_sg_hs - * cdef fast_sentence_sg_neg_ptr fast_sentence_sg_neg + * + * DEF EXP_TABLE_SIZE = 1000 */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_fblas); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sscal); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_sscal); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_2, __pyx_n_s_cpointer); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 39; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_sscal_ptr)PyCObject_AsVoidPtr(__pyx_t_1)); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":73 + /* "trunk/gensim/models/word2vec_inner.pyx":46 * cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE * * cdef int ONE = 1 # <<<<<<<<<<<<<< @@ -9706,57 +7169,57 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE = 1; - /* "trunk/gensim/models/word2vec_inner.pyx":74 + /* "trunk/gensim/models/word2vec_inner.pyx":47 * * cdef int ONE = 1 * cdef REAL_t ONEF = 1.0 # <<<<<<<<<<<<<< * - * cdef void fast_sentence0_sg_hs( + * # function implementations swapped based on BLAS detected */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "trunk/gensim/models/word2vec_inner.pyx":580 - * return next_random + /* "trunk/gensim/models/word2vec_inner.pyx":242 + * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_1train_sentence_sg, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_1train_sentence_sg, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 580; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":661 + /* "trunk/gensim/models/word2vec_inner.pyx":323 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 661; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":742 + /* "trunk/gensim/models/word2vec_inner.pyx":404 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 742; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":790 + /* "trunk/gensim/models/word2vec_inner.pyx":444 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -9769,14 +7232,14 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 790; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "trunk/gensim/models/word2vec_inner.pyx":1 diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index 4b35472149..c0a26f86c5 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -31,39 +31,12 @@ ctypedef double (*dsdot_ptr) (const int *N, const float *X, const int *incX, con ctypedef double (*snrm2_ptr) (const int *N, const float *X, const int *incX) nogil ctypedef void (*sscal_ptr) (const int *N, const float *alpha, const float *X, const int *incX) nogil -ctypedef void (*fast_sentence_sg_hs_ptr) ( - const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work) nogil - -ctypedef unsigned long long (*fast_sentence_sg_neg_ptr) ( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random) nogil - -ctypedef void (*fast_sentence_cbow_hs_ptr) ( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean) nogil - -ctypedef unsigned long long (*fast_sentence_cbow_neg_ptr) ( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random) nogil - cdef scopy_ptr scopy=PyCObject_AsVoidPtr(fblas.scopy._cpointer) # y = x cdef saxpy_ptr saxpy=PyCObject_AsVoidPtr(fblas.saxpy._cpointer) # y += alpha * x cdef sdot_ptr sdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # float = dot(x, y) cdef dsdot_ptr dsdot=PyCObject_AsVoidPtr(fblas.sdot._cpointer) # double = dot(x, y) cdef snrm2_ptr snrm2=PyCObject_AsVoidPtr(fblas.snrm2._cpointer) # sqrt(x^2) cdef sscal_ptr sscal=PyCObject_AsVoidPtr(fblas.sscal._cpointer) # x = alpha * x -cdef fast_sentence_sg_hs_ptr fast_sentence_sg_hs -cdef fast_sentence_sg_neg_ptr fast_sentence_sg_neg -cdef fast_sentence_cbow_hs_ptr fast_sentence_cbow_hs -cdef fast_sentence_cbow_neg_ptr fast_sentence_cbow_neg DEF EXP_TABLE_SIZE = 1000 DEF MAX_EXP = 6 @@ -73,51 +46,39 @@ cdef REAL_t[EXP_TABLE_SIZE] EXP_TABLE cdef int ONE = 1 cdef REAL_t ONEF = 1.0 -cdef void fast_sentence0_sg_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work) nogil: - - cdef long long a, b - cdef long long row1 = word2_index * size, row2 - cdef REAL_t f, g +# function implementations swapped based on BLAS detected +ctypedef REAL_t (*our_dot_ptr) (const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil +ctypedef void (*our_saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil - memset(work, 0, size * cython.sizeof(REAL_t)) - for b in range(codelen): - row2 = word_point[b] * size - f = dsdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) +cdef our_dot_ptr our_dot +cdef our_saxpy_ptr our_saxpy +# for when fblas.sdot returns a double +cdef REAL_t our_dot_double(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + return dsdot(N, X, incX, Y, incY) -cdef void fast_sentence1_sg_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, - REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work) nogil: - - cdef long long a, b - cdef long long row1 = word2_index * size, row2 - cdef REAL_t f, g +# for when fblas.sdot returns a float +cdef REAL_t our_dot_float(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + return sdot(N, X, incX, Y, incY) - memset(work, 0, size * cython.sizeof(REAL_t)) - for b in range(codelen): - row2 = word_point[b] * size - f = sdot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) +# for when no blas available +cdef REAL_t our_dot_noblas(const int *N, const float *X, const int *incX, const float *Y, const int *incY) nogil: + # not a true full dot()-implementation: just enough for our cases + cdef int i + cdef REAL_t a + a = 0.0 + for i from 0 <= i < N[0] by 1: + a += X[i] * Y[i] + return a + +# for when no blas available +cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil: + cdef int i + for i from 0 <= i < N[0] by 1: + Y[i * (incY[0])] = (alpha[0]) * X[i * (incX[0])] + Y[i * (incY[0])] -cdef void fast_sentence2_sg_hs( +cdef void fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work) nogil: @@ -126,26 +87,20 @@ cdef void fast_sentence2_sg_hs( cdef long long row1 = word2_index * size, row2 cdef REAL_t f, g - for a in range(size): - work[a] = 0.0 + memset(work, 0, size * cython.sizeof(REAL_t)) for b in range(codelen): row2 = word_point[b] * size - f = 0.0 - for a in range(size): - f += syn0[row1 + a] * syn1[row2 + a] + f = our_dot(&size, &syn0[row1], &ONE, &syn1[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha - for a in range(size): - work[a] += g * syn1[row2 + a] - for a in range(size): - syn1[row2 + a] += g * syn0[row1 + a] - for a in range(size): - syn0[row1 + a] += work[a] + our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) -cdef unsigned long long fast_sentence0_sg_neg( +cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *table, unsigned long long table_len, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, @@ -172,145 +127,20 @@ cdef unsigned long long fast_sentence0_sg_neg( label = 0.0 row2 = target_index * size - f = dsdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + f = our_dot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) + our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) return next_random -cdef unsigned long long fast_sentence1_sg_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random) nogil: - - cdef long long a - cdef long long row1 = word2_index * size, row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, label - cdef np.uint32_t target_index - cdef int d - - memset(work, 0, size * cython.sizeof(REAL_t)) - - for d in range(negative+1): - - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = sdot(&size, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - - saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) - - return next_random - -cdef unsigned long long fast_sentence2_sg_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, - REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random) nogil: - - cdef long long a - cdef long long row1 = word2_index * size, row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, label - cdef np.uint32_t target_index - cdef int d - - for a in range(size): - work[a] = 0.0 - - for d in range(negative+1): - - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = 0.0 - for a in range(size): - f += syn0[row1 + a] * syn1neg[row2 + a] - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - for a in range(size): - work[a] += g * syn1neg[row2 + a] - for a in range(size): - syn1neg[row2 + a] += g * syn0[row1 + a] - - for a in range(size): - syn0[row1 + a] += work[a] - - return next_random - -cdef void fast_sentence0_cbow_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean) nogil: - - cdef long long a, b - cdef long long row2 - cdef REAL_t f, g, count, inv_count - cdef int m - - memset(neu1, 0, size * cython.sizeof(REAL_t)) - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - if cbow_mean and count > (0.5): - inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) - - memset(work, 0, size * cython.sizeof(REAL_t)) - for b in range(codelens[i]): - row2 = word_point[b] * size - f = dsdot(&size, neu1, &ONE, &syn1[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) -cdef void fast_sentence1_cbow_hs( +cdef void fast_sentence_cbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, @@ -328,77 +158,30 @@ cdef void fast_sentence1_cbow_hs( continue else: count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) if cbow_mean and count > (0.5): inv_count = ONEF/count - sscal(&size, &inv_count , neu1, &ONE) + sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(work, 0, size * cython.sizeof(REAL_t)) for b in range(codelens[i]): row2 = word_point[b] * size - f = sdot(&size, neu1, &ONE, &syn1[row2], &ONE) + f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (1 - word_code[b] - f) * alpha - saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) - saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - -cdef void fast_sentence2_cbow_hs( - const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean) nogil: + our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) - cdef long long a, b - cdef long long row2 - cdef REAL_t f, g, count - cdef int m - - for a in range(size): - neu1[a] = 0.0 - count = 0.0 for m in range(j, k): if m == i or codelens[m] == 0: continue else: - count += ONEF - for a in range(size): - neu1[a] += syn0[indexes[m] * size + a] - if cbow_mean and count > (0.5): - for a in range(size): - neu1[a] /= count - - for a in range(size): - work[a] = 0.0 - for b in range(codelens[i]): - row2 = word_point[b] * size - f = 0.0 - for a in range(size): - f += neu1[a] * syn1[row2 + a] - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (1 - word_code[b] - f) * alpha - for a in range(size): - work[a] += g * syn1[row2 + a] - for a in range(size): - syn1[row2 + a] += g * neu1[a] + our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - for a in range(size): - syn0[indexes[m] * size + a] += work[a] -cdef unsigned long long fast_sentence0_cbow_neg( +cdef unsigned long long fast_sentence_cbow_neg( const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, @@ -420,10 +203,10 @@ cdef unsigned long long fast_sentence0_cbow_neg( continue else: count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) if cbow_mean and count > (0.5): inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) + sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(work, 0, size * cython.sizeof(REAL_t)) @@ -439,143 +222,22 @@ cdef unsigned long long fast_sentence0_cbow_neg( label = 0.0 row2 = target_index * size - f = dsdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) + f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) if f <= -MAX_EXP or f >= MAX_EXP: continue f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) + our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) for m in range(j,k): if m == i or codelens[m] == 0: continue else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) return next_random -cdef unsigned long long fast_sentence1_cbow_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random) nogil: - - cdef long long a - cdef long long row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, count, inv_count, label - cdef np.uint32_t target_index, word_index - cdef int d, m - - word_index = indexes[i] - - memset(neu1, 0, size * cython.sizeof(REAL_t)) - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - if cbow_mean and count > (0.5): - inv_count = ONEF/count - sscal(&size, &inv_count, neu1, &ONE) - - memset(work, 0, size * cython.sizeof(REAL_t)) - - for d in range(negative+1): - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = sdot(&size, neu1, &ONE, &syn1neg[row2], &ONE) - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) - saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) - - for m in range(j,k): - if m == i or codelens[m] == 0: - continue - else: - saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) - - return next_random - -cdef unsigned long long fast_sentence2_cbow_neg( - const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], - REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, - const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random) nogil: - - cdef long long a - cdef long long row2 - cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, count, inv_count, label - cdef np.uint32_t target_index, word_index - cdef int d, m - - word_index = indexes[i] - - for a in range(size): - neu1[a] = 0.0 - count = 0.0 - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - count += ONEF - for a in range(size): - neu1[a] += syn0[indexes[m] * size + a] - if cbow_mean and count > (0.5): - for a in range(size): - neu1[a] /= count - - for a in range(size): - work[a] = 0.0 - - for d in range(negative+1): - if d == 0: - target_index = word_index - label = ONEF - else: - target_index = table[(next_random >> 16) % table_len] - next_random = (next_random * 25214903917ULL + 11) & modulo - if target_index == word_index: - continue - label = 0.0 - - row2 = target_index * size - f = 0.0 - for a in range(size): - f += neu1[a] * syn1neg[row2 + a] - if f <= -MAX_EXP or f >= MAX_EXP: - continue - f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] - g = (label - f) * alpha - for a in range(size): - work[a] += g * syn1neg[row2 + a] - for a in range(size): - syn1neg[row2 + a] += g * neu1[a] - - for m in range(j, k): - if m == i or codelens[m] == 0: - continue - else: - for a in range(size): - syn0[indexes[m] * size + a] += work[a] - - return next_random def train_sentence_sg(model, sentence, alpha, _work): cdef int hs = model.hs @@ -745,10 +407,8 @@ def init(): into table EXP_TABLE. """ - global fast_sentence_sg_hs - global fast_sentence_sg_neg - global fast_sentence_cbow_hs - global fast_sentence_cbow_neg + global our_dot + global our_saxpy cdef int i cdef float *x = [10.0] @@ -767,24 +427,18 @@ def init(): d_res = dsdot(&size, x, &ONE, y, &ONE) p_res = &d_res if (abs(d_res - expected) < 0.0001): - fast_sentence_sg_hs = fast_sentence0_sg_hs - fast_sentence_sg_neg = fast_sentence0_sg_neg - fast_sentence_cbow_hs = fast_sentence0_cbow_hs - fast_sentence_cbow_neg = fast_sentence0_cbow_neg + our_dot = our_dot_double + our_saxpy = saxpy return 0 # double elif (abs(p_res[0] - expected) < 0.0001): - fast_sentence_sg_hs = fast_sentence1_sg_hs - fast_sentence_sg_neg = fast_sentence1_sg_neg - fast_sentence_cbow_hs = fast_sentence1_cbow_hs - fast_sentence_cbow_neg = fast_sentence1_cbow_neg + our_dot = our_dot_float + our_saxpy = saxpy return 1 # float else: # neither => use cython loops, no BLAS # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here - fast_sentence_sg_hs = fast_sentence2_sg_hs - fast_sentence_sg_neg = fast_sentence2_sg_neg - fast_sentence_cbow_hs = fast_sentence2_cbow_hs - fast_sentence_cbow_neg = fast_sentence2_cbow_neg + our_dot = our_dot_noblas + our_saxpy = our_saxpy_noblas return 2 FAST_VERSION = init() # initialize the module From 1ed5e49e9624eb4e9e6ddf9b7d24032472928482 Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 24 Jun 2015 04:48:26 -0700 Subject: [PATCH 47/49] for cbow-sum, divide error over all contributing vectors --- gensim/models/word2vec_inner.c | 850 +++++++++++++++++-------------- gensim/models/word2vec_inner.pyx | 16 +- 2 files changed, 477 insertions(+), 389 deletions(-) diff --git a/gensim/models/word2vec_inner.c b/gensim/models/word2vec_inner.c index 7c0390435d..13dec3bf05 100644 --- a/gensim/models/word2vec_inner.c +++ b/gensim/models/word2vec_inner.c @@ -1959,6 +1959,15 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h int __pyx_t_4; PY_LONG_LONG __pyx_t_5; + /* "trunk/gensim/models/word2vec_inner.pyx":151 + * cdef long long a, b + * cdef long long row2 + * cdef REAL_t f, g, count, inv_count = 1.0 # <<<<<<<<<<<<<< + * cdef int m + * + */ + __pyx_v_inv_count = 1.0; + /* "trunk/gensim/models/word2vec_inner.pyx":154 * cdef int m * @@ -2022,7 +2031,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): + * if count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); @@ -2030,7 +2039,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): + * if count > (0.5): * inv_count = ONEF/count */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); @@ -2041,43 +2050,56 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h /* "trunk/gensim/models/word2vec_inner.pyx":162 * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< + * if count > (0.5): # <<<<<<<<<<<<<< + * inv_count = ONEF/count + * if cbow_mean and count > (0.5): + */ + __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/word2vec_inner.pyx":163 + * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) + * if count > (0.5): + * inv_count = ONEF/count # <<<<<<<<<<<<<< + * if cbow_mean and count > (0.5): + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + */ + __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); + goto __pyx_L8; + } + __pyx_L8:; + + /* "trunk/gensim/models/word2vec_inner.pyx":164 + * if count > (0.5): * inv_count = ONEF/count + * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * */ __pyx_t_4 = (__pyx_v_cbow_mean != 0); if (__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L9_bool_binop_done; + goto __pyx_L10_bool_binop_done; } __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L9_bool_binop_done:; + __pyx_L10_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":163 - * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): - * inv_count = ONEF/count # <<<<<<<<<<<<<< - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * - */ - __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); - - /* "trunk/gensim/models/word2vec_inner.pyx":164 - * if cbow_mean and count > (0.5): + /* "trunk/gensim/models/word2vec_inner.pyx":165 * inv_count = ONEF/count + * if cbow_mean and count > (0.5): * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * * memset(work, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - goto __pyx_L8; + goto __pyx_L9; } - __pyx_L8:; + __pyx_L9:; - /* "trunk/gensim/models/word2vec_inner.pyx":166 + /* "trunk/gensim/models/word2vec_inner.pyx":167 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2086,7 +2108,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":167 + /* "trunk/gensim/models/word2vec_inner.pyx":168 * * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): # <<<<<<<<<<<<<< @@ -2097,7 +2119,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_1; __pyx_t_5+=1) { __pyx_v_b = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":168 + /* "trunk/gensim/models/word2vec_inner.pyx":169 * memset(work, 0, size * cython.sizeof(REAL_t)) * for b in range(codelens[i]): * row2 = word_point[b] * size # <<<<<<<<<<<<<< @@ -2106,7 +2128,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ __pyx_v_row2 = ((__pyx_v_word_point[__pyx_v_b]) * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":169 + /* "trunk/gensim/models/word2vec_inner.pyx":170 * for b in range(codelens[i]): * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< @@ -2115,7 +2137,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":170 + /* "trunk/gensim/models/word2vec_inner.pyx":171 * row2 = word_point[b] * size * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2126,24 +2148,24 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L14_bool_binop_done; + goto __pyx_L15_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L14_bool_binop_done:; + __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":171 + /* "trunk/gensim/models/word2vec_inner.pyx":172 * f = our_dot(&size, neu1, &ONE, &syn1[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha */ - goto __pyx_L11_continue; + goto __pyx_L12_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":172 + /* "trunk/gensim/models/word2vec_inner.pyx":173 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2152,7 +2174,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/word2vec_inner.pyx":173 + /* "trunk/gensim/models/word2vec_inner.pyx":174 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha # <<<<<<<<<<<<<< @@ -2161,7 +2183,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ __pyx_v_g = (((1 - (__pyx_v_word_code[__pyx_v_b])) - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/word2vec_inner.pyx":174 + /* "trunk/gensim/models/word2vec_inner.pyx":175 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2170,20 +2192,42 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":175 + /* "trunk/gensim/models/word2vec_inner.pyx":176 * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< * - * for m in range(j, k): + * if not cbow_mean: # divide error over summed window vectors */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L11_continue:; + __pyx_L12_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":177 + /* "trunk/gensim/models/word2vec_inner.pyx":178 * our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) * + * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/word2vec_inner.pyx":179 + * + * if not cbow_mean: # divide error over summed window vectors + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * + * for m in range(j, k): + */ + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + goto __pyx_L17; + } + __pyx_L17:; + + /* "trunk/gensim/models/word2vec_inner.pyx":181 + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * * for m in range(j, k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue @@ -2192,7 +2236,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":178 + /* "trunk/gensim/models/word2vec_inner.pyx":182 * * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2203,25 +2247,25 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L19_bool_binop_done; + goto __pyx_L21_bool_binop_done; } __pyx_t_4 = (((__pyx_v_codelens[__pyx_v_m]) == 0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L19_bool_binop_done:; + __pyx_L21_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":179 + /* "trunk/gensim/models/word2vec_inner.pyx":183 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) */ - goto __pyx_L16_continue; + goto __pyx_L18_continue; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":181 + /* "trunk/gensim/models/word2vec_inner.pyx":185 * continue * else: * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< @@ -2230,7 +2274,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } - __pyx_L16_continue:; + __pyx_L18_continue:; } /* "trunk/gensim/models/word2vec_inner.pyx":143 @@ -2244,7 +2288,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h /* function exit code */ } -/* "trunk/gensim/models/word2vec_inner.pyx":184 +/* "trunk/gensim/models/word2vec_inner.pyx":188 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2271,16 +2315,25 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas int __pyx_t_4; long __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":192 + /* "trunk/gensim/models/word2vec_inner.pyx":196 * cdef long long a * cdef long long row2 * cdef unsigned long long modulo = 281474976710655ULL # <<<<<<<<<<<<<< - * cdef REAL_t f, g, count, inv_count, label + * cdef REAL_t f, g, count, inv_count = 1.0, label * cdef np.uint32_t target_index, word_index */ __pyx_v_modulo = 281474976710655ULL; /* "trunk/gensim/models/word2vec_inner.pyx":197 + * cdef long long row2 + * cdef unsigned long long modulo = 281474976710655ULL + * cdef REAL_t f, g, count, inv_count = 1.0, label # <<<<<<<<<<<<<< + * cdef np.uint32_t target_index, word_index + * cdef int d, m + */ + __pyx_v_inv_count = 1.0; + + /* "trunk/gensim/models/word2vec_inner.pyx":201 * cdef int d, m * * word_index = indexes[i] # <<<<<<<<<<<<<< @@ -2289,7 +2342,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_word_index = (__pyx_v_indexes[__pyx_v_i]); - /* "trunk/gensim/models/word2vec_inner.pyx":199 + /* "trunk/gensim/models/word2vec_inner.pyx":203 * word_index = indexes[i] * * memset(neu1, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2298,7 +2351,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ memset(__pyx_v_neu1, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":200 + /* "trunk/gensim/models/word2vec_inner.pyx":204 * * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 # <<<<<<<<<<<<<< @@ -2307,7 +2360,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_count = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); - /* "trunk/gensim/models/word2vec_inner.pyx":201 + /* "trunk/gensim/models/word2vec_inner.pyx":205 * memset(neu1, 0, size * cython.sizeof(REAL_t)) * count = 0.0 * for m in range(j, k): # <<<<<<<<<<<<<< @@ -2318,7 +2371,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":202 + /* "trunk/gensim/models/word2vec_inner.pyx":206 * count = 0.0 * for m in range(j, k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2336,7 +2389,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_L6_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":203 + /* "trunk/gensim/models/word2vec_inner.pyx":207 * for m in range(j, k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -2347,20 +2400,20 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":205 + /* "trunk/gensim/models/word2vec_inner.pyx":209 * continue * else: * count += ONEF # <<<<<<<<<<<<<< * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): + * if count > (0.5): */ __pyx_v_count = (__pyx_v_count + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF); - /* "trunk/gensim/models/word2vec_inner.pyx":206 + /* "trunk/gensim/models/word2vec_inner.pyx":210 * else: * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) # <<<<<<<<<<<<<< - * if cbow_mean and count > (0.5): + * if count > (0.5): * inv_count = ONEF/count */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); @@ -2368,46 +2421,51 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_L3_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":207 + /* "trunk/gensim/models/word2vec_inner.pyx":211 * count += ONEF * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): # <<<<<<<<<<<<<< + * if count > (0.5): # <<<<<<<<<<<<<< * inv_count = ONEF/count - * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * if cbow_mean: */ - __pyx_t_4 = (__pyx_v_cbow_mean != 0); - if (__pyx_t_4) { - } else { - __pyx_t_3 = __pyx_t_4; - goto __pyx_L9_bool_binop_done; - } - __pyx_t_4 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); - __pyx_t_3 = __pyx_t_4; - __pyx_L9_bool_binop_done:; + __pyx_t_3 = ((__pyx_v_count > ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.5)) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":208 + /* "trunk/gensim/models/word2vec_inner.pyx":212 * our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - * if cbow_mean and count > (0.5): + * if count > (0.5): * inv_count = ONEF/count # <<<<<<<<<<<<<< + * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) - * */ __pyx_v_inv_count = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF / __pyx_v_count); + goto __pyx_L8; + } + __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":209 - * if cbow_mean and count > (0.5): + /* "trunk/gensim/models/word2vec_inner.pyx":213 + * if count > (0.5): * inv_count = ONEF/count + * if cbow_mean: # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = (__pyx_v_cbow_mean != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/word2vec_inner.pyx":214 + * inv_count = ONEF/count + * if cbow_mean: * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< * * memset(work, 0, size * cython.sizeof(REAL_t)) */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - goto __pyx_L8; + goto __pyx_L9; } - __pyx_L8:; + __pyx_L9:; - /* "trunk/gensim/models/word2vec_inner.pyx":211 + /* "trunk/gensim/models/word2vec_inner.pyx":216 * sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) * * memset(work, 0, size * cython.sizeof(REAL_t)) # <<<<<<<<<<<<<< @@ -2416,7 +2474,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ memset(__pyx_v_work, 0, (__pyx_v_size * (sizeof(__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)))); - /* "trunk/gensim/models/word2vec_inner.pyx":213 + /* "trunk/gensim/models/word2vec_inner.pyx":218 * memset(work, 0, size * cython.sizeof(REAL_t)) * * for d in range(negative+1): # <<<<<<<<<<<<<< @@ -2427,7 +2485,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas for (__pyx_t_1 = 0; __pyx_t_1 < __pyx_t_5; __pyx_t_1+=1) { __pyx_v_d = __pyx_t_1; - /* "trunk/gensim/models/word2vec_inner.pyx":214 + /* "trunk/gensim/models/word2vec_inner.pyx":219 * * for d in range(negative+1): * if d == 0: # <<<<<<<<<<<<<< @@ -2437,7 +2495,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_t_3 = ((__pyx_v_d == 0) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":215 + /* "trunk/gensim/models/word2vec_inner.pyx":220 * for d in range(negative+1): * if d == 0: * target_index = word_index # <<<<<<<<<<<<<< @@ -2446,7 +2504,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_target_index = __pyx_v_word_index; - /* "trunk/gensim/models/word2vec_inner.pyx":216 + /* "trunk/gensim/models/word2vec_inner.pyx":221 * if d == 0: * target_index = word_index * label = ONEF # <<<<<<<<<<<<<< @@ -2454,11 +2512,11 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas * target_index = table[(next_random >> 16) % table_len] */ __pyx_v_label = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF; - goto __pyx_L13; + goto __pyx_L12; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":218 + /* "trunk/gensim/models/word2vec_inner.pyx":223 * label = ONEF * else: * target_index = table[(next_random >> 16) % table_len] # <<<<<<<<<<<<<< @@ -2467,7 +2525,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_target_index = (__pyx_v_table[((__pyx_v_next_random >> 16) % __pyx_v_table_len)]); - /* "trunk/gensim/models/word2vec_inner.pyx":219 + /* "trunk/gensim/models/word2vec_inner.pyx":224 * else: * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo # <<<<<<<<<<<<<< @@ -2476,7 +2534,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_next_random = (((__pyx_v_next_random * ((unsigned PY_LONG_LONG)25214903917ULL)) + 11) & __pyx_v_modulo); - /* "trunk/gensim/models/word2vec_inner.pyx":220 + /* "trunk/gensim/models/word2vec_inner.pyx":225 * target_index = table[(next_random >> 16) % table_len] * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: # <<<<<<<<<<<<<< @@ -2486,17 +2544,17 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_t_3 = ((__pyx_v_target_index == __pyx_v_word_index) != 0); if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":221 + /* "trunk/gensim/models/word2vec_inner.pyx":226 * next_random = (next_random * 25214903917ULL + 11) & modulo * if target_index == word_index: * continue # <<<<<<<<<<<<<< * label = 0.0 * */ - goto __pyx_L11_continue; + goto __pyx_L10_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":222 + /* "trunk/gensim/models/word2vec_inner.pyx":227 * if target_index == word_index: * continue * label = 0.0 # <<<<<<<<<<<<<< @@ -2505,9 +2563,9 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_label = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)0.0); } - __pyx_L13:; + __pyx_L12:; - /* "trunk/gensim/models/word2vec_inner.pyx":224 + /* "trunk/gensim/models/word2vec_inner.pyx":229 * label = 0.0 * * row2 = target_index * size # <<<<<<<<<<<<<< @@ -2516,7 +2574,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_row2 = (__pyx_v_target_index * __pyx_v_size); - /* "trunk/gensim/models/word2vec_inner.pyx":225 + /* "trunk/gensim/models/word2vec_inner.pyx":230 * * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< @@ -2525,7 +2583,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_f = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot((&__pyx_v_size), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":226 + /* "trunk/gensim/models/word2vec_inner.pyx":231 * row2 = target_index * size * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: # <<<<<<<<<<<<<< @@ -2536,24 +2594,24 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas if (!__pyx_t_4) { } else { __pyx_t_3 = __pyx_t_4; - goto __pyx_L16_bool_binop_done; + goto __pyx_L15_bool_binop_done; } __pyx_t_4 = ((__pyx_v_f >= 6.0) != 0); __pyx_t_3 = __pyx_t_4; - __pyx_L16_bool_binop_done:; + __pyx_L15_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":227 + /* "trunk/gensim/models/word2vec_inner.pyx":232 * f = our_dot(&size, neu1, &ONE, &syn1neg[row2], &ONE) * if f <= -MAX_EXP or f >= MAX_EXP: * continue # <<<<<<<<<<<<<< * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha */ - goto __pyx_L11_continue; + goto __pyx_L10_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":228 + /* "trunk/gensim/models/word2vec_inner.pyx":233 * if f <= -MAX_EXP or f >= MAX_EXP: * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] # <<<<<<<<<<<<<< @@ -2562,7 +2620,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_f = (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[((int)((__pyx_v_f + 6.0) * 83.0))]); - /* "trunk/gensim/models/word2vec_inner.pyx":229 + /* "trunk/gensim/models/word2vec_inner.pyx":234 * continue * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha # <<<<<<<<<<<<<< @@ -2571,7 +2629,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_g = ((__pyx_v_label - __pyx_v_f) * __pyx_v_alpha); - /* "trunk/gensim/models/word2vec_inner.pyx":230 + /* "trunk/gensim/models/word2vec_inner.pyx":235 * f = EXP_TABLE[((f + MAX_EXP) * (EXP_TABLE_SIZE / MAX_EXP / 2))] * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< @@ -2580,20 +2638,42 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":231 + /* "trunk/gensim/models/word2vec_inner.pyx":236 * g = (label - f) * alpha * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * - * for m in range(j,k): + * if not cbow_mean: # divide error over summed window vectors */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), __pyx_v_neu1, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - __pyx_L11_continue:; + __pyx_L10_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":233 + /* "trunk/gensim/models/word2vec_inner.pyx":238 * our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) * + * if not cbow_mean: # divide error over summed window vectors # <<<<<<<<<<<<<< + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * + */ + __pyx_t_3 = ((!(__pyx_v_cbow_mean != 0)) != 0); + if (__pyx_t_3) { + + /* "trunk/gensim/models/word2vec_inner.pyx":239 + * + * if not cbow_mean: # divide error over summed window vectors + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) # <<<<<<<<<<<<<< + * + * for m in range(j,k): + */ + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_sscal((&__pyx_v_size), (&__pyx_v_inv_count), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + goto __pyx_L17; + } + __pyx_L17:; + + /* "trunk/gensim/models/word2vec_inner.pyx":241 + * sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + * * for m in range(j,k): # <<<<<<<<<<<<<< * if m == i or codelens[m] == 0: * continue @@ -2602,7 +2682,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas for (__pyx_t_2 = __pyx_v_j; __pyx_t_2 < __pyx_t_1; __pyx_t_2+=1) { __pyx_v_m = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":234 + /* "trunk/gensim/models/word2vec_inner.pyx":242 * * for m in range(j,k): * if m == i or codelens[m] == 0: # <<<<<<<<<<<<<< @@ -2620,7 +2700,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_L21_bool_binop_done:; if (__pyx_t_3) { - /* "trunk/gensim/models/word2vec_inner.pyx":235 + /* "trunk/gensim/models/word2vec_inner.pyx":243 * for m in range(j,k): * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< @@ -2631,7 +2711,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":237 + /* "trunk/gensim/models/word2vec_inner.pyx":245 * continue * else: * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< @@ -2643,7 +2723,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_L18_continue:; } - /* "trunk/gensim/models/word2vec_inner.pyx":239 + /* "trunk/gensim/models/word2vec_inner.pyx":247 * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< @@ -2653,7 +2733,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas __pyx_r = __pyx_v_next_random; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":184 + /* "trunk/gensim/models/word2vec_inner.pyx":188 * * * cdef unsigned long long fast_sentence_cbow_neg( # <<<<<<<<<<<<<< @@ -2666,7 +2746,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":242 +/* "trunk/gensim/models/word2vec_inner.pyx":250 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< @@ -2710,21 +2790,21 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_1train_sentenc case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_sg") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 4) { goto __pyx_L5_argtuple_error; @@ -2741,7 +2821,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_1train_sentenc } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_sg", 1, 4, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.word2vec_inner.train_sentence_sg", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -2805,82 +2885,82 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_sg", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":243 + /* "trunk/gensim/models/word2vec_inner.pyx":251 * * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 243; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 251; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":244 + /* "trunk/gensim/models/word2vec_inner.pyx":252 * def train_sentence_sg(model, sentence, alpha, _work): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 244; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 252; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":246 + /* "trunk/gensim/models/word2vec_inner.pyx":254 * cdef int negative = model.negative * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t _alpha = alpha */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 246; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":248 + /* "trunk/gensim/models/word2vec_inner.pyx":256 * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 248; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":249 + /* "trunk/gensim/models/word2vec_inner.pyx":257 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 249; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":255 + /* "trunk/gensim/models/word2vec_inner.pyx":263 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":258 + /* "trunk/gensim/models/word2vec_inner.pyx":266 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -2889,7 +2969,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":271 + /* "trunk/gensim/models/word2vec_inner.pyx":279 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -2899,23 +2979,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":272 + /* "trunk/gensim/models/word2vec_inner.pyx":280 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 272; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/word2vec_inner.pyx":274 + /* "trunk/gensim/models/word2vec_inner.pyx":282 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -2925,106 +3005,106 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":275 + /* "trunk/gensim/models/word2vec_inner.pyx":283 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 275; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":276 + /* "trunk/gensim/models/word2vec_inner.pyx":284 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 276; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":277 + /* "trunk/gensim/models/word2vec_inner.pyx":285 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 277; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":278 + /* "trunk/gensim/models/word2vec_inner.pyx":286 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/word2vec_inner.pyx":281 + /* "trunk/gensim/models/word2vec_inner.pyx":289 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/word2vec_inner.pyx":282 + /* "trunk/gensim/models/word2vec_inner.pyx":290 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 282; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -3033,7 +3113,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":284 + /* "trunk/gensim/models/word2vec_inner.pyx":292 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3044,19 +3124,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":285 + /* "trunk/gensim/models/word2vec_inner.pyx":293 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":286 + /* "trunk/gensim/models/word2vec_inner.pyx":294 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -3067,7 +3147,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":287 + /* "trunk/gensim/models/word2vec_inner.pyx":295 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -3079,20 +3159,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":289 + /* "trunk/gensim/models/word2vec_inner.pyx":297 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":290 + /* "trunk/gensim/models/word2vec_inner.pyx":298 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -3102,49 +3182,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":291 + /* "trunk/gensim/models/word2vec_inner.pyx":299 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":292 + /* "trunk/gensim/models/word2vec_inner.pyx":300 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 292; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":293 + /* "trunk/gensim/models/word2vec_inner.pyx":301 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":295 + /* "trunk/gensim/models/word2vec_inner.pyx":303 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -3155,7 +3235,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":296 + /* "trunk/gensim/models/word2vec_inner.pyx":304 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -3167,7 +3247,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_L7:; } - /* "trunk/gensim/models/word2vec_inner.pyx":298 + /* "trunk/gensim/models/word2vec_inner.pyx":306 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -3175,17 +3255,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -3199,7 +3279,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -3213,7 +3293,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -3221,9 +3301,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -3231,16 +3311,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -3249,7 +3329,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -3260,17 +3340,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/word2vec_inner.pyx":299 + /* "trunk/gensim/models/word2vec_inner.pyx":307 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":298 + /* "trunk/gensim/models/word2vec_inner.pyx":306 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -3280,7 +3360,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":302 + /* "trunk/gensim/models/word2vec_inner.pyx":310 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -3294,7 +3374,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":303 + /* "trunk/gensim/models/word2vec_inner.pyx":311 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3305,7 +3385,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":304 + /* "trunk/gensim/models/word2vec_inner.pyx":312 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -3315,7 +3395,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":305 + /* "trunk/gensim/models/word2vec_inner.pyx":313 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -3325,7 +3405,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence goto __pyx_L14_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":306 + /* "trunk/gensim/models/word2vec_inner.pyx":314 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -3334,7 +3414,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":307 + /* "trunk/gensim/models/word2vec_inner.pyx":315 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -3344,7 +3424,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = ((__pyx_v_j < 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":308 + /* "trunk/gensim/models/word2vec_inner.pyx":316 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -3356,7 +3436,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L17:; - /* "trunk/gensim/models/word2vec_inner.pyx":309 + /* "trunk/gensim/models/word2vec_inner.pyx":317 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3365,7 +3445,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":310 + /* "trunk/gensim/models/word2vec_inner.pyx":318 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -3375,7 +3455,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":311 + /* "trunk/gensim/models/word2vec_inner.pyx":319 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -3387,7 +3467,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L18:; - /* "trunk/gensim/models/word2vec_inner.pyx":312 + /* "trunk/gensim/models/word2vec_inner.pyx":320 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -3398,7 +3478,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "trunk/gensim/models/word2vec_inner.pyx":313 + /* "trunk/gensim/models/word2vec_inner.pyx":321 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< @@ -3416,7 +3496,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_L22_bool_binop_done:; if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":314 + /* "trunk/gensim/models/word2vec_inner.pyx":322 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< @@ -3426,7 +3506,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence goto __pyx_L19_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":315 + /* "trunk/gensim/models/word2vec_inner.pyx":323 * if j == i or codelens[j] == 0: * continue * if hs: # <<<<<<<<<<<<<< @@ -3436,7 +3516,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":316 + /* "trunk/gensim/models/word2vec_inner.pyx":324 * continue * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) # <<<<<<<<<<<<<< @@ -3448,7 +3528,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L24:; - /* "trunk/gensim/models/word2vec_inner.pyx":317 + /* "trunk/gensim/models/word2vec_inner.pyx":325 * if hs: * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) * if negative: # <<<<<<<<<<<<<< @@ -3458,7 +3538,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_negative != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":318 + /* "trunk/gensim/models/word2vec_inner.pyx":326 * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) * if negative: * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) # <<<<<<<<<<<<<< @@ -3475,7 +3555,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } } - /* "trunk/gensim/models/word2vec_inner.pyx":302 + /* "trunk/gensim/models/word2vec_inner.pyx":310 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -3493,7 +3573,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } } - /* "trunk/gensim/models/word2vec_inner.pyx":320 + /* "trunk/gensim/models/word2vec_inner.pyx":328 * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) * * return result # <<<<<<<<<<<<<< @@ -3501,13 +3581,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 320; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":242 + /* "trunk/gensim/models/word2vec_inner.pyx":250 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< @@ -3533,7 +3613,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":323 +/* "trunk/gensim/models/word2vec_inner.pyx":331 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -3579,26 +3659,26 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_3train_sentenc case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -3617,7 +3697,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_3train_sentenc } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.word2vec_inner.train_sentence_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3681,95 +3761,95 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_cbow", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":324 + /* "trunk/gensim/models/word2vec_inner.pyx":332 * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 324; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":325 + /* "trunk/gensim/models/word2vec_inner.pyx":333 * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 325; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":326 + /* "trunk/gensim/models/word2vec_inner.pyx":334 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 326; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":328 + /* "trunk/gensim/models/word2vec_inner.pyx":336 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< * cdef REAL_t *work * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":331 + /* "trunk/gensim/models/word2vec_inner.pyx":339 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":332 + /* "trunk/gensim/models/word2vec_inner.pyx":340 * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":338 + /* "trunk/gensim/models/word2vec_inner.pyx":346 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":341 + /* "trunk/gensim/models/word2vec_inner.pyx":349 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -3778,7 +3858,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_result = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":354 + /* "trunk/gensim/models/word2vec_inner.pyx":362 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -3788,23 +3868,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":355 + /* "trunk/gensim/models/word2vec_inner.pyx":363 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 355; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/word2vec_inner.pyx":357 + /* "trunk/gensim/models/word2vec_inner.pyx":365 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3814,116 +3894,116 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":358 + /* "trunk/gensim/models/word2vec_inner.pyx":366 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 358; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":359 + /* "trunk/gensim/models/word2vec_inner.pyx":367 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 359; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":360 + /* "trunk/gensim/models/word2vec_inner.pyx":368 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 360; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":361 + /* "trunk/gensim/models/word2vec_inner.pyx":369 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/word2vec_inner.pyx":364 + /* "trunk/gensim/models/word2vec_inner.pyx":372 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 364; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/word2vec_inner.pyx":365 + /* "trunk/gensim/models/word2vec_inner.pyx":373 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "trunk/gensim/models/word2vec_inner.pyx":366 + /* "trunk/gensim/models/word2vec_inner.pyx":374 * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -3932,7 +4012,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":368 + /* "trunk/gensim/models/word2vec_inner.pyx":376 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3943,19 +4023,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":369 + /* "trunk/gensim/models/word2vec_inner.pyx":377 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":370 + /* "trunk/gensim/models/word2vec_inner.pyx":378 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -3966,7 +4046,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":371 + /* "trunk/gensim/models/word2vec_inner.pyx":379 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -3978,20 +4058,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":373 + /* "trunk/gensim/models/word2vec_inner.pyx":381 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":374 + /* "trunk/gensim/models/word2vec_inner.pyx":382 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -4001,49 +4081,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":375 + /* "trunk/gensim/models/word2vec_inner.pyx":383 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":376 + /* "trunk/gensim/models/word2vec_inner.pyx":384 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":377 + /* "trunk/gensim/models/word2vec_inner.pyx":385 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":379 + /* "trunk/gensim/models/word2vec_inner.pyx":387 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -4054,7 +4134,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":380 + /* "trunk/gensim/models/word2vec_inner.pyx":388 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -4066,7 +4146,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_L7:; } - /* "trunk/gensim/models/word2vec_inner.pyx":382 + /* "trunk/gensim/models/word2vec_inner.pyx":390 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -4074,17 +4154,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -4098,7 +4178,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -4112,7 +4192,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -4120,9 +4200,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -4130,16 +4210,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -4148,7 +4228,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 382; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -4159,17 +4239,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/word2vec_inner.pyx":383 + /* "trunk/gensim/models/word2vec_inner.pyx":391 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":382 + /* "trunk/gensim/models/word2vec_inner.pyx":390 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -4179,7 +4259,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":386 + /* "trunk/gensim/models/word2vec_inner.pyx":394 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4193,7 +4273,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc #endif /*try:*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":387 + /* "trunk/gensim/models/word2vec_inner.pyx":395 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4204,7 +4284,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":388 + /* "trunk/gensim/models/word2vec_inner.pyx":396 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -4214,7 +4294,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":389 + /* "trunk/gensim/models/word2vec_inner.pyx":397 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -4224,7 +4304,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc goto __pyx_L14_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":390 + /* "trunk/gensim/models/word2vec_inner.pyx":398 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4233,7 +4313,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":391 + /* "trunk/gensim/models/word2vec_inner.pyx":399 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -4243,7 +4323,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = ((__pyx_v_j < 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":392 + /* "trunk/gensim/models/word2vec_inner.pyx":400 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -4255,7 +4335,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L17:; - /* "trunk/gensim/models/word2vec_inner.pyx":393 + /* "trunk/gensim/models/word2vec_inner.pyx":401 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4264,7 +4344,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":394 + /* "trunk/gensim/models/word2vec_inner.pyx":402 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -4274,7 +4354,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":395 + /* "trunk/gensim/models/word2vec_inner.pyx":403 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -4286,7 +4366,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L18:; - /* "trunk/gensim/models/word2vec_inner.pyx":396 + /* "trunk/gensim/models/word2vec_inner.pyx":404 * if k > sentence_len: * k = sentence_len * if hs: # <<<<<<<<<<<<<< @@ -4296,7 +4376,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":397 + /* "trunk/gensim/models/word2vec_inner.pyx":405 * k = sentence_len * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) # <<<<<<<<<<<<<< @@ -4308,7 +4388,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L19:; - /* "trunk/gensim/models/word2vec_inner.pyx":398 + /* "trunk/gensim/models/word2vec_inner.pyx":406 * if hs: * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) * if negative: # <<<<<<<<<<<<<< @@ -4318,7 +4398,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_negative != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":399 + /* "trunk/gensim/models/word2vec_inner.pyx":407 * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) * if negative: * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) # <<<<<<<<<<<<<< @@ -4333,7 +4413,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } } - /* "trunk/gensim/models/word2vec_inner.pyx":386 + /* "trunk/gensim/models/word2vec_inner.pyx":394 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4351,7 +4431,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } } - /* "trunk/gensim/models/word2vec_inner.pyx":401 + /* "trunk/gensim/models/word2vec_inner.pyx":409 * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) * * return result # <<<<<<<<<<<<<< @@ -4359,13 +4439,13 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 401; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":323 + /* "trunk/gensim/models/word2vec_inner.pyx":331 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -4391,7 +4471,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":404 +/* "trunk/gensim/models/word2vec_inner.pyx":412 * * * def init(): # <<<<<<<<<<<<<< @@ -4430,7 +4510,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":414 + /* "trunk/gensim/models/word2vec_inner.pyx":422 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -4440,7 +4520,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/word2vec_inner.pyx":415 + /* "trunk/gensim/models/word2vec_inner.pyx":423 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -4450,7 +4530,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":416 + /* "trunk/gensim/models/word2vec_inner.pyx":424 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -4459,7 +4539,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/word2vec_inner.pyx":417 + /* "trunk/gensim/models/word2vec_inner.pyx":425 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -4468,7 +4548,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_size = 1; - /* "trunk/gensim/models/word2vec_inner.pyx":422 + /* "trunk/gensim/models/word2vec_inner.pyx":430 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -4478,7 +4558,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":423 + /* "trunk/gensim/models/word2vec_inner.pyx":431 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -4487,7 +4567,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/word2vec_inner.pyx":424 + /* "trunk/gensim/models/word2vec_inner.pyx":432 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -4497,7 +4577,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/word2vec_inner.pyx":427 + /* "trunk/gensim/models/word2vec_inner.pyx":435 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -4506,7 +4586,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":428 + /* "trunk/gensim/models/word2vec_inner.pyx":436 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -4515,7 +4595,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/word2vec_inner.pyx":429 + /* "trunk/gensim/models/word2vec_inner.pyx":437 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4525,7 +4605,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":430 + /* "trunk/gensim/models/word2vec_inner.pyx":438 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -4534,7 +4614,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_double; - /* "trunk/gensim/models/word2vec_inner.pyx":431 + /* "trunk/gensim/models/word2vec_inner.pyx":439 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4543,7 +4623,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy; - /* "trunk/gensim/models/word2vec_inner.pyx":432 + /* "trunk/gensim/models/word2vec_inner.pyx":440 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -4556,7 +4636,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U goto __pyx_L0; } - /* "trunk/gensim/models/word2vec_inner.pyx":433 + /* "trunk/gensim/models/word2vec_inner.pyx":441 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4566,7 +4646,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":434 + /* "trunk/gensim/models/word2vec_inner.pyx":442 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -4575,7 +4655,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_float; - /* "trunk/gensim/models/word2vec_inner.pyx":435 + /* "trunk/gensim/models/word2vec_inner.pyx":443 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4584,7 +4664,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy; - /* "trunk/gensim/models/word2vec_inner.pyx":436 + /* "trunk/gensim/models/word2vec_inner.pyx":444 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -4598,7 +4678,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":440 + /* "trunk/gensim/models/word2vec_inner.pyx":448 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -4607,7 +4687,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/word2vec_inner.pyx":441 + /* "trunk/gensim/models/word2vec_inner.pyx":449 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -4616,7 +4696,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/word2vec_inner.pyx":442 + /* "trunk/gensim/models/word2vec_inner.pyx":450 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -4629,7 +4709,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U goto __pyx_L0; } - /* "trunk/gensim/models/word2vec_inner.pyx":404 + /* "trunk/gensim/models/word2vec_inner.pyx":412 * * * def init(): # <<<<<<<<<<<<<< @@ -6746,7 +6826,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -6758,31 +6838,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":278 + /* "trunk/gensim/models/word2vec_inner.pyx":286 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 278; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/word2vec_inner.pyx":361 + /* "trunk/gensim/models/word2vec_inner.pyx":369 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 361; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -6852,41 +6932,41 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_GOTREF(__pyx_tuple__10); __Pyx_GIVEREF(__pyx_tuple__10); - /* "trunk/gensim/models/word2vec_inner.pyx":242 + /* "trunk/gensim/models/word2vec_inner.pyx":250 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__11 = PyTuple_Pack(28, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__11 = PyTuple_Pack(28, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 28, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_sg, 242, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 28, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_sg, 250, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/word2vec_inner.pyx":323 + /* "trunk/gensim/models/word2vec_inner.pyx":331 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(31, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__13 = PyTuple_Pack(31, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 31, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_cbow, 323, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 31, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_cbow, 331, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/word2vec_inner.pyx":404 + /* "trunk/gensim/models/word2vec_inner.pyx":412 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 404, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 412, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -7178,48 +7258,48 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)1.0); - /* "trunk/gensim/models/word2vec_inner.pyx":242 + /* "trunk/gensim/models/word2vec_inner.pyx":250 * * * def train_sentence_sg(model, sentence, alpha, _work): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_1train_sentence_sg, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_1train_sentence_sg, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 242; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":323 + /* "trunk/gensim/models/word2vec_inner.pyx":331 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 323; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":404 + /* "trunk/gensim/models/word2vec_inner.pyx":412 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 404; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":444 + /* "trunk/gensim/models/word2vec_inner.pyx":452 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -7232,14 +7312,14 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 444; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "trunk/gensim/models/word2vec_inner.pyx":1 diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index c0a26f86c5..fe845bf571 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -148,7 +148,7 @@ cdef void fast_sentence_cbow_hs( cdef long long a, b cdef long long row2 - cdef REAL_t f, g, count, inv_count + cdef REAL_t f, g, count, inv_count = 1.0 cdef int m memset(neu1, 0, size * cython.sizeof(REAL_t)) @@ -159,8 +159,9 @@ cdef void fast_sentence_cbow_hs( else: count += ONEF our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - if cbow_mean and count > (0.5): + if count > (0.5): inv_count = ONEF/count + if cbow_mean and count > (0.5): sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(work, 0, size * cython.sizeof(REAL_t)) @@ -174,6 +175,9 @@ cdef void fast_sentence_cbow_hs( our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) our_saxpy(&size, &g, neu1, &ONE, &syn1[row2], &ONE) + if not cbow_mean: # divide error over summed window vectors + sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + for m in range(j, k): if m == i or codelens[m] == 0: continue @@ -190,7 +194,7 @@ cdef unsigned long long fast_sentence_cbow_neg( cdef long long a cdef long long row2 cdef unsigned long long modulo = 281474976710655ULL - cdef REAL_t f, g, count, inv_count, label + cdef REAL_t f, g, count, inv_count = 1.0, label cdef np.uint32_t target_index, word_index cdef int d, m @@ -204,8 +208,9 @@ cdef unsigned long long fast_sentence_cbow_neg( else: count += ONEF our_saxpy(&size, &ONEF, &syn0[indexes[m] * size], &ONE, neu1, &ONE) - if cbow_mean and count > (0.5): + if count > (0.5): inv_count = ONEF/count + if cbow_mean: sscal(&size, &inv_count, neu1, &ONE) # (does this need BLAS-variants like saxpy?) memset(work, 0, size * cython.sizeof(REAL_t)) @@ -230,6 +235,9 @@ cdef unsigned long long fast_sentence_cbow_neg( our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) our_saxpy(&size, &g, neu1, &ONE, &syn1neg[row2], &ONE) + if not cbow_mean: # divide error over summed window vectors + sscal(&size, &inv_count, work, &ONE) # (does this need BLAS-variants like saxpy?) + for m in range(j,k): if m == i or codelens[m] == 0: continue From 739fe31d28ffd70308188a4e7c8c1d3781b6f86c Mon Sep 17 00:00:00 2001 From: Gordon Mohr Date: Wed, 24 Jun 2015 06:39:26 -0700 Subject: [PATCH 48/49] _lockf support in cython; test --- gensim/models/word2vec_inner.c | 650 ++++++++++++++++--------------- gensim/models/word2vec_inner.pyx | 26 +- gensim/test/test_word2vec.py | 19 + 3 files changed, 375 insertions(+), 320 deletions(-) diff --git a/gensim/models/word2vec_inner.c b/gensim/models/word2vec_inner.c index 13dec3bf05..682930f1d9 100644 --- a/gensim/models/word2vec_inner.c +++ b/gensim/models/word2vec_inner.c @@ -1151,10 +1151,10 @@ static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6ge static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_float(int const *, float const *, int const *, float const *, int const *); /*proto*/ static __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_noblas(int const *, float const *, int const *, float const *, int const *); /*proto*/ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int const *, float const *, float const *, int const *, float *, int const *); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG); /*proto*/ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int); /*proto*/ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const , __pyx_t_5numpy_uint32_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *, __pyx_t_5numpy_uint8_t const *, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(int const , __pyx_t_5numpy_uint32_t *, unsigned PY_LONG_LONG, int *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int const , __pyx_t_5numpy_uint32_t const *, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const , __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *, int, int, int, int, unsigned PY_LONG_LONG, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *); /*proto*/ #define __Pyx_MODULE_NAME "trunk.gensim.models.word2vec_inner" int __pyx_module_is_main_trunk__gensim__models__word2vec_inner = 0; @@ -1240,6 +1240,8 @@ static char __pyx_k_cbow_mean[] = "cbow_mean"; static char __pyx_k_enumerate[] = "enumerate"; static char __pyx_k_table_len[] = "table_len"; static char __pyx_k_ValueError[] = "ValueError"; +static char __pyx_k_syn0_lockf[] = "syn0_lockf"; +static char __pyx_k_word_locks[] = "word_locks"; static char __pyx_k_layer1_size[] = "layer1_size"; static char __pyx_k_next_random[] = "next_random"; static char __pyx_k_FAST_VERSION[] = "FAST_VERSION"; @@ -1315,6 +1317,7 @@ static PyObject *__pyx_n_s_size; static PyObject *__pyx_n_s_snrm2; static PyObject *__pyx_n_s_sscal; static PyObject *__pyx_n_s_syn0; +static PyObject *__pyx_n_s_syn0_lockf; static PyObject *__pyx_n_s_syn1; static PyObject *__pyx_n_s_syn1neg; static PyObject *__pyx_n_s_table; @@ -1326,6 +1329,7 @@ static PyObject *__pyx_n_s_trunk_gensim_models_word2vec_inn; static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd; static PyObject *__pyx_n_s_window; static PyObject *__pyx_n_s_word; +static PyObject *__pyx_n_s_word_locks; static PyObject *__pyx_n_s_work; static PyObject *__pyx_n_s_work_2; static PyObject *__pyx_n_s_x; @@ -1536,7 +1540,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas(int * REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work) { +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int const __pyx_v_codelen, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; @@ -1645,7 +1649,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs( * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) # <<<<<<<<<<<<<< * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); @@ -1653,7 +1657,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs( * g = (1 - word_code[b] - f) * alpha * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) # <<<<<<<<<<<<<< - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) * */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); @@ -1663,11 +1667,11 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs( /* "trunk/gensim/models/word2vec_inner.pyx":100 * our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); /* "trunk/gensim/models/word2vec_inner.pyx":81 * @@ -1688,7 +1692,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs( * REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const __pyx_v_word_index, __pyx_t_5numpy_uint32_t const __pyx_v_word2_index, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks) { PY_LONG_LONG __pyx_v_row1; PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; @@ -1899,7 +1903,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas * our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) # <<<<<<<<<<<<<< * - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_g), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn1neg[__pyx_v_row2])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); __pyx_L3_continue:; @@ -1908,14 +1912,14 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas /* "trunk/gensim/models/word2vec_inner.pyx":138 * our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) * - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[__pyx_v_word2_index])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[__pyx_v_row1])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); /* "trunk/gensim/models/word2vec_inner.pyx":140 - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + * our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -1945,7 +1949,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, */ -static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean) { +static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs(__pyx_t_5numpy_uint32_t const *__pyx_v_word_point, __pyx_t_5numpy_uint8_t const *__pyx_v_word_code, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks) { PY_LONG_LONG __pyx_v_b; PY_LONG_LONG __pyx_v_row2; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; @@ -2259,7 +2263,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) + * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) */ goto __pyx_L18_continue; } @@ -2268,11 +2272,11 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h /* "trunk/gensim/models/word2vec_inner.pyx":185 * continue * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) # <<<<<<<<<<<<<< * * */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } __pyx_L18_continue:; } @@ -2296,7 +2300,7 @@ static void __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_h * REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, */ -static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random) { +static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(int const __pyx_v_negative, __pyx_t_5numpy_uint32_t *__pyx_v_table, unsigned PY_LONG_LONG __pyx_v_table_len, int *__pyx_v_codelens, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn1neg, int const __pyx_v_size, __pyx_t_5numpy_uint32_t const *__pyx_v_indexes, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t const __pyx_v_alpha, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work, int __pyx_v_i, int __pyx_v_j, int __pyx_v_k, int __pyx_v_cbow_mean, unsigned PY_LONG_LONG __pyx_v_next_random, __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks) { PY_LONG_LONG __pyx_v_row2; unsigned PY_LONG_LONG __pyx_v_modulo; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v_f; @@ -2705,7 +2709,7 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas * if m == i or codelens[m] == 0: * continue # <<<<<<<<<<<<<< * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) */ goto __pyx_L18_continue; } @@ -2714,17 +2718,17 @@ static unsigned PY_LONG_LONG __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fas /* "trunk/gensim/models/word2vec_inner.pyx":245 * continue * else: - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< + * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) # <<<<<<<<<<<<<< * * return next_random */ - __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONEF), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); + __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy((&__pyx_v_size), (&(__pyx_v_word_locks[(__pyx_v_indexes[__pyx_v_m])])), __pyx_v_work, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), (&(__pyx_v_syn0[((__pyx_v_indexes[__pyx_v_m]) * __pyx_v_size)])), (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); } __pyx_L18_continue:; } /* "trunk/gensim/models/word2vec_inner.pyx":247 - * our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + * our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) * * return next_random # <<<<<<<<<<<<<< * @@ -2838,6 +2842,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence int __pyx_v_hs; int __pyx_v_negative; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0; + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; int __pyx_v_size; @@ -2915,8 +2920,8 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence * cdef int negative = model.negative * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< + * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work - * cdef REAL_t _alpha = alpha */ __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 254; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); @@ -2924,43 +2929,56 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":256 + /* "trunk/gensim/models/word2vec_inner.pyx":255 + * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< + * cdef REAL_t *work + * cdef REAL_t _alpha = alpha + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 255; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_word_locks = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "trunk/gensim/models/word2vec_inner.pyx":257 + * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 256; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":257 + /* "trunk/gensim/models/word2vec_inner.pyx":258 * cdef REAL_t *work * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 257; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 258; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":263 + /* "trunk/gensim/models/word2vec_inner.pyx":264 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 263; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 264; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":266 + /* "trunk/gensim/models/word2vec_inner.pyx":267 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -2969,7 +2987,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_result = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":279 + /* "trunk/gensim/models/word2vec_inner.pyx":280 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -2979,23 +2997,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":280 + /* "trunk/gensim/models/word2vec_inner.pyx":281 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 280; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 281; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/word2vec_inner.pyx":282 + /* "trunk/gensim/models/word2vec_inner.pyx":283 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3005,106 +3023,106 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":283 + /* "trunk/gensim/models/word2vec_inner.pyx":284 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 283; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":284 + /* "trunk/gensim/models/word2vec_inner.pyx":285 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 284; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":285 + /* "trunk/gensim/models/word2vec_inner.pyx":286 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 285; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":286 + /* "trunk/gensim/models/word2vec_inner.pyx":287 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__2, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/word2vec_inner.pyx":289 + /* "trunk/gensim/models/word2vec_inner.pyx":290 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 289; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/word2vec_inner.pyx":290 + /* "trunk/gensim/models/word2vec_inner.pyx":291 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 290; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 291; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -3113,7 +3131,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":292 + /* "trunk/gensim/models/word2vec_inner.pyx":293 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3124,19 +3142,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":293 + /* "trunk/gensim/models/word2vec_inner.pyx":294 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 293; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 294; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":294 + /* "trunk/gensim/models/word2vec_inner.pyx":295 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -3147,7 +3165,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":295 + /* "trunk/gensim/models/word2vec_inner.pyx":296 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -3159,20 +3177,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":297 + /* "trunk/gensim/models/word2vec_inner.pyx":298 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 297; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 298; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":298 + /* "trunk/gensim/models/word2vec_inner.pyx":299 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -3182,49 +3200,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":299 + /* "trunk/gensim/models/word2vec_inner.pyx":300 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 299; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":300 + /* "trunk/gensim/models/word2vec_inner.pyx":301 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 300; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":301 + /* "trunk/gensim/models/word2vec_inner.pyx":302 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 301; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 302; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":303 + /* "trunk/gensim/models/word2vec_inner.pyx":304 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -3235,7 +3253,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":304 + /* "trunk/gensim/models/word2vec_inner.pyx":305 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -3247,7 +3265,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_L7:; } - /* "trunk/gensim/models/word2vec_inner.pyx":306 + /* "trunk/gensim/models/word2vec_inner.pyx":307 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -3255,17 +3273,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -3279,7 +3297,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -3293,7 +3311,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -3301,9 +3319,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -3311,16 +3329,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -3329,7 +3347,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -3340,17 +3358,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/word2vec_inner.pyx":307 + /* "trunk/gensim/models/word2vec_inner.pyx":308 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 308; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":306 + /* "trunk/gensim/models/word2vec_inner.pyx":307 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -3360,7 +3378,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":310 + /* "trunk/gensim/models/word2vec_inner.pyx":311 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -3374,7 +3392,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence #endif /*try:*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":311 + /* "trunk/gensim/models/word2vec_inner.pyx":312 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -3385,7 +3403,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":312 + /* "trunk/gensim/models/word2vec_inner.pyx":313 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -3395,7 +3413,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":313 + /* "trunk/gensim/models/word2vec_inner.pyx":314 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -3405,7 +3423,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence goto __pyx_L14_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":314 + /* "trunk/gensim/models/word2vec_inner.pyx":315 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -3414,7 +3432,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":315 + /* "trunk/gensim/models/word2vec_inner.pyx":316 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -3424,7 +3442,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = ((__pyx_v_j < 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":316 + /* "trunk/gensim/models/word2vec_inner.pyx":317 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -3436,7 +3454,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L17:; - /* "trunk/gensim/models/word2vec_inner.pyx":317 + /* "trunk/gensim/models/word2vec_inner.pyx":318 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -3445,7 +3463,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":318 + /* "trunk/gensim/models/word2vec_inner.pyx":319 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -3455,7 +3473,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":319 + /* "trunk/gensim/models/word2vec_inner.pyx":320 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< @@ -3467,7 +3485,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } __pyx_L18:; - /* "trunk/gensim/models/word2vec_inner.pyx":320 + /* "trunk/gensim/models/word2vec_inner.pyx":321 * if k > sentence_len: * k = sentence_len * for j in range(j, k): # <<<<<<<<<<<<<< @@ -3478,7 +3496,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence for (__pyx_t_19 = __pyx_v_j; __pyx_t_19 < __pyx_t_18; __pyx_t_19+=1) { __pyx_v_j = __pyx_t_19; - /* "trunk/gensim/models/word2vec_inner.pyx":321 + /* "trunk/gensim/models/word2vec_inner.pyx":322 * k = sentence_len * for j in range(j, k): * if j == i or codelens[j] == 0: # <<<<<<<<<<<<<< @@ -3496,56 +3514,56 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence __pyx_L22_bool_binop_done:; if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":322 + /* "trunk/gensim/models/word2vec_inner.pyx":323 * for j in range(j, k): * if j == i or codelens[j] == 0: * continue # <<<<<<<<<<<<<< * if hs: - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) + * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) */ goto __pyx_L19_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":323 + /* "trunk/gensim/models/word2vec_inner.pyx":324 * if j == i or codelens[j] == 0: * continue * if hs: # <<<<<<<<<<<<<< - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) + * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: */ __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":324 + /* "trunk/gensim/models/word2vec_inner.pyx":325 * continue * if hs: - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) # <<<<<<<<<<<<<< + * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) # <<<<<<<<<<<<<< * if negative: - * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) + * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) */ - __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work); + __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), (__pyx_v_codelens[__pyx_v_i]), __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_word_locks); goto __pyx_L24; } __pyx_L24:; - /* "trunk/gensim/models/word2vec_inner.pyx":325 + /* "trunk/gensim/models/word2vec_inner.pyx":326 * if hs: - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) + * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) + * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) * */ __pyx_t_12 = (__pyx_v_negative != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":326 - * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) + /* "trunk/gensim/models/word2vec_inner.pyx":327 + * fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) * if negative: - * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) # <<<<<<<<<<<<<< + * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) # <<<<<<<<<<<<<< * * return result */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_sg_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, (__pyx_v_indexes[__pyx_v_i]), (__pyx_v_indexes[__pyx_v_j]), __pyx_v__alpha, __pyx_v_work, __pyx_v_next_random, __pyx_v_word_locks); goto __pyx_L25; } __pyx_L25:; @@ -3555,7 +3573,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } } - /* "trunk/gensim/models/word2vec_inner.pyx":310 + /* "trunk/gensim/models/word2vec_inner.pyx":311 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -3573,15 +3591,15 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence } } - /* "trunk/gensim/models/word2vec_inner.pyx":328 - * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) + /* "trunk/gensim/models/word2vec_inner.pyx":329 + * next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 328; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 329; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; @@ -3613,7 +3631,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_train_sentence return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":331 +/* "trunk/gensim/models/word2vec_inner.pyx":332 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -3659,26 +3677,26 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_3train_sentenc case 1: if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_sentence)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 2: if (likely((values[2] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_alpha)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 2); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 3: if (likely((values[3] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_work)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 3); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } case 4: if (likely((values[4] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_neu1)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, 4); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "train_sentence_cbow") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L3_error;} } } else if (PyTuple_GET_SIZE(__pyx_args) != 5) { goto __pyx_L5_argtuple_error; @@ -3697,7 +3715,7 @@ static PyObject *__pyx_pw_5trunk_6gensim_6models_14word2vec_inner_3train_sentenc } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L3_error;} + __Pyx_RaiseArgtupleInvalid("train_sentence_cbow", 1, 5, 5, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L3_error;} __pyx_L3_error:; __Pyx_AddTraceback("trunk.gensim.models.word2vec_inner.train_sentence_cbow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -3715,6 +3733,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc int __pyx_v_negative; int __pyx_v_cbow_mean; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_syn0; + __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_word_locks; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_work; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *__pyx_v_neu1; __pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t __pyx_v__alpha; @@ -3761,95 +3780,108 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc int __pyx_clineno = 0; __Pyx_RefNannySetupContext("train_sentence_cbow", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":332 + /* "trunk/gensim/models/word2vec_inner.pyx":333 * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs # <<<<<<<<<<<<<< * cdef int negative = model.negative * cdef int cbow_mean = model.cbow_mean */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_hs); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_hs = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":333 + /* "trunk/gensim/models/word2vec_inner.pyx":334 * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): * cdef int hs = model.hs * cdef int negative = model.negative # <<<<<<<<<<<<<< * cdef int cbow_mean = model.cbow_mean * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_negative); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 333; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_negative = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":334 + /* "trunk/gensim/models/word2vec_inner.pyx":335 * cdef int hs = model.hs * cdef int negative = model.negative * cdef int cbow_mean = model.cbow_mean # <<<<<<<<<<<<<< * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_cbow_mean); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 334; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 335; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_cbow_mean = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":336 + /* "trunk/gensim/models/word2vec_inner.pyx":337 * cdef int cbow_mean = model.cbow_mean * * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) # <<<<<<<<<<<<<< + * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) * cdef REAL_t *work - * cdef REAL_t *neu1 */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 336; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 337; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn0 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":339 + /* "trunk/gensim/models/word2vec_inner.pyx":338 + * + * cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + * cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) # <<<<<<<<<<<<<< + * cdef REAL_t *work + * cdef REAL_t *neu1 + */ + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn0_lockf); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __Pyx_GOTREF(__pyx_t_1); + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 338; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_v_word_locks = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + + /* "trunk/gensim/models/word2vec_inner.pyx":341 * cdef REAL_t *work * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha # <<<<<<<<<<<<<< * cdef int size = model.layer1_size * */ - __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 339; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_3 = __pyx_PyFloat_AsFloat(__pyx_v_alpha); if (unlikely((__pyx_t_3 == (npy_float32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 341; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v__alpha = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":340 + /* "trunk/gensim/models/word2vec_inner.pyx":342 * cdef REAL_t *neu1 * cdef REAL_t _alpha = alpha * cdef int size = model.layer1_size # <<<<<<<<<<<<<< * * cdef int codelens[MAX_SENTENCE_LEN] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_layer1_size); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 340; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 342; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_size = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":346 + /* "trunk/gensim/models/word2vec_inner.pyx":348 * cdef np.uint32_t reduced_windows[MAX_SENTENCE_LEN] * cdef int sentence_len * cdef int window = model.window # <<<<<<<<<<<<<< * * cdef int i, j, k */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 346; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_PyInt_As_int(__pyx_t_1); if (unlikely((__pyx_t_2 == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 348; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_window = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":349 + /* "trunk/gensim/models/word2vec_inner.pyx":351 * * cdef int i, j, k * cdef long result = 0 # <<<<<<<<<<<<<< @@ -3858,7 +3890,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_result = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":362 + /* "trunk/gensim/models/word2vec_inner.pyx":364 * cdef unsigned long long next_random * * if hs: # <<<<<<<<<<<<<< @@ -3868,23 +3900,23 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_4 = (__pyx_v_hs != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":363 + /* "trunk/gensim/models/word2vec_inner.pyx":365 * * if hs: * syn1 = (np.PyArray_DATA(model.syn1)) # <<<<<<<<<<<<<< * * if negative: */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 363; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 365; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; goto __pyx_L3; } __pyx_L3:; - /* "trunk/gensim/models/word2vec_inner.pyx":365 + /* "trunk/gensim/models/word2vec_inner.pyx":367 * syn1 = (np.PyArray_DATA(model.syn1)) * * if negative: # <<<<<<<<<<<<<< @@ -3894,116 +3926,116 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_4 = (__pyx_v_negative != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":366 + /* "trunk/gensim/models/word2vec_inner.pyx":368 * * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) # <<<<<<<<<<<<<< * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_syn1neg); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 366; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_syn1neg = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":367 + /* "trunk/gensim/models/word2vec_inner.pyx":369 * if negative: * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) # <<<<<<<<<<<<<< * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 367; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_table = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_1))); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":368 + /* "trunk/gensim/models/word2vec_inner.pyx":370 * syn1neg = (np.PyArray_DATA(model.syn1neg)) * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) # <<<<<<<<<<<<<< * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_model, __pyx_n_s_table); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 368; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_t_1); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 370; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_table_len = __pyx_t_5; - /* "trunk/gensim/models/word2vec_inner.pyx":369 + /* "trunk/gensim/models/word2vec_inner.pyx":371 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_random); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_randint); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyNumber_Multiply(__pyx_int_16777216, __pyx_t_6); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_random); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_randint); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyNumber_Add(__pyx_t_1, __pyx_t_7); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_8 = __Pyx_PyInt_As_unsigned_PY_LONG_LONG(__pyx_t_6); if (unlikely((__pyx_t_8 == (unsigned PY_LONG_LONG)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_v_next_random = __pyx_t_8; goto __pyx_L4; } __pyx_L4:; - /* "trunk/gensim/models/word2vec_inner.pyx":372 + /* "trunk/gensim/models/word2vec_inner.pyx":374 * * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) # <<<<<<<<<<<<<< * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) */ - if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 372; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__work) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__work, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_work = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__work))); - /* "trunk/gensim/models/word2vec_inner.pyx":373 + /* "trunk/gensim/models/word2vec_inner.pyx":375 * # convert Python structures to primitive types, so we can release the GIL * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) # <<<<<<<<<<<<<< * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * */ - if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 373; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_v__neu1) == Py_None) || likely(__Pyx_TypeTest(__pyx_v__neu1, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 375; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_v_neu1 = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t *)PyArray_DATA(((PyArrayObject *)__pyx_v__neu1))); - /* "trunk/gensim/models/word2vec_inner.pyx":374 + /* "trunk/gensim/models/word2vec_inner.pyx":376 * work = np.PyArray_DATA(_work) * neu1 = np.PyArray_DATA(_neu1) * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) # <<<<<<<<<<<<<< * * for i in range(sentence_len): */ - __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 374; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_5 = PyObject_Length(__pyx_v_sentence); if (unlikely(__pyx_t_5 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 376; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_t_9 = 10000; if (((__pyx_t_5 < __pyx_t_9) != 0)) { __pyx_t_10 = __pyx_t_5; @@ -4012,7 +4044,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_v_sentence_len = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":376 + /* "trunk/gensim/models/word2vec_inner.pyx":378 * sentence_len = min(MAX_SENTENCE_LEN, len(sentence)) * * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4023,19 +4055,19 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":377 + /* "trunk/gensim/models/word2vec_inner.pyx":379 * * for i in range(sentence_len): * word = sentence[i] # <<<<<<<<<<<<<< * if word is None: * codelens[i] = 0 */ - __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 377; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; + __pyx_t_6 = __Pyx_GetItemInt(__pyx_v_sentence, __pyx_v_i, int, 1, __Pyx_PyInt_From_int, 0, 0, 0); if (unlikely(__pyx_t_6 == NULL)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 379; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_word, __pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":378 + /* "trunk/gensim/models/word2vec_inner.pyx":380 * for i in range(sentence_len): * word = sentence[i] * if word is None: # <<<<<<<<<<<<<< @@ -4046,7 +4078,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_t_4 != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":379 + /* "trunk/gensim/models/word2vec_inner.pyx":381 * word = sentence[i] * if word is None: * codelens[i] = 0 # <<<<<<<<<<<<<< @@ -4058,20 +4090,20 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":381 + /* "trunk/gensim/models/word2vec_inner.pyx":383 * codelens[i] = 0 * else: * indexes[i] = word.index # <<<<<<<<<<<<<< * if hs: * codelens[i] = len(word.code) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_index); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 381; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_t_6); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_indexes[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":382 + /* "trunk/gensim/models/word2vec_inner.pyx":384 * else: * indexes[i] = word.index * if hs: # <<<<<<<<<<<<<< @@ -4081,49 +4113,49 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":383 + /* "trunk/gensim/models/word2vec_inner.pyx":385 * indexes[i] = word.index * if hs: * codelens[i] = len(word.code) # <<<<<<<<<<<<<< * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 383; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = PyObject_Length(__pyx_t_6); if (unlikely(__pyx_t_10 == -1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; (__pyx_v_codelens[__pyx_v_i]) = ((int)__pyx_t_10); - /* "trunk/gensim/models/word2vec_inner.pyx":384 + /* "trunk/gensim/models/word2vec_inner.pyx":386 * if hs: * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) # <<<<<<<<<<<<<< * points[i] = np.PyArray_DATA(word.point) * else: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_code); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 386; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 384; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 386; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_codes[__pyx_v_i]) = ((__pyx_t_5numpy_uint8_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":385 + /* "trunk/gensim/models/word2vec_inner.pyx":387 * codelens[i] = len(word.code) * codes[i] = np.PyArray_DATA(word.code) * points[i] = np.PyArray_DATA(word.point) # <<<<<<<<<<<<<< * else: * codelens[i] = 1 */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_word, __pyx_n_s_point); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); - if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 385; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (!(likely(((__pyx_t_6) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_6, __pyx_ptype_5numpy_ndarray))))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 387; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_points[__pyx_v_i]) = ((__pyx_t_5numpy_uint32_t *)PyArray_DATA(((PyArrayObject *)__pyx_t_6))); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; goto __pyx_L8; } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":387 + /* "trunk/gensim/models/word2vec_inner.pyx":389 * points[i] = np.PyArray_DATA(word.point) * else: * codelens[i] = 1 # <<<<<<<<<<<<<< @@ -4134,7 +4166,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L8:; - /* "trunk/gensim/models/word2vec_inner.pyx":388 + /* "trunk/gensim/models/word2vec_inner.pyx":390 * else: * codelens[i] = 1 * result += 1 # <<<<<<<<<<<<<< @@ -4146,7 +4178,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_L7:; } - /* "trunk/gensim/models/word2vec_inner.pyx":390 + /* "trunk/gensim/models/word2vec_inner.pyx":392 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -4154,17 +4186,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc * */ __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_t_7, __pyx_n_s_random); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_randint); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_window); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_14 = __Pyx_PyInt_From_int(__pyx_v_sentence_len); if (unlikely(!__pyx_t_14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_14); __pyx_t_15 = NULL; __pyx_t_10 = 0; @@ -4178,7 +4210,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_10 = 1; } } - __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_16 = PyTuple_New(3+__pyx_t_10); if (unlikely(!__pyx_t_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_16); if (__pyx_t_15) { PyTuple_SET_ITEM(__pyx_t_16, 0, __pyx_t_15); __Pyx_GIVEREF(__pyx_t_15); __pyx_t_15 = NULL; @@ -4192,7 +4224,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __Pyx_GIVEREF(__pyx_t_14); __pyx_t_1 = 0; __pyx_t_14 = 0; - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_7, __pyx_t_16, NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_16); __pyx_t_16 = 0; __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; @@ -4200,9 +4232,9 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_7 = __pyx_t_6; __Pyx_INCREF(__pyx_t_7); __pyx_t_10 = 0; __pyx_t_17 = NULL; } else { - __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_10 = -1; __pyx_t_7 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); - __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_17 = Py_TYPE(__pyx_t_7)->tp_iternext; if (unlikely(!__pyx_t_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { @@ -4210,16 +4242,16 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc if (likely(PyList_CheckExact(__pyx_t_7))) { if (__pyx_t_10 >= PyList_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } else { if (__pyx_t_10 >= PyTuple_GET_SIZE(__pyx_t_7)) break; #if CYTHON_COMPILING_IN_CPYTHON - __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_7, __pyx_t_10); __Pyx_INCREF(__pyx_t_6); __pyx_t_10++; if (unlikely(0 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_6 = PySequence_ITEM(__pyx_t_7, __pyx_t_10); __pyx_t_10++; if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} #endif } } else { @@ -4228,7 +4260,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(exc_type == PyExc_StopIteration || PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 390; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + else {__pyx_filename = __pyx_f[0]; __pyx_lineno = 392; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } break; } @@ -4239,17 +4271,17 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_v_i = __pyx_t_2; __pyx_t_2 = (__pyx_t_2 + 1); - /* "trunk/gensim/models/word2vec_inner.pyx":391 + /* "trunk/gensim/models/word2vec_inner.pyx":393 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): * reduced_windows[i] = item # <<<<<<<<<<<<<< * * # release GIL & train on the sentence */ - __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 391; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_13 = __Pyx_PyInt_As_npy_uint32(__pyx_v_item); if (unlikely((__pyx_t_13 == (npy_uint32)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 393; __pyx_clineno = __LINE__; goto __pyx_L1_error;} (__pyx_v_reduced_windows[__pyx_v_i]) = __pyx_t_13; - /* "trunk/gensim/models/word2vec_inner.pyx":390 + /* "trunk/gensim/models/word2vec_inner.pyx":392 * result += 1 * # single randint() call avoids a big thread-sync slowdown * for i, item in enumerate(np.random.randint(0, window, sentence_len)): # <<<<<<<<<<<<<< @@ -4259,7 +4291,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":394 + /* "trunk/gensim/models/word2vec_inner.pyx":396 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4273,7 +4305,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc #endif /*try:*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":395 + /* "trunk/gensim/models/word2vec_inner.pyx":397 * # release GIL & train on the sentence * with nogil: * for i in range(sentence_len): # <<<<<<<<<<<<<< @@ -4284,7 +4316,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc for (__pyx_t_11 = 0; __pyx_t_11 < __pyx_t_2; __pyx_t_11+=1) { __pyx_v_i = __pyx_t_11; - /* "trunk/gensim/models/word2vec_inner.pyx":396 + /* "trunk/gensim/models/word2vec_inner.pyx":398 * with nogil: * for i in range(sentence_len): * if codelens[i] == 0: # <<<<<<<<<<<<<< @@ -4294,7 +4326,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = (((__pyx_v_codelens[__pyx_v_i]) == 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":397 + /* "trunk/gensim/models/word2vec_inner.pyx":399 * for i in range(sentence_len): * if codelens[i] == 0: * continue # <<<<<<<<<<<<<< @@ -4304,7 +4336,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc goto __pyx_L14_continue; } - /* "trunk/gensim/models/word2vec_inner.pyx":398 + /* "trunk/gensim/models/word2vec_inner.pyx":400 * if codelens[i] == 0: * continue * j = i - window + reduced_windows[i] # <<<<<<<<<<<<<< @@ -4313,7 +4345,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_j = ((__pyx_v_i - __pyx_v_window) + (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":399 + /* "trunk/gensim/models/word2vec_inner.pyx":401 * continue * j = i - window + reduced_windows[i] * if j < 0: # <<<<<<<<<<<<<< @@ -4323,7 +4355,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = ((__pyx_v_j < 0) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":400 + /* "trunk/gensim/models/word2vec_inner.pyx":402 * j = i - window + reduced_windows[i] * if j < 0: * j = 0 # <<<<<<<<<<<<<< @@ -4335,7 +4367,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } __pyx_L17:; - /* "trunk/gensim/models/word2vec_inner.pyx":401 + /* "trunk/gensim/models/word2vec_inner.pyx":403 * if j < 0: * j = 0 * k = i + window + 1 - reduced_windows[i] # <<<<<<<<<<<<<< @@ -4344,7 +4376,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc */ __pyx_v_k = (((__pyx_v_i + __pyx_v_window) + 1) - (__pyx_v_reduced_windows[__pyx_v_i])); - /* "trunk/gensim/models/word2vec_inner.pyx":402 + /* "trunk/gensim/models/word2vec_inner.pyx":404 * j = 0 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: # <<<<<<<<<<<<<< @@ -4354,58 +4386,58 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc __pyx_t_12 = ((__pyx_v_k > __pyx_v_sentence_len) != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":403 + /* "trunk/gensim/models/word2vec_inner.pyx":405 * k = i + window + 1 - reduced_windows[i] * if k > sentence_len: * k = sentence_len # <<<<<<<<<<<<<< * if hs: - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) */ __pyx_v_k = __pyx_v_sentence_len; goto __pyx_L18; } __pyx_L18:; - /* "trunk/gensim/models/word2vec_inner.pyx":404 + /* "trunk/gensim/models/word2vec_inner.pyx":406 * if k > sentence_len: * k = sentence_len * if hs: # <<<<<<<<<<<<<< - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: */ __pyx_t_12 = (__pyx_v_hs != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":405 + /* "trunk/gensim/models/word2vec_inner.pyx":407 * k = sentence_len * if hs: - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) # <<<<<<<<<<<<<< + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) # <<<<<<<<<<<<<< * if negative: - * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) + * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) */ - __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean); + __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_hs((__pyx_v_points[__pyx_v_i]), (__pyx_v_codes[__pyx_v_i]), __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_word_locks); goto __pyx_L19; } __pyx_L19:; - /* "trunk/gensim/models/word2vec_inner.pyx":406 + /* "trunk/gensim/models/word2vec_inner.pyx":408 * if hs: - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: # <<<<<<<<<<<<<< - * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) + * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) * */ __pyx_t_12 = (__pyx_v_negative != 0); if (__pyx_t_12) { - /* "trunk/gensim/models/word2vec_inner.pyx":407 - * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) + /* "trunk/gensim/models/word2vec_inner.pyx":409 + * fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) * if negative: - * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) # <<<<<<<<<<<<<< + * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) # <<<<<<<<<<<<<< * * return result */ - __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random); + __pyx_v_next_random = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_fast_sentence_cbow_neg(__pyx_v_negative, __pyx_v_table, __pyx_v_table_len, __pyx_v_codelens, __pyx_v_neu1, __pyx_v_syn0, __pyx_v_syn1neg, __pyx_v_size, __pyx_v_indexes, __pyx_v__alpha, __pyx_v_work, __pyx_v_i, __pyx_v_j, __pyx_v_k, __pyx_v_cbow_mean, __pyx_v_next_random, __pyx_v_word_locks); goto __pyx_L20; } __pyx_L20:; @@ -4413,7 +4445,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } } - /* "trunk/gensim/models/word2vec_inner.pyx":394 + /* "trunk/gensim/models/word2vec_inner.pyx":396 * * # release GIL & train on the sentence * with nogil: # <<<<<<<<<<<<<< @@ -4431,21 +4463,21 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc } } - /* "trunk/gensim/models/word2vec_inner.pyx":409 - * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) + /* "trunk/gensim/models/word2vec_inner.pyx":411 + * next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) * * return result # <<<<<<<<<<<<<< * * */ __Pyx_XDECREF(__pyx_r); - __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 409; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_7 = __Pyx_PyInt_From_long(__pyx_v_result); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 411; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_7); __pyx_r = __pyx_t_7; __pyx_t_7 = 0; goto __pyx_L0; - /* "trunk/gensim/models/word2vec_inner.pyx":331 + /* "trunk/gensim/models/word2vec_inner.pyx":332 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< @@ -4471,7 +4503,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_2train_sentenc return __pyx_r; } -/* "trunk/gensim/models/word2vec_inner.pyx":412 +/* "trunk/gensim/models/word2vec_inner.pyx":414 * * * def init(): # <<<<<<<<<<<<<< @@ -4510,7 +4542,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U int __pyx_t_4; __Pyx_RefNannySetupContext("init", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":422 + /* "trunk/gensim/models/word2vec_inner.pyx":424 * * cdef int i * cdef float *x = [10.0] # <<<<<<<<<<<<<< @@ -4520,7 +4552,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_1[0] = ((float)10.0); __pyx_v_x = __pyx_t_1; - /* "trunk/gensim/models/word2vec_inner.pyx":423 + /* "trunk/gensim/models/word2vec_inner.pyx":425 * cdef int i * cdef float *x = [10.0] * cdef float *y = [0.01] # <<<<<<<<<<<<<< @@ -4530,7 +4562,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_2[0] = ((float)0.01); __pyx_v_y = __pyx_t_2; - /* "trunk/gensim/models/word2vec_inner.pyx":424 + /* "trunk/gensim/models/word2vec_inner.pyx":426 * cdef float *x = [10.0] * cdef float *y = [0.01] * cdef float expected = 0.1 # <<<<<<<<<<<<<< @@ -4539,7 +4571,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_expected = ((float)0.1); - /* "trunk/gensim/models/word2vec_inner.pyx":425 + /* "trunk/gensim/models/word2vec_inner.pyx":427 * cdef float *y = [0.01] * cdef float expected = 0.1 * cdef int size = 1 # <<<<<<<<<<<<<< @@ -4548,7 +4580,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_size = 1; - /* "trunk/gensim/models/word2vec_inner.pyx":430 + /* "trunk/gensim/models/word2vec_inner.pyx":432 * * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): # <<<<<<<<<<<<<< @@ -4558,7 +4590,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U for (__pyx_t_3 = 0; __pyx_t_3 < 1000; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3; - /* "trunk/gensim/models/word2vec_inner.pyx":431 + /* "trunk/gensim/models/word2vec_inner.pyx":433 * # build the sigmoid table * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) # <<<<<<<<<<<<<< @@ -4567,7 +4599,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)exp(((((__pyx_v_i / ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)1000)) * 2.0) - 1.0) * 6.0))); - /* "trunk/gensim/models/word2vec_inner.pyx":432 + /* "trunk/gensim/models/word2vec_inner.pyx":434 * for i in range(EXP_TABLE_SIZE): * EXP_TABLE[i] = exp((i / EXP_TABLE_SIZE * 2 - 1) * MAX_EXP) * EXP_TABLE[i] = (EXP_TABLE[i] / (EXP_TABLE[i] + 1)) # <<<<<<<<<<<<<< @@ -4577,7 +4609,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U (__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) = ((__pyx_t_5trunk_6gensim_6models_14word2vec_inner_REAL_t)((__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) / ((__pyx_v_5trunk_6gensim_6models_14word2vec_inner_EXP_TABLE[__pyx_v_i]) + 1.0))); } - /* "trunk/gensim/models/word2vec_inner.pyx":435 + /* "trunk/gensim/models/word2vec_inner.pyx":437 * * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) # <<<<<<<<<<<<<< @@ -4586,7 +4618,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_d_res = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_dsdot((&__pyx_v_size), __pyx_v_x, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE), __pyx_v_y, (&__pyx_v_5trunk_6gensim_6models_14word2vec_inner_ONE)); - /* "trunk/gensim/models/word2vec_inner.pyx":436 + /* "trunk/gensim/models/word2vec_inner.pyx":438 * # check whether sdot returns double or float * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res # <<<<<<<<<<<<<< @@ -4595,7 +4627,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_p_res = ((float *)(&__pyx_v_d_res)); - /* "trunk/gensim/models/word2vec_inner.pyx":437 + /* "trunk/gensim/models/word2vec_inner.pyx":439 * d_res = dsdot(&size, x, &ONE, y, &ONE) * p_res = &d_res * if (abs(d_res - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4605,7 +4637,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_4 = ((fabs((__pyx_v_d_res - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":438 + /* "trunk/gensim/models/word2vec_inner.pyx":440 * p_res = &d_res * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double # <<<<<<<<<<<<<< @@ -4614,7 +4646,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_double; - /* "trunk/gensim/models/word2vec_inner.pyx":439 + /* "trunk/gensim/models/word2vec_inner.pyx":441 * if (abs(d_res - expected) < 0.0001): * our_dot = our_dot_double * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4623,7 +4655,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy; - /* "trunk/gensim/models/word2vec_inner.pyx":440 + /* "trunk/gensim/models/word2vec_inner.pyx":442 * our_dot = our_dot_double * our_saxpy = saxpy * return 0 # double # <<<<<<<<<<<<<< @@ -4636,7 +4668,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U goto __pyx_L0; } - /* "trunk/gensim/models/word2vec_inner.pyx":441 + /* "trunk/gensim/models/word2vec_inner.pyx":443 * our_saxpy = saxpy * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): # <<<<<<<<<<<<<< @@ -4646,7 +4678,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U __pyx_t_4 = ((fabsf(((__pyx_v_p_res[0]) - __pyx_v_expected)) < 0.0001) != 0); if (__pyx_t_4) { - /* "trunk/gensim/models/word2vec_inner.pyx":442 + /* "trunk/gensim/models/word2vec_inner.pyx":444 * return 0 # double * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float # <<<<<<<<<<<<<< @@ -4655,7 +4687,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_float; - /* "trunk/gensim/models/word2vec_inner.pyx":443 + /* "trunk/gensim/models/word2vec_inner.pyx":445 * elif (abs(p_res[0] - expected) < 0.0001): * our_dot = our_dot_float * our_saxpy = saxpy # <<<<<<<<<<<<<< @@ -4664,7 +4696,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_v_5trunk_6gensim_6models_14word2vec_inner_saxpy; - /* "trunk/gensim/models/word2vec_inner.pyx":444 + /* "trunk/gensim/models/word2vec_inner.pyx":446 * our_dot = our_dot_float * our_saxpy = saxpy * return 1 # float # <<<<<<<<<<<<<< @@ -4678,7 +4710,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U } /*else*/ { - /* "trunk/gensim/models/word2vec_inner.pyx":448 + /* "trunk/gensim/models/word2vec_inner.pyx":450 * # neither => use cython loops, no BLAS * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas # <<<<<<<<<<<<<< @@ -4687,7 +4719,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_dot = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_dot_noblas; - /* "trunk/gensim/models/word2vec_inner.pyx":449 + /* "trunk/gensim/models/word2vec_inner.pyx":451 * # actually, the BLAS is so messed up we'll probably have segfaulted above and never even reach here * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas # <<<<<<<<<<<<<< @@ -4696,7 +4728,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U */ __pyx_v_5trunk_6gensim_6models_14word2vec_inner_our_saxpy = __pyx_f_5trunk_6gensim_6models_14word2vec_inner_our_saxpy_noblas; - /* "trunk/gensim/models/word2vec_inner.pyx":450 + /* "trunk/gensim/models/word2vec_inner.pyx":452 * our_dot = our_dot_noblas * our_saxpy = our_saxpy_noblas * return 2 # <<<<<<<<<<<<<< @@ -4709,7 +4741,7 @@ static PyObject *__pyx_pf_5trunk_6gensim_6models_14word2vec_inner_4init(CYTHON_U goto __pyx_L0; } - /* "trunk/gensim/models/word2vec_inner.pyx":412 + /* "trunk/gensim/models/word2vec_inner.pyx":414 * * * def init(): # <<<<<<<<<<<<<< @@ -6807,6 +6839,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_n_s_snrm2, __pyx_k_snrm2, sizeof(__pyx_k_snrm2), 0, 0, 1, 1}, {&__pyx_n_s_sscal, __pyx_k_sscal, sizeof(__pyx_k_sscal), 0, 0, 1, 1}, {&__pyx_n_s_syn0, __pyx_k_syn0, sizeof(__pyx_k_syn0), 0, 0, 1, 1}, + {&__pyx_n_s_syn0_lockf, __pyx_k_syn0_lockf, sizeof(__pyx_k_syn0_lockf), 0, 0, 1, 1}, {&__pyx_n_s_syn1, __pyx_k_syn1, sizeof(__pyx_k_syn1), 0, 0, 1, 1}, {&__pyx_n_s_syn1neg, __pyx_k_syn1neg, sizeof(__pyx_k_syn1neg), 0, 0, 1, 1}, {&__pyx_n_s_table, __pyx_k_table, sizeof(__pyx_k_table), 0, 0, 1, 1}, @@ -6818,6 +6851,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0}, {&__pyx_n_s_window, __pyx_k_window, sizeof(__pyx_k_window), 0, 0, 1, 1}, {&__pyx_n_s_word, __pyx_k_word, sizeof(__pyx_k_word), 0, 0, 1, 1}, + {&__pyx_n_s_word_locks, __pyx_k_word_locks, sizeof(__pyx_k_word_locks), 0, 0, 1, 1}, {&__pyx_n_s_work, __pyx_k_work, sizeof(__pyx_k_work), 0, 0, 1, 1}, {&__pyx_n_s_work_2, __pyx_k_work_2, sizeof(__pyx_k_work_2), 0, 0, 1, 1}, {&__pyx_n_s_x, __pyx_k_x, sizeof(__pyx_k_x), 0, 0, 1, 1}, @@ -6826,7 +6860,7 @@ static __Pyx_StringTabEntry __pyx_string_tab[] = { }; static int __Pyx_InitCachedBuiltins(void) { __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 91; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 306; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_builtin_enumerate = __Pyx_GetBuiltinName(__pyx_n_s_enumerate); if (!__pyx_builtin_enumerate) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 307; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 218; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) {__pyx_filename = __pyx_f[1]; __pyx_lineno = 802; __pyx_clineno = __LINE__; goto __pyx_L1_error;} return 0; @@ -6838,31 +6872,31 @@ static int __Pyx_InitCachedConstants(void) { __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); - /* "trunk/gensim/models/word2vec_inner.pyx":286 + /* "trunk/gensim/models/word2vec_inner.pyx":287 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple_ = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple_)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple_); __Pyx_GIVEREF(__pyx_tuple_); - __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 286; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__2 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 287; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); - /* "trunk/gensim/models/word2vec_inner.pyx":369 + /* "trunk/gensim/models/word2vec_inner.pyx":371 * table = (np.PyArray_DATA(model.table)) * table_len = len(model.table) * next_random = (2**24) * np.random.randint(0, 2**24) + np.random.randint(0, 2**24) # <<<<<<<<<<<<<< * * # convert Python structures to primitive types, so we can release the GIL */ - __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__3 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__3); __Pyx_GIVEREF(__pyx_tuple__3); - __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 369; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__4 = PyTuple_Pack(2, __pyx_int_0, __pyx_int_16777216); if (unlikely(!__pyx_tuple__4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 371; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__4); __Pyx_GIVEREF(__pyx_tuple__4); @@ -6939,34 +6973,34 @@ static int __Pyx_InitCachedConstants(void) { * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__11 = PyTuple_Pack(28, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__11 = PyTuple_Pack(29, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__11)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__11); __Pyx_GIVEREF(__pyx_tuple__11); - __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 28, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_sg, 250, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(4, 0, 29, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_sg, 250, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/word2vec_inner.pyx":331 + /* "trunk/gensim/models/word2vec_inner.pyx":332 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_tuple__13 = PyTuple_Pack(31, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__13 = PyTuple_Pack(32, __pyx_n_s_model, __pyx_n_s_sentence, __pyx_n_s_alpha, __pyx_n_s_work, __pyx_n_s_neu1, __pyx_n_s_hs, __pyx_n_s_negative, __pyx_n_s_cbow_mean, __pyx_n_s_syn0, __pyx_n_s_word_locks, __pyx_n_s_work_2, __pyx_n_s_neu1_2, __pyx_n_s_alpha_2, __pyx_n_s_size, __pyx_n_s_codelens, __pyx_n_s_indexes, __pyx_n_s_reduced_windows, __pyx_n_s_sentence_len, __pyx_n_s_window, __pyx_n_s_i, __pyx_n_s_j, __pyx_n_s_k, __pyx_n_s_result, __pyx_n_s_syn1, __pyx_n_s_points, __pyx_n_s_codes, __pyx_n_s_syn1neg, __pyx_n_s_table, __pyx_n_s_table_len, __pyx_n_s_next_random, __pyx_n_s_word, __pyx_n_s_item); if (unlikely(!__pyx_tuple__13)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__13); __Pyx_GIVEREF(__pyx_tuple__13); - __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 31, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_cbow, 331, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__14 = (PyObject*)__Pyx_PyCode_New(5, 0, 32, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__13, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_train_sentence_cbow, 332, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__14)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} - /* "trunk/gensim/models/word2vec_inner.pyx":412 + /* "trunk/gensim/models/word2vec_inner.pyx":414 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_tuple__15 = PyTuple_Pack(7, __pyx_n_s_i, __pyx_n_s_x, __pyx_n_s_y, __pyx_n_s_expected, __pyx_n_s_size, __pyx_n_s_d_res, __pyx_n_s_p_res); if (unlikely(!__pyx_tuple__15)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_tuple__15); __Pyx_GIVEREF(__pyx_tuple__15); - __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 412, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_codeobj__16 = (PyObject*)__Pyx_PyCode_New(0, 0, 7, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__15, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_Users_scratch_Documents_dev2015, __pyx_n_s_init, 414, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_RefNannyFinishContext(); return 0; __pyx_L1_error:; @@ -7270,36 +7304,36 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_sg, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 250; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":331 + /* "trunk/gensim/models/word2vec_inner.pyx":332 * * * def train_sentence_cbow(model, sentence, alpha, _work, _neu1): # <<<<<<<<<<<<<< * cdef int hs = model.hs * cdef int negative = model.negative */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_3train_sentence_cbow, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 331; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_train_sentence_cbow, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 332; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":412 + /* "trunk/gensim/models/word2vec_inner.pyx":414 * * * def init(): # <<<<<<<<<<<<<< * """ * Precompute function `sigmoid(x) = 1 / (1 + exp(-x))`, for x values discretized */ - __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5trunk_6gensim_6models_14word2vec_inner_5init, NULL, __pyx_n_s_trunk_gensim_models_word2vec_inn); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_1); - if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 412; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_init, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 414; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "trunk/gensim/models/word2vec_inner.pyx":452 + /* "trunk/gensim/models/word2vec_inner.pyx":454 * return 2 * * FAST_VERSION = init() # initialize the module # <<<<<<<<<<<<<< */ - __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_2 = __Pyx_GetModuleGlobalName(__pyx_n_s_init); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_COMPILING_IN_CPYTHON && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -7312,14 +7346,14 @@ PyMODINIT_FUNC PyInit_word2vec_inner(void) } } if (__pyx_t_3) { - __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else { - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_t_2); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} } __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 452; __pyx_clineno = __LINE__; goto __pyx_L1_error;} + if (PyDict_SetItem(__pyx_d, __pyx_n_s_FAST_VERSION, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 454; __pyx_clineno = __LINE__; goto __pyx_L1_error;} __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; /* "trunk/gensim/models/word2vec_inner.pyx":1 diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx index fe845bf571..107a66f744 100755 --- a/gensim/models/word2vec_inner.pyx +++ b/gensim/models/word2vec_inner.pyx @@ -81,7 +81,7 @@ cdef void our_saxpy_noblas(const int *N, const float *alpha, const float *X, con cdef void fast_sentence_sg_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, const int codelen, REAL_t *syn0, REAL_t *syn1, const int size, - const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work) nogil: + const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, REAL_t *word_locks) nogil: cdef long long a, b cdef long long row1 = word2_index * size, row2 @@ -97,14 +97,14 @@ cdef void fast_sentence_sg_hs( g = (1 - word_code[b] - f) * alpha our_saxpy(&size, &g, &syn1[row2], &ONE, work, &ONE) our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1[row2], &ONE) - our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) cdef unsigned long long fast_sentence_sg_neg( const int negative, np.uint32_t *table, unsigned long long table_len, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t word_index, const np.uint32_t word2_index, const REAL_t alpha, REAL_t *work, - unsigned long long next_random) nogil: + unsigned long long next_random, REAL_t *word_locks) nogil: cdef long long a cdef long long row1 = word2_index * size, row2 @@ -135,7 +135,7 @@ cdef unsigned long long fast_sentence_sg_neg( our_saxpy(&size, &g, &syn1neg[row2], &ONE, work, &ONE) our_saxpy(&size, &g, &syn0[row1], &ONE, &syn1neg[row2], &ONE) - our_saxpy(&size, &ONEF, work, &ONE, &syn0[row1], &ONE) + our_saxpy(&size, &word_locks[word2_index], work, &ONE, &syn0[row1], &ONE) return next_random @@ -144,7 +144,7 @@ cdef void fast_sentence_cbow_hs( const np.uint32_t *word_point, const np.uint8_t *word_code, int codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean) nogil: + int i, int j, int k, int cbow_mean, REAL_t *word_locks) nogil: cdef long long a, b cdef long long row2 @@ -182,14 +182,14 @@ cdef void fast_sentence_cbow_hs( if m == i or codelens[m] == 0: continue else: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m] * size], &ONE) + our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m] * size], &ONE) cdef unsigned long long fast_sentence_cbow_neg( const int negative, np.uint32_t *table, unsigned long long table_len, int codelens[MAX_SENTENCE_LEN], REAL_t *neu1, REAL_t *syn0, REAL_t *syn1neg, const int size, const np.uint32_t indexes[MAX_SENTENCE_LEN], const REAL_t alpha, REAL_t *work, - int i, int j, int k, int cbow_mean, unsigned long long next_random) nogil: + int i, int j, int k, int cbow_mean, unsigned long long next_random, REAL_t *word_locks) nogil: cdef long long a cdef long long row2 @@ -242,7 +242,7 @@ cdef unsigned long long fast_sentence_cbow_neg( if m == i or codelens[m] == 0: continue else: - our_saxpy(&size, &ONEF, work, &ONE, &syn0[indexes[m]*size], &ONE) + our_saxpy(&size, &word_locks[indexes[m]], work, &ONE, &syn0[indexes[m]*size], &ONE) return next_random @@ -252,6 +252,7 @@ def train_sentence_sg(model, sentence, alpha, _work): cdef int negative = model.negative cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) cdef REAL_t *work cdef REAL_t _alpha = alpha cdef int size = model.layer1_size @@ -321,9 +322,9 @@ def train_sentence_sg(model, sentence, alpha, _work): if j == i or codelens[j] == 0: continue if hs: - fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work) + fast_sentence_sg_hs(points[i], codes[i], codelens[i], syn0, syn1, size, indexes[j], _alpha, work, word_locks) if negative: - next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random) + next_random = fast_sentence_sg_neg(negative, table, table_len, syn0, syn1neg, size, indexes[i], indexes[j], _alpha, work, next_random, word_locks) return result @@ -334,6 +335,7 @@ def train_sentence_cbow(model, sentence, alpha, _work, _neu1): cdef int cbow_mean = model.cbow_mean cdef REAL_t *syn0 = (np.PyArray_DATA(model.syn0)) + cdef REAL_t *word_locks = (np.PyArray_DATA(model.syn0_lockf)) cdef REAL_t *work cdef REAL_t *neu1 cdef REAL_t _alpha = alpha @@ -402,9 +404,9 @@ def train_sentence_cbow(model, sentence, alpha, _work, _neu1): if k > sentence_len: k = sentence_len if hs: - fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean) + fast_sentence_cbow_hs(points[i], codes[i], codelens, neu1, syn0, syn1, size, indexes, _alpha, work, i, j, k, cbow_mean, word_locks) if negative: - next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random) + next_random = fast_sentence_cbow_neg(negative, table, table_len, codelens, neu1, syn0, syn1neg, size, indexes, _alpha, work, i, j, k, cbow_mean, next_random, word_locks) return result diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py index de0c7fb36c..c955827dca 100644 --- a/gensim/test/test_word2vec.py +++ b/gensim/test/test_word2vec.py @@ -149,6 +149,25 @@ def testTraining(self): model2 = word2vec.Word2Vec(sentences, size=2, min_count=1) self.models_equal(model, model2) + + def testLocking(self): + """Test word2vec training doesn't change locked vectors.""" + corpus = LeeCorpus() + # build vocabulary, don't train yet + for sg in range(2): # test both cbow and sg + model = word2vec.Word2Vec(size=4, hs=1, negative=5, min_count=1, sg=sg, window=5) + model.build_vocab(corpus) + + # remember two vectors + locked0 = numpy.copy(model.syn0[0]) + unlocked1 = numpy.copy(model.syn0[1]) + # lock the vector in slot 0 against change + model.syn0_lockf[0] = 0.0 + + model.train(corpus) + self.assertFalse((unlocked1==model.syn0[1]).all()) # unlocked vector should vary + self.assertTrue((locked0==model.syn0[0]).all()) # locked vector should not vary + def testTrainingCbow(self): """Test CBOW word2vec training.""" # to test training, make the corpus larger by repeating its sentences over and over From 356c53a9dc7e9ed0395dbc3fa0f692faa835d4a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= Date: Sun, 28 Jun 2015 21:37:58 +0200 Subject: [PATCH 49/49] pep8 & python2 fixes to doc2vec notebook --- docs/notebooks/doc2vec-IMDB.ipynb | 517 +++++++++++++++--------------- 1 file changed, 267 insertions(+), 250 deletions(-) diff --git a/docs/notebooks/doc2vec-IMDB.ipynb b/docs/notebooks/doc2vec-IMDB.ipynb index 4f59ffb8de..0b0f721be1 100644 --- a/docs/notebooks/doc2vec-IMDB.ipynb +++ b/docs/notebooks/doc2vec-IMDB.ipynb @@ -1,7 +1,7 @@ { "metadata": { "name": "", - "signature": "sha256:fc80920786d62c0737c8530d8458e059e9c5f1a95cfefb7c34beafd875b34aa6" + "signature": "sha256:26971c428490c5b0358c2d98666355be414831a09bf6cf3c50b03d39bd186505" }, "nbformat": 3, "nbformat_minor": 0, @@ -16,6 +16,15 @@ "gensim doc2vec & IMDB sentiment dataset" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "TODO: section on introduction & motivation\n", + "\n", + "TODO: prerequisites + dependencies (statsmodels, patsy, ?)" + ] + }, { "cell_type": "heading", "level": 2, @@ -74,7 +83,15 @@ ], "language": "python", "metadata": {}, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "stream": "stderr", + "text": [ + "rm: temp: No such file or directory\n" + ] + } + ], "prompt_number": 1 }, { @@ -100,15 +117,16 @@ "cell_type": "code", "collapsed": false, "input": [ + "import gensim\n", "from gensim.models.doc2vec import TaggedDocument\n", "from collections import namedtuple\n", "\n", - "SentimentDocument = namedtuple('SentimentDocument','words tags split sentiment')\n", + "SentimentDocument = namedtuple('SentimentDocument', 'words tags split sentiment')\n", "\n", "alldocs = [] # will hold all docs in original order\n", "with open('aclImdb/alldata-id.txt') as alldata:\n", " for line_no, line in enumerate(alldata):\n", - " tokens = line.split()\n", + " tokens = gensim.utils.to_unicode(line).split()\n", " words = tokens[1:]\n", " tags = [line_no] # `tags = [tokens[0]]` would also work at extra memory cost\n", " split = ['train','test','extra','extra'][line_no//25000] # 25k train, 25k test, 25k extra\n", @@ -173,11 +191,11 @@ "\n", "simple_models = [\n", " # PV-DM w/concatenation - window=5 (both sides) approximates paper's 10-word total window size\n", - " Doc2Vec(dm=1,dm_concat=1,size=100,window=5,negative=5,hs=0,min_count=2,workers=cores),\n", + " Doc2Vec(dm=1, dm_concat=1, size=100, window=5, negative=5, hs=0, min_count=2, workers=cores),\n", " # PV-DBOW \n", - " Doc2Vec(dm=0,size=100,negative=5,hs=0,min_count=2,workers=cores),\n", + " Doc2Vec(dm=0, size=100, negative=5, hs=0, min_count=2, workers=cores),\n", " # PV-DM w/average\n", - " Doc2Vec(dm=1,dm_mean=1,size=100,window=10,negative=5,hs=0,min_count=2,workers=cores),\n", + " Doc2Vec(dm=1, dm_mean=1, size=100, window=10, negative=5, hs=0, min_count=2, workers=cores),\n", "]\n", "\n", "# speed setup by sharing results of 1st model's vocabulary scan\n", @@ -196,8 +214,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "Doc2Vec(dm/c,d100,n5,w5,mc2,t4)\n", - "Doc2Vec(dbow,d100,n5,mc2,t4)" + "Doc2Vec(dm/c,d100,n5,w5,mc2,t8)\n", + "Doc2Vec(dbow,d100,n5,mc2,t8)" ] }, { @@ -205,7 +223,7 @@ "stream": "stdout", "text": [ "\n", - "Doc2Vec(dm/m,d100,n5,w10,mc2,t4)" + "Doc2Vec(dm/m,d100,n5,w10,mc2,t8)" ] }, { @@ -290,15 +308,15 @@ " test_data = test_set\n", " if infer:\n", " if infer_subsample < 1.0:\n", - " test_data = sample(test_data, int(infer_subsample*len(test_data)))\n", - " test_regressors = [test_model.infer_vector(doc.words,steps=infer_steps,alpha=infer_alpha) for doc in test_data]\n", + " test_data = sample(test_data, int(infer_subsample * len(test_data)))\n", + " test_regressors = [test_model.infer_vector(doc.words, steps=infer_steps, alpha=infer_alpha) for doc in test_data]\n", " else:\n", " test_regressors = [test_model.docvecs[doc.tags[0]] for doc in test_docs]\n", " test_regressors = sm.add_constant(test_regressors)\n", " \n", " # predict & evaluate\n", " test_predictions = predictor.predict(test_regressors)\n", - " corrects = sum(np.rint(test_predictions)==[doc.sentiment for doc in test_data])\n", + " corrects = sum(np.rint(test_predictions) == [doc.sentiment for doc in test_data])\n", " errors = len(test_predictions) - corrects\n", " error_rate = float(errors) / len(test_predictions)\n", " return (error_rate, errors, len(test_predictions), predictor)\n" @@ -306,7 +324,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 6 + "prompt_number": 8 }, { "cell_type": "heading", @@ -339,7 +357,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 7 + "prompt_number": 9 }, { "cell_type": "code", @@ -359,7 +377,7 @@ " for name, train_model in models_by_name.items():\n", " # train\n", " duration = 'na'\n", - " train_model.alpha, train_model.min_alpha = (alpha, alpha)\n", + " train_model.alpha, train_model.min_alpha = alpha, alpha\n", " with elapsed_timer() as elapsed:\n", " train_model.train(doc_list)\n", " duration = '%.1f' % elapsed()\n", @@ -367,26 +385,26 @@ " # evaluate\n", " eval_duration = ''\n", " with elapsed_timer() as eval_elapsed:\n", - " (err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs)\n", + " err, err_count, test_count, predictor = error_rate_for_model(train_model, train_docs, test_docs)\n", " eval_duration = '%.1f' % eval_elapsed()\n", " best_indicator = ' '\n", " if err <= best_error[name]:\n", " best_error[name] = err\n", " best_indicator = '*' \n", - " print(\"%s%f : %i passes : %s %ss %ss\"%(best_indicator,err,epoch+1,name, duration, eval_duration))\n", + " print(\"%s%f : %i passes : %s %ss %ss\" % (best_indicator, err, epoch + 1, name, duration, eval_duration))\n", "\n", - " if epoch == 0 or (epoch % 5) == 0:\n", + " if (epoch % 5) == 0:\n", " eval_duration = ''\n", " with elapsed_timer() as eval_elapsed:\n", - " (infer_err, err_count, test_count, predictor) = error_rate_for_model(train_model, train_docs, test_docs, infer=True)\n", + " infer_err, err_count, test_count, predictor = error_rate_for_model(train_model, train_docs, test_docs, infer=True)\n", " eval_duration = '%.1f' % eval_elapsed()\n", " best_indicator = ' '\n", - " if infer_err < best_error[name+'_inferred']:\n", - " best_error[name+'_inferred'] = infer_err\n", + " if infer_err < best_error[name + '_inferred']:\n", + " best_error[name + '_inferred'] = infer_err\n", " best_indicator = '*'\n", - " print(\"%s%f : %i passes : %s %ss %ss\"%(best_indicator,infer_err,epoch+1,name+'_inferred', duration, eval_duration))\n", + " print(\"%s%f : %i passes : %s %ss %ss\" % (best_indicator, infer_err, epoch + 1, name + '_inferred', duration, eval_duration))\n", "\n", - " print('completed pass %i at alpha %f'%(epoch+1,alpha))\n", + " print('completed pass %i at alpha %f' % (epoch + 1, alpha))\n", " alpha -= alpha_delta\n", " \n", "print(\"END %s\" % str(datetime.datetime.now()))" @@ -398,8 +416,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "START 2015-06-15 23:42:20.225229\n", - "*0.412640 : 1 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 58.4s 1.4s" + "START 2015-06-28 20:34:29.500839\n", + "*0.417080 : 1 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 84.5s 1.0s" ] }, { @@ -407,7 +425,7 @@ "stream": "stdout", "text": [ "\n", - "*0.401200 : 1 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 58.4s 10.5s" + "*0.363200 : 1 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8)_inferred 84.5s 14.9s" ] }, { @@ -415,7 +433,7 @@ "stream": "stdout", "text": [ "\n", - "*0.218280 : 1 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.8s" + "*0.219520 : 1 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.0s 0.6s" ] }, { @@ -423,7 +441,7 @@ "stream": "stdout", "text": [ "\n", - "*0.195600 : 1 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 28.0s 5.1s" + "*0.184000 : 1 passes : Doc2Vec(dbow,d100,n5,mc2,t8)_inferred 19.0s 4.6s" ] }, { @@ -431,7 +449,7 @@ "stream": "stdout", "text": [ "\n", - "*0.273280 : 1 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.5s 0.8s" + "*0.277080 : 1 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.0s 0.6s" ] }, { @@ -439,7 +457,7 @@ "stream": "stdout", "text": [ "\n", - "*0.214000 : 1 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 34.5s 6.2s" + "*0.230800 : 1 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8)_inferred 35.0s 6.4s" ] }, { @@ -447,7 +465,7 @@ "stream": "stdout", "text": [ "\n", - "*0.204680 : 1 passes : dbow+dmm 0.0s 2.3s" + "*0.207840 : 1 passes : dbow+dmm 0.0s 1.5s" ] }, { @@ -455,7 +473,7 @@ "stream": "stdout", "text": [ "\n", - "*0.182400 : 1 passes : dbow+dmm_inferred 0.0s 11.6s" + "*0.185200 : 1 passes : dbow+dmm_inferred 0.0s 11.2s" ] }, { @@ -463,7 +481,7 @@ "stream": "stdout", "text": [ "\n", - "*0.216240 : 1 passes : dbow+dmc 0.0s 1.5s" + "*0.220720 : 1 passes : dbow+dmc 0.0s 1.1s" ] }, { @@ -471,7 +489,7 @@ "stream": "stdout", "text": [ "\n", - "*0.222000 : 1 passes : dbow+dmc_inferred 0.0s 16.2s" + "*0.189200 : 1 passes : dbow+dmc_inferred 0.0s 19.3s" ] }, { @@ -480,7 +498,7 @@ "text": [ "\n", "completed pass 1 at alpha 0.025000\n", - "*0.358040 : 2 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 57.0s 0.8s" + "*0.357120 : 2 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 73.1s 0.6s" ] }, { @@ -488,7 +506,7 @@ "stream": "stdout", "text": [ "\n", - "*0.140320 : 2 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.7s" + "*0.144360 : 2 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.8s 0.6s" ] }, { @@ -496,7 +514,7 @@ "stream": "stdout", "text": [ "\n", - "*0.223920 : 2 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.0s 1.4s" + "*0.225640 : 2 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 36.2s 1.0s" ] }, { @@ -504,7 +522,7 @@ "stream": "stdout", "text": [ "\n", - "*0.136520 : 2 passes : dbow+dmm 0.0s 1.6s" + "*0.141160 : 2 passes : dbow+dmm 0.0s 1.1s" ] }, { @@ -512,7 +530,7 @@ "stream": "stdout", "text": [ "\n", - "*0.141120 : 2 passes : dbow+dmc 0.0s 1.7s" + "*0.144800 : 2 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -521,7 +539,7 @@ "text": [ "\n", "completed pass 2 at alpha 0.023800\n", - "*0.325440 : 3 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 62.6s 0.7s" + "*0.326840 : 3 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 73.6s 0.6s" ] }, { @@ -529,7 +547,7 @@ "stream": "stdout", "text": [ "\n", - "*0.124000 : 3 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.8s" + "*0.125880 : 3 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 20.1s 0.7s" ] }, { @@ -537,7 +555,7 @@ "stream": "stdout", "text": [ "\n", - "*0.198680 : 3 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.2s 0.8s" + "*0.202680 : 3 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 36.0s 0.6s" ] }, { @@ -545,7 +563,7 @@ "stream": "stdout", "text": [ "\n", - "*0.121760 : 3 passes : dbow+dmm 0.0s 2.1s" + "*0.123280 : 3 passes : dbow+dmm 0.0s 1.6s" ] }, { @@ -553,7 +571,7 @@ "stream": "stdout", "text": [ "\n", - "*0.125120 : 3 passes : dbow+dmc 0.0s 1.5s" + "*0.126040 : 3 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -562,7 +580,7 @@ "text": [ "\n", "completed pass 3 at alpha 0.022600\n", - "*0.300600 : 4 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.0s 0.7s" + "*0.302360 : 4 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 72.6s 0.6s" ] }, { @@ -570,7 +588,7 @@ "stream": "stdout", "text": [ "\n", - "*0.115760 : 4 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + "*0.113640 : 4 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.9s 0.7s" ] }, { @@ -578,7 +596,7 @@ "stream": "stdout", "text": [ "\n", - "*0.188680 : 4 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.6s 0.7s" + "*0.189880 : 4 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.8s 0.6s" ] }, { @@ -586,7 +604,7 @@ "stream": "stdout", "text": [ "\n", - "*0.114760 : 4 passes : dbow+dmm 0.0s 1.6s" + "*0.114200 : 4 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -594,7 +612,7 @@ "stream": "stdout", "text": [ "\n", - "*0.115440 : 4 passes : dbow+dmc 0.0s 2.1s" + "*0.115640 : 4 passes : dbow+dmc 0.0s 1.6s" ] }, { @@ -603,7 +621,7 @@ "text": [ "\n", "completed pass 4 at alpha 0.021400\n", - "*0.281360 : 5 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 55.6s 0.8s" + "*0.281480 : 5 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 72.7s 0.7s" ] }, { @@ -611,7 +629,7 @@ "stream": "stdout", "text": [ "\n", - "*0.112000 : 5 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.4s 0.9s" + "*0.109720 : 5 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 21.5s 0.7s" ] }, { @@ -619,7 +637,7 @@ "stream": "stdout", "text": [ "\n", - "*0.182360 : 5 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.5s 0.7s" + "*0.181360 : 5 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 37.8s 0.7s" ] }, { @@ -627,7 +645,7 @@ "stream": "stdout", "text": [ "\n", - "*0.111800 : 5 passes : dbow+dmm 0.0s 1.4s" + "*0.109760 : 5 passes : dbow+dmm 0.0s 1.3s" ] }, { @@ -635,7 +653,7 @@ "stream": "stdout", "text": [ "\n", - "*0.111560 : 5 passes : dbow+dmc 0.0s 1.5s" + "*0.110400 : 5 passes : dbow+dmc 0.0s 1.6s" ] }, { @@ -644,7 +662,7 @@ "text": [ "\n", "completed pass 5 at alpha 0.020200\n", - "*0.266200 : 6 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.7s 0.9s" + "*0.264640 : 6 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 72.0s 0.7s" ] }, { @@ -652,7 +670,7 @@ "stream": "stdout", "text": [ "\n", - "*0.272000 : 6 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 54.7s 11.4s" + "*0.292000 : 6 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8)_inferred 72.0s 13.3s" ] }, { @@ -660,7 +678,7 @@ "stream": "stdout", "text": [ "\n", - "*0.110560 : 6 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 29.1s 0.8s" + "*0.107440 : 6 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 21.6s 0.7s" ] }, { @@ -668,7 +686,7 @@ "stream": "stdout", "text": [ "\n", - "*0.112800 : 6 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 29.1s 5.3s" + "*0.116000 : 6 passes : Doc2Vec(dbow,d100,n5,mc2,t8)_inferred 21.6s 4.7s" ] }, { @@ -676,7 +694,7 @@ "stream": "stdout", "text": [ "\n", - "*0.178520 : 6 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 33.7s 0.8s" + "*0.176040 : 6 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 37.4s 1.1s" ] }, { @@ -684,7 +702,7 @@ "stream": "stdout", "text": [ "\n", - "*0.199200 : 6 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 33.7s 6.3s" + "*0.213600 : 6 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8)_inferred 37.4s 6.4s" ] }, { @@ -692,7 +710,7 @@ "stream": "stdout", "text": [ "\n", - "*0.109040 : 6 passes : dbow+dmm 0.0s 1.5s" + "*0.107000 : 6 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -700,7 +718,7 @@ "stream": "stdout", "text": [ "\n", - "*0.118800 : 6 passes : dbow+dmm_inferred 0.0s 12.9s" + "*0.108000 : 6 passes : dbow+dmm_inferred 0.0s 11.2s" ] }, { @@ -708,7 +726,7 @@ "stream": "stdout", "text": [ "\n", - "*0.110400 : 6 passes : dbow+dmc 0.0s 1.6s" + "*0.107880 : 6 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -716,7 +734,7 @@ "stream": "stdout", "text": [ "\n", - "*0.118400 : 6 passes : dbow+dmc_inferred 0.0s 16.4s" + "*0.124400 : 6 passes : dbow+dmc_inferred 0.0s 18.3s" ] }, { @@ -725,7 +743,7 @@ "text": [ "\n", "completed pass 6 at alpha 0.019000\n", - "*0.254600 : 7 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.0s 0.8s" + "*0.254200 : 7 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 65.7s 1.1s" ] }, { @@ -733,7 +751,7 @@ "stream": "stdout", "text": [ "\n", - "*0.107920 : 7 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.5s 0.8s" + "*0.106720 : 7 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.5s 0.7s" ] }, { @@ -741,7 +759,7 @@ "stream": "stdout", "text": [ "\n", - "*0.175560 : 7 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.0s 0.7s" + "*0.172880 : 7 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.6s 0.7s" ] }, { @@ -749,7 +767,7 @@ "stream": "stdout", "text": [ "\n", - "*0.107880 : 7 passes : dbow+dmm 0.0s 2.4s" + "*0.106080 : 7 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -757,7 +775,7 @@ "stream": "stdout", "text": [ "\n", - "*0.107760 : 7 passes : dbow+dmc 0.0s 1.6s" + "*0.106320 : 7 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -766,7 +784,7 @@ "text": [ "\n", "completed pass 7 at alpha 0.017800\n", - "*0.246160 : 8 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.5s 0.9s" + "*0.245880 : 8 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 68.6s 0.7s" ] }, { @@ -774,7 +792,7 @@ "stream": "stdout", "text": [ "\n", - "*0.106640 : 8 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + "*0.104920 : 8 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 20.0s 1.0s" ] }, { @@ -782,7 +800,7 @@ "stream": "stdout", "text": [ "\n", - "*0.173720 : 8 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.8s" + "*0.171000 : 8 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.4s 0.7s" ] }, { @@ -790,7 +808,7 @@ "stream": "stdout", "text": [ "\n", - "*0.106640 : 8 passes : dbow+dmm 0.0s 2.4s" + "*0.104760 : 8 passes : dbow+dmm 0.0s 1.3s" ] }, { @@ -798,7 +816,7 @@ "stream": "stdout", "text": [ "\n", - "*0.106320 : 8 passes : dbow+dmc 0.0s 1.7s" + "*0.105600 : 8 passes : dbow+dmc 0.0s 1.3s" ] }, { @@ -807,7 +825,7 @@ "text": [ "\n", "completed pass 8 at alpha 0.016600\n", - "*0.239160 : 9 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 55.4s 0.8s" + "*0.238400 : 9 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 66.1s 0.6s" ] }, { @@ -815,7 +833,7 @@ "stream": "stdout", "text": [ "\n", - "*0.104120 : 9 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.8s 0.8s" + "*0.104520 : 9 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 21.2s 1.1s" ] }, { @@ -823,7 +841,7 @@ "stream": "stdout", "text": [ "\n", - "*0.170400 : 9 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.8s" + "*0.167600 : 9 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 37.5s 0.7s" ] }, { @@ -831,7 +849,7 @@ "stream": "stdout", "text": [ "\n", - "*0.103840 : 9 passes : dbow+dmm 0.0s 1.5s" + "*0.103680 : 9 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -839,7 +857,7 @@ "stream": "stdout", "text": [ "\n", - "*0.104920 : 9 passes : dbow+dmc 0.0s 1.5s" + "*0.103480 : 9 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -848,7 +866,7 @@ "text": [ "\n", "completed pass 9 at alpha 0.015400\n", - "*0.233320 : 10 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 53.4s 1.3s" + "*0.232160 : 10 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 69.0s 0.7s" ] }, { @@ -856,7 +874,7 @@ "stream": "stdout", "text": [ "\n", - "*0.104120 : 10 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + "*0.103680 : 10 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 21.8s 0.7s" ] }, { @@ -864,7 +882,7 @@ "stream": "stdout", "text": [ "\n", - "*0.170000 : 10 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.2s 0.7s" + "*0.166000 : 10 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.4s 1.1s" ] }, { @@ -872,7 +890,7 @@ "stream": "stdout", "text": [ "\n", - " 0.104080 : 10 passes : dbow+dmm 0.0s 1.5s" + "*0.101920 : 10 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -880,7 +898,7 @@ "stream": "stdout", "text": [ "\n", - "*0.104600 : 10 passes : dbow+dmc 0.0s 1.5s" + " 0.103560 : 10 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -889,7 +907,7 @@ "text": [ "\n", "completed pass 10 at alpha 0.014200\n", - "*0.228680 : 11 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 52.8s 1.4s" + "*0.227760 : 11 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 66.4s 0.7s" ] }, { @@ -897,7 +915,7 @@ "stream": "stdout", "text": [ "\n", - "*0.222400 : 11 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 52.8s 10.3s" + "*0.242400 : 11 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8)_inferred 66.4s 13.0s" ] }, { @@ -905,7 +923,7 @@ "stream": "stdout", "text": [ "\n", - "*0.103280 : 11 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.9s" + "*0.102160 : 11 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.7s 0.6s" ] }, { @@ -913,7 +931,7 @@ "stream": "stdout", "text": [ "\n", - "*0.106000 : 11 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 28.1s 5.3s" + "*0.113200 : 11 passes : Doc2Vec(dbow,d100,n5,mc2,t8)_inferred 19.7s 5.0s" ] }, { @@ -921,7 +939,7 @@ "stream": "stdout", "text": [ "\n", - "*0.167280 : 11 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.7s" + "*0.163480 : 11 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.4s 0.6s" ] }, { @@ -929,7 +947,7 @@ "stream": "stdout", "text": [ "\n", - " 0.206800 : 11 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 34.3s 6.5s" + "*0.208800 : 11 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8)_inferred 35.4s 6.2s" ] }, { @@ -937,7 +955,7 @@ "stream": "stdout", "text": [ "\n", - "*0.101800 : 11 passes : dbow+dmm 0.0s 2.2s" + "*0.101560 : 11 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -945,7 +963,7 @@ "stream": "stdout", "text": [ "\n", - "*0.115600 : 11 passes : dbow+dmm_inferred 0.0s 12.1s" + "*0.102000 : 11 passes : dbow+dmm_inferred 0.0s 11.4s" ] }, { @@ -953,7 +971,7 @@ "stream": "stdout", "text": [ "\n", - "*0.102920 : 11 passes : dbow+dmc 0.0s 1.4s" + "*0.101920 : 11 passes : dbow+dmc 0.0s 1.6s" ] }, { @@ -961,7 +979,7 @@ "stream": "stdout", "text": [ "\n", - "*0.100400 : 11 passes : dbow+dmc_inferred 0.0s 15.9s" + "*0.109600 : 11 passes : dbow+dmc_inferred 0.0s 17.4s" ] }, { @@ -970,7 +988,7 @@ "text": [ "\n", "completed pass 11 at alpha 0.013000\n", - "*0.225600 : 12 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.3s 0.8s" + "*0.225960 : 12 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 61.8s 0.7s" ] }, { @@ -978,7 +996,7 @@ "stream": "stdout", "text": [ "\n", - " 0.104040 : 12 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + "*0.101720 : 12 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 20.2s 0.7s" ] }, { @@ -986,7 +1004,7 @@ "stream": "stdout", "text": [ "\n", - "*0.165160 : 12 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.9s 0.8s" + "*0.163000 : 12 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.5s 0.7s" ] }, { @@ -994,7 +1012,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102720 : 12 passes : dbow+dmm 0.0s 2.0s" + "*0.100840 : 12 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -1002,7 +1020,7 @@ "stream": "stdout", "text": [ "\n", - " 0.103360 : 12 passes : dbow+dmc 0.0s 1.5s" + "*0.101920 : 12 passes : dbow+dmc 0.0s 1.7s" ] }, { @@ -1011,7 +1029,7 @@ "text": [ "\n", "completed pass 12 at alpha 0.011800\n", - "*0.223720 : 13 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 51.6s 0.8s" + "*0.222360 : 13 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 65.2s 0.7s" ] }, { @@ -1019,7 +1037,7 @@ "stream": "stdout", "text": [ "\n", - " 0.103520 : 13 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.4s 0.8s" + " 0.103120 : 13 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 20.0s 0.7s" ] }, { @@ -1027,7 +1045,7 @@ "stream": "stdout", "text": [ "\n", - " 0.165320 : 13 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.0s 0.8s" + "*0.161960 : 13 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.2s 0.6s" ] }, { @@ -1035,7 +1053,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102080 : 13 passes : dbow+dmm 0.0s 2.1s" + " 0.101640 : 13 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -1043,7 +1061,7 @@ "stream": "stdout", "text": [ "\n", - "*0.102480 : 13 passes : dbow+dmc 0.0s 1.5s" + " 0.102600 : 13 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -1052,7 +1070,7 @@ "text": [ "\n", "completed pass 13 at alpha 0.010600\n", - "*0.221680 : 14 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 54.8s 0.8s" + "*0.220960 : 14 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 65.3s 1.1s" ] }, { @@ -1060,7 +1078,7 @@ "stream": "stdout", "text": [ "\n", - "*0.102440 : 14 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.5s 0.9s" + " 0.102920 : 14 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.9s 0.7s" ] }, { @@ -1068,7 +1086,7 @@ "stream": "stdout", "text": [ "\n", - "*0.164480 : 14 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 40.0s 0.8s" + "*0.160160 : 14 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 36.0s 0.7s" ] }, { @@ -1076,7 +1094,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102120 : 14 passes : dbow+dmm 0.0s 1.5s" + " 0.101720 : 14 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -1084,7 +1102,7 @@ "stream": "stdout", "text": [ "\n", - " 0.103640 : 14 passes : dbow+dmc 0.0s 1.5s" + " 0.102560 : 14 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -1093,7 +1111,7 @@ "text": [ "\n", "completed pass 14 at alpha 0.009400\n", - "*0.220560 : 15 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 52.6s 1.4s" + "*0.219400 : 15 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 64.0s 1.0s" ] }, { @@ -1101,7 +1119,7 @@ "stream": "stdout", "text": [ "\n", - "*0.102040 : 15 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 29.6s 0.8s" + "*0.101440 : 15 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.5s 0.7s" ] }, { @@ -1109,7 +1127,7 @@ "stream": "stdout", "text": [ "\n", - "*0.163160 : 15 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 37.6s 0.8s" + " 0.160640 : 15 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 38.6s 0.7s" ] }, { @@ -1117,7 +1135,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102160 : 15 passes : dbow+dmm 0.0s 1.7s" + "*0.100160 : 15 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -1125,7 +1143,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102880 : 15 passes : dbow+dmc 0.0s 1.5s" + "*0.101880 : 15 passes : dbow+dmc 0.0s 1.3s" ] }, { @@ -1134,7 +1152,7 @@ "text": [ "\n", "completed pass 15 at alpha 0.008200\n", - "*0.218400 : 16 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 56.1s 0.7s" + "*0.216880 : 16 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 64.1s 1.1s" ] }, { @@ -1142,7 +1160,7 @@ "stream": "stdout", "text": [ "\n", - " 0.233600 : 16 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred 56.1s 11.0s" + "*0.232400 : 16 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8)_inferred 64.1s 12.8s" ] }, { @@ -1150,7 +1168,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102840 : 16 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.0s 0.9s" + " 0.101760 : 16 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.1s 0.7s" ] }, { @@ -1158,7 +1176,7 @@ "stream": "stdout", "text": [ "\n", - " 0.106000 : 16 passes : Doc2Vec(dbow,d100,n5,mc2,t4)_inferred 28.0s 5.6s" + "*0.111600 : 16 passes : Doc2Vec(dbow,d100,n5,mc2,t8)_inferred 19.1s 4.7s" ] }, { @@ -1166,7 +1184,7 @@ "stream": "stdout", "text": [ "\n", - "*0.161920 : 16 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.3s 0.8s" + "*0.159800 : 16 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 34.9s 0.6s" ] }, { @@ -1174,7 +1192,7 @@ "stream": "stdout", "text": [ "\n", - "*0.190800 : 16 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred 34.3s 6.3s" + "*0.184000 : 16 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8)_inferred 34.9s 6.5s" ] }, { @@ -1182,7 +1200,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102280 : 16 passes : dbow+dmm 0.0s 1.8s" + " 0.100640 : 16 passes : dbow+dmm 0.0s 1.6s" ] }, { @@ -1190,7 +1208,7 @@ "stream": "stdout", "text": [ "\n", - "*0.109200 : 16 passes : dbow+dmm_inferred 0.0s 12.0s" + "*0.094800 : 16 passes : dbow+dmm_inferred 0.0s 11.7s" ] }, { @@ -1198,7 +1216,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102840 : 16 passes : dbow+dmc 0.0s 2.2s" + "*0.101320 : 16 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -1206,7 +1224,7 @@ "stream": "stdout", "text": [ "\n", - " 0.114800 : 16 passes : dbow+dmc_inferred 0.0s 15.8s" + " 0.109600 : 16 passes : dbow+dmc_inferred 0.0s 17.5s" ] }, { @@ -1215,7 +1233,7 @@ "text": [ "\n", "completed pass 16 at alpha 0.007000\n", - " 0.219000 : 17 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 53.9s 0.8s" + " 0.217160 : 17 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 58.6s 0.6s" ] }, { @@ -1223,7 +1241,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102960 : 17 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.9s 0.8s" + " 0.101760 : 17 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.5s 0.7s" ] }, { @@ -1231,7 +1249,7 @@ "stream": "stdout", "text": [ "\n", - "*0.161480 : 17 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.8s 0.8s" + "*0.159640 : 17 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 37.0s 1.1s" ] }, { @@ -1239,7 +1257,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102120 : 17 passes : dbow+dmm 0.0s 1.7s" + " 0.100760 : 17 passes : dbow+dmm 0.0s 1.3s" ] }, { @@ -1247,7 +1265,7 @@ "stream": "stdout", "text": [ "\n", - "*0.102040 : 17 passes : dbow+dmc 0.0s 2.1s" + " 0.101480 : 17 passes : dbow+dmc 0.0s 1.3s" ] }, { @@ -1256,7 +1274,7 @@ "text": [ "\n", "completed pass 17 at alpha 0.005800\n", - " 0.219600 : 18 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 53.1s 0.7s" + "*0.216080 : 18 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 60.7s 0.6s" ] }, { @@ -1264,7 +1282,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102400 : 18 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 29.0s 0.8s" + " 0.101520 : 18 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.6s 0.6s" ] }, { @@ -1272,7 +1290,7 @@ "stream": "stdout", "text": [ "\n", - " 0.161680 : 18 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 35.8s 0.7s" + "*0.158760 : 18 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 34.9s 1.0s" ] }, { @@ -1280,7 +1298,7 @@ "stream": "stdout", "text": [ "\n", - "*0.101680 : 18 passes : dbow+dmm 0.0s 1.5s" + " 0.100800 : 18 passes : dbow+dmm 0.0s 1.2s" ] }, { @@ -1288,7 +1306,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102120 : 18 passes : dbow+dmc 0.0s 1.5s" + " 0.101760 : 18 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -1297,7 +1315,7 @@ "text": [ "\n", "completed pass 18 at alpha 0.004600\n", - " 0.218920 : 19 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 51.5s 1.3s" + "*0.215560 : 19 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 62.6s 0.7s" ] }, { @@ -1305,7 +1323,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102320 : 19 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 28.1s 0.8s" + "*0.101000 : 19 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 20.6s 0.7s" ] }, { @@ -1313,7 +1331,7 @@ "stream": "stdout", "text": [ "\n", - " 0.161600 : 19 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.2s 0.7s" + " 0.159080 : 19 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 35.9s 0.7s" ] }, { @@ -1321,7 +1339,7 @@ "stream": "stdout", "text": [ "\n", - "*0.101640 : 19 passes : dbow+dmm 0.0s 1.5s" + "*0.099920 : 19 passes : dbow+dmm 0.0s 1.7s" ] }, { @@ -1329,7 +1347,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102160 : 19 passes : dbow+dmc 0.0s 1.6s" + " 0.102280 : 19 passes : dbow+dmc 0.0s 1.2s" ] }, { @@ -1338,7 +1356,7 @@ "text": [ "\n", "completed pass 19 at alpha 0.003400\n", - "*0.218200 : 20 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t4) 51.2s 0.8s" + "*0.215160 : 20 passes : Doc2Vec(dm/c,d100,n5,w5,mc2,t8) 58.3s 0.6s" ] }, { @@ -1346,7 +1364,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102560 : 20 passes : Doc2Vec(dbow,d100,n5,mc2,t4) 27.9s 1.4s" + " 0.101360 : 20 passes : Doc2Vec(dbow,d100,n5,mc2,t8) 19.5s 0.7s" ] }, { @@ -1354,7 +1372,7 @@ "stream": "stdout", "text": [ "\n", - "*0.161360 : 20 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t4) 34.6s 0.7s" + " 0.158920 : 20 passes : Doc2Vec(dm/m,d100,n5,w10,mc2,t8) 33.6s 0.6s" ] }, { @@ -1362,7 +1380,7 @@ "stream": "stdout", "text": [ "\n", - "*0.101360 : 20 passes : dbow+dmm 0.0s 1.5s" + " 0.100480 : 20 passes : dbow+dmm 0.0s 1.5s" ] }, { @@ -1370,7 +1388,7 @@ "stream": "stdout", "text": [ "\n", - " 0.102560 : 20 passes : dbow+dmc 0.0s 1.5s" + " 0.102160 : 20 passes : dbow+dmc 0.0s 1.1s" ] }, { @@ -1379,11 +1397,11 @@ "text": [ "\n", "completed pass 20 at alpha 0.002200\n", - "END 2015-06-16 00:27:00.604456\n" + "END 2015-06-28 21:20:48.994706\n" ] } ], - "prompt_number": 8 + "prompt_number": 10 }, { "cell_type": "heading", @@ -1398,10 +1416,8 @@ "collapsed": true, "input": [ "# print best error rates achieved\n", - "errs = [(rate,name) for name, rate in best_error.items()]\n", - "errs.sort(key=lambda pair: pair[0])\n", - "for err in errs:\n", - " print(\"%f %s\"%(err[0],err[1]))" + "for rate, name in sorted((rate, name) for name, rate in best_error.items()):\n", + " print(\"%f %s\" % (rate, name))" ], "language": "python", "metadata": {}, @@ -1410,20 +1426,20 @@ "output_type": "stream", "stream": "stdout", "text": [ - "0.100400 dbow+dmc_inferred\n", - "0.101360 dbow+dmm\n", - "0.102040 Doc2Vec(dbow,d100,n5,mc2,t4)\n", - "0.102040 dbow+dmc\n", - "0.106000 Doc2Vec(dbow,d100,n5,mc2,t4)_inferred\n", - "0.109200 dbow+dmm_inferred\n", - "0.161360 Doc2Vec(dm/m,d100,n5,w10,mc2,t4)\n", - "0.190800 Doc2Vec(dm/m,d100,n5,w10,mc2,t4)_inferred\n", - "0.218200 Doc2Vec(dm/c,d100,n5,w5,mc2,t4)\n", - "0.222400 Doc2Vec(dm/c,d100,n5,w5,mc2,t4)_inferred\n" + "0.094800 dbow+dmm_inferred\n", + "0.099920 dbow+dmm\n", + "0.101000 Doc2Vec(dbow,d100,n5,mc2,t8)\n", + "0.101320 dbow+dmc\n", + "0.109600 dbow+dmc_inferred\n", + "0.111600 Doc2Vec(dbow,d100,n5,mc2,t8)_inferred\n", + "0.158760 Doc2Vec(dm/m,d100,n5,w10,mc2,t8)\n", + "0.184000 Doc2Vec(dm/m,d100,n5,w10,mc2,t8)_inferred\n", + "0.215160 Doc2Vec(dm/c,d100,n5,w5,mc2,t8)\n", + "0.232400 Doc2Vec(dm/c,d100,n5,w5,mc2,t8)_inferred\n" ] } ], - "prompt_number": 9 + "prompt_number": 12 }, { "cell_type": "markdown", @@ -1452,11 +1468,11 @@ "cell_type": "code", "collapsed": false, "input": [ - "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", + "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc; re-run cell for more examples\n", "print('for doc %d...' % doc_id)\n", "for model in simple_models:\n", " inferred_docvec = model.infer_vector(alldocs[doc_id].words)\n", - " print('%s:\\n %s' % (model, model.docvecs.most_similar([inferred_docvec],topn=3)))" + " print('%s:\\n %s' % (model, model.docvecs.most_similar([inferred_docvec], topn=3)))" ], "language": "python", "metadata": {}, @@ -1465,9 +1481,9 @@ "output_type": "stream", "stream": "stdout", "text": [ - "for doc 10937...\n", - "Doc2Vec(dm/c,d100,n5,w5,mc2,t4):\n", - " [(10937, 0.6842625141143799), (7308, 0.42190566658973694), (10839, 0.4074726700782776)]" + "for doc 25430...\n", + "Doc2Vec(dm/c,d100,n5,w5,mc2,t8):\n", + " [(25430, 0.6583491563796997), (27314, 0.4142411947250366), (16479, 0.40846431255340576)]" ] }, { @@ -1475,8 +1491,8 @@ "stream": "stdout", "text": [ "\n", - "Doc2Vec(dbow,d100,n5,mc2,t4):\n", - " [(10937, 0.9522888660430908), (12203, 0.5845203399658203), (35262, 0.575614869594574)]" + "Doc2Vec(dbow,d100,n5,mc2,t8):\n", + " [(25430, 0.9325973987579346), (49281, 0.5766637921333313), (79679, 0.5634804964065552)]" ] }, { @@ -1484,8 +1500,8 @@ "stream": "stdout", "text": [ "\n", - "Doc2Vec(dm/m,d100,n5,w10,mc2,t4):\n", - " [(10937, 0.8651494979858398), (11717, 0.8156246542930603), (58074, 0.8120745420455933)]" + "Doc2Vec(dm/m,d100,n5,w10,mc2,t8):\n", + " [(25430, 0.7970066666603088), (97818, 0.6925815343856812), (230, 0.690807580947876)]" ] }, { @@ -1496,7 +1512,7 @@ ] } ], - "prompt_number": 11 + "prompt_number": 13 }, { "cell_type": "markdown", @@ -1518,12 +1534,13 @@ "collapsed": false, "input": [ "import random\n", + "\n", "doc_id = np.random.randint(simple_models[0].docvecs.count) # pick random doc, re-run cell for more examples\n", "model = random.choice(simple_models) # and a random model\n", "sims = model.docvecs.most_similar(doc_id, topn=model.docvecs.count) # get *all* similar documents\n", "print('TARGET (%d): \u00ab%s\u00bb\\n' % (doc_id, ' '.join(alldocs[doc_id].words)))\n", "print('SIMILAR/DISSIMILAR DOCS PER MODEL %s:\\n' % model)\n", - "for label, index in [('MOST',0), ('MEDIAN',len(sims)//2), ('LEAST',len(sims)-1)]:\n", + "for label, index in [('MOST', 0), ('MEDIAN', len(sims)//2), ('LEAST', len(sims) - 1)]:\n", " print('%s %s: \u00ab%s\u00bb\\n' % (label, sims[index], ' '.join(alldocs[sims[index][0]].words)))\n" ], "language": "python", @@ -1533,20 +1550,20 @@ "output_type": "stream", "stream": "stdout", "text": [ - "TARGET (90609): \u00absomehow in line with \" calendar girls \" and \" mrs henderson presents \" as it deals with the sex life of elderly ladies , \" irina palm \" is the story of the slightly-more-than-middle-aged maggie who has to raise a large sum of money in order to save her grandchild from dying , takes a job as a w*nker in a sex club ( minimal physical touch , no nude scenes , all done in the best taste ) ( . . . imaginable under the circumstances , that is ) - and finds that she has a rare talent for just that sort of work . i liked it . the story is given every conceivable , foreseeable twist and turn - a romance with the sex bar proprietor who just had to sample her talent on the sly ; her friends who are dying to be let in on the particulars , but still are too prudish not to turn their backs on her ; her son finding out and flying into a rage , and the reconciliation with her hostile daughter-in-law when she learns about maggie's sacrifice - all predictable , but still : i liked it . perhaps because everybody in the film puts out great performance . miki manoljovic is very good as the sex bar owner who falls in love with his unlikely ace employee , kevin bishop is frighteningly good as the loving , mild-mannered son who cannot really see his way through to understand his mother ( which son could , given her line of work ? ) , and marianne faithful , that rarely seen blast from the past ( my past , at least ) is certainly a far cry from her ophelia in 1969 ( yes , i do know that she's been doing bits and bobs in between , but somehow i've missed them ) . marianne faithful's slow , slightly hazy style is recognizable still , and i'd say she carries this film through in a very touching way - no pun intended .\u00bb\n", + "TARGET (72927): \u00abthis is one of the best films of this year . for a year that was fueled by controversy and crap , it was nice to finally see a film that had a true heart to it . from the opening scene to the end , i was so moved by the love that will smith has for his son . basically , if you see this movie and walk out of it feeling nothing , there is something that is very wrong with you . loved this movie , it's the perfect movie to end the year with . the best part was after the movie , my friends and i all got up and realized that this movie had actually made the four of us tear up ! it's an amazing film and if will smith doesn't get at least an oscar nom , then the oscars will just suck . in fact will smith should actually just win an oscar for this role . ! ! ! i loved this movie ! ! ! ! everybody needs to see especially the people in this world that take everything for granted , watch this movie , it will change you !\u00bb\n", "\n", - "SIMILAR/DISSIMILAR DOCS PER MODEL Doc2Vec(dm/m,d100,n5,w10,mc2,t4):\n", + "SIMILAR/DISSIMILAR DOCS PER MODEL Doc2Vec(dm/m,d100,n5,w10,mc2,t8):\n", "\n", - "MOST (36095, 0.6738423109054565): \u00abwalter matthau is wonderful as the \" philandering \" dentist dr . julian winston whose frequent fibs to girlfriend goldie provide textbook proof of the dangers of lying . goldie hawn's touching kook toni simmons certainly deserved to win her oscar . ingrid bergman's work as the stiff-as-starch nurse stephanie is also touching to watch as she comes out of her shell , slowly and nervously . this is a great movie to watch in the springtime , or any time for that matter . it's very underrated ; i never heard about it until i found it in the video store , and what a find !\u00bb\n", + "MOST (2046, 0.7372332215309143): \u00abi thought this movie would be dumb , but i really liked it . people i know hate it because spirit was the only horse that talked . well , so what ? the songs were good , and the horses didn't need to talk to seem human . i wouldn't care to own the movie , and i would love to see it again . 8/10\u00bb\n", "\n", - "MEDIAN (84045, 0.33689069747924805): \u00abembarrassingly bad , low-budget italian-made war movie set in holland in the dying days of wwii . a tedious , plodding storyline concerning a plot to steal some diamonds from a german hq , awful acting and dreadful editing make this movie a prize turkey from the opening scene right through to the cringeworthy oh-so-60s `romantic' ending which will have you reaching for the puke bucket - that is if you haven't already reached for the `off' button long before . the worst performances come from john ireland as captain o'connor and the blonde female lead , whose name escapes me . she plays the `love interest' to our rugged leading man . perhaps it wasn't entirely her fault , as back then female romantic leads , especially in action movies , were often written as weak , wishy-washy , sobbing , super-sensitive emotional jellyfish . this one is no exception , and the second , supporting female character is just as bad . simply nauseating to watch . even the action scenes ( which are few and far between , except towards the end ) are boring and predictable . most ludicrously , in the climatic battle , we have rugged leading man and his two mates holed up in some sandbagged bunker and effortlessly gunning down endless attacking germans right , left and centre . the germans of course are all terrible shots and even seem to be eager cannon-fodder , as they make little or no effort to take cover , dying spectacularly in droves amid much flailing of arms and comic-book shouts of `aaaaargh' . this is glorification of war at its very worst . then suddenly - right in the middle of the battle - the resistance guy pulls up completely unmolested in a stolen german jeep and trots effortlessly along a convenient trench - which leads directly into the bunker and which somehow hundreds of germans approaching from all sides have failed to spot - and calmly joins our heroes inside the bunker . another scene of crass stupidity that really must be seen to be believed has captain o'connor flying over the german lines in a reconnaissance plane which , with the help of some clumsily-inserted old newsreel footage , is suddenly and miraculously transformed into a heavy bomber disgorging its massive payload from wide-open bomb bays and pulverising the germans beneath , before once more instantaneously reverting to being a small reconnaissance plane again . the concept of an ongoing truce between the resistance fighters and the occupying german army also seems ludicrous to me , yet this is a central theme of the movie . the english title of the film was obviously inspired by `the dirty dozen' ( which was made around the same time ) but it doesn't deserve to be mentioned in the same breath . the original italian title of this film ( dalle ardenne all' inferno ) , the sleeve notes for the english language video release are also grossly misleading . this film has absolutely nothing to do with the battle of the ardennes . the ardennes isn't even in holland - it's a part of belgium - which indicates that the film-makers' knowledge of world war ii events and geography was just as limited as their ability to make even a half-decent film . don't waste two hours of your valuable time on this rubbish . one of the worst movies i've had the misfortune to sit through - and i've sat through some garbage ! !\u00bb\n", + "MEDIAN (6999, 0.4129640758037567): \u00abokay , the recent history of star trek has not been good . the next generation faded in its last few seasons , ds9 boldly stayed where no one had stayed before , and voyager started very bad and never really lived up to its promise . so , when they announced a new star trek series , i did not have high expectations . and , the first episode , broken bow , did have some problems . but , overall it was solid trek material and a good romp . i'll get the nits out of the way first . the opening theme is dull and i don't look forward to sitting through it regularly , but that's what remotes are for . what was really bad was the completely gratuitous lotion rubbing scene that just about drove my wife out of the room . they need to cut that nonsense out . but , the plot was strong and moved along well . the characters , though still new , seem to be well rounded and not always what you would expect . the vulcans are clearly being presented very differently than before , with a slightly ominous theme . i particularly liked the linguist , who is the first star trek character to not be able to stand proud in the face of death , but rather has to deal with her phobias and fears . they seemed to stay true to trek lore , something that has been a significant problem in past series , though they have plenty of time to bring us things like shooting through shields , the instant invention of technology that can fix anything , and the inevitable plethora of time-travel stories . anyone want to start a pool on how long before the borg show up ? all in all , the series has enormous potential . they are seeing the universe with fresh eyes . we have the chance to learn how things got the way they were in the later series . how did the klingons go from just insulting to war ? how did we meet the romulans ? how did the federation form and just who put earth in charge . why is the prime directive so important ? if they address these things rather than spitting out time travel episodes , this will be an interesting series . my favorite line : zephram cochran saying \" where no man has gone before \" ( not \" no one \" )\u00bb\n", "\n", - "LEAST (44173, -0.15633562207221985): \u00abfrom what i understand , fox was embarrassed they released a pg-13 alien/predator movie not so long ago . it was not well received by any means . not exactly sure where to go next , seeing as they thought anderson was the best director for the franchise and they had produced a true sci-fi gem , fox turned to it's small , but knowledgeable group of monkeys for answers . these monkeys were by no means veterans of writing sci-fi flicks , but had seen burton's planet of the apes remake and house of the dead . their first task : hire actors . fox gave them a reasonable budget but the monkeys wanted to save the money . they hired fifteen tv actors shortly after . now , the script . the monkeys wanted to save more of the budget so they wrote the movie themselves . leaving out important aspects of the two franchises was the easy part . thinking of great new lines for the general audience to remember years down the line - that was more difficult . they butted heads awhile and came up with a truly award-winning screenplay equipped with clich\u00e9 characters , idiotic decisions an gaping plot holes . fox was pleased thus far with the results but wanted to see what was to become of the centerpieces to the film - the aliens and predators . the monkeys again wanted to save money in the budget so they decided to trash the great robotics used in the otherwise terrible avp original and go with the man-in-the-suit alien seen in the old films . the actors playing the aliens had trouble fitting into the suits as they weren't properly sized by the monkeys so they jiggled their plastic heads throughout the film with honor . as for the predators , the monkeys decided one predator was enough this time around ( again , saving budget ) to fight the hordes of aliens that seemingly come out of nowhere . but what about the effects , you ask ? come on now , people . they may be monkeys but they clearly knew cgi would play a key role in the film . without diving into the budget , the monkeys used a standard final cut program and cut and pasted some very nice fire and spark effects throughout . putting red and green filters over the camera lens provided some excellent predator visions . the setting was something the monkeys thought long and hard about . if this was to be on earth , in colorado of all places , they needed to make it realistic . this was where they admitted they might have made a mistake . see , the monkeys didn't have proper training in this department so they thought turning the lights off in the city and having the movie play out in the dead of night and in the rain was the right thing to do . they simply forgot people like to see the creatures instead of looking at shadows and rain the whole film . to add insult to injury , the monkeys accidentally filmed all the fight scenes incredibly close so no one could see what was fighting or who it was . but again , rookie mistake . the rating . fox told the monkeys to make the movie r-rated . that was easy . without showing how many of the injuries or deaths actually happened , the monkeys made a habit of showing the carnage after the fact . it was simple : the viewers got the gore they desired and the monkeys didn't have to film the majority of action shots involving that violence . some of the actors originally had questions concerning the screenplay . why does a blue liquid the predator has endless amounts of magically disintegrate whatever he wants it to and nothing more than that ? why is an ex-convict driving around in a police car the entire movie ? why did the monkeys forget to show a full body shot of the aliens ? why does a clock play a more memorable role than any of the main characters ? the list of questions just kept growing but the monkeys ignored them and finished their masterpiece . fox was thrilled with their work . so thrilled that they opened the movie nationwide on christmas day and even spent a few bucks advertising it the week before it came out . the monkeys had successfully made another installment in these cherished franchises . but some ask , what ever happened to the budget the monkeys forgot to use ? they put it towards their next film : aliens vs . predator vs . hulk hogan . they knew the general public would be upset with the title but they have since released this statement : \" to the people- do not worry about our upcoming film . it will be rated r and will have violence . \" and everyone lived happily ever after . the end .\u00bb\n", + "LEAST (16617, 0.015464222989976406): \u00abi saw this movie during a tolkien-themed interim class during my sophomore year of college . i was seated unfortunately close to the screen and my professor chose me to serve as a whipping boy- everyone else was laughing , but they weren't within constant eyesight . let's get it out of the way : the peter jackson 'lord of the rings' films do owe something to the bakshi film . in jackson's version of the fellowship of the ring , for instance , the scene in which the black riders assault the empty inn beds is almost a complete carbon copy of the scene in bakshi's film , shot by shot . you could call this plagiarism or homage , depending on your agenda . i'm sure the similarities don't stop there . i'm not going to do any research to find out what they are , because that would imply i have some mote of respect for this film . i'm sure others have outlined the similarities- look around . this movie is a complete train wreck in every sense of the metaphor , and many , many people died in the accident . i've decided to list what i can remember in a more or less chronological fashion- if i've left out anything else that offended me it's because i'm completely overwhelmed , confronted with a wealth of failure ( and , at high points , mediocrity ) . *due to heavy use of rotoscoping , gandalf is no longer a gentle , wise wizard but a wildly flailing prophet of doom ( whose hat inexplicably changes color once or twice during the course of the film ) . *saruman the white is sometimes referred to as 'aruman' during the film , without explanation . he wears purple and red for some mysterious reason . *sam is flat out hideous . the portrayal of his friendship with frodo is strangely childlike and unsatisfying . yes , hobbits are small like children , but they are not children . *merry and pippin are never introduced--they simply appear during a scene change with a one-sentence explanation . the film is filled with sloppy editing like this . *frodo , sam , pippin and merry are singing merrily as they skip through along the road . one of the hobbits procures a lute at least twice as large as he is from behind his back--which was not visible before--and begins strumming in typical fantasy bard fashion as they all break into \" la-la-la \" s . awful . *aragorn , apparently , is a native american dressed in an extremely stereotypical fantasy tunic ( no pants ) , complete with huge , square pilgrim belt buckle . he is arguably the worst swordsman in the entire movie--oftentimes he gets one wobbly swing in before being knocked flat on his ass . *the black riders appear more like lepers than menacing instruments of evil . they limp everywhere they go at a painfully slow pace . this is disturbing to be sure , but not frightening . *the scene before the black riders attempt to cross the ford of bruinen ( in which they stare at frodo , who is on the other side on horseback ) goes on forever , during which time the riders rear their horses in a vaguely threatening manner and . . . do nothing else . the scene was probably intended to illustrate frodo's hallucinatory decline as he succumbs to his wound . it turns out to be more plodding than anything else . *gimli the dwarf is just as tall as legolas the elf . he's a dwarf . there is simply no excuse for that . he also looks like a bastardized david the gnome . it's a crude but accurate description . *boromir appears to have pilfered elmer fudd's golden viking armor from that bugs bunny opera episode . he looks ridiculous . *despite the similarity to tolkien's illustration , the balrog is howl inducing and the least-threatening villain in the entire film . it looks like someone wearing pink bedroom slippers , and it's barely taller than gandalf . \" purists \" may prefer this balrog , but i'll take jackson's version any day . *the battle scenes are awkward and embarrassing . almost none of the characters display any level of competency with their armaments . i'm not asking for action-packed scenes like those in jackson's film , but they are supposed to be fighting . *treebeard makes a very short appearance , and i was sorry he bothered to show up at all . watch the film , you'll see what i mean . alright , now for the good parts of the film . *some of the voice acting is pretty good . it isn't that aragorn sounds bad , he just looks kind of like the jolly green giant . *galadriel is somewhat interesting in this portrayal ; like tom bombadil , she seems immune to the ring's powers of temptation , and her voice actress isn't horrible either . *boromir's death isn't as heart wrenching as in jackson's portrayal of the same scene , but it's still appropriately dramatic ( and more true to his death in the book , though i don't believe jackson made a mistake shooting it the way he did ) . *as my professor pointed out ( between whispered threats ) , the orcs ( mainly at helm's deep , if i'm correct ) resemble the war-ravaged corpses of soldiers , a political statement that works pretty well if you realize what's being attempted . *while this isn't really a positive point about the film , bakshi can't be blamed for the majority of the failures in this movie , or so i've been told--the project was on a tight budget , and late in its production he lost creative control to some of the higher-ups ( who i'm sure hadn't read the books ) . let me be clear : i respect bakshi for even attempting something of this magnitude . i simply have a hard time believing he was happy with the final product . overall , i cannot in any way recommend this blasphemous adaptation of tolkien's classic trilogy even for laughs , unless you've already read the books and have your own visualizations of the characters , places and events . i'm sure somebody , somewhere , will pick a copy of this up in confusion ; if you do , keep an open mind and glean what good you can from it .\u00bb\n", "\n" ] } ], - "prompt_number": 13 + "prompt_number": 14 }, { "cell_type": "markdown", @@ -1572,7 +1589,7 @@ "language": "python", "metadata": {}, "outputs": [], - "prompt_number": 14 + "prompt_number": 15 }, { "cell_type": "code", @@ -1586,8 +1603,8 @@ " if word_models[0].vocab[word].count > 10:\n", " break\n", "# or just pick a word from the relevant domain:\n", - "# word = 'plot'\n", - "similars_per_model = [str(model.most_similar(word,topn=20)).replace('), ','),
\\n') for model in word_models]\n", + "word = 'comedy/drama'\n", + "similars_per_model = [str(model.most_similar(word, topn=20)).replace('), ','),
\\n') for model in word_models]\n", "similar_table = (\"
\" +\n", " \"\".join([str(model) for model in word_models]) + \n", " \"
\" +\n", @@ -1608,74 +1625,74 @@ }, { "html": [ - "
Doc2Vec(dm/c,d100,n5,w5,mc2,t4)Doc2Vec(dbow,d100,n5,mc2,t4)Doc2Vec(dm/m,d100,n5,w10,mc2,t4)
[('comedy', 0.7096928358078003),
\n", - "('drama', 0.6825233101844788),
\n", - "('dramedy', 0.6664647459983826),
\n", - "('thriller', 0.6615678071975708),
\n", - "('horror/comedy', 0.6410363912582397),
\n", - "('adventure', 0.6175029277801514),
\n", - "('chiller', 0.5992485284805298),
\n", - "('melodrama', 0.5929774045944214),
\n", - "('romance', 0.576662540435791),
\n", - "('romp', 0.5749073028564453),
\n", - "('science-fiction', 0.5690299868583679),
\n", - "('farce', 0.5652514100074768),
\n", - "('weeper', 0.5628592371940613),
\n", - "('drama/comedy', 0.5627389550209045),
\n", - "('whodunit', 0.5624251961708069),
\n", - "('sci-fi', 0.5603950023651123),
\n", - "('mockumentary', 0.5558925271034241),
\n", - "('biopic', 0.5510786771774292),
\n", - "('sitcom', 0.5482240915298462),
\n", - "('road-movie', 0.5472671985626221)]
[('adrenaline-pumping', 0.45663613080978394),
\n", - "('kipling', 0.4251996576786041),
\n", - "('appears', 0.4016043245792389),
\n", - "('five-second', 0.3902825713157654),
\n", - "(\"fmv's\", 0.3895858824253082),
\n", - "('aardvarks', 0.3821781873703003),
\n", - "('promulgated', 0.3801535367965698),
\n", - "('inert', 0.37795290350914),
\n", - "('floorboards', 0.37393927574157715),
\n", - "(\"skeletor's\", 0.37129074335098267),
\n", - "('generate', 0.36898282170295715),
\n", - "('open-ended', 0.36304017901420593),
\n", - "('`i', 0.36297476291656494),
\n", - "('inching', 0.3623065948486328),
\n", - "('digestive', 0.36074209213256836),
\n", - "('yoji', 0.36062514781951904),
\n", - "('bergman', 0.36027780175209045),
\n", - "(\"hodder's\", 0.35910874605178833),
\n", - "('40-something', 0.355363667011261),
\n", - "('bushido', 0.35415762662887573)]
[('comedy-drama', 0.6464394330978394),
\n", - "('thriller', 0.6074070930480957),
\n", - "('comedy', 0.597672700881958),
\n", - "('dramedy', 0.5864953994750977),
\n", - "('action-drama', 0.5831291079521179),
\n", - "('actioner', 0.5727459192276001),
\n", - "('potboiler', 0.5611938238143921),
\n", - "('road-movie', 0.5596767663955688),
\n", - "('weeper', 0.5421388149261475),
\n", - "('romcom', 0.5401057004928589),
\n", - "('chiller', 0.5400895476341248),
\n", - "('drama', 0.5357507467269897),
\n", - "('flick', 0.5278905630111694),
\n", - "('action/thriller', 0.5270429253578186),
\n", - "('diversion', 0.5243188738822937),
\n", - "('action-comedy', 0.5170137882232666),
\n", - "('confection', 0.5149624347686768),
\n", - "('telemovie', 0.5138946771621704),
\n", - "('yarn', 0.5052254796028137),
\n", - "('farce', 0.5036333799362183)]
" + "
Doc2Vec(dm/c,d100,n5,w5,mc2,t8)Doc2Vec(dbow,d100,n5,mc2,t8)Doc2Vec(dm/m,d100,n5,w10,mc2,t8)
[('comedy', 0.7255545258522034),
\n", + "('thriller', 0.6946465969085693),
\n", + "('drama', 0.6763534545898438),
\n", + "('romance', 0.6251884698867798),
\n", + "('dramedy', 0.6217159032821655),
\n", + "('melodrama', 0.6156137585639954),
\n", + "('adventure', 0.6091135740280151),
\n", + "('farce', 0.6034293174743652),
\n", + "('chiller', 0.5948368906974792),
\n", + "('romantic-comedy', 0.5876704454421997),
\n", + "('fantasy', 0.5863304138183594),
\n", + "('mystery/comedy', 0.577541708946228),
\n", + "('whodunit', 0.572147011756897),
\n", + "('biopic', 0.5679721832275391),
\n", + "('thriller/drama', 0.5630226731300354),
\n", + "('sitcom', 0.5574496984481812),
\n", + "('slash-fest', 0.5573585033416748),
\n", + "('mystery', 0.5542301535606384),
\n", + "('potboiler', 0.5519827604293823),
\n", + "('mockumentary', 0.5490710139274597)]
[('1000%', 0.42290645837783813),
\n", + "(\"gymnast's\", 0.4180164337158203),
\n", + "('hollywoodland', 0.3898555636405945),
\n", + "('cultures', 0.3857914209365845),
\n", + "('hooda', 0.3851744532585144),
\n", + "('cites', 0.38047513365745544),
\n", + "(\"78's\", 0.3792475461959839),
\n", + "(\"dormael's\", 0.3775535225868225),
\n", + "('jokester', 0.3725704252719879),
\n", + "('impelled', 0.36853262782096863),
\n", + "('lia', 0.3684236407279968),
\n", + "('snivelling', 0.3683513104915619),
\n", + "('astral', 0.36715900897979736),
\n", + "('euro-exploitation', 0.35853487253189087),
\n", + "(\"serra's\", 0.3578598201274872),
\n", + "('down-on-their-luck', 0.3576606214046478),
\n", + "('rowles', 0.3567575514316559),
\n", + "('romantica', 0.3549702763557434),
\n", + "('bonham-carter', 0.354231059551239),
\n", + "('1877', 0.3541453182697296)]
[('comedy-drama', 0.6274900436401367),
\n", + "('comedy', 0.5986765623092651),
\n", + "('thriller', 0.5765297412872314),
\n", + "('road-movie', 0.5615973472595215),
\n", + "('dramedy', 0.5580120086669922),
\n", + "('time-killer', 0.5497636795043945),
\n", + "('potboiler', 0.5456510782241821),
\n", + "('comedy/', 0.5439876317977905),
\n", + "('actioner', 0.5423712134361267),
\n", + "('diversion', 0.541743278503418),
\n", + "('romcom', 0.5402226448059082),
\n", + "('rom-com', 0.5358527302742004),
\n", + "('drama', 0.5320745706558228),
\n", + "('chiller', 0.5229591727256775),
\n", + "('romp', 0.5228806734085083),
\n", + "('horror/comedy', 0.5219299793243408),
\n", + "('weeper', 0.5195824503898621),
\n", + "('mockumentary', 0.5149033069610596),
\n", + "('camp-fest', 0.5122634768486023),
\n", + "('mystery/comedy', 0.5020694732666016)]
" ], "metadata": {}, "output_type": "pyout", - "prompt_number": 16, + "prompt_number": 17, "text": [ - "" + "" ] } ], - "prompt_number": 16 + "prompt_number": 17 }, { "cell_type": "markdown", @@ -1714,8 +1731,8 @@ "output_type": "stream", "stream": "stdout", "text": [ - "Doc2Vec(dm/c,d100,n5,w5,mc2,t4): 28.13% correct (5650 of 20086)\n", - "Doc2Vec(dbow,d100,n5,mc2,t4): 0.01% correct (2 of 20086)" + "Doc2Vec(dm/c,d100,n5,w5,mc2,t8): 28.70% correct (5746 of 20024)\n", + "Doc2Vec(dbow,d100,n5,mc2,t8): 0.01% correct (2 of 20024)" ] }, { @@ -1723,7 +1740,7 @@ "stream": "stdout", "text": [ "\n", - "Doc2Vec(dm/m,d100,n5,w10,mc2,t4): 27.49% correct (5522 of 20086)" + "Doc2Vec(dm/m,d100,n5,w10,mc2,t8): 27.24% correct (5454 of 20024)" ] }, { @@ -1734,13 +1751,13 @@ ] } ], - "prompt_number": 18 + "prompt_number": 26 }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Even though this is a tiny, domain-specific dataset, it shows some meagher capability on the general word analogies \u2013 at least for the DM/concat and DM/mean models which actually train word vectors. (The untrained random-initialized words of the DBOW model of course fail miserably.)" + "Even though this is a tiny, domain-specific dataset, it shows some meager capability on the general word analogies \u2013 at least for the DM/concat and DM/mean models which actually train word vectors. (The untrained random-initialized words of the DBOW model of course fail miserably.)" ] }, { @@ -1773,7 +1790,7 @@ "collapsed": false, "input": [ "from gensim.models import Word2Vec\n", - "w2v_g100b = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz',binary=True)\n", + "w2v_g100b = Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin.gz', binary=True)\n", "w2v_g100b.compact_name = 'w2v_g100b'\n", "word_models.append(w2v_g100b)" ], @@ -1823,4 +1840,4 @@ "metadata": {} } ] -} +} \ No newline at end of file