diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index 213a93cc8b..f859e261a2 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -422,7 +422,7 @@ def init_sims(self, replace=False): self.doctag_syn0norm = empty(self.doctag_syn0.shape, dtype=REAL) np_divide(self.doctag_syn0, sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis], self.doctag_syn0norm) - def most_similar(self, positive=[], negative=[], topn=10, clip_start=0, clip_end=None, indexer=None): + def most_similar(self, positive=None, negative=None, topn=10, clip_start=0, clip_end=None, indexer=None): """ Find the top-N most similar docvecs known from training. Positive docs contribute positively towards the similarity, negative docs negatively. @@ -436,6 +436,11 @@ def most_similar(self, positive=[], negative=[], topn=10, clip_start=0, clip_end range of the underlying doctag_syn0norm vectors. (This may be useful if the ordering there was chosen to be significant, such as more popular tag IDs in lower indexes.) """ + if positive is None: + positive = [] + if negative is None: + negative = [] + self.init_sims() clip_end = clip_end or len(self.doctag_syn0norm) diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 4957dc83f3..3568f43ab5 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -287,7 +287,7 @@ def word_vec(self, word, use_norm=False): else: raise KeyError("word '%s' not in vocabulary" % word) - def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, indexer=None): + def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None): """ Find the top-N most similar words. Positive words contribute positively towards the similarity, negative words negatively. @@ -310,6 +310,11 @@ def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, i [('queen', 0.50882536), ...] """ + if positive is None: + positive = [] + if negative is None: + negative = [] + self.init_sims() if isinstance(positive, string_types) and not negative: @@ -442,7 +447,7 @@ def nbow(document): # Compute WMD. return emd(d1, d2, distance_matrix) - def most_similar_cosmul(self, positive=[], negative=[], topn=10): + def most_similar_cosmul(self, positive=None, negative=None, topn=10): """ Find the top-N most similar words, using the multiplicative combination objective proposed by Omer Levy and Yoav Goldberg in [4]_. Positive words still contribute @@ -464,6 +469,11 @@ def most_similar_cosmul(self, positive=[], negative=[], topn=10): .. [4] Omer Levy and Yoav Goldberg. Linguistic Regularities in Sparse and Explicit Word Representations, 2014. """ + if positive is None: + positive = [] + if negative is None: + negative = [] + self.init_sims() if isinstance(positive, string_types) and not negative: diff --git a/gensim/models/lda_dispatcher.py b/gensim/models/lda_dispatcher.py index 484e497b5f..ea54a9c18b 100755 --- a/gensim/models/lda_dispatcher.py +++ b/gensim/models/lda_dispatcher.py @@ -56,14 +56,14 @@ class Dispatcher(object): There should never be more than one dispatcher running at any one time. """ - def __init__(self, maxsize=MAX_JOBS_QUEUE, ns_conf={}): + def __init__(self, maxsize=MAX_JOBS_QUEUE, ns_conf=None): """ Note that the constructor does not fully initialize the dispatcher; use the `initialize()` function to populate it with workers etc. """ self.maxsize = maxsize - self.callback = None # a pyro proxy to this object (unknown at init time, but will be set later) - self.ns_conf = ns_conf + self.callback = None # a pyro proxy to this object (unknown at init time, but will be set later) + self.ns_conf = ns_conf if ns_conf is not None else {} @Pyro4.expose def initialize(self, **model_params): diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index f8c3dfc63f..4dc1a024b8 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -195,7 +195,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, distributed=False, chunksize=2000, passes=1, update_every=1, alpha='symmetric', eta=None, decay=0.5, offset=1.0, eval_every=10, iterations=50, gamma_threshold=0.001, minimum_probability=0.01, - random_state=None, ns_conf={}, minimum_phi_value=0.01, + random_state=None, ns_conf=None, minimum_phi_value=0.01, per_word_topics=False, callbacks=None): """ If given, start training from the iterable `corpus` straight away. If not given, @@ -316,6 +316,9 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, # set up distributed version try: import Pyro4 + if ns_conf is None: + ns_conf = {} + with utils.getNS(**ns_conf) as ns: from gensim.models.lda_dispatcher import LDA_DISPATCHER_PREFIX self.dispatcher = Pyro4.Proxy(ns.list(prefix=LDA_DISPATCHER_PREFIX)[LDA_DISPATCHER_PREFIX]) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index b5b60361b1..a37de25158 100644 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -1231,7 +1231,7 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut self.syn0_lockf[self.wv.vocab[word].index] = lockf # lock-factor: 0.0 stops further changes logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.wv.syn0.shape, fname)) - def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, indexer=None): + def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None): """ Deprecated. Use self.wv.most_similar() instead. Refer to the documentation for `gensim.models.KeyedVectors.most_similar` @@ -1245,7 +1245,7 @@ def wmdistance(self, document1, document2): """ return self.wv.wmdistance(document1, document2) - def most_similar_cosmul(self, positive=[], negative=[], topn=10): + def most_similar_cosmul(self, positive=None, negative=None, topn=10): """ Deprecated. Use self.wv.most_similar_cosmul() instead. Refer to the documentation for `gensim.models.KeyedVectors.most_similar_cosmul` diff --git a/gensim/summarization/graph.py b/gensim/summarization/graph.py index bfed410b5e..8424873e35 100644 --- a/gensim/summarization/graph.py +++ b/gensim/summarization/graph.py @@ -77,7 +77,7 @@ def add_node(self, node, attrs=None): pass @abstractmethod - def add_edge(self, edge, wt=1, label='', attrs=[]): + def add_edge(self, edge, wt=1, label='', attrs=None): """ Add an edge to the graph connecting two nodes. @@ -172,7 +172,9 @@ def neighbors(self, node): def has_node(self, node): return node in self.node_neighbors - def add_edge(self, edge, wt=1, label='', attrs=[]): + def add_edge(self, edge, wt=1, label='', attrs=None): + if attrs is None: + attrs = [] u, v = edge if v not in self.node_neighbors[u] and u not in self.node_neighbors[v]: self.node_neighbors[u].append(v) diff --git a/gensim/utils.py b/gensim/utils.py index 387a9a4193..cb7a204511 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1011,15 +1011,18 @@ def getNS(host=None, port=None, broadcast=True, hmac_key=None): raise RuntimeError("Pyro name server not found") -def pyro_daemon(name, obj, random_suffix=False, ip=None, port=None, ns_conf={}): +def pyro_daemon(name, obj, random_suffix=False, ip=None, port=None, ns_conf=None): """ Register object with name server (starting the name server if not running yet) and block until the daemon is terminated. The object is registered under `name`, or `name`+ some random suffix if `random_suffix` is set. """ + if ns_conf is None: + ns_conf = {} if random_suffix: name += '.' + hex(random.randint(0, 0xffffff))[2:] + import Pyro4 with getNS(**ns_conf) as ns: with Pyro4.Daemon(ip or get_my_ip(), port or 0) as daemon: