-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix docstrings forgensim.models.hdpmodel
, gensim.models.lda_worker
& gensim.models.lda_dispatcher
(#1667)
#1912
Changes from 1 commit
0ffd13c
daab82d
592d32a
b3fd8c8
e833ede
4f58ac8
dc86dad
430f772
725a102
7bae16e
b1458a8
ce47b1d
9b68b16
72d5f0f
66d7b18
2e8249b
0256d7c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,13 +4,13 @@ | |
# Copyright (C) 2011 Radim Rehurek <[email protected]> | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html | ||
|
||
""" | ||
Worker ("slave") process used in computing distributed LDA. Run this script \ | ||
on every node in your cluster. If you wish, you may even run it multiple times \ | ||
on a single machine, to make better use of multiple cores (just beware that \ | ||
memory footprint increases accordingly). | ||
"""Worker ("slave") process used in computing distributed LDA. | ||
|
||
Run this script on every node in your cluster. If you wish, you may even | ||
run it multiple times on a single machine, to make better use of multiple | ||
cores (just beware that memory footprint increases accordingly). | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please look at #1892, this is really good way how to document distributed stuff (instruction of running, showing arguments of script in automatic way, etc) |
||
Example: python -m gensim.models.lda_worker | ||
|
||
""" | ||
|
||
|
||
|
@@ -40,11 +40,35 @@ | |
|
||
|
||
class Worker(object): | ||
"""Used as a Pyro class with exposed methods. | ||
|
||
Exposes every non-private method and property of the class automatically | ||
to be available for remote access. | ||
|
||
Attributes | ||
---------- | ||
model : :obj: of :class:`~gensim.models.ldamodel.LdaModel` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no need to write |
||
|
||
""" | ||
|
||
def __init__(self): | ||
"""Partly initializes the model.""" | ||
self.model = None | ||
|
||
@Pyro4.expose | ||
def initialize(self, myid, dispatcher, **model_params): | ||
"""Fully initializes the worker. | ||
|
||
Parameters | ||
---------- | ||
myid : int | ||
An ID number used to identify this worker in the dispatcher object. | ||
dispatcher : :class:`~gensim.models.lda_dispatcher.Dispatcher` | ||
The dispatcher responsible for scheduling this worker. | ||
**model_params | ||
Keyword parameters to initialize the inner LDA model, see :class:`~gensim.models.ldamodel.LdaModel`. | ||
|
||
""" | ||
self.lock_update = threading.Lock() | ||
self.jobsdone = 0 # how many jobs has this worker completed? | ||
# id of this worker in the dispatcher; just a convenience var for easy access/logging TODO remove? | ||
|
@@ -59,6 +83,12 @@ def initialize(self, myid, dispatcher, **model_params): | |
def requestjob(self): | ||
""" | ||
Request jobs from the dispatcher, in a perpetual loop until `getstate()` is called. | ||
|
||
Raises | ||
------ | ||
RuntimeError | ||
Worker has to be initialised before receiving jobs. | ||
|
||
""" | ||
if self.model is None: | ||
raise RuntimeError("worker must be initialized before receiving jobs") | ||
|
@@ -79,6 +109,14 @@ def requestjob(self): | |
|
||
@utils.synchronous('lock_update') | ||
def processjob(self, job): | ||
"""Incrementally processes the job and potentially logs progress. | ||
|
||
Parameters | ||
---------- | ||
job : {iterable of list of (int, float), scipy.sparse.csc} | ||
Stream of document vectors or sparse matrix of shape (`num_terms`, `num_documents`). | ||
|
||
""" | ||
logger.debug("starting to process job #%i", self.jobsdone) | ||
self.model.do_estep(job) | ||
self.jobsdone += 1 | ||
|
@@ -89,11 +127,20 @@ def processjob(self, job): | |
|
||
@Pyro4.expose | ||
def ping(self): | ||
"""Test the connectivity with Worker.""" | ||
return True | ||
|
||
@Pyro4.expose | ||
@utils.synchronous('lock_update') | ||
def getstate(self): | ||
"""Log and get the LDA model's current state. | ||
|
||
Returns | ||
------- | ||
result : :obj: of `~gensim.models.ldamodel.LdaState` | ||
The current state. | ||
|
||
""" | ||
logger.info("worker #%i returning its state after %s jobs", self.myid, self.jobsdone) | ||
result = self.model.state | ||
assert isinstance(result, ldamodel.LdaState) | ||
|
@@ -104,6 +151,14 @@ def getstate(self): | |
@Pyro4.expose | ||
@utils.synchronous('lock_update') | ||
def reset(self, state): | ||
"""Reset the worker by setting sufficient stats to 0. | ||
|
||
Parameters | ||
---------- | ||
state : :obj: of :class:`~gensim.models.ldamodel.LdaState` | ||
Encapsulates information for distributed computation of LdaModel objects. | ||
|
||
""" | ||
assert state is not None | ||
logger.info("resetting worker #%i", self.myid) | ||
self.model.state = state | ||
|
@@ -113,11 +168,13 @@ def reset(self, state): | |
|
||
@Pyro4.oneway | ||
def exit(self): | ||
"""Terminate the worker.""" | ||
logger.info("terminating worker #%i", self.myid) | ||
os._exit(0) | ||
|
||
|
||
def main(): | ||
"""Set up argument parser,logger and launches pyro daemon.""" | ||
parser = argparse.ArgumentParser(description=__doc__) | ||
parser.add_argument("--host", help="Nameserver hostname (default: %(default)s)", default=None) | ||
parser.add_argument("--port", help="Nameserver port (default: %(default)s)", default=None, type=int) | ||
|
@@ -146,4 +203,4 @@ def main(): | |
|
||
|
||
if __name__ == '__main__': | ||
main() | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
First of all, please fix PEP8 problems (almost lead spaces), look at travis log https://travis-ci.org/RaRe-Technologies/gensim/jobs/342495787#L511
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. Also, should I add a section for module level attributes such as HUGE_TIMEOUT ,MAX_JOBS_QUEUE,etc in lda_dispatcher.py ?