Skip to content

Commit

Permalink
Add new '-j' option for running checks in sub-processes.
Browse files Browse the repository at this point in the history
Patch by Michal Nowikowski.
  • Loading branch information
PCManticore committed Oct 15, 2014
1 parent 4ea910d commit c03fefd
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 23 deletions.
2 changes: 2 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ ChangeLog for Pylint
wrong-spelling-in-comment, wrong-spelling-in-docstring.
New options: spelling-dict, spelling-ignore-words.

* Add new '-j' option for running checks in sub-processes.

* Added new checks for line endings if they are mixed (LF vs CRLF)
or if they are not as expected. New messages: mixed-line-endings,
unexpected-line-ending-format. New option: expected-line-ending-format.
Expand Down
2 changes: 1 addition & 1 deletion doc/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .

.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest all

help:
@echo "Please use \`make <target>' where <target> is one of"
Expand Down
27 changes: 24 additions & 3 deletions doc/run.rst
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ expression in special cases). For a full list of options, use ``--help``

Specifying all the options suitable for your setup and coding
standards can be tedious, so it is possible to use a configuration file to
specify the default values. You can specify a configuration file on the
specify the default values. You can specify a configuration file on the
command line using the ``--rcfile`` option. Otherwise, Pylint searches for a
configuration file in the following order and uses the first one it finds:

Expand All @@ -117,7 +117,7 @@ includes:
* Options appearing before ``--generate-rcfile`` on the Pylint command line

Of course you can also start with the default values and hand tune the
configuration.
configuration.

Other useful global options include:

Expand All @@ -128,5 +128,26 @@ Other useful global options include:
--output-format=<format> Select output format (text, html, custom).
--msg-template=<template> Modifiy text output message template.
--list-msgs Generate pylint's messages.
--full-documentation Generate pylint's full documentation, in reST
--full-documentation Generate pylint's full documentation, in reST
format.

Parallel execution
------------------

It is possible to speed up the execution of Pylint. If the running computer has more CPUs than one
then the files to be checked could be spread on all processors to Pylint sub-processes.
This functionality is exposed via ``-j`` command line parameter. It takes a number of sub-processes
that should be spawned. If provided number is 0 then the number of CPUs will be taken.
Default number is 1.

Example::

pylint -j 4 mymodule1.py mymodule2.py mymodule3.py mymodule4.py

This will spawn 4 parallel Pylint sub-process. Each provided module will be checked in parallel.
Discovered problems by checkers are not displayed immediatelly. They are shown just after completing
checking a module.

There are some limitations in running checks in parallel in current implementation.
It is not possible to use custom plugins (i.e. ``--load-plugins`` option).
It is also not possible to use initialization hook (i.e. ``--init-hook`` option).
188 changes: 181 additions & 7 deletions lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,13 @@
from operator import attrgetter
from warnings import warn

from logilab.common.configuration import UnsupportedAction, OptionsManagerMixIn
try:
import multiprocessing
except ImportError:
multiprocessing = None

from logilab.common.configuration import (
UnsupportedAction, OptionsManagerMixIn, format_option_value)
from logilab.common.optik_ext import check_csv
from logilab.common.interface import implements
from logilab.common.textutils import splitstrip, unquote
Expand All @@ -51,12 +57,12 @@
MSG_TYPES, OPTION_RGX,
PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn, ReportsHandlerMixIn,
MessagesStore, FileState, EmptyReport,
expand_modules, tokenize_module)
expand_modules, tokenize_module, Message)
from pylint.interfaces import IRawChecker, ITokenChecker, IAstroidChecker, CONFIDENCE_LEVELS
from pylint.checkers import (BaseTokenChecker,
table_lines_from_stats,
initialize as checkers_initialize)
from pylint.reporters import initialize as reporters_initialize
from pylint.reporters import initialize as reporters_initialize, CollectingReporter
from pylint import config

from pylint.__pkginfo__ import version
Expand Down Expand Up @@ -155,6 +161,55 @@ def _warn_deprecated(option, optname, *args):
'type': opt_type, 'action': 'callback', 'callback': _warn_deprecated}


if multiprocessing is not None:
class ChildLinter(multiprocessing.Process): # pylint: disable=no-member
def run(self):
tasks_queue, results_queue, config = self._args # pylint: disable=no-member

for file_or_module in iter(tasks_queue.get, 'STOP'):
result = self._run_linter(config, file_or_module[0])
try:
results_queue.put(result)
except Exception as ex:
print("internal error with sending report for module %s" % file_or_module, file=sys.stderr)
print(ex, file=sys.stderr)
results_queue.put({})

def _run_linter(self, config, file_or_module):
linter = PyLinter()

# Register standard checkers.
linter.load_default_plugins()
# Load command line plugins.
# TODO linter.load_plugin_modules(self._plugins)
# i.e. Run options are not available here as they are patches to Pylinter options.
# To fix it Run options should be moved to Pylinter class to make them
# available here.

linter.disable('pointless-except')
linter.disable('suppressed-message')
linter.disable('useless-suppression')

# Copy config with skipping command-line specific options.
linter_config = {}
filter_options = {"symbols", "include-ids"}
for opt_providers in six.itervalues(linter._all_options):
for optname, optdict in opt_providers.options:
if optname not in filter_options:
linter_config[optname] = config[optname]
linter_config['jobs'] = 1 # Child does not parallelize any further.
linter.load_configuration(**linter_config)

linter.set_reporter(CollectingReporter())

# Run the checks.
linter.check(file_or_module)

msgs = [m.get_init_args() for m in linter.reporter.messages]
return (file_or_module, linter.file_state.base_name, linter.current_name,
msgs, linter.stats, linter.msg_status)


class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
BaseTokenChecker):
"""lint Python modules using external checkers.
Expand Down Expand Up @@ -286,7 +341,14 @@ def make_options():

('include-ids', _deprecated_option('i', 'yn')),
('symbols', _deprecated_option('s', 'yn')),
)

('jobs',
{'type' : 'int', 'metavar': '<n-processes>',
'short': 'j',
'default': 1,
'help' : '''Use multiple processes to speed up PyLint.''',
}), # jobs
)

option_groups = (
('Messages control', 'Options controling analysis messages'),
Expand Down Expand Up @@ -539,7 +601,7 @@ def prepare_checkers(self):
messages = set(msg for msg in checker.msgs
if msg[0] != 'F' and self.is_message_enabled(msg))
if (messages or
any(self.report_is_enabled(r[0]) for r in checker.reports)):
any(self.report_is_enabled(r[0]) for r in checker.reports)):
neededcheckers.append(checker)
# Sort checkers by priority
neededcheckers = sorted(neededcheckers, key=attrgetter('priority'),
Expand Down Expand Up @@ -574,6 +636,104 @@ def check(self, files_or_modules):

if not isinstance(files_or_modules, (list, tuple)):
files_or_modules = (files_or_modules,)

if self.config.jobs == 1:
self._do_check(files_or_modules)
else:
self._parallel_check(files_or_modules)

def _parallel_check(self, files_or_modules):
"""Spawn a defined number of subprocesses."""

manager = multiprocessing.Manager() # pylint: disable=no-member
tasks_queue = manager.Queue() # pylint: disable=no-member
results_queue = manager.Queue() # pylint: disable=no-member

# Prepare configuration for child linters.
config = {}
for opt_providers in six.itervalues(self._all_options):
for optname, optdict, val in opt_providers.options_and_values():
config[optname] = format_option_value(optdict, val)

# Reset stats.
self.open()

# Spawn child linters.
childs = []
for _ in range(self.config.jobs):
cl = ChildLinter(args=(tasks_queue, results_queue, config))
cl.start() # pylint: disable=no-member
childs.append(cl)

# send files to child linters
for files_or_module in files_or_modules:
tasks_queue.put([files_or_module])

# collect results from child linters
failed = False
all_stats = []
for i in range(len(files_or_modules)):
try:
(
file_or_module,
self.file_state.base_name,
module,
messages,
stats,
msg_status
) = results_queue.get()
except Exception as ex:
print("internal error while receiving results from child linter",
file=sys.stderr)
print(ex, file=sys.stderr)
failed = True
break

if file_or_module == files_or_modules[-1]:
last_module = module

for msg in messages:
msg = Message(*msg)
self.set_current_module(module)
self.reporter.handle_message(msg)

all_stats.append(stats)
self.msg_status |= msg_status

# Stop child linters and wait for their completion.
for i in range(self.config.jobs):
tasks_queue.put('STOP')
for cl in childs:
cl.join()

if failed:
print("Error occured, stopping the linter.", file=sys.stderr)
sys.exit(32)

all_stats.append(self.stats)
all_stats = self._merge_stats(all_stats)
self.stats = all_stats
self.current_name = last_module

# Insert stats data to local checkers.
for checker in self.get_checkers():
if checker is not self:
checker.stats = self.stats

def _merge_stats(self, stats):
merged = {}
for stat in stats:
for key, item in six.iteritems(stat):
if key not in merged:
merged[key] = item
else:
if isinstance(item, dict):
merged[key].update(item)
else:
merged[key] = merged[key] + item
return merged

def _do_check(self, files_or_modules):
walker = PyLintASTWalker(self)
checkers = self.prepare_checkers()
tokencheckers = [c for c in checkers if implements(c, ITokenChecker)
Expand Down Expand Up @@ -610,7 +770,6 @@ def check(self, files_or_modules):
for msgid, line, args in self.file_state.iter_spurious_suppression_messages(self.msgs_store):
self.add_message(msgid, line, None, args)
# notify global end
self.set_current_module('')
self.stats['statement'] = walker.nbstatements
checkers.reverse()
for checker in checkers:
Expand Down Expand Up @@ -695,7 +854,7 @@ def open(self):
for msg_cat in six.itervalues(MSG_TYPES):
self.stats[msg_cat] = 0

def close(self):
def generate_reports(self):
"""close the whole package /module, it's time to make reports !
if persistent run, pickle results for later comparison
Expand Down Expand Up @@ -1009,6 +1168,20 @@ def __init__(self, args, reporter=None, exit=True):
if not args:
print(linter.help())
sys.exit(32)

if linter.config.jobs < 0:
print("Jobs number (%d) should be greater than 0"
% linter.config.jobs, file=sys.stderr)
sys.exit(32)
if linter.config.jobs > 1 or linter.config.jobs == 0:
if multiprocessing is None:
print("Multiprocessing library is missing, "
"fallback to single process", file=sys.stderr)
linter.set_option("jobs", 1)
else:
if linter.config.jobs == 0:
linter.config.jobs = multiprocessing.cpu_count()

# insert current working directory to the python path to have a correct
# behaviour
linter.prepare_import_path(args)
Expand All @@ -1023,6 +1196,7 @@ def __init__(self, args, reporter=None, exit=True):
data.print_stats(30)
else:
linter.check(args)
linter.generate_reports()
linter.cleanup_import_path()
if exit:
sys.exit(self.linter.msg_status)
Expand Down
15 changes: 14 additions & 1 deletion reporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def handle_message(self, msg):
Invokes the legacy add_message API by default."""
self.add_message(
msg.msg_id, (msg.abspath, msg.module, msg.obj, msg.line, msg.column),
msg.msg_id, (msg.abspath, msg.module, msg.obj, msg.line, msg.column),
msg.msg)

def add_message(self, msg_id, location, msg):
Expand Down Expand Up @@ -115,6 +115,19 @@ def on_close(self, stats, previous_stats):
pass


class CollectingReporter(BaseReporter):
"""collects messages"""

name = 'collector'

def __init__(self):
BaseReporter.__init__(self)
self.messages = []

def handle_message(self, msg):
self.messages.append(msg)


def initialize(linter):
"""initialize linter with reporters in this package """
utils.register_plugins(linter, __path__[0])
1 change: 1 addition & 0 deletions test/test_import_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def test_checker_dep_graphs(self):
l.global_set_option('ignore', ('func_unknown_encoding.py',))
try:
l.check('input')
l.generate_reports()
self.assertTrue(exists('import.dot'))
self.assertTrue(exists('ext_import.dot'))
self.assertTrue(exists('int_import.dot'))
Expand Down
9 changes: 8 additions & 1 deletion test/test_self.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,8 @@ def test_all(self):
HTMLReporter(StringIO()),
ColorizedTextReporter(StringIO())
]
self._runtest(['pylint.lint'], reporter=MultiReporter(reporters))
self._runtest(['pylint/test/functional/arguments.py'],
reporter=MultiReporter(reporters), code=1)

def test_no_ext_file(self):
self._runtest([join(HERE, 'input', 'noext')], code=0)
Expand Down Expand Up @@ -127,6 +128,12 @@ def test_no_out_encoding(self):
self._runtest([join(HERE, 'regrtest_data/no_stdout_encoding.py')],
out=strio)

@unittest.skipIf(sys.platform.startswith("win") and sys.version_info[0] == 2,
"This test does not work on Python 2.X due to a bug in "
"multiprocessing.")
def test_parallel_execution(self):
self._runtest(['-j 2', 'pylint/test/functional/arguments.py',
'pylint/test/functional/bad_continuation.py'], code=1)

if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit c03fefd

Please sign in to comment.