-
Notifications
You must be signed in to change notification settings - Fork 230
/
dsession.py
601 lines (525 loc) · 22.2 KB
/
dsession.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
from __future__ import annotations
from collections.abc import Sequence
from enum import auto
from enum import Enum
from queue import Empty
from queue import Queue
import sys
from typing import Any
import warnings
import execnet
import pytest
from xdist.remote import Producer
from xdist.remote import WorkerInfo
from xdist.scheduler import EachScheduling
from xdist.scheduler import LoadFileScheduling
from xdist.scheduler import LoadGroupScheduling
from xdist.scheduler import LoadScheduling
from xdist.scheduler import LoadScopeScheduling
from xdist.scheduler import Scheduling
from xdist.scheduler import WorkStealingScheduling
from xdist.workermanage import NodeManager
from xdist.workermanage import WorkerController
class Interrupted(KeyboardInterrupt):
"""signals an immediate interruption."""
class DSession:
"""A pytest plugin which runs a distributed test session.
At the beginning of the test session this creates a NodeManager
instance which creates and starts all nodes. Nodes then emit
events processed in the pytest_runtestloop hook using the worker_*
methods.
Once a node is started it will automatically start running the
pytest mainloop with some custom hooks. This means a node
automatically starts collecting tests. Once tests are collected
it will wait for instructions.
"""
shouldstop: bool | str
def __init__(self, config: pytest.Config) -> None:
self.config = config
self.log = Producer("dsession", enabled=config.option.debug)
self.nodemanager: NodeManager | None = None
self.sched: Scheduling | None = None
self.shuttingdown = False
self.countfailures = 0
self.maxfail: int = config.getvalue("maxfail")
self.queue: Queue[tuple[str, dict[str, Any]]] = Queue()
self._session: pytest.Session | None = None
self._failed_collection_errors: dict[object, bool] = {}
self._active_nodes: set[WorkerController] = set()
self._failed_nodes_count = 0
self._max_worker_restart = get_default_max_worker_restart(self.config)
# summary message to print at the end of the session
self._summary_report: str | None = None
self.terminal = config.pluginmanager.getplugin("terminalreporter")
if self.terminal:
self.trdist = TerminalDistReporter(config)
config.pluginmanager.register(self.trdist, "terminaldistreporter")
@property
def session_finished(self) -> bool:
"""Return True if the distributed session has finished.
This means all nodes have executed all test items. This is
used by pytest_runtestloop to break out of its loop.
"""
return bool(self.shuttingdown and not self._active_nodes)
def report_line(self, line: str) -> None:
if self.terminal and self.config.option.verbose >= 0:
self.terminal.write_line(line)
@pytest.hookimpl(trylast=True)
def pytest_sessionstart(self, session: pytest.Session) -> None:
"""Creates and starts the nodes.
The nodes are setup to put their events onto self.queue. As
soon as nodes start they will emit the worker_workerready event.
"""
self.nodemanager = NodeManager(self.config)
nodes = self.nodemanager.setup_nodes(putevent=self.queue.put)
self._active_nodes.update(nodes)
self._session = session
@pytest.hookimpl
def pytest_sessionfinish(self) -> None:
"""Shutdown all nodes."""
nm = getattr(self, "nodemanager", None) # if not fully initialized
if nm is not None:
nm.teardown_nodes()
self._session = None
@pytest.hookimpl
def pytest_collection(self) -> bool:
# prohibit collection of test items in controller process
return True
@pytest.hookimpl(trylast=True)
def pytest_xdist_make_scheduler(
self,
config: pytest.Config,
log: Producer,
) -> Scheduling | None:
dist = config.getvalue("dist")
if dist == "each":
return EachScheduling(config, log)
if dist == "load":
return LoadScheduling(config, log)
if dist == "loadscope":
return LoadScopeScheduling(config, log)
if dist == "loadfile":
return LoadFileScheduling(config, log)
if dist == "loadgroup":
return LoadGroupScheduling(config, log)
if dist == "worksteal":
return WorkStealingScheduling(config, log)
return None
@pytest.hookimpl
def pytest_runtestloop(self) -> bool:
self.sched = self.config.hook.pytest_xdist_make_scheduler(
config=self.config, log=self.log
)
assert self.sched is not None
self.shouldstop = False
pending_exception = None
while not self.session_finished:
self.loop_once()
if self.shouldstop:
self.triggershutdown()
pending_exception = Interrupted(str(self.shouldstop))
if pending_exception:
raise pending_exception
return True
def loop_once(self) -> None:
"""Process one callback from one of the workers."""
while 1:
if not self._active_nodes:
# If everything has died stop looping
self.triggershutdown()
raise RuntimeError("Unexpectedly no active workers available")
try:
eventcall = self.queue.get(timeout=2.0)
break
except Empty:
continue
callname, kwargs = eventcall
assert callname, kwargs
method = "worker_" + callname
call = getattr(self, method)
self.log("calling method", method, kwargs)
call(**kwargs)
assert self.sched is not None
if self.sched.tests_finished:
self.triggershutdown()
#
# callbacks for processing events from workers
#
def worker_workerready(
self,
node: WorkerController,
workerinfo: WorkerInfo,
) -> None:
"""Emitted when a node first starts up.
This adds the node to the scheduler, nodes continue with
collection without any further input.
"""
node.workerinfo = workerinfo
node.workerinfo["id"] = node.gateway.id
node.workerinfo["spec"] = node.gateway.spec
self.config.hook.pytest_testnodeready(node=node)
if self.shuttingdown:
node.shutdown()
else:
assert self.sched is not None
self.sched.add_node(node)
def worker_workerfinished(self, node: WorkerController) -> None:
"""Emitted when node executes its pytest_sessionfinish hook.
Removes the node from the scheduler.
The node might not be in the scheduler if it had not emitted
workerready before shutdown was triggered.
"""
self.config.hook.pytest_testnodedown(node=node, error=None)
if node.workeroutput["exitstatus"] == 2: # keyboard-interrupt
self.shouldstop = f"{node} received keyboard-interrupt"
self.worker_errordown(node, "keyboard-interrupt")
return
shouldfail = node.workeroutput["shouldfail"]
shouldstop = node.workeroutput["shouldstop"]
for shouldx in [shouldfail, shouldstop]:
if shouldx:
if not self.shouldstop:
self.shouldstop = shouldx
break
else:
assert self.sched is not None
if node in self.sched.nodes:
crashitem = self.sched.remove_node(node)
assert not crashitem, (crashitem, node)
self._active_nodes.remove(node)
def worker_internal_error(
self, node: WorkerController, formatted_error: str
) -> None:
"""
pytest_internalerror() was called on the worker.
pytest_internalerror() arguments are an excinfo and an excrepr, which can't
be serialized, so we go with a poor man's solution of raising an exception
here ourselves using the formatted message.
"""
self._active_nodes.remove(node)
try:
assert False, formatted_error
except AssertionError:
excinfo = pytest.ExceptionInfo.from_current()
excrepr = excinfo.getrepr()
self.config.hook.pytest_internalerror(excrepr=excrepr, excinfo=excinfo)
def worker_errordown(self, node: WorkerController, error: object | None) -> None:
"""Emitted by the WorkerController when a node dies."""
self.config.hook.pytest_testnodedown(node=node, error=error)
assert self.sched is not None
try:
crashitem = self.sched.remove_node(node)
except KeyError:
pass
else:
if crashitem:
self.handle_crashitem(crashitem, node)
self._failed_nodes_count += 1
maximum_reached = (
self._max_worker_restart is not None
and self._failed_nodes_count > self._max_worker_restart
)
if maximum_reached:
if self._max_worker_restart == 0:
msg = f"worker {node.gateway.id} crashed and worker restarting disabled"
else:
msg = f"maximum crashed workers reached: {self._max_worker_restart}"
self._summary_report = msg
self.report_line("\n" + msg)
self.triggershutdown()
else:
self.report_line("\nreplacing crashed worker %s" % node.gateway.id)
self.shuttingdown = False
self._clone_node(node)
self._active_nodes.remove(node)
@pytest.hookimpl
def pytest_terminal_summary(self, terminalreporter: Any) -> None:
if self.config.option.verbose >= 0 and self._summary_report:
terminalreporter.write_sep("=", f"xdist: {self._summary_report}")
def worker_collectionfinish(
self, node: WorkerController, ids: Sequence[str]
) -> None:
"""Worker has finished test collection.
This adds the collection for this node to the scheduler. If
the scheduler indicates collection is finished (i.e. all
initial nodes have submitted their collections), then tells the
scheduler to schedule the collected items. When initiating
scheduling the first time it logs which scheduler is in use.
"""
if self.shuttingdown:
return
self.config.hook.pytest_xdist_node_collection_finished(node=node, ids=ids)
# tell session which items were effectively collected otherwise
# the controller node will finish the session with EXIT_NOTESTSCOLLECTED
assert self._session is not None
self._session.testscollected = len(ids)
assert self.sched is not None
self.sched.add_node_collection(node, ids)
if self.terminal:
self.trdist.setstatus(
node.gateway.spec, WorkerStatus.CollectionDone, tests_collected=len(ids)
)
if self.sched.collection_is_completed:
if self.terminal and not self.sched.has_pending:
self.trdist.ensure_show_status()
self.terminal.write_line("")
if self.config.option.verbose > 0:
self.terminal.write_line(
f"scheduling tests via {self.sched.__class__.__name__}"
)
self.sched.schedule()
def worker_logstart(
self,
node: WorkerController,
nodeid: str,
location: tuple[str, int | None, str],
) -> None:
"""Emitted when a node calls the pytest_runtest_logstart hook."""
self.config.hook.pytest_runtest_logstart(nodeid=nodeid, location=location)
def worker_logfinish(
self,
node: WorkerController,
nodeid: str,
location: tuple[str, int | None, str],
) -> None:
"""Emitted when a node calls the pytest_runtest_logfinish hook."""
self.config.hook.pytest_runtest_logfinish(nodeid=nodeid, location=location)
def worker_testreport(self, node: WorkerController, rep: pytest.TestReport) -> None:
"""Emitted when a node calls the pytest_runtest_logreport hook."""
rep.node = node # type: ignore[attr-defined]
self.config.hook.pytest_runtest_logreport(report=rep)
self._handlefailures(rep)
def worker_runtest_protocol_complete(
self, node: WorkerController, item_index: int, duration: float
) -> None:
"""
Emitted when a node fires the 'runtest_protocol_complete' event,
signalling that a test has completed the runtestprotocol and should be
removed from the pending list in the scheduler.
"""
assert self.sched is not None
self.sched.mark_test_complete(node, item_index, duration)
def worker_unscheduled(
self, node: WorkerController, indices: Sequence[int]
) -> None:
"""
Emitted when a node fires the 'unscheduled' event, signalling that
some tests have been removed from the worker's queue and should be
sent to some worker again.
This should happen only in response to 'steal' command, so schedulers
not using 'steal' command don't have to implement it.
"""
assert self.sched is not None
self.sched.remove_pending_tests_from_node(node, indices)
def worker_collectreport(
self,
node: WorkerController,
rep: pytest.CollectReport | pytest.TestReport,
) -> None:
"""Emitted when a node calls the pytest_collectreport hook.
Because we only need the report when there's a failure/skip, as optimization
we only expect to receive failed/skipped reports from workers (#330).
"""
assert not rep.passed
self._failed_worker_collectreport(node, rep)
def worker_warning_recorded(
self,
warning_message: warnings.WarningMessage,
when: str,
nodeid: str,
location: tuple[str, int, str] | None,
) -> None:
"""Emitted when a node calls the pytest_warning_recorded hook."""
kwargs = dict(
warning_message=warning_message, when=when, nodeid=nodeid, location=location
)
self.config.hook.pytest_warning_recorded.call_historic(kwargs=kwargs)
def _clone_node(self, node: WorkerController) -> WorkerController:
"""Return new node based on an existing one.
This is normally for when a node dies, this will copy the spec
of the existing node and create a new one with a new id. The
new node will have been setup so it will start calling the
"worker_*" hooks and do work soon.
"""
spec = node.gateway.spec
spec.id = None
assert self.nodemanager is not None
self.nodemanager.group.allocate_id(spec)
clone = self.nodemanager.setup_node(spec, self.queue.put)
self._active_nodes.add(clone)
return clone
def _failed_worker_collectreport(
self,
node: WorkerController,
rep: pytest.CollectReport | pytest.TestReport,
) -> None:
# Check we haven't already seen this report (from
# another worker).
if rep.longrepr not in self._failed_collection_errors:
self._failed_collection_errors[rep.longrepr] = True
self.config.hook.pytest_collectreport(report=rep)
self._handlefailures(rep)
def _handlefailures(
self,
rep: pytest.CollectReport | pytest.TestReport,
) -> None:
if rep.failed:
self.countfailures += 1
if (
self.maxfail
and self.countfailures >= self.maxfail
and not self.shouldstop
):
self.shouldstop = f"stopping after {self.countfailures} failures"
def triggershutdown(self) -> None:
if not self.shuttingdown:
self.log("triggering shutdown")
self.shuttingdown = True
assert self.sched is not None
for node in self.sched.nodes:
node.shutdown()
def handle_crashitem(self, nodeid: str, worker: WorkerController) -> None:
# XXX get more reporting info by recording pytest_runtest_logstart?
# XXX count no of failures and retry N times
fspath = nodeid.split("::")[0]
msg = f"worker {worker.gateway.id!r} crashed while running {nodeid!r}"
rep = pytest.TestReport(
nodeid=nodeid,
location=(fspath, None, fspath),
keywords={},
outcome="failed",
longrepr=msg,
when="???", # type: ignore[arg-type]
)
rep.node = worker # type: ignore[attr-defined]
self.config.hook.pytest_handlecrashitem(
crashitem=nodeid,
report=rep,
sched=self.sched,
)
self.config.hook.pytest_runtest_logreport(report=rep)
class WorkerStatus(Enum):
"""Status of each worker during creation/collection."""
# Worker spec has just been created.
Created = auto()
# Worker has been initialized.
Initialized = auto()
# Worker is now ready for collection.
ReadyForCollection = auto()
# Worker has finished collection.
CollectionDone = auto()
class TerminalDistReporter:
def __init__(self, config: pytest.Config) -> None:
self.config = config
self.tr = config.pluginmanager.getplugin("terminalreporter")
self._status: dict[object, tuple[WorkerStatus, int]] = {}
self._lastlen = 0
self._isatty = getattr(self.tr, "isatty", self.tr.hasmarkup)
def write_line(self, msg: str) -> None:
self.tr.write_line(msg)
def ensure_show_status(self) -> None:
if not self._isatty:
self.write_line(self.getstatus())
def setstatus(
self,
spec: execnet.XSpec,
status: WorkerStatus,
*,
tests_collected: int,
show: bool = True,
) -> None:
self._status[spec.id] = (status, tests_collected)
if show and self._isatty:
self.rewrite(self.getstatus())
def getstatus(self) -> str:
if self.config.option.verbose >= 0:
line = get_workers_status_line(list(self._status.values()))
if line:
return line
return "bringing up nodes..."
def rewrite(self, line: str, newline: bool = False) -> None:
pline = line + " " * max(self._lastlen - len(line), 0)
if newline:
self._lastlen = 0
pline += "\n"
else:
self._lastlen = len(line)
self.tr.rewrite(pline, bold=True)
@pytest.hookimpl
def pytest_xdist_setupnodes(self, specs: Sequence[execnet.XSpec]) -> None:
self._specs = specs
for spec in specs:
self.setstatus(spec, WorkerStatus.Created, tests_collected=0, show=False)
self.setstatus(spec, WorkerStatus.Created, tests_collected=0, show=True)
self.ensure_show_status()
@pytest.hookimpl
def pytest_xdist_newgateway(self, gateway: execnet.Gateway) -> None:
if self.config.option.verbose > 0:
rinfo = gateway._rinfo()
different_interpreter = rinfo.executable != sys.executable
if different_interpreter:
version = "{}.{}.{}".format(*rinfo.version_info[:3])
self.rewrite(
f"[{gateway.id}] {rinfo.platform} Python {version} cwd: {rinfo.cwd}",
newline=True,
)
self.setstatus(gateway.spec, WorkerStatus.Initialized, tests_collected=0)
@pytest.hookimpl
def pytest_testnodeready(self, node: WorkerController) -> None:
if self.config.option.verbose > 0:
d = node.workerinfo
different_interpreter = d.get("executable") != sys.executable
if different_interpreter:
version = d["version"].replace("\n", " -- ")
self.rewrite(f"[{d['id']}] Python {version}", newline=True)
self.setstatus(
node.gateway.spec, WorkerStatus.ReadyForCollection, tests_collected=0
)
@pytest.hookimpl
def pytest_testnodedown(self, node: WorkerController, error: object) -> None:
if not error:
return
self.write_line(f"[{node.gateway.id}] node down: {error}")
def get_default_max_worker_restart(config: pytest.Config) -> int | None:
"""Gets the default value of --max-worker-restart option if it is not provided.
Use a reasonable default to avoid workers from restarting endlessly due to crashing collections (#226).
"""
result_str: str | None = config.option.maxworkerrestart
if result_str is not None:
result = int(result_str)
elif config.option.numprocesses:
# if --max-worker-restart was not provided, use a reasonable default (#226)
result = config.option.numprocesses * 4
else:
result = None
return result
def get_workers_status_line(
status_and_items: Sequence[tuple[WorkerStatus, int]],
) -> str:
"""
Return the line to display during worker setup/collection based on the
status of the workers and number of tests collected for each.
"""
statuses = [s for s, c in status_and_items]
total_workers = len(statuses)
workers_noun = "worker" if total_workers == 1 else "workers"
if status_and_items and all(s == WorkerStatus.CollectionDone for s in statuses):
# All workers collect the same number of items, so we grab
# the total number of items from the first worker.
first = status_and_items[0]
status, tests_collected = first
tests_noun = "item" if tests_collected == 1 else "items"
return f"{total_workers} {workers_noun} [{tests_collected} {tests_noun}]"
if WorkerStatus.CollectionDone in statuses:
done = sum(1 for s, c in status_and_items if c > 0)
return f"collecting: {done}/{total_workers} {workers_noun}"
if WorkerStatus.ReadyForCollection in statuses:
ready = statuses.count(WorkerStatus.ReadyForCollection)
return f"ready: {ready}/{total_workers} {workers_noun}"
if WorkerStatus.Initialized in statuses:
initialized = statuses.count(WorkerStatus.Initialized)
return f"initialized: {initialized}/{total_workers} {workers_noun}"
if WorkerStatus.Created in statuses:
created = statuses.count(WorkerStatus.Created)
return f"created: {created}/{total_workers} {workers_noun}"
return ""