-
Notifications
You must be signed in to change notification settings - Fork 240
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Turn on more CWL tests #3628
Turn on more CWL tests #3628
Changes from 45 commits
d74e71b
9fe5dee
7285373
e273be9
00d2d75
1041d02
f37ac53
ffec068
e89d056
5f3d7b0
9300d65
9ede711
53b2587
671ee57
14756e0
bc9d677
0be75a0
c04ad88
6606f8b
3a1235a
357aad1
73e972d
212aa60
126d9c1
32c61f4
143d712
75904fd
f3a148d
b6eb6c1
170ccfc
536fd3a
856a67d
beb9753
216be15
a1fd2bd
d0102e5
b8f1c00
9c68963
c0dd699
a94e8de
805352c
30bbaf6
9e71644
63e2cc9
8a1cd8f
21d1728
3c21664
fe4b221
9bbd57e
23140ba
215b919
ca7b35e
f173ae4
29e3c1b
f740116
fc47153
af5cd74
0339197
937f461
9155270
4dec732
f6ab2b3
6013aa9
6313d2d
37711e6
5cd5282
6af2d1c
1feae24
21ebb0f
25918b2
11ab044
052cdd3
2f867c0
513797b
cf8ba34
ff2967c
92834e3
00e70c1
1bc8f7c
00f87a4
e15bb24
1e0eeb9
0c6e493
f933c49
a8c4e35
2cd58c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -472,6 +472,10 @@ def makeString(x): | |
# the logging has been captured to be reported on the leader. | ||
self.logJobStoreFileID = None | ||
|
||
# Every time we update a job description in place in the job store, we | ||
# increment this. | ||
self._generation = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll rename this. |
||
|
||
def serviceHostIDsInBatches(self): | ||
""" | ||
Get an iterator over all batches of service host job IDs that can be | ||
|
@@ -635,6 +639,8 @@ def replace(self, other): | |
self.filesToDelete += other.filesToDelete | ||
self.jobsToDelete += other.jobsToDelete | ||
|
||
self._generation = other._generation | ||
|
||
def addChild(self, childID): | ||
""" | ||
Make the job with the given ID a child of the described job. | ||
|
@@ -814,6 +820,8 @@ def __str__(self): | |
if self.jobStoreID is not None: | ||
printedName += ' ' + str(self.jobStoreID) | ||
|
||
printedName += ' v' + str(self._generation) | ||
|
||
return printedName | ||
|
||
# Not usable as a key (not hashable) and doesn't have any value-equality. | ||
|
@@ -823,6 +831,16 @@ def __str__(self): | |
def __repr__(self): | ||
return '%s( **%r )' % (self.__class__.__name__, self.__dict__) | ||
|
||
def pre_update_hook(self) -> None: | ||
""" | ||
Called by the job store before pickling and saving a created or updated | ||
version of a job. | ||
""" | ||
|
||
self._generation += 1 | ||
logger.debug("New generation: %s", self) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's go with "New job version". |
||
|
||
|
||
|
||
class ServiceJobDescription(JobDescription): | ||
""" | ||
|
@@ -1448,7 +1466,8 @@ def registerPromise(self, path): | |
raise JobPromiseConstraintError(self) | ||
# TODO: can we guarantee self.jobStoreID is populated and so pass that here? | ||
with self._promiseJobStore.writeFileStream() as (fileHandle, jobStoreFileID): | ||
promise = UnfulfilledPromiseSentinel(str(self.description), False) | ||
promise = UnfulfilledPromiseSentinel(str(self.description), jobStoreFileID, False) | ||
logger.debug('Issuing promise %s for result of %s', jobStoreFileID, self.description) | ||
pickle.dump(promise, fileHandle, pickle.HIGHEST_PROTOCOL) | ||
self._rvs[path].append(jobStoreFileID) | ||
return self._promiseJobStore.config.jobStore, jobStoreFileID | ||
|
@@ -1874,7 +1893,7 @@ def _fulfillPromises(self, returnValues, jobStore): | |
# either case, we just pass it on. | ||
promisedValue = returnValues | ||
else: | ||
# If there is an path ... | ||
# If there is a path ... | ||
if isinstance(returnValues, Promise): | ||
# ... and the value itself is a Promise, we need to created a new, narrower | ||
# promise and pass it on. | ||
|
@@ -1888,8 +1907,11 @@ def _fulfillPromises(self, returnValues, jobStore): | |
# File may be gone if the job is a service being re-run and the accessing job is | ||
# already complete. | ||
if jobStore.fileExists(promiseFileStoreID): | ||
logger.debug("Resolve promise %s from %s with a %s", promiseFileStoreID, self, type(promisedValue)) | ||
with jobStore.updateFileStream(promiseFileStoreID) as fileHandle: | ||
pickle.dump(promisedValue, fileHandle, pickle.HIGHEST_PROTOCOL) | ||
else: | ||
logger.debug("Do not resolve promise %s from %s because it is no longer needed", promiseFileStoreID, self) | ||
|
||
# Functions associated with Job.checkJobGraphAcyclic to establish that the job graph does not | ||
# contain any cycles of dependencies: | ||
|
@@ -2381,7 +2403,6 @@ def _jobName(self): | |
""" | ||
return self._description.displayName | ||
|
||
|
||
class JobException(Exception): | ||
""" | ||
General job exception. | ||
|
@@ -2577,7 +2598,7 @@ class EncapsulatedJob(Job): | |
predecessors automatically. Care should be exercised to ensure the encapsulated job has the | ||
proper set of predecessors. | ||
|
||
The return value of an encapsulatd job (as accessed by the :func:`toil.job.Job.rv` function) | ||
The return value of an encapsulated job (as accessed by the :func:`toil.job.Job.rv` function) | ||
is the return value of the root job, e.g. A().encapsulate().rv() and A().rv() will resolve to | ||
the same value after A or A.encapsulate() has been run. | ||
""" | ||
|
@@ -2930,17 +2951,19 @@ def convertPromises(kwargs): | |
class UnfulfilledPromiseSentinel: | ||
"""This should be overwritten by a proper promised value. Throws an | ||
exception when unpickled.""" | ||
def __init__(self, fulfillingJobName, unpickled): | ||
def __init__(self, fulfillingJobName, file_id: str, unpickled): | ||
self.fulfillingJobName = fulfillingJobName | ||
self.file_id = file_id | ||
|
||
@staticmethod | ||
def __setstate__(stateDict): | ||
"""Only called when unpickling. This won't be unpickled unless the | ||
promise wasn't resolved, so we throw an exception.""" | ||
jobName = stateDict['fulfillingJobName'] | ||
raise RuntimeError("This job was passed a promise that wasn't yet resolved when it " | ||
"ran. The job {jobName} that fulfills this promise hasn't yet " | ||
"finished. This means that there aren't enough constraints to " | ||
"ensure the current job always runs after {jobName}. Consider adding a " | ||
"follow-on indirection between this job and its parent, or adding " | ||
"this job as a child/follow-on of {jobName}.".format(jobName=jobName)) | ||
file_id = stateDict['file_id'] | ||
raise RuntimeError(f"This job was passed promise {file_id} that wasn't yet resolved when it " | ||
f"ran. The job {jobName} that fulfills this promise hasn't yet " | ||
f"finished. This means that there aren't enough constraints to " | ||
f"ensure the current job always runs after {jobName}. Consider adding a " | ||
f"follow-on indirection between this job and its parent, or adding " | ||
f"this job as a child/follow-on of {jobName}.") |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,9 +22,11 @@ | |
import shutil | ||
import signal | ||
import socket | ||
import stat | ||
import sys | ||
import time | ||
import traceback | ||
from typing import Callable, Any | ||
from contextlib import contextmanager | ||
|
||
from toil import logProcessContext | ||
|
@@ -39,10 +41,14 @@ | |
from toil.statsAndLogging import configure_root_logger, set_log_level | ||
|
||
try: | ||
from toil.cwl.cwltoil import CWL_INTERNAL_JOBS | ||
from toil.cwl.cwltoil import (CWL_INTERNAL_JOBS, | ||
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE, | ||
CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION) | ||
except ImportError: | ||
# CWL extra not installed | ||
CWL_INTERNAL_JOBS = () | ||
CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE = None | ||
CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION = type(None) | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
@@ -275,6 +281,7 @@ def workerScript(jobStore, config, jobName, jobStoreID, redirectOutputToLogFile= | |
########################################## | ||
|
||
jobAttemptFailed = False | ||
failure_exit_code = 1 | ||
statsDict = MagicExpando() | ||
statsDict.jobs = [] | ||
statsDict.workers.logsToMaster = [] | ||
|
@@ -492,9 +499,13 @@ def workerScript(jobStore, config, jobName, jobStoreID, redirectOutputToLogFile= | |
########################################## | ||
#Trapping where worker goes wrong | ||
########################################## | ||
except: #Case that something goes wrong in worker | ||
except Exception as e: #Case that something goes wrong in worker | ||
traceback.print_exc() | ||
logger.error("Exiting the worker because of a failed job on host %s", socket.gethostname()) | ||
if isinstance(e, CWL_UNSUPPORTED_REQUIREMENT_EXCEPTION): | ||
# We need to inform the leader that this is a CWL workflow problem | ||
# and it needs to inform its caller. | ||
failure_exit_code = CWL_UNSUPPORTED_REQUIREMENT_EXIT_CODE | ||
AbstractFileStore._terminateEvent.set() | ||
|
||
########################################## | ||
|
@@ -587,7 +598,17 @@ def workerScript(jobStore, config, jobName, jobStoreID, redirectOutputToLogFile= | |
#Remove the temp dir | ||
cleanUp = config.cleanWorkDir | ||
if cleanUp == 'always' or (cleanUp == 'onSuccess' and not jobAttemptFailed) or (cleanUp == 'onError' and jobAttemptFailed): | ||
shutil.rmtree(localWorkerTempDir) | ||
def make_parent_writable(func: Callable[[str], Any], path: str, _) -> None: | ||
""" | ||
When encountering an error removing a file or directory, make sure | ||
the parent directory is writable. | ||
|
||
cwltool likes to lock down directory permissions, and doesn't clean | ||
up after itself. | ||
""" | ||
# Just chmod it for rwx for user. This can't work anyway if it isn't ours. | ||
os.chmod(os.path.dirname(path), stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR) | ||
shutil.rmtree(localWorkerTempDir, onerror=make_parent_writable) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, this is so useful, I didn't know |
||
|
||
#This must happen after the log file is done with, else there is no place to put the log | ||
if (not jobAttemptFailed) and jobDesc.command == None and next(jobDesc.successorsAndServiceHosts(), None) is None: | ||
|
@@ -597,7 +618,7 @@ def workerScript(jobStore, config, jobName, jobStoreID, redirectOutputToLogFile= | |
jobStore.delete(jobDesc.jobStoreID) | ||
|
||
if jobAttemptFailed: | ||
return 1 | ||
return failure_exit_code | ||
else: | ||
return 0 | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this only for debugging? The purpose of this isn't clear to me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added this for debugging because I thought we might have been forgetting to save an update or running the wrong version of a job somewhere; it turned out that wasn't the case and we were giving the same version of the job to two batch systems, but I left this in because I want to use it later when poking around the leader looking for race conditions with its weird job update system.