diff --git a/sisyphus/global_settings.py b/sisyphus/global_settings.py index 5709c49..1bf2e25 100644 --- a/sisyphus/global_settings.py +++ b/sisyphus/global_settings.py @@ -130,8 +130,10 @@ def file_caching(path): JOB_CLEANER_INTERVAL = 60 #: How many threads should be cleaning in parallel JOB_CLEANER_WORKER = 5 -#: If the job internal work directory should be keeped re deleted during clean up +#: If the job internal work directory should be kept or deleted during clean up JOB_CLEANUP_KEEP_WORK = False +#: If the job internal input directory with symlinks to input jobs should be kept or deleted during clean up +JOB_CLEANUP_KEEP_INPUT = True #: Default value for job used by tk.cleaner to determine if a job should be removed or not JOB_DEFAULT_KEEP_VALUE = 50 #: How many threads should update the graph in parallel, useful if the filesystem has a high latency diff --git a/sisyphus/job.py b/sisyphus/job.py index d016d6e..5243bdb 100644 --- a/sisyphus/job.py +++ b/sisyphus/job.py @@ -516,11 +516,10 @@ def _sis_cleanup(self): try: if not gs.JOB_CLEANUP_KEEP_WORK: shutil.rmtree(os.path.abspath(self._sis_path(gs.JOB_WORK_DIR))) - files = [ - i - for i in os.listdir(self._sis_path()) - if i not in (gs.JOB_OUTPUT, gs.JOB_INFO, gs.JOB_WORK_DIR) - ] + files_keep = [gs.JOB_OUTPUT, gs.JOB_INFO, gs.JOB_WORK_DIR] + if gs.JOB_CLEANUP_KEEP_INPUT: + files_keep.append(gs.JOB_INPUT) + files = [i for i in os.listdir(self._sis_path()) if i not in files_keep] subprocess.check_call( ["tar", "-czf", gs.JOB_FINISHED_ARCHIVE] + files, cwd=os.path.abspath(self._sis_path()) )