Skip to content

Commit

Permalink
Merge pull request DataBiosphere#1392 from WEHI-ResearchComputing/plu…
Browse files Browse the repository at this point in the history
…gin-batch-systems

Plugin batch systems
  • Loading branch information
ejacox authored Jul 5, 2017
2 parents 8b2c8be + b04c8a5 commit 7b1f22e
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 81 deletions.
12 changes: 12 additions & 0 deletions src/toil/batchSystems/abstractBatchSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,18 @@ def getRescueBatchJobFrequency(cls):
raise NotImplementedError()


@classmethod
def setOptions(cls, setOption):
"""
Process command line or configuration options relevant to this batch system.
The
:param setOption: A function with signature setOption(varName, parsingFn=None, checkFn=None, default=None)
used to update run configuration
"""
pass


class BatchSystemSupport(AbstractBatchSystem):
"""
Partial implementation of AbstractBatchSystem, support methods.
Expand Down
11 changes: 8 additions & 3 deletions src/toil/batchSystems/mesos/batchSystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(self, nodeAddress, slaveId, nodeInfo, lastSeen):
self.nodeInfo = nodeInfo
self.lastSeen = lastSeen

def __init__(self, config, maxCores, maxMemory, maxDisk, masterAddress):
def __init__(self, config, maxCores, maxMemory, maxDisk):
super(MesosBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk)

# The hot-deployed resource representing the user script. Will be passed along in every
Expand All @@ -90,7 +90,7 @@ def __init__(self, config, maxCores, maxMemory, maxDisk, masterAddress):
self.jobQueues = JobQueue()

# Address of the Mesos master in the form host:port where host can be an IP or a hostname
self.masterAddress = masterAddress
self.mesosMasterAddress = config.mesosMasterAddress

# Written to when Mesos kills tasks, as directed by Toil
self.killedJobIds = set()
Expand Down Expand Up @@ -272,7 +272,7 @@ def _startDriver(self):
framework.principal = framework.name
self.driver = mesos.native.MesosSchedulerDriver(self,
framework,
self._resolveAddress(self.masterAddress),
self._resolveAddress(self.mesosMasterAddress),
True) # enable implicit acknowledgements
assert self.driver.start() == mesos_pb2.DRIVER_RUNNING

Expand Down Expand Up @@ -615,6 +615,11 @@ def executorLost(self, driver, executorId, slaveId, status):
log.warning("Executor '%s' lost.", executorId)


@classmethod
def setOptions(cl, setOption):
setOption("mesosMasterAddress", None, None, 'localhost:5050')


def toMiB(n):
return n / 1024 / 1024

Expand Down
88 changes: 88 additions & 0 deletions src/toil/batchSystems/options.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (C) 2015-2016 Regents of the University of California
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
#

from registry import batchSystemFactoryFor, defaultBatchSystem, uniqueNames


def _parasolOptions(addOptionFn):
addOptionFn("--parasolCommand", dest="parasolCommand", default=None,
help="The name or path of the parasol program. Will be looked up on PATH "
"unless it starts with a slashdefault=%s" % 'parasol')
addOptionFn("--parasolMaxBatches", dest="parasolMaxBatches", default=None,
help="Maximum number of job batches the Parasol batch is allowed to create. One "
"batch is created for jobs with a a unique set of resource requirements. "
"default=%i" % 1000)

def _singleMachineOptions(addOptionFn):
addOptionFn("--scale", dest="scale", default=None,
help=("A scaling factor to change the value of all submitted tasks's submitted cores. "
"Used in singleMachine batch system. default=%s" % 1))

def _mesosOptions(addOptionFn):
addOptionFn("--mesosMaster", dest="mesosMasterAddress", default=None,
help=("The host and port of the Mesos master separated by colon. default=%s" % 'localhost:5050'))

# Built in batch systems that have options
_OPTIONS = [
_parasolOptions,
_singleMachineOptions,
_mesosOptions
]

_options = list(_OPTIONS)

def addOptionsDefinition(optionsDefinition):
_options.append(optionsDefinition)


def setOptions(config, setOption):
batchSystem = config.batchSystem

factory = batchSystemFactoryFor(batchSystem)
batchSystem = factory()

batchSystem.setOptions(setOption)

def addOptions(addOptionFn):

addOptionFn("--batchSystem", dest="batchSystem", default=defaultBatchSystem(),
help=("The type of batch system to run the job(s) with, currently can be one "
"of %s'. default=%s" % (', '.join(uniqueNames()), defaultBatchSystem())))
addOptionFn("--disableHotDeployment", dest="disableHotDeployment", action='store_true', default=None,
help=("Should hot-deployment of the user script be deactivated? If True, the user "
"script/package should be present at the same location on all workers. "
"default=false"))

for o in _options:
o(addOptionFn)

def setDefaultOptions(config):
'''
Set default options for builtin batch systems. This is required if a Config
object is not constructed from an Options object.
'''

config.batchSystem = "singleMachine"
config.disableHotDeployment = False
config.environment = {}

# single machine
config.scale = 1

# mesos
config.mesosMasterAddress = 'localhost:5050'

# parasol
config.parasolCommand = 'parasol'
config.parasolMaxBatches = 10000
8 changes: 8 additions & 0 deletions src/toil/batchSystems/parasol.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,3 +370,11 @@ def shutdown(self):
for results in self.resultsFiles.values():
os.remove(results)
os.rmdir(self.parasolResultsDir)


@classmethod
def setOptions(cls, setOption):
from toil.common import iC
setOption("parasolCommand", None, None, 'parasol')
setOption("parasolMaxBatches", int, iC(1), 10000)

80 changes: 80 additions & 0 deletions src/toil/batchSystems/registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# Copyright (C) 2015-2016 Regents of the University of California
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
#

def _gridengineBatchSystemFactory():
from toil.batchSystems.gridengine import GridengineBatchSystem
return GridengineBatchSystem

def _parasolBatchSystemFactory():
from toil.batchSystems.parasol import ParasolBatchSystem
return ParasolBatchSystem

def _lsfBatchSystemFactory():
from toil.batchSystems.lsf import LSFBatchSystem
return LSFBatchSystem

def _singleMachineBatchSystemFactory():
from toil.batchSystems.singleMachine import SingleMachineBatchSystem
return SingleMachineBatchSystem

def _mesosBatchSystemFactory():
from toil.batchSystems.mesos.batchSystem import MesosBatchSystem
return MesosBatchSystem

def _slurmBatchSystemFactory():
from toil.batchSystems.slurm import SlurmBatchSystem
return SlurmBatchSystem


_DEFAULT_REGISTRY = {
'parasol' : _parasolBatchSystemFactory,
'singleMachine' : _singleMachineBatchSystemFactory,
'single_machine' : _singleMachineBatchSystemFactory,
'gridEngine' : _gridengineBatchSystemFactory,
'gridengine' : _gridengineBatchSystemFactory,
'lsf' : _lsfBatchSystemFactory,
'LSF' : _lsfBatchSystemFactory,
'mesos' : _mesosBatchSystemFactory,
'Mesos' : _mesosBatchSystemFactory,
'slurm' : _slurmBatchSystemFactory,
'Slurm' : _slurmBatchSystemFactory
}

_UNIQUE_NAME = {
'parasol',
'singleMachine',
'gridEngine',
'LSF',
'Mesos',
'Slurm'
}

_batchSystemRegistry = _DEFAULT_REGISTRY.copy()
_batchSystemNames = set(_UNIQUE_NAME)

def addBatchSystemFactory(key, batchSystemFactory):
_batchSystemNames.add(key)
_batchSystemRegistry[key] = batchSystemFactory

def batchSystemFactoryFor(batchSystem):
return _batchSystemRegistry[batchSystem ]

def defaultBatchSystem():
return 'singleMachine'

def uniqueNames():
return list(_batchSystemNames)

def batchSystems():
list(set(_batchSystemRegistry.values()))
4 changes: 4 additions & 0 deletions src/toil/batchSystems/singleMachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,10 @@ def getRescueBatchJobFrequency(cls):
"""
return 5400

@classmethod
def setOptions(cls, setOption):
setOption("scale", default=1)

class Info(object):
# Can't use namedtuple here since killIntended needs to be mutable
def __init__(self, startTime, popen, killIntended):
Expand Down
Loading

0 comments on commit 7b1f22e

Please sign in to comment.