diff --git a/ganga/GangaCore/GPIDev/Adapters/IGangaFile.py b/ganga/GangaCore/GPIDev/Adapters/IGangaFile.py index 813a76debc..914b6364a8 100644 --- a/ganga/GangaCore/GPIDev/Adapters/IGangaFile.py +++ b/ganga/GangaCore/GPIDev/Adapters/IGangaFile.py @@ -15,7 +15,7 @@ class IGangaFile(GangaObject): - """IGangaFile represents base class for output files, such as MassStorageFile, LCGSEFile, DiracFile, LocalFile, etc + """IGangaFile represents base class for output files, such as MassStorageFile, DiracFile, LocalFile, etc """ _schema = Schema(Version(1, 1), {'namePattern': SimpleItem( defvalue="", doc='pattern of the file name')}) diff --git a/ganga/GangaCore/GPIDev/Adapters/StandardJobConfig.py b/ganga/GangaCore/GPIDev/Adapters/StandardJobConfig.py index cf529a93ad..552bdf2a54 100755 --- a/ganga/GangaCore/GPIDev/Adapters/StandardJobConfig.py +++ b/ganga/GangaCore/GPIDev/Adapters/StandardJobConfig.py @@ -13,7 +13,7 @@ class StandardJobConfig(object): """ - StandardJobConfig defines a standard input for many of the handlers: LSF, Localhost,LCG. + StandardJobConfig defines a standard input for many of the handlers: LSF, Localhost. It corresponds to a simplified JDL definition: specification of executable, arguments and input sandbox. Executable and arguments may be specified either as strings or File objects. In the second case they are automatically added to the input sandbox list. diff --git a/ganga/GangaCore/GPIDev/Credentials_old/GridProxy.py b/ganga/GangaCore/GPIDev/Credentials_old/GridProxy.py deleted file mode 100755 index dd7de5a8bb..0000000000 --- a/ganga/GangaCore/GPIDev/Credentials_old/GridProxy.py +++ /dev/null @@ -1,554 +0,0 @@ -########################################################################## -# Ganga Project. http://cern.ch/ganga -# -# $Id: GridProxy.py,v 1.5 2009/03/25 15:43:35 karl Exp $ -########################################################################## -# -# File: GridProxy.py -# Author: K. Harrison -# Created: 060519 -# -# 06/07/2006 KH: Changed to GangaCore.Utility.Shell for shell commands -# Added voms support -# -# 02/08/2006 KH: Modified GridProxy class to create one instance of -# VomsCommand and GridCommand -# -# 07/08/2006 KH: Added isValid() method -# -# 09/08/2006 KH: Use shell defined via GangaCore.Lib.LCG.GridShell.getShell() -# -# 25/08/2006 KH: Declare GridProxy class as hidden -# -# 06/09/2006 KH: Argument minValidity added to methods create() and renew() -# -# 25/09/2006 KH: Changed method isValid(), so that default validity is -# value of self.minValidity -# -# 13/11/2006 KH: Added method info() for obtaining proxy information, -# and changed location() to use this method -# -# 23/11/2006 KH: Added "pipe" keyword to option dictionaries of GridCommand -# and VomsCommand -# Added method to determine if credential is available -# with system/configuration used -# (requests from CLT) -# -# 28/02/2007 CLT: Replaced VomsCommand.options and GridCommand.options -# with dictionaries init_parameters, destroy_parameters, -# info_parameters, each providing independent options -# Added VomsCommand.currentOpts and GridCommand.currentOpts -# dictionaries, to add flexibility and assist in option -# construction (as opposed to direct string manipulation) -# Added GridProxy.buildOpts(), to consolidate the option -# building functionality from create(), destroy() and info() -# -# 02/03/2007 KH : Added method to determine user's identity -# (request from DL) -# -# 25/04/2007 KH : Modified GridProxy.identity method to be able to deal -# with new-style CERN certificates, with ambiguous CN definition -# -# 08/06/2007 KH : Added method GridProxy.voname, to allow name of -# virtual organisation to be determined from proxy -# -# 25/09/2007 KH: Changes for compatibility with multi-proxy handling -# => "middleware" argument introduced -# -# 08/12/2007 KH: Changes to take into account ICommandSet being made -# a component class -# -# 17/12/2007 KH: Made changes for handling of GridCommand and VomsCommand as -# component classes -# -# 15/01/2008 KH: Set initial null value for infoCommand in GridProxy.voname() -# -# 18/01/2008 KH : Modified GridProxy.identity method to disregard -# values of CN=proxy -# -# 27/02/2008 KH : Setup shell in GridProxy constructor, if middleware is defined -# -# 30/01/2009 KH : Added possibility to request that GridProxy.identity() -# returns string with non-alphanumeric characters stripped out -# -# 25/03/2009 KH : Correction to GridProxy.voname() to check that one-word -# VO name is returned -# -# 18/03/2009 MWS: Added the 'log' option to isValid -# Added method to retrieve the full identity as a dictionary -# Require consistency between VO in proxy and in configuration -# -# 15/10/2009 MWS: Added cache for proxy information -# -# -# 09/11/2009 MWS: Added check that proxy is valid before updating cache -# (addToProxyCache() method) -# -# 12/11/2009 MWS: Added additional checks that cached information -# is consistent with available proxy - -"""Module defining class for creating, querying and renewing Grid proxy""" -__author__ = "K.Harrison " -__date__ = "12 November 2009" -__version__ = "1.21" - -import os -import re -import time - -from GangaCore.GPIDev.Base.Proxy import isType -from GangaCore.GPIDev.Credentials_old.ICredential import ICommandSet, ICredential, registerCommandSet -from GangaCore.GPIDev.Schema import SimpleItem -from GangaCore.Utility.logging import getLogger -from GangaCore.Utility.GridShell import getShell - -logger = getLogger() - -allowed_exit_range = list(range(1000)) - - -class GridCommand(ICommandSet): - - """ - Class used to define shell commands and options for working with Grid proxy - """ - - _schema = ICommandSet._schema.inherit_copy() - _schema['init']._meta['defvalue'] = "grid-proxy-init" - _schema['info']._meta['defvalue'] = "grid-proxy-info" - _schema['destroy']._meta['defvalue'] = "grid-proxy-destroy" - _schema['init_parameters']._meta['defvalue'] = {"pipe": "-pwstdin", "valid": "-valid"} - _schema['destroy_parameters']._meta['defvalue'] = {} - _schema['info_parameters']._meta['defvalue'] = {} - - _name = "GridCommand" - _hidden = 1 - _enable_config = 1 - - def __init__(self): - super(GridCommand, self).__init__() - - self.currentOpts = {} - self.infoOpts = {} - self.destroyOpts = {} - - -class VomsCommand(ICommandSet): - - """ - Class used to define shell commands and options for working with Grid proxy, - using VOMS extensions - """ - - _schema = ICommandSet._schema.inherit_copy() - - _schema['init']._meta['defvalue'] = "voms-proxy-init" - _schema['info']._meta['defvalue'] = "voms-proxy-info" - _schema['destroy']._meta['defvalue'] = "voms-proxy-destroy" - _schema['init_parameters']._meta['defvalue'] = {"pipe": "-pwstdin", "valid": "-valid", "voms": "-voms"} - _schema['destroy_parameters']._meta['defvalue'] = {} - _schema['info_parameters']._meta['defvalue'] = {"vo": "-vo"} - - _name = "VomsCommand" - _hidden = 1 - _enable_config = 1 - - def __init__(self): - super(VomsCommand, self).__init__() - - self.currentOpts = {} - self.infoOpts = {} - self.destroyOpts = {} - - -for commandSet in [GridCommand, VomsCommand]: - registerCommandSet(commandSet) - -# global proxy info cache -_infoCache = {} - - -class GridProxy(ICredential): - - """ - Class for working with Grid proxy - """ - - _schema = ICredential._schema.inherit_copy() - _schema.datadict["voms"] = SimpleItem(defvalue="", doc="Virtual organisation managment system information") - _schema.datadict["init_opts"] = SimpleItem( - defvalue="", doc="String of options to be passed to command for proxy creation") - _schema.datadict["info_refresh_time"] = SimpleItem(defvalue="00:15", doc="Refresh time of proxy info cache") - _schema.datadict["maxTry"] = SimpleItem( - defvalue=5, doc="Number of password attempts allowed when creating credential") - _name = "GridProxy" - _hidden = 1 - _enable_config = 1 - _exportmethods = ["create", "destroy", "identity", "info", "isAvailable", - "isValid", "location", "renew", "timeleft", "voname", "fullIdentity"] - - def __init__(self): - super(GridProxy, self).__init__() - self.shell = getShell() - self.chooseCommandSet() - return - - def chooseCommandSet(self): - """ - Choose command set to be used for proxy-related commands - - No arguments other than self - - If self.voms has a null value then the GridCommand set of commands - is used. Otherwise the VomsCommand set of commands is used. - - Return value: None - """ - from GangaCore.GPIDev.Lib.GangaList.GangaList import GangaList - if self.voms: - # Make sure we don't overwrite a VomsCommand if it's already there - if not isinstance(self.command, VomsCommand): - self.command = VomsCommand() - else: - self.command = GridCommand() - - # Populate the self.command.currentOpts dictionary with - # GridProxy specific options. - def buildOpts(self, command, clear=True): - if command == self.command.init: - if clear: - self.command.currentOpts.clear() - if "voms" in self.command.init_parameters: - if self.voms: - self.command.currentOpts[self.command.init_parameters['voms']] = self.voms - if "valid" in self.command.init_parameters: - if self.validityAtCreation: - self.command.currentOpts[self.command.init_parameters['valid']] \ - = self.validityAtCreation - if self.init_opts: - self.command.currentOpts[''] = self.init_opts - elif command == self.command.destroy: - if clear: - self.command.destroyOpts.clear() - elif command == self.command.info: - if clear: - self.command.infoOpts.clear() - - def create(self, validity="", maxTry=0, minValidity="", check=False): - self.chooseCommandSet() - self.buildOpts(self.command.init) - status = ICredential.create(self, validity, maxTry, minValidity, check) - return status - - def destroy(self, allowed_exit=[0, 1]): - self.chooseCommandSet() - self.buildOpts(self.command.destroy) - return ICredential.destroy(self, allowed_exit) - - def isAvailable(self): - if self.shell: - return True - else: - return False - - def isValid(self, validity="", log=False, force_check=False): - - # Do parent check - if not ICredential.isValid(self, validity, log, force_check): - return False - - # check vo names - if self.voname() != self.voms: - if log: - logger.warning("Grid Proxy not valid. Certificate VO '%s' does not match requested '%s'" - % (self.voname(), self.voms)) - return False - - return True - - def location(self): - - proxyPath = self.info("-path").strip() - - if not os.path.exists(proxyPath): - proxyPath = "" - - return proxyPath - - def fullIdentity(self, safe=False): - """ - Return the users full identity as a dictionary - - Argument: - safe - logical flag - => False : return identity exactly as obtained from proxy - => True : return identity after stripping out - non-alphanumeric characters - - Return value: Dictionary of the various labels in the users DN - """ - - ele_dict = {} - - subjectList = self.info(opt="-identity").split("/") - - for subjectElement in subjectList: - element = subjectElement.strip() - if element.find("=") == -1: - continue - - field, val = element.split("=") - if safe: - val = re.sub("[^a-zA-Z0-9]", "", val) - ele_dict[field] = val - - return ele_dict - - def identity(self, safe=False): - """ - Return user's identify - - Argument: - safe - logical flag - => False : return identity exactly as obtained from proxy - => True : return identity after stripping out - non-alphanumeric characters - - => The identity is determined from the user proxy if possible, - or otherwise from the user's top-level directory - - Return value: String specifying user identity - """ - - cn = os.path.basename(os.path.expanduser("~")) - try: - subjectList = self.info(opt="-identity").split("/") - subjectList.reverse() - for subjectElement in subjectList: - element = subjectElement.strip() - try: - cn = element.split("CN=")[1].strip() - if cn != "proxy": - break - except IndexError: - pass - except Exception as err: - logger.debug("Err: %s" % err) - pass - - id = "".join(cn.split()) - if safe: - id = re.sub("[^a-zA-Z0-9]", "", id) - - return id - - def info(self, opt="", force_check=False): - """ - Obtain proxy information - - Arguments other than self: - opt - String of options to be used when querying - proxy information - => Help on valid options can be obtained using: - info( opt = "-help" ) - force_check - Force credential check, rather than relying on cache - - Return value: Output from result of querying proxy - """ - - # use cached version of this command call if possible - output = self.getProxyCacheValue(opt) - - if (force_check) or (output == ""): - self.chooseCommandSet() - infoCommand = " ".join([self.command.info, opt]) - logger.debug("Executing info Command: %s" % infoCommand) - status, output, message = self.shell.cmd1(cmd=infoCommand, allowed_exit=allowed_exit_range) - - self.addToProxyCache(status, output, opt) - - if not output: - output = "" - - return str("%s" % output) - - def renew(self, validity="", maxTry=0, minValidity="", check=True): - self.chooseCommandSet() - if self.voms: - if not self.voname(): - check = False - return ICredential.renew(self, validity, maxTry, minValidity, check) - - def timeleft(self, units="hh:mm:ss", force_check=False): - return ICredential.timeleft(self, units, force_check) - - def timeleftInHMS(self, force_check=False): - global _infoCache - - output = self.getProxyCacheValue("timeleftInHMS") - status = 0 - - if (force_check) or (output == ""): - # should really use the 'info' method - self.chooseCommandSet() - infoList = [self.command.info] - # Append option value pairs - for optName, optVal in self.command.infoOpts.items(): - infoList.append("%s %s" % (optName, optVal)) - logger.debug("Executing timeHMS Command: %s" % " ".join(infoList)) - status, output, message = self.shell.cmd1(cmd=" ".join(infoList), allowed_exit=allowed_exit_range) - - self.addToProxyCache(status, output, "timeleftInHMS") - - timeRemaining = "00:00:00" - - if status: - if (1 + output.lower().find("command not found")): - logger.warning("Command '" + self.command.info + "' not found") - logger.warning("Unable to obtain information on Grid proxy") - timeRemaining = "" - if "timeleftInHMS" in _infoCache: - del _infoCache["timeleftInHMS"] - - if timeRemaining: - lineList = output.split("\n") - for line in lineList: - if (1 + line.find("Couldn't find a valid proxy")): - timeRemaining = "-1" - if 'timeleftInHMS' in _infoCache: - del _infoCache['timeleftInHMS'] - break - elif (1 + line.find("timeleft")): - elementList = line.split() - timeRemaining = elementList[2] - break - - return timeRemaining - - def voname(self, force_check=False): - """ - Obtain name of virtual organisation from proxy - - Argument other than self: - force_check - Force credential check, rather than relying on cache - - Return value: Name of virtual organisation where this can be determined - (voms proxy), or empty string otherwise (globus proxy) - """ - global _infoCache - output = self.getProxyCacheValue("voname") - - if (force_check) or (output == ""): - self.chooseCommandSet() - infoCommand = "" - - if "vo" in self.command.info_parameters: - if self.command.info: - infoCommand = " ".join([self.command.info, - self.command.info_parameters["vo"]]) - else: - infoCommand = self.command.info - - if infoCommand: - logger.debug("Executing voname Command: %s" % infoCommand) - status, output, message = self.shell.cmd1(cmd=infoCommand, - allowed_exit=allowed_exit_range, capture_stderr=True) - - self.addToProxyCache(status, output, "voname") - - else: - output = "" - - if not output: - output = "" - if "voname" in _infoCache: - del _infoCache["voname"] - - output = output.strip() - - for error in ["VOMS extension not found", "unrecognized option"]: - if output.find(error) != -1: - output = "" - if 'voname' in _infoCache: - del _infoCache['voname'] - break - - # Check for reasonable output (single-word VO) - if len(output.split()) != 1: - output = self.voms - - return output - - def getProxyCacheValue(self, opt): - """ - Check the proxy cache for the required key. Make sure the proxy - file is older than the last check. - - opt - the key to check for - """ - - global _infoCache - - info_refresh = self.timeInSeconds(self.info_refresh_time) - output = '' - path = '' - - # check when the grid proxy was created - if '-path' not in _infoCache or (_infoCache['-path'][1] < (time.time() - info_refresh)): - self.chooseCommandSet() - infoCommand = " ".join([self.command.info, '-path']) - logger.debug("Executing cache Command: %s" % infoCommand) - status, output, message = self.shell.cmd1(cmd=infoCommand, allowed_exit=allowed_exit_range) - - if not status: - path = output - self.addToProxyCache(status, output, '-path') - - else: - path = _infoCache['-path'][0] - - path = path.strip() - if not os.path.exists(path): - # blank the cache as the proxy isn't there - _infoCache = {} - return '' - - # we're OK to use the cache - if opt in _infoCache and\ - (_infoCache[opt][1] > (time.time() - info_refresh)) and\ - (_infoCache[opt][1] > os.path.getmtime(path)): - logger.debug("Returning Cached Value %s" % opt) - output = _infoCache[opt][0] - else: - output = "" - - return output - - def addToProxyCache(self, status, output, opt): - """ - Test the result of grid proxy call - and add to the cache if all OK - - status - the status output - output - the output text - opt - opt to add - """ - - if (not status) and (output): - error = False - for line in output.split('\n'): - if (1 + line.find("Couldn't find a valid proxy")): - error = True - - if not error: - _infoCache[opt] = [output, time.time()] - - return None - - # Add documentation strings from base class - for method in [create, destroy, isAvailable, isValid, location, - renew, timeleft, timeleftInHMS]: - if hasattr(ICredential, method.__name__): - baseMethod = getattr(ICredential, method.__name__) - setattr(method, "__doc__", - baseMethod.__doc__.replace("credential", "Grid Proxy")) diff --git a/ganga/GangaCore/GPIDev/Lib/File/LCGSEFile.py b/ganga/GangaCore/GPIDev/Lib/File/LCGSEFile.py deleted file mode 100755 index 89d9689d06..0000000000 --- a/ganga/GangaCore/GPIDev/Lib/File/LCGSEFile.py +++ /dev/null @@ -1,357 +0,0 @@ - -########################################################################## -# Ganga Project. http://cern.ch/ganga -# -# $Id: LCGSEFile.py,v 0.1 2011-02-12 15:40:00 idzhunov Exp $ -########################################################################## - -import copy -import os -import re - -import GangaCore.Utility.Config -import GangaCore.Utility.logging -from GangaCore.GPIDev.Adapters.IGangaFile import IGangaFile -from GangaCore.GPIDev.Base.Proxy import GPIProxyObjectFactory, getName -from GangaCore.GPIDev.Credentials import VomsProxy, require_credential -from GangaCore.GPIDev.Schema import ComponentItem, Schema, SimpleItem, Version -from GangaCore.Utility.Config import getConfig -from GangaCore.Utility.GridShell import getShell - -logger = GangaCore.Utility.logging.getLogger() - - -regex = re.compile(r'[*?\[\]]') - - -def getLCGConfig(): - return getConfig('Output')['LCGSEFile']['uploadOptions'] - - -class LCGSEFile(IGangaFile): - - """LCGSEFile represents a class marking an output file to be written into LCG SE - """ - - _schema = Schema(Version(1, 1), { - 'namePattern': SimpleItem(defvalue="", doc='pattern of the file name'), - 'localDir': SimpleItem(defvalue="", copyable=1, doc='local dir where the file is stored, used from get and put methods'), - 'joboutputdir': SimpleItem(defvalue="", doc='outputdir of the job with which the outputsandbox file object is associated'), - 'se': SimpleItem(defvalue=getLCGConfig()['dest_SRM'], copyable=1, doc='the LCG SE hostname'), - 'se_type': SimpleItem(defvalue='', copyable=1, doc='the LCG SE type'), - 'se_rpath': SimpleItem(defvalue='', copyable=1, doc='the relative path to the file from the VO directory on the SE'), - 'lfc_host': SimpleItem(defvalue=getLCGConfig()['LFC_HOST'], copyable=1, doc='the LCG LFC hostname'), - 'srm_token': SimpleItem(defvalue='', copyable=1, doc='the SRM space token, meaningful only when se_type is set to srmv2'), - 'SURL': SimpleItem(defvalue='', copyable=1, doc='the LCG SE SURL'), - 'port': SimpleItem(defvalue='', copyable=1, doc='the LCG SE port'), - 'locations': SimpleItem(defvalue=[], copyable=1, typelist=[str], sequence=1, doc="list of locations where the outputfiles were uploaded"), - 'subfiles': ComponentItem(category='gangafiles', defvalue=[], hidden=1, sequence=1, copyable=0, doc="collected files from the wildcard namePattern"), - 'failureReason': SimpleItem(defvalue="", protected=1, copyable=0, doc='reason for the upload failure'), - 'compressed': SimpleItem(defvalue=False, typelist=[bool], protected=0, doc='wheather the output file should be compressed before sending somewhere'), - 'credential_requirements': ComponentItem('CredentialRequirement', defvalue='VomsProxy'), - }) - _category = 'gangafiles' - _name = "LCGSEFile" - _exportmethods = ["location", "setLocation", "get", "put", "getUploadCmd"] - - def __init__(self, namePattern='', localDir='', **kwds): - """ namePattern is the pattern of the output file that has to be written into LCG SE - """ - super(LCGSEFile, self).__init__() - self.namePattern = namePattern - self.localDir = localDir - - self.locations = [] - - def __setattr__(self, attr, value): - if attr == 'se_type' and value not in ['', 'srmv1', 'srmv2', 'se']: - raise AttributeError('invalid se_type: %s' % value) - super(LCGSEFile, self).__setattr__(attr, value) - - def _on_attribute__set__(self, obj_type, attrib_name): - r = copy.deepcopy(self) - if getName(obj_type) == 'Job' and attrib_name == 'outputfiles': - r.locations = [] - r.localDir = '' - r.failureReason = '' - return r - - def __repr__(self): - """Get the representation of the file.""" - - return "LCGSEFile(namePattern='%s')" % self.namePattern - - def __get_unique_fname__(self): - '''gets an unique filename''' - - import random - import time - - uuid = (str(random.uniform(0, 100000000)) + - '-' + str(time.time())).replace('.', '-') - user = getConfig('Configuration')['user'] - - fname = 'user.%s.%s' % (user, uuid) - return fname - - def setLocation(self): - """ - Sets the location of output files that were uploaded to lcg storage element from the WN - """ - - job = self.getJobObject() - - postprocessLocationsPath = os.path.join( - job.outputdir, getConfig('Output')['PostProcessLocationsFileName']) - if not os.path.exists(postprocessLocationsPath): - return - - def lcgse_line_processor(line, lcgse_file): - guid = line[line.find('->') + 2:] - pattern = line.split(' ')[1] - name = line.split(' ')[2].strip('.gz') - - if regex.search(lcgse_file.namePattern) is not None: - d = LCGSEFile(namePattern=name) - d.compressed = lcgse_file.compressed - d.lfc_host = lcgse_file.lfc_host - d.se = lcgse_file.se - # todo copy also the other attributes - lcgse_file.subfiles.append(GPIProxyObjectFactory(d)) - lcgse_line_processor(line, d) - elif pattern == lcgse_file.namePattern: - if guid.startswith('ERROR'): - logger.error("Failed to upload file to LCG SE") - logger.error(guid[6:]) - lcgse_file.failureReason = guid[6:] - return - lcgse_file.locations = guid - - for line in open(postprocessLocationsPath, 'r'): - - if line.strip() == '': - continue - - if line.startswith('lcgse'): - lcgse_line_processor(line.strip(), self) - - def location(self): - """ - Return list with the locations of the post processed files (if they were configured to upload the output somewhere) - """ - return self.locations - - def getUploadCmd(self): - - vo = self.credential_requirements.vo - - cmd = 'lcg-cr --vo %s ' % vo - if self.se != '': - cmd = cmd + ' -d %s' % self.se - if self.se_type == 'srmv2' and self.srm_token != '': - cmd = cmd + ' -D srmv2 -s %s' % self.srm_token - - # specify the physical location - if self.se_rpath != '': - cmd = cmd + \ - ' -P %s/ganga.%s/filename' % (self.se_rpath, - self.__get_unique_fname__()) - - return cmd - - @require_credential - def put(self): - """ - Executes the internally created command for file upload to LCG SE, this method will - be called on the client - """ - import glob - - sourceDir = '' - - # if used as a stand alone object - if self._getParent() is None: - if self.localDir == '': - logger.warning( - 'localDir attribute is empty, don\'t know from which dir to take the file') - return - else: - sourceDir = self.localDir - else: - job = self.getJobObject() - sourceDir = job.outputdir - import os - os.environ['LFC_HOST'] = self.lfc_host - - fileName = self.namePattern - - if self.compressed: - fileName = '%s.gz' % self.namePattern - - if regex.search(fileName) is not None: - for currentFile in glob.glob(os.path.join(sourceDir, fileName)): - cmd = self.getUploadCmd() - cmd = cmd.replace('filename', currentFile) - cmd = cmd + ' file:%s' % currentFile - - (exitcode, output, m) = getShell(self.credential_requirements).cmd1( - cmd, capture_stderr=True) - - d = LCGSEFile(namePattern=os.path.basename(currentFile)) - d.compressed = self.compressed - d.lfc_host = self.lfc_host - d.se = self.se - # todo copy also the other attributes - - if exitcode == 0: - - match = re.search(r'(guid:\S+)', output) - if match: - d.locations = output.strip() - - # Alex removed this as more general approach in job.py after put() is called - # remove file from output dir if this object is attached to a job - # if self._getParent() is not None: - # os.system('rm %s' % os.path.join(sourceDir, currentFile)) - - else: - d.failureReason = output - if self._getParent() is not None: - logger.error("Job %s failed. One of the job.outputfiles couldn't be uploaded because of %s" % ( - str(self._getParent().fqid), self.failureReason)) - else: - logger.error( - "The file can't be uploaded because of %s" % (self.failureReason)) - - self.subfiles.append(GPIProxyObjectFactory(d)) - - else: - logger.debug("sourceDir: %s" % sourceDir) - logger.debug("fileName: %s" % fileName) - currentFile = os.path.join(sourceDir, fileName) - import os.path - if os.path.isfile(currentFile): - logger.debug("currentFile: %s exists!" % currentFile) - else: - logger.debug("currentFile: %s DOES NOT exist!" % currentFile) - - cmd = self.getUploadCmd() - cmd = cmd.replace('filename', currentFile) - cmd = cmd + ' file:%s' % currentFile - - logger.debug("cmd is: %s" % cmd) - - (exitcode, output, m) = getShell(self.credential_requirements).cmd1(cmd, capture_stderr=True) - - if exitcode == 0: - - match = re.search(r'(guid:\S+)', output) - if match: - self.locations = output.strip() - - # Alex removed this as more general approach in job.py after put() is called - # remove file from output dir if this object is attached to a job - # if self._getParent() is not None: - # os.system('rm %s' % os.path.join(sourceDir, currentFile)) - - else: - self.failureReason = output - if self._getParent() is not None: - logger.error("Job %s failed. One of the job.outputfiles couldn't be uploaded because of %s" % ( - str(self._getParent().fqid), self.failureReason)) - else: - logger.error( - "The file can't be uploaded because of %s" % (self.failureReason)) - - def getWNInjectedScript(self, outputFiles, indent, patternsToZip, postProcessLocationsFP): - """ - Returns script that have to be injected in the jobscript for postprocessing on the WN - """ - lcgCommands = [] - - for outputFile in outputFiles: - lcgCommands.append('lcgse %s %s %s' % ( - outputFile.namePattern, outputFile.lfc_host, outputFile.getUploadCmd())) - logger.debug("OutputFile (%s) cmd for WN script is: %s" % - (outputFile.namePattern, outputFile.getUploadCmd())) - - import inspect - script_location = os.path.join(os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))), - 'scripts/LCGSEFileWNScript.py.template') - - from GangaCore.GPIDev.Lib.File import FileUtils - script = FileUtils.loadScript(script_location, '###INDENT###') - - script = script.replace('###LCGCOMMANDS###', str(lcgCommands)) - script = script.replace('###PATTERNSTOZIP###', str(patternsToZip)) - script = script.replace('###INDENT###', indent) - script = script.replace('###POSTPROCESSLOCATIONSFP###', postProcessLocationsFP) - - return script - - @require_credential - def internalCopyTo(self, targetPath): - """ - Retrieves locally all files matching this LCGSEFile object pattern - Args: - targetPath (str): Target path where the file is copied to - """ - to_location = targetPath - - # set lfc host - os.environ['LFC_HOST'] = self.lfc_host - - vo = self.credential_requirements.vo - - for location in self.locations: - destFileName = os.path.join(to_location, self.namePattern) - cmd = 'lcg-cp --vo {vo} {remote_path} file:{local_path}'.format( - vo=vo, remote_path=location, local_path=destFileName) - (exitcode, output, m) = getShell(self.credential_requirements).cmd1(cmd, capture_stderr=True) - - if exitcode != 0: - logger.error('command %s failed to execute , reason for failure is %s' % (cmd, output)) - - def getWNScriptDownloadCommand(self, indent): - - script = """\n - -###INDENT###os.environ['LFC_HOST'] = '###LFC_HOST###' -###INDENT###cwDir = os.getcwd() -###INDENT###dwnCmd = 'lcg-cp --vo ###VO### lfn:/grid/###VO###/###LOCATION###/###NAMEPATTERN### file:%s' % os.path.join(cwDir, '###NAMEPATTERN###') -###INDENT###os.system(dwnCmd) -""" - - script = script.replace('###INDENT###', indent) - script = script.replace('###LFC_HOST###', self.lfc_host) - script = script.replace( - '###VO###', self.credential_requirements.vo) - script = script.replace('###LOCATION###', self.se_rpath) - script = script.replace('###NAMEPATTERN###', self.namePattern) - - return script - - @require_credential - def processWildcardMatches(self): - if self.subfiles: - return self.subfiles - - from fnmatch import fnmatch - - if regex.search(self.namePattern): - # TODO namePattern shouldn't contain slashes and se_rpath should not contain wildcards - cmd = 'lcg-ls lfn:/grid/{vo}/{se_rpath}'.format(vo=self.credential_requirements.vo, se_rpath=self.se_rpath) - exitcode, output, m = getShell(self.credential_requirements).cmd1(cmd, capture_stderr=True) - - for filename in output.split('\n'): - if fnmatch(filename, self.namePattern): - subfile = LCGSEFile(namePattern=filename) - subfile.se_rpath = self.se_rpath - subfile.lfc_host = self.lfc_host - - self.subfiles.append(GPIProxyObjectFactory(subfile)) - - -# add LCGSEFile objects to the configuration scope (i.e. it will be -# possible to write instatiate LCGSEFile() objects via config file) -GangaCore.Utility.Config.config_scope['LCGSEFile'] = LCGSEFile diff --git a/ganga/GangaCore/GPIDev/Lib/File/LocalFile.py b/ganga/GangaCore/GPIDev/Lib/File/LocalFile.py index 8876b2f085..fe8b4fe10b 100755 --- a/ganga/GangaCore/GPIDev/Lib/File/LocalFile.py +++ b/ganga/GangaCore/GPIDev/Lib/File/LocalFile.py @@ -28,7 +28,7 @@ class LocalFile(IGangaFile): - """LocalFile represents base class for output files, such as MassStorageFile, LCGSEFile, etc + """LocalFile represents base class for output files, such as MassStorageFile, etc """ _schema = Schema(Version(1, 1), {'namePattern': SimpleItem(defvalue="", doc='pattern of the file name'), 'localDir': SimpleItem(defvalue="", doc='local dir where the file is stored, used from get and put methods'), diff --git a/ganga/GangaCore/GPIDev/Lib/File/__init__.py b/ganga/GangaCore/GPIDev/Lib/File/__init__.py index 48a4011d74..6b5ab8095b 100755 --- a/ganga/GangaCore/GPIDev/Lib/File/__init__.py +++ b/ganga/GangaCore/GPIDev/Lib/File/__init__.py @@ -6,7 +6,6 @@ from GangaCore.GPIDev.Lib.File.LocalFile import LocalFile from GangaCore.GPIDev.Lib.File.MassStorageFile import MassStorageFile -from GangaCore.GPIDev.Lib.File.LCGSEFile import LCGSEFile from GangaCore.GPIDev.Lib.File.GoogleFile import GoogleFile import GangaCore.Utility.logging @@ -86,9 +85,6 @@ def string_file_shortcut(v, item): if key == 'MassStorageFile': from .MassStorageFile import MassStorageFile return stripProxy(MassStorageFile._proxyClass(v)) - elif key == 'LCGSEFile': - from .LCGSEFile import LCGSEFile - return stripProxy(LCGSEFile._proxyClass(v)) elif key == 'DiracFile': try: from GangaDirac.Lib.Files.DiracFile import DiracFile diff --git a/ganga/GangaCore/GPIDev/Lib/Tasks/ITransform.py b/ganga/GangaCore/GPIDev/Lib/Tasks/ITransform.py index 48f6f72551..13b2dabaf6 100644 --- a/ganga/GangaCore/GPIDev/Lib/Tasks/ITransform.py +++ b/ganga/GangaCore/GPIDev/Lib/Tasks/ITransform.py @@ -615,8 +615,7 @@ def __setattr__(self, attr, value): 'ITransform.outputsandbox is set, you can\'t set ITransform.outputfiles') return - # reduce duplicate values here, leave only duplicates for LCG, - # where we can have replicas + # reduce duplicate values here uniqueValuesDict = [] uniqueValues = [] @@ -625,8 +624,6 @@ def __setattr__(self, attr, value): if key not in uniqueValuesDict: uniqueValuesDict.append(key) uniqueValues.append(val) - elif getName(val) == 'LCGSEFile': - uniqueValues.append(val) super(ITransform, self).__setattr__(attr, uniqueValues) diff --git a/ganga/GangaCore/GPIDev/Lib/Tasks/TaskRegistry.py b/ganga/GangaCore/GPIDev/Lib/Tasks/TaskRegistry.py index 727865df18..63f1babbbf 100644 --- a/ganga/GangaCore/GPIDev/Lib/Tasks/TaskRegistry.py +++ b/ganga/GangaCore/GPIDev/Lib/Tasks/TaskRegistry.py @@ -409,7 +409,7 @@ def c(s): " Transform Backend : " + c("tf.backend")) print('') print(" Set parameter in all applications : " + c("t.setParameter(my_software_version='1.42.0')")) - print(" Set backend for all transforms : " + c("t.setBackend(backend) , p.e. t.setBackend(LCG())")) + print(" Set backend for all transforms : " + c("t.setBackend(backend) , p.e. t.setBackend(Dirac())")) print(" Limit on how often jobs are resubmitted : " + c("tf.run_limit = 4")) print(" Manually change the status of partitions: " + c("tf.setPartitionStatus(partition, 'status')")) print('') diff --git a/ganga/GangaCore/Lib/Executable/Executable.py b/ganga/GangaCore/Lib/Executable/Executable.py index b83de6c12e..50b26769f8 100755 --- a/ganga/GangaCore/Lib/Executable/Executable.py +++ b/ganga/GangaCore/Lib/Executable/Executable.py @@ -239,27 +239,6 @@ def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): convertIntToStringArgs(app.args), stripProxy(app).getJobObject().outputsandbox, app.env) return c - -class LCGRTHandler(IRuntimeHandler): - - def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - prepared_exe = app.exe - if app.is_prepared is not None: - shared_path = os.path.join(expandfilename(getConfig('Configuration')['gangadir']), - 'shared', getConfig('Configuration')['user']) - if isinstance(app.exe, str): - prepared_exe = app.exe - elif isinstance(app.exe, File): - logger.info("Submitting a prepared application; taking any input files from %s" % ( - app.is_prepared.name)) - prepared_exe = File(os.path.join( - os.path.join(shared_path, app.is_prepared.name), os.path.basename(app.exe.name))) - - return LCGJobConfig(prepared_exe, app._getParent().inputsandbox, convertIntToStringArgs(app.args), app._getParent().outputsandbox, app.env) - - class gLiteRTHandler(IRuntimeHandler): def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): @@ -287,15 +266,11 @@ def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): allHandlers.add('Executable', 'PBS', RTHandler) allHandlers.add('Executable', 'SGE', RTHandler) allHandlers.add('Executable', 'Condor', RTHandler) -allHandlers.add('Executable', 'LCG', LCGRTHandler) allHandlers.add('Executable', 'gLite', gLiteRTHandler) allHandlers.add('Executable', 'TestSubmitter', RTHandler) allHandlers.add('Executable', 'Interactive', RTHandler) allHandlers.add('Executable', 'Batch', RTHandler) allHandlers.add('Executable', 'Cronus', RTHandler) -allHandlers.add('Executable', 'Remote', LCGRTHandler) -allHandlers.add('Executable', 'CREAM', LCGRTHandler) -allHandlers.add('Executable', 'ARC', LCGRTHandler) allHandlers.add('Executable', 'Slurm', RTHandler) diff --git a/ganga/GangaCore/Lib/Executable/__init__.py b/ganga/GangaCore/Lib/Executable/__init__.py index 87bfd98365..0561d56c58 100755 --- a/ganga/GangaCore/Lib/Executable/__init__.py +++ b/ganga/GangaCore/Lib/Executable/__init__.py @@ -1,3 +1,2 @@ -from .Executable import (Executable, LCGRTHandler, RTHandler, gLiteRTHandler, - randomString) +from .Executable import (Executable, RTHandler, gLiteRTHandler, randomString) diff --git a/ganga/GangaCore/Lib/LCG/ARC.py b/ganga/GangaCore/Lib/LCG/ARC.py deleted file mode 100644 index 7cdb940aa4..0000000000 --- a/ganga/GangaCore/Lib/LCG/ARC.py +++ /dev/null @@ -1,1304 +0,0 @@ -# ARC backend -import os -import os.path -import math -import re -import mimetypes -import shutil -from collections import defaultdict - -from urllib.parse import urlparse - -from GangaCore.Core.GangaThread.MTRunner import MTRunner, Data, Algorithm -from GangaCore.Core.exceptions import GangaException - -from GangaCore.GPIDev.Schema import Schema, Version, SimpleItem, ComponentItem -from GangaCore.GPIDev.Lib.File import FileBuffer -from GangaCore.GPIDev.Adapters.IBackend import IBackend -from GangaCore.Utility.Config import getConfig -from GangaCore.Utility.logging import getLogger, log_user_exception -from GangaCore.Utility.logic import implies -from GangaCore.Lib.LCG.Utility import get_md5sum -from GangaCore.Lib.LCG.ElapsedTimeProfiler import ElapsedTimeProfiler - -from GangaCore.Lib.LCG import Grid -from GangaCore.Lib.LCG.GridftpSandboxCache import GridftpSandboxCache - -from GangaCore.GPIDev.Credentials import require_credential, credential_store, needed_credentials -from GangaCore.GPIDev.Credentials.VomsProxy import VomsProxy -from GangaCore.GPIDev.Base.Proxy import getName - -config = getConfig('LCG') - - -class ARC(IBackend): - - '''ARC backend - direct job submission to an ARC CE''' - _schema = Schema(Version(1, 0), { - 'CE': SimpleItem(defvalue='', doc='ARC CE endpoint'), - 'jobtype': SimpleItem(defvalue='Normal', doc='Job type: Normal, MPICH'), - 'requirements': ComponentItem('LCGRequirements', doc='Requirements for the resource selection'), - 'sandboxcache': ComponentItem('GridSandboxCache', copyable=1, doc='Interface for handling oversized input sandbox'), - 'id': SimpleItem(defvalue='', typelist=[str, list], protected=1, copyable=0, doc='Middleware job identifier'), - 'status': SimpleItem(defvalue='', typelist=[str, dict], protected=1, copyable=0, doc='Middleware job status'), - 'exitcode': SimpleItem(defvalue='', protected=1, copyable=0, doc='Application exit code'), - 'exitcode_arc': SimpleItem(defvalue='', protected=1, copyable=0, doc='Middleware exit code'), - 'actualCE': SimpleItem(defvalue='', protected=1, copyable=0, doc='The ARC CE where the job actually runs.'), - 'queue': SimpleItem(defvalue='', typelist=[str], doc='The queue to send the job to.'), - 'xRSLextras': SimpleItem(defvalue=None, typelist=[dict, None], doc='Extra things to put into the xRSL for submission.'), - 'reason': SimpleItem(defvalue='', protected=1, copyable=0, doc='Reason of causing the job status'), - 'workernode': SimpleItem(defvalue='', protected=1, copyable=0, doc='The worker node on which the job actually runs.'), - 'isbURI': SimpleItem(defvalue='', protected=1, copyable=0, doc='The input sandbox URI on ARC CE'), - 'osbURI': SimpleItem(defvalue='', protected=1, copyable=0, doc='The output sandbox URI on ARC CE'), - 'verbose': SimpleItem(defvalue=False, doc='Use verbose options for ARC commands'), - 'credential_requirements': ComponentItem('CredentialRequirement', defvalue=VomsProxy()), - }) - - _category = 'backends' - - _name = 'ARC' - - def __init__(self): - super(ARC, self).__init__() - - # dynamic requirement object loading - try: - reqName1 = config['Requirements'] - reqName = config['Requirements'].split('.').pop() - reqModule = __import__(reqName1, globals(), locals(), [reqName1]) - reqClass = vars(reqModule)[reqName] - self.requirements = reqClass() - - logger.debug('load %s as LCGRequirements' % reqName) - except: - logger.debug('load default LCGRequirements') - pass - - # dynamic sandbox cache object loading - # force to use GridftpSandboxCache - self.sandboxcache = GridftpSandboxCache() - try: - scName1 = config['SandboxCache'] - scName = config['SandboxCache'].split('.').pop() - scModule = __import__(scName1, globals(), locals(), [scName1]) - scClass = vars(scModule)[scName] - self.sandboxcache = scClass() - logger.debug('load %s as SandboxCache' % scName) - except: - logger.debug('load default SandboxCache') - pass - - def __refresh_jobinfo__(self, job): - '''Refresh the lcg jobinfo. It will be called after resubmission.''' - job.backend.status = '' - job.backend.reason = '' - job.backend.actualCE = '' - job.backend.exitcode = '' - job.backend.exitcode_arc = '' - job.backend.workernode = '' - job.backend.isbURI = '' - job.backend.osbURI = '' - - def __setup_sandboxcache__(self, job): - '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend''' - - re_token = re.compile('^token:(.*):(.*)$') - - self.sandboxcache.timeout = config['SandboxTransferTimeout'] - - if self.sandboxcache._name == 'GridftpSandboxCache': - # If the copy command is set in the config then use it. - if config['ArcCopyCommand']: - self.sandboxcache.copyCommand = config['ArcCopyCommand'] - - if self.sandboxcache._name == 'LCGSandboxCache': - if config['ArcCopyCommand']: - self.sandboxcache.copyCommand = config['ArcCopyCommand'] - - if not self.sandboxcache.lfc_host: - self.sandboxcache.lfc_host = Grid.__get_lfc_host__() - - if not self.sandboxcache.se: - - token = '' - se_host = config['DefaultSE'] - m = re_token.match(se_host) - if m: - token = m.group(1) - se_host = m.group(2) - - self.sandboxcache.se = se_host - - if token: - self.sandboxcache.srm_token = token - - if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token): - self.sandboxcache.srm_token = config['DefaultSRMToken'] - - return True - - def __check_and_prestage_inputfile__(self, file): - '''Checks the given input file size and if it's size is - over "BoundSandboxLimit", prestage it to a grid SE. - - The argument is a path of the local file. - - It returns a dictionary containing information to refer to the file: - - idx = {'lfc_host': lfc_host, - 'local': [the local file pathes], - 'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... } - } - - If prestaging failed, None object is returned. - - If the file has been previously uploaded (according to md5sum), - the prestaging is ignored and index to the previously uploaded file - is returned. - ''' - - idx = {'lfc_host': '', 'local': [], 'remote': {}} - - job = self.getJobObject() - - # read-in the previously uploaded files - uploadedFiles = [] - - # getting the uploaded file list from the master job - if job.master: - uploadedFiles += job.master.backend.sandboxcache.get_cached_files() - - # set and get the $LFC_HOST for uploading oversized sandbox - self.__setup_sandboxcache__(job) - - uploadedFiles += self.sandboxcache.get_cached_files() - - lfc_host = None - - # for LCGSandboxCache, take the one specified in the sansboxcache object. - # the value is exactly the same as the one from the local grid shell env. if - # it is not specified exclusively. - if self.sandboxcache._name == 'LCGSandboxCache': - lfc_host = self.sandboxcache.lfc_host - - # or in general, query it from the Grid object - if not lfc_host: - lfc_host = Grid.__get_lfc_host__() - - idx['lfc_host'] = lfc_host - - abspath = os.path.abspath(file) - fsize = os.path.getsize(abspath) - - if fsize > config['BoundSandboxLimit']: - - md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True) - - doUpload = True - for uf in uploadedFiles: - if uf.md5sum == md5sum: - # the same file has been uploaded to the iocache - idx['remote'][os.path.basename(file)] = uf.id - doUpload = False - break - - if doUpload: - - logger.warning( - 'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...' % (file, config['BoundSandboxLimit'])) - - if self.sandboxcache.upload([abspath]): - remote_sandbox = self.sandboxcache.get_cached_files()[-1] - idx['remote'][remote_sandbox.name] = remote_sandbox.id - else: - logger.error( - 'Oversized sandbox not successfully pre-staged') - return None - else: - idx['local'].append(abspath) - - return idx - - def __mt_job_prepare__(self, rjobs, subjobconfigs, masterjobconfig): - '''preparing jobs in multiple threads''' - - logger.warning( - 'preparing %d subjobs ... it may take a while' % len(rjobs)) - - # prepare the master job (i.e. create shared inputsandbox, etc.) - master_input_sandbox = IBackend.master_prepare(self, masterjobconfig) - - # uploading the master job if it's over the WMS sandbox limitation - for f in master_input_sandbox: - master_input_idx = self.__check_and_prestage_inputfile__(f) - - if not master_input_idx: - logger.error('master input sandbox perparation failed: %s' % f) - return None - - # the algorithm for preparing a single bulk job - class MyAlgorithm(Algorithm): - - def __init__(self): - Algorithm.__init__(self) - - def process(self, sj_info): - my_sc = sj_info[0] - my_sj = sj_info[1] - - try: - logger.debug("preparing job %s" % my_sj.getFQID('.')) - jdlpath = my_sj.backend.preparejob( - my_sc, master_input_sandbox) - - if (not jdlpath) or (not os.path.exists(jdlpath)): - raise GangaException( - 'job %s not properly prepared' % my_sj.getFQID('.')) - - self.__appendResult__(my_sj.id, jdlpath) - return True - except Exception as x: - log_user_exception() - return False - - mt_data = [] - for sc, sj in zip(subjobconfigs, rjobs): - mt_data.append([sc, sj]) - - myAlg = MyAlgorithm() - myData = Data(collection=mt_data) - - runner = MTRunner( - name='lcg_jprepare', algorithm=myAlg, data=myData, numThread=10) - runner.start() - runner.join(-1) - - if len(runner.getDoneList()) < len(mt_data): - return None - else: - # return a JDL file dictionary with subjob ids as keys, JDL file - # paths as values - return runner.getResults() - - @require_credential - def __mt_bulk_submit__(self, node_jdls): - '''submitting jobs in multiple threads''' - - job = self.getJobObject() - - logger.warning( - 'submitting %d subjobs ... it may take a while' % len(node_jdls)) - - # the algorithm for submitting a single bulk job - class MyAlgorithm(Algorithm): - - def __init__(self, cred_req, masterInputWorkspace, ce, arcverbose): - Algorithm.__init__(self) - self.inpw = masterInputWorkspace - self.cred_req = cred_req - self.ce = ce - self.arcverbose = arcverbose - - def process(self, jdl_info): - my_sj_id = jdl_info[0] - my_sj_jdl = jdl_info[1] - - my_sj_jid = Grid.arc_submit(my_sj_jdl, self.ce, self.arcverbose, self.cred_req) - - if not my_sj_jid: - return False - else: - self.__appendResult__(my_sj_id, my_sj_jid) - return True - - mt_data = [] - for id, jdl in node_jdls.items(): - mt_data.append((id, jdl)) - - myAlg = MyAlgorithm(cred_req=self.credential_requirements, masterInputWorkspace=job.getInputWorkspace( - ), ce=self.CE, arcverbose=self.verbose) - myData = Data(collection=mt_data) - - runner = MTRunner(name='arc_jsubmit', algorithm=myAlg, - data=myData, numThread=config['SubmissionThread']) - runner.start() - runner.join(timeout=-1) - - if len(runner.getDoneList()) < len(mt_data): - # not all bulk jobs are successfully submitted. canceling the - # submitted jobs on WMS immediately - logger.error( - 'some bulk jobs not successfully (re)submitted, canceling submitted jobs on WMS') - Grid.arc_cancel_multiple(list(runner.getResults().values()), self.credential_requirements) - return None - else: - return runner.getResults() - - def __jobWrapperTemplate__(self): - '''Create job wrapper''' - - script = """#!/usr/bin/env python -#----------------------------------------------------- -# This job wrapper script is automatically created by -# GANGA LCG backend handler. -# -# It controls: -# 1. unpack input sandbox -# 2. invoke application executable -# 3. invoke monitoring client -#----------------------------------------------------- -import os,os.path,shutil,tempfile -import sys,time,traceback - -#bugfix #36178: subprocess.py crashes if python 2.5 is used -#try to import subprocess from local python installation before an -#import from PYTHON_DIR is attempted some time later -try: - import subprocess -except ImportError: - pass - -## Utility functions ## -def timeString(): - return time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) - -def printInfo(s): - out.write(timeString() + ' [Info]' + ' ' + str(s) + os.linesep) - out.flush() - -def printError(s): - out.write(timeString() + ' [Error]' + ' ' + str(s) + os.linesep) - out.flush() - -def lcg_file_download(vo,guid,localFilePath,timeout=60,maxRetry=3): - cmd = 'lcg-cp -t %d --vo %s %s file://%s' % (timeout,vo,guid,localFilePath) - - printInfo('LFC_HOST set to %s' % os.environ['LFC_HOST']) - printInfo('lcg-cp timeout: %d' % timeout) - - i = 0 - rc = 0 - isDone = False - try_again = True - - while try_again: - i = i + 1 - try: - ps = os.popen(cmd) - status = ps.close() - - if not status: - isDone = True - printInfo('File %s download from iocache' % os.path.basename(localFilePath)) - else: - raise IOError("Download file %s from iocache failed with error code: %d, trial %d." % (os.path.basename(localFilePath), status, i)) - - except IOError as e: - isDone = False - printError(str(e)) - - if isDone: - try_again = False - elif i == maxRetry: - try_again = False - else: - try_again = True - - return isDone - -## system command executor with subprocess -def execSyscmdSubprocess(cmd, wdir=os.getcwd()): - - import os, subprocess - - global exitcode - - outfile = open('stdout','w') - errorfile = open('stderr','w') - - try: - child = subprocess.Popen(cmd, cwd=wdir, shell=True, stdout=outfile, stderr=errorfile) - - while 1: - exitcode = child.poll() - if exitcode is not None: - break - else: - outfile.flush() - errorfile.flush() - time.sleep(0.3) - finally: - pass - - outfile.flush() - errorfile.flush() - outfile.close() - errorfile.close() - - return True - -## system command executor with multi-thread -## stderr/stdout handler -def execSyscmdEnhanced(cmd, wdir=os.getcwd()): - - import os, subprocess, threading - - cwd = os.getcwd() - - isDone = False - - try: - ## change to the working directory - os.chdir(wdir) - - child = subprocess.Popen(cmd,1) - child.tochild.close() # don't need stdin - - class PipeThread(threading.Thread): - - def __init__(self,infile,outfile,stopcb): - self.outfile = outfile - self.infile = infile - self.stopcb = stopcb - self.finished = 0 - threading.Thread.__init__(self) - - def run(self): - stop = False - while not stop: - buf = self.infile.read(10000) - self.outfile.write(buf) - self.outfile.flush() - time.sleep(0.01) - stop = self.stopcb() - #FIXME: should we do here?: self.infile.read() - #FIXME: this is to make sure that all the output is read (if more than buffer size of output was produced) - self.finished = 1 - - def stopcb(poll=False): - global exitcode - if poll: - exitcode = child.poll() - return exitcode != -1 - - out_thread = PipeThread(child.fromchild, sys.stdout, stopcb) - err_thread = PipeThread(child.childerr, sys.stderr, stopcb) - - out_thread.start() - err_thread.start() - while not out_thread.finished and not err_thread.finished: - stopcb(True) - time.sleep(0.3) - - sys.stdout.flush() - sys.stderr.flush() - - isDone = True - - except(Exception,e): - isDone = False - - ## return to the original directory - os.chdir(cwd) - - return isDone - -############################################################################################ - -###INLINEMODULES### - -############################################################################################ - -## Main program ## - -outputsandbox = ###OUTPUTSANDBOX### -input_sandbox = ###INPUTSANDBOX### -wrapperlog = ###WRAPPERLOG### -appexec = ###APPLICATIONEXEC### -appargs = ###APPLICATIONARGS### -appenvs = ###APPLICATIONENVS### -timeout = ###TRANSFERTIMEOUT### - -exitcode=-1 - -import sys, stat, os, os.path, commands - -# Change to scratch directory if provided -scratchdir = '' -tmpdir = '' - -orig_wdir = os.getcwd() - -# prepare log file for job wrapper -out = open(os.path.join(orig_wdir, wrapperlog),'w') - -if os.getenv('EDG_WL_SCRATCH'): - scratchdir = os.getenv('EDG_WL_SCRATCH') -elif os.getenv('TMPDIR'): - scratchdir = os.getenv('TMPDIR') - -if scratchdir: - (status, tmpdir) = commands.getstatusoutput('mktemp -d %s/gangajob_XXXXXXXX' % (scratchdir)) - if status == 0: - os.chdir(tmpdir) - else: - ## if status != 0, tmpdir should contains error message so print it to stderr - printError('Error making ganga job scratch dir: %s' % tmpdir) - printInfo('Unable to create ganga job scratch dir in %s. Run directly in: %s' % ( scratchdir, os.getcwd() ) ) - - ## reset scratchdir and tmpdir to disable the usage of Ganga scratch dir - scratchdir = '' - tmpdir = '' - -wdir = os.getcwd() - -if scratchdir: - printInfo('Changed working directory to scratch directory %s' % tmpdir) - try: - os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stdout'), os.path.join(wdir, 'stdout'))) - os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stderr'), os.path.join(wdir, 'stderr'))) - except Exception as e: - printError(sys.exc_info()[0]) - printError(sys.exc_info()[1]) - str_traceback = traceback.format_tb(sys.exc_info()[2]) - for str_tb in str_traceback: - printError(str_tb) - printInfo('Linking stdout & stderr to original directory failed. Looking at stdout during job run may not be possible') - -os.environ['PATH'] = '.:'+os.environ['PATH'] - -vo = os.environ['GANGA_LCG_VO'] - -try: - printInfo('Job Wrapper start.') - -# download inputsandbox from remote cache - for f,guid in input_sandbox['remote'].iteritems(): - if not lcg_file_download(vo, guid, os.path.join(wdir,f), timeout=int(timeout)): - raise IOError('Download remote input %s:%s failed.' % (guid,f) ) - else: - if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']: - getPackedInputSandbox(f) - else: - shutil.copy(f, os.path.join(os.getcwd(), os.path.basename(f))) - - printInfo('Download inputsandbox from iocache passed.') - -# unpack inputsandbox from wdir - for f in input_sandbox['local']: - if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']: - getPackedInputSandbox(os.path.join(orig_wdir,f)) - - printInfo('Unpack inputsandbox passed.') - - #get input files - ###DOWNLOADINPUTFILES### - - printInfo('Loading Python modules ...') - - sys.path.insert(0,os.path.join(wdir,PYTHON_DIR)) - - # check the python library path - try: - printInfo(' ** PYTHON_DIR: %s' % os.environ['PYTHON_DIR']) - except KeyError: - pass - - try: - printInfo(' ** PYTHONPATH: %s' % os.environ['PYTHONPATH']) - except KeyError: - pass - - for lib_path in sys.path: - printInfo(' ** sys.path: %s' % lib_path) - -# execute application - - ## convern appenvs into environment setup script to be 'sourced' before executing the user executable - - printInfo('Prepare environment variables for application executable') - - env_setup_script = os.path.join(os.getcwd(), '__ganga_lcg_env__.sh') - - f = open( env_setup_script, 'w') - f.write('#!/bin/sh' + os.linesep ) - f.write('##user application environmet setup script generated by Ganga job wrapper' + os.linesep) - for k,v in appenvs.items(): - - str_env = 'export %s="%s"' % (k, v) - - printInfo(' ** ' + str_env) - - f.write(str_env + os.linesep) - f.close() - - try: #try to make shipped executable executable - os.chmod('%s/%s'% (wdir,appexec),stat.S_IXUSR|stat.S_IRUSR|stat.S_IWUSR) - except: - pass - - status = False - try: - # use subprocess to run the user's application if the module is available on the worker node - import subprocess - printInfo('Load application executable with subprocess module') - status = execSyscmdSubprocess('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir) - except ImportError as err: - # otherwise, use separate threads to control process IO pipes - printInfo('Load application executable with separate threads') - status = execSyscmdEnhanced('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir) - - os.system("cp %s/stdout stdout.1" % orig_wdir) - os.system("cp %s/stderr stderr.1" % orig_wdir) - - printInfo('GZipping stdout and stderr...') - - os.system("gzip stdout.1 stderr.1") - - # move them to the original wdir so they can be picked up - os.system("mv stdout.1.gz %s/stdout.gz" % orig_wdir) - os.system("mv stderr.1.gz %s/stderr.gz" % orig_wdir) - - if not status: - raise OSError('Application execution failed.') - printInfo('Application execution passed with exit code %d.' % exitcode) - - ###OUTPUTUPLOADSPOSTPROCESSING### - - for f in os.listdir(os.getcwd()): - command = "cp %s %s" % (os.path.join(os.getcwd(),f), os.path.join(orig_wdir,f)) - os.system(command) - - createPackedOutputSandbox(outputsandbox,None,orig_wdir) - -# pack outputsandbox -# printInfo('== check output ==') -# for line in os.popen('pwd; ls -l').readlines(): -# printInfo(line) - - printInfo('Pack outputsandbox passed.') - - # Clean up after us - All log files and packed outputsandbox should be in "wdir" - if scratchdir: - os.chdir(orig_wdir) - os.system("rm %s -rf" % wdir) -except Exception as e: - printError(sys.exc_info()[0]) - printError(sys.exc_info()[1]) - str_traceback = traceback.format_tb(sys.exc_info()[2]) - for str_tb in str_traceback: - printError(str_tb) - -printInfo('Job Wrapper stop.') - -out.close() - -# always return exit code 0 so the in the case of application failure -# one can always get stdout and stderr back to the UI for debug. -sys.exit(0) -""" - return script - - def preparejob(self, jobconfig, master_job_sandbox): - '''Prepare the JDL''' - - script = self.__jobWrapperTemplate__() - - job = self.getJobObject() - inpw = job.getInputWorkspace() - - wrapperlog = '__jobscript__.log' - - import GangaCore.Core.Sandbox as Sandbox - - # FIXME: check what happens if 'stdout','stderr' are specified here - script = script.replace( - '###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) - - script = script.replace( - '###APPLICATION_NAME###', getName(job.application)) - script = script.replace( - '###APPLICATIONEXEC###', repr(jobconfig.getExeString())) - script = script.replace( - '###APPLICATIONARGS###', repr(jobconfig.getArguments())) - - from GangaCore.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles - - script = script.replace( - '###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) - - script = script.replace( - '###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) - - if jobconfig.env: - script = script.replace( - '###APPLICATIONENVS###', repr(jobconfig.env)) - else: - script = script.replace('###APPLICATIONENVS###', repr({})) - - script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) - import inspect - script = script.replace( - '###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) - - mon = job.getMonitoringService() - - self.monInfo = None - - # set the monitoring file by default to the stdout - if isinstance(self.monInfo, dict): - self.monInfo['remotefile'] = 'stdout' - - # try to print out the monitoring service information in debug mode - try: - logger.debug('job info of monitoring service: %s' % - str(self.monInfo)) - except: - pass - -# prepare input/output sandboxes - import GangaCore.Utility.files - from GangaCore.GPIDev.Lib.File import File - from GangaCore.Core.Sandbox.WNSandbox import PYTHON_DIR - import inspect - - fileutils = File(inspect.getsourcefile(GangaCore.Utility.files), subdir=PYTHON_DIR) - packed_files = jobconfig.getSandboxFiles() + [fileutils] - sandbox_files = job.createPackedInputSandbox(packed_files) - - # sandbox of child jobs should include master's sandbox - sandbox_files.extend(master_job_sandbox) - - # check the input file size and pre-upload larger inputs to the iocache - lfc_host = '' - - input_sandbox_uris = [] - input_sandbox_names = [] - - ick = True - - max_prestaged_fsize = 0 - for f in sandbox_files: - - idx = self.__check_and_prestage_inputfile__(f) - - if not idx: - logger.error('input sandbox preparation failed: %s' % f) - ick = False - break - else: - - if idx['lfc_host']: - lfc_host = idx['lfc_host'] - - if idx['remote']: - abspath = os.path.abspath(f) - fsize = os.path.getsize(abspath) - - if fsize > max_prestaged_fsize: - max_prestaged_fsize = fsize - - input_sandbox_uris.append( - idx['remote'][os.path.basename(f)]) - - input_sandbox_names.append( - os.path.basename(urlparse(f)[2])) - - if idx['local']: - input_sandbox_uris += idx['local'] - input_sandbox_names.append(os.path.basename(f)) - - if not ick: - logger.error('stop job submission') - return None - - # determin the lcg-cp timeout according to the max_prestaged_fsize - # - using the assumption of 1 MB/sec. - max_prestaged_fsize = 0 - lfc_host = '' - transfer_timeout = config['SandboxTransferTimeout'] - predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) - - if predict_timeout > transfer_timeout: - transfer_timeout = predict_timeout - - if transfer_timeout < 60: - transfer_timeout = 60 - - script = script.replace( - '###TRANSFERTIMEOUT###', '%d' % transfer_timeout) - - # update the job wrapper with the inputsandbox list - script = script.replace( - '###INPUTSANDBOX###', repr({'remote': {}, 'local': input_sandbox_names})) - - # write out the job wrapper and put job wrapper into job's inputsandbox - scriptPath = inpw.writefile( - FileBuffer('__jobscript_%s__' % job.getFQID('.'), script), executable=1) - input_sandbox = input_sandbox_uris + [scriptPath] - - for isb in input_sandbox: - logger.debug('ISB URI: %s' % isb) - - # compose output sandbox to include by default the following files: - # - gzipped stdout (transferred only when the JobLogHandler is WMS) - # - gzipped stderr (transferred only when the JobLogHandler is WMS) - # - __jobscript__.log (job wrapper's log) - output_sandbox = [wrapperlog] - - from GangaCore.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns - for outputSandboxPattern in getOutputSandboxPatterns(job): - output_sandbox.append(outputSandboxPattern) - - if config['JobLogHandler'] in ['WMS']: - output_sandbox += ['stdout.gz', 'stderr.gz'] - - if len(jobconfig.outputbox): - output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] - - # compose ARC XRSL - xrsl = { - # 'VirtualOrganisation' : config['VirtualOrganisation'], - 'executable': os.path.basename(scriptPath), - 'environment': {'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host}, - # 'stdout' : 'stdout', - # 'stderr' : 'stderr', - 'inputFiles': input_sandbox, - 'outputFiles': output_sandbox, - 'queue': self.queue, - # 'OutputSandboxBaseDestURI': 'gsiftp://localhost' - } - - xrsl['environment'].update({'GANGA_LCG_CE': self.CE}) - #xrsl['Requirements'] = self.requirements.merge(jobconfig.requirements).convert() - - if self.xRSLextras: - for key in self.xRSLextras: - if key in xrsl: - xrsl[key].update(self.xRSLextras[key]) - else: - xrsl[key] = self.xRSLextras[key] - - # if self.jobtype.upper() in ['NORMAL','MPICH']: - #xrsl['JobType'] = self.jobtype.upper() - # if self.jobtype.upper() == 'MPICH': - #xrsl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') - # xrsl['Requirements'].append('Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') - #xrsl['NodeNumber'] = self.requirements.nodenumber - # else: - # logger.warning('JobType "%s" not supported' % self.jobtype) - # return - -# additional settings from the job - if jobconfig.env: - xrsl['environment'].update(jobconfig.env) - - xrslText = Grid.expandxrsl(xrsl) - - # append any additional requirements from the requirements object - xrslText += '\n'.join(self.requirements.other) - - logger.debug('subjob XRSL: %s' % xrslText) - return inpw.writefile(FileBuffer('__xrslfile__', xrslText)) - - @require_credential - def kill(self): - '''Kill the job''' - job = self.getJobObject() - - logger.info('Killing job %s' % job.getFQID('.')) - - if not self.id: - logger.warning('Job %s is not running.' % job.getFQID('.')) - return False - - return Grid.arc_cancel([self.id], self.credential_requirements) - - def master_kill(self): - '''kill the master job to the grid''' - - job = self.getJobObject() - - if not job.master and len(job.subjobs) == 0: - return IBackend.master_kill(self) - elif job.master: - return IBackend.master_kill(self) - else: - return self.master_bulk_kill() - - @require_credential - def master_bulk_kill(self): - '''GLITE bulk resubmission''' - - job = self.getJobObject() - - # killing the individually re-submitted subjobs - logger.debug('cancelling running/submitted subjobs.') - - # 1. collect job ids - ids = [] - for sj in job.subjobs: - if sj.status in ['submitted', 'running'] and sj.backend.id: - ids.append(sj.backend.id) - - # 2. cancel the collected jobs - ck = Grid.arc_cancel_multiple(ids, self.credential_requirements) - if not ck: - logger.warning('Job cancellation failed') - return False - else: - for sj in job.subjobs: - if sj.backend.id in ids: - sj.updateStatus('killed') - - return True - - def master_bulk_submit(self, rjobs, subjobconfigs, masterjobconfig): - '''submit multiple subjobs in parallel, by default using 10 concurrent threads''' - - from GangaCore.Utility.logic import implies - assert(implies(rjobs, len(subjobconfigs) == len(rjobs))) - - # prepare the subjobs, jdl repository before bulk submission - node_jdls = self.__mt_job_prepare__( - rjobs, subjobconfigs, masterjobconfig) - - if not node_jdls: - logger.error('Some jobs not successfully prepared') - return False - - # set all subjobs to submitting status - for sj in rjobs: - sj.updateStatus('submitting') - - node_jids = self.__mt_bulk_submit__(node_jdls) - - status = False - - if node_jids: - for sj in rjobs: - if sj.id in node_jids: - sj.backend.id = node_jids[sj.id] - sj.backend.CE = self.CE - sj.backend.actualCE = sj.backend.CE - sj.updateStatus('submitted') - sj.info.submit_counter += 1 - else: - logger.warning( - 'subjob %s not successfully submitted' % sj.getFQID('.')) - - status = True - - return status - - def master_bulk_resubmit(self, rjobs): - '''ARC bulk resubmission''' - - from GangaCore.Utility.logging import log_user_exception - -# job = self.getJobObject() - - # compose master JDL for collection job - node_jdls = {} - for sj in rjobs: - jdlpath = os.path.join(sj.inputdir, '__jdlfile__') - node_jdls[sj.id] = jdlpath - - # set all subjobs to submitting status - for sj in rjobs: - sj.updateStatus('submitting') - - node_jids = self.__mt_bulk_submit__(node_jdls) - - status = False - - if node_jids: - for sj in rjobs: - if sj.id in node_jids: - self.__refresh_jobinfo__(sj) - sj.backend.id = node_jids[sj.id] - sj.backend.CE = self.CE - sj.backend.actualCE = sj.backend.CE - sj.updateStatus('submitted') - sj.info.submit_counter += 1 - else: - logger.warning( - 'subjob %s not successfully submitted' % sj.getFQID('.')) - - status = True - -# # set all subjobs to submitted status -# # NOTE: this is just a workaround to avoid the unexpected transition -# # that turns the master job's status from 'submitted' to 'submitting'. -# # As this transition should be allowed to simulate a lock mechanism in Ganga 4, the workaround -# # is to set all subjobs' status to 'submitted' so that the transition can be avoided. -# # A more clear solution should be implemented with the lock mechanism introduced in Ganga 5. -# for sj in rjobs: -# sj.updateStatus('submitted') -# sj.info.submit_counter += 1 - - return status - - @require_credential - def master_submit(self, rjobs, subjobconfigs, masterjobconfig): - '''Submit the master job to the grid''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - job = self.getJobObject() - - # finding ARC CE endpoint for job submission - #allowed_celist = [] - # try: - # allowed_celist = self.requirements.getce() - # if not self.CE and allowed_celist: - # self.CE = allowed_celist[0] - # except: - # logger.warning('ARC CE assigment from ARCRequirements failed.') - - # if self.CE and allowed_celist: - # if self.CE not in allowed_celist: - # logger.warning('submission to CE not allowed: %s, use %s instead' % ( self.CE, allowed_celist[0] ) ) - # self.CE = allowed_celist[0] - - # use arc info to check for any endpoints recorded in the config file - rc, output = Grid.arc_info(self.credential_requirements) - - if not self.CE and rc != 0: - raise GangaException( - "ARC CE endpoint not set and no default settings in '%s'. " % config['ArcConfigFile']) - elif self.CE: - logger.info('ARC CE endpoint set to: ' + str(self.CE)) - else: - logger.info("Using ARC CE endpoints defined in '%s'" % - config['ArcConfigFile']) - - # doing massive job preparation - if len(job.subjobs) == 0: - ick = IBackend.master_submit( - self, rjobs, subjobconfigs, masterjobconfig) - else: - ick = self.master_bulk_submit( - rjobs, subjobconfigs, masterjobconfig) - - profiler.check('==> master_submit() elapsed time') - - return ick - - @require_credential - def submit(self, subjobconfig, master_job_sandbox): - '''Submit the job to the grid''' - - ick = False - - xrslpath = self.preparejob(subjobconfig, master_job_sandbox) - - if xrslpath: - self.id = Grid.arc_submit(xrslpath, self.CE, self.verbose, self.credential_requirements) - - if self.id: - self.actualCE = self.CE - ick = True - - return ick - - def master_auto_resubmit(self, rjobs): - """ - Resubmit each subjob individually as bulk resubmission will overwrite - previous master job statuses - """ - - # check for master failure - in which case bulk resubmit - mj = self._getParent() - if mj.status == 'failed': - return self.master_resubmit(rjobs) - - for j in rjobs: - if not j.backend.master_resubmit([j]): - return False - - return True - - def master_resubmit(self, rjobs): - '''Resubmit the master job to the grid''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - job = self.getJobObject() - - ick = False - - if not job.master and len(job.subjobs) == 0: - # case 1: master job normal resubmission - logger.debug('rjobs: %s' % str(rjobs)) - logger.debug('mode: master job normal resubmission') - ick = IBackend.master_resubmit(self, rjobs) - - elif job.master: - # case 2: individual subjob resubmission - logger.debug('mode: individual subjob resubmission') - ick = IBackend.master_resubmit(self, rjobs) - - else: - # case 3: master job bulk resubmission - logger.debug('mode: master job resubmission') - - ick = self.master_bulk_resubmit(rjobs) - if not ick: - raise GangaException('ARC bulk submission failure') - - profiler.check('job re-submission elapsed time') - - return ick - - @require_credential - def resubmit(self): - '''Resubmit the job''' - - ick = False - - job = self.getJobObject() - - jdlpath = job.getInputWorkspace().getPath("__jdlfile__") - - if jdlpath: - self.id = Grid.arc_submit(jdlpath, self.CE, self.verbose, self.credential_requirements) - - if self.id: - # refresh the lcg job information - self.__refresh_jobinfo__(job) - self.actualCE = self.CE - ick = True - - return ick - - @staticmethod - def updateMonitoringInformation(jobs): - '''Monitoring loop for normal jobs''' - - import datetime - - ce_list = [] # type: List[str] - jobdict = {} # type: Mapping[str, Job] - for j in jobs: - if j.backend.id and ((datetime.datetime.utcnow() - j.time.timestamps["submitted"]).seconds > config["ArcWaitTimeBeforeStartingMonitoring"]): - jobdict[j.backend.id] = j - ce_list.append(j.backend.actualCE) - - if len(jobdict) == 0: - return - - # Group jobs by the backend's credential requirements - cred_to_backend_id_list = defaultdict(list) # type: Mapping[ICredentialRequirement, List[str]] - for jid, job in jobdict.items(): - cred_to_backend_id_list[job.backend.credential_requirements].append(jid) - - # Batch the status requests by credential requirement - jobInfoDict = {} - for cred_req, job_ids in cred_to_backend_id_list.items(): - # If the credential is not valid or doesn't exist then skip it - cred = credential_store.get(cred_req) - if not cred or not cred.is_valid(): - needed_credentials.add(cred_req) - continue - # Create a ``Grid`` for each credential requirement and request the relevant jobs through it - info = Grid.arc_status(job_ids, ce_list, cred_req) - jobInfoDict.update(info) - - jidListForPurge = [] - - # update job information for those available in jobInfoDict - for id, info in jobInfoDict.items(): - - if info: - - job = jobdict[id] - - if job.backend.actualCE != urlparse(id)[1].split(":")[0]: - job.backend.actualCE = urlparse(id)[1].split(":")[0] - - if job.backend.status != info['State']: - - doStatusUpdate = True - - # no need to update Ganga job status if backend status is - # not changed - if info['State'] == job.backend.status: - doStatusUpdate = False - - # download output sandboxes if final status is reached - elif info['State'] in ['Finished', '(FINISHED)', 'Finished (FINISHED)']: - - # grab output sandbox - if Grid.arc_get_output(job.backend.id, job.getOutputWorkspace(create=True).getPath(), job.backend.credential_requirements): - (ick, app_exitcode) = Grid.__get_app_exitcode__( - job.getOutputWorkspace(create=True).getPath()) - job.backend.exitcode = app_exitcode - - jidListForPurge.append(job.backend.id) - - else: - logger.error( - 'fail to download job output: %s' % jobdict[id].getFQID('.')) - - if doStatusUpdate: - job.backend.status = info['State'] - if 'Exit Code' in info: - try: - job.backend.exitcode_arc = int( - info['Exit Code']) - except: - job.backend.exitcode_arc = 1 - - if 'Job Error' in info: - try: - job.backend.reason = info['Job Error'] - except: - pass - - job.backend.updateGangaJobStatus() - else: - logger.warning( - 'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.')) - - # purging the jobs the output has been fetched locally - if jidListForPurge: - for cred_req, job_ids in cred_to_backend_id_list.items(): - if not Grid.arc_purge_multiple(set(job_ids) & set(jidListForPurge), cred_req): - logger.warning("Failed to purge all ARC jobs.") - - def updateGangaJobStatus(self): - '''map backend job status to Ganga job status''' - - job = self.getJobObject() - - if self.status.startswith('Running') or self.status.startswith('Finishing'): - job.updateStatus('running') - elif self.status.startswith('Finished'): - if job.backend.exitcode and job.backend.exitcode != 0: - job.backend.reason = 'non-zero app. exit code: %s' % repr( - job.backend.exitcode) - job.updateStatus('failed') - elif job.backend.exitcode_arc and job.backend.exitcode_arc != 0: - job.backend.reason = 'non-zero ARC job exit code: %s' % repr( - job.backend.exitcode_arc) - job.updateStatus('failed') - else: - job.updateStatus('completed') - - elif self.status in ['DONE-FAILED', 'ABORTED', 'UNKNOWN', 'Failed']: - job.updateStatus('failed') - - elif self.status in ['CANCELLED']: - job.updateStatus('killed') - - elif self.status.startswith('Queuing'): - pass - - else: - logger.warning('Unexpected job status "%s"', self.status) - - -logger = getLogger() diff --git a/ganga/GangaCore/Lib/LCG/CREAM.py b/ganga/GangaCore/Lib/LCG/CREAM.py deleted file mode 100644 index 6bb7eb3767..0000000000 --- a/ganga/GangaCore/Lib/LCG/CREAM.py +++ /dev/null @@ -1,1331 +0,0 @@ -# CREAM backend -import os -import os.path -import math -import re -import mimetypes -import shutil -from collections import defaultdict - -from urllib.parse import urlparse - -from GangaCore.Core.GangaThread.MTRunner import MTRunner, Data, Algorithm -from GangaCore.Core.exceptions import GangaException - -from GangaCore.GPIDev.Schema import Schema, Version, SimpleItem, ComponentItem -from GangaCore.GPIDev.Lib.File import FileBuffer -from GangaCore.GPIDev.Adapters.IBackend import IBackend -from GangaCore.Utility.Config import getConfig -from GangaCore.Utility.logging import getLogger, log_user_exception -from GangaCore.Utility.logic import implies -from GangaCore.Lib.LCG.Utility import get_uuid -from GangaCore.Lib.LCG.Utility import get_md5sum -from GangaCore.Lib.LCG.ElapsedTimeProfiler import ElapsedTimeProfiler - -from GangaCore.Lib.LCG import Grid -from GangaCore.Lib.LCG.GridftpSandboxCache import GridftpSandboxCache - -from GangaCore.GPIDev.Base.Proxy import getName -from GangaCore.GPIDev.Credentials import require_credential, credential_store, needed_credentials -from GangaCore.GPIDev.Credentials.VomsProxy import VomsProxy -config = getConfig('LCG') - - -def __cream_resolveOSBList__(job, jdl): - - osbURIList = [] - - re_osb = re.compile(r'^.*OutputSandbox\s+\=\s+\{(.*)\}\s?\]?$') - - for l in jdl.split(';'): - m = re_osb.match(l) - if m: - osb = m.group(1) - osb = re.sub(r'\s?\"\s?', '', osb) - - for f in osb.split(','): - if not urlparse(f)[0]: - osbURIList.append( - '%s/%s' % (job.backend.osbURI, os.path.basename(f))) - else: - osbURIList.append(f) - break - - return osbURIList - - -class CREAM(IBackend): - - '''CREAM backend - direct job submission to gLite CREAM CE''' - _schema = Schema(Version(1, 0), { - 'CE': SimpleItem(defvalue='', doc='CREAM CE endpoint'), - 'jobtype': SimpleItem(defvalue='Normal', doc='Job type: Normal, MPICH'), - 'requirements': ComponentItem('LCGRequirements', doc='Requirements for the resource selection'), - 'sandboxcache': ComponentItem('GridSandboxCache', copyable=1, doc='Interface for handling oversized input sandbox'), - 'id': SimpleItem(defvalue='', typelist=[str, list], protected=1, copyable=0, doc='Middleware job identifier'), - 'status': SimpleItem(defvalue='', typelist=[str, dict], protected=1, copyable=0, doc='Middleware job status'), - 'exitcode': SimpleItem(defvalue='', protected=1, copyable=0, doc='Application exit code'), - 'exitcode_cream': SimpleItem(defvalue='', protected=1, copyable=0, doc='Middleware exit code'), - 'actualCE': SimpleItem(defvalue='', protected=1, copyable=0, doc='The CREAM CE where the job actually runs.'), - 'reason': SimpleItem(defvalue='', protected=1, copyable=0, doc='Reason of causing the job status'), - 'workernode': SimpleItem(defvalue='', protected=1, copyable=0, doc='The worker node on which the job actually runs.'), - 'isbURI': SimpleItem(defvalue='', protected=1, copyable=0, doc='The input sandbox URI on CREAM CE'), - 'osbURI': SimpleItem(defvalue='', protected=1, copyable=0, doc='The output sandbox URI on CREAM CE'), - 'credential_requirements': ComponentItem('CredentialRequirement', defvalue=VomsProxy()), - 'delegation_id': SimpleItem(defvalue='', typelist=[str], hidden=True), - }) - - _category = 'backends' - - _name = 'CREAM' - - def __init__(self): - super(CREAM, self).__init__() - - # dynamic requirement object loading - try: - reqName1 = config['Requirements'] - reqName = config['Requirements'].split('.').pop() - reqModule = __import__(reqName1, globals(), locals(), [reqName1]) - reqClass = vars(reqModule)[reqName] - self.requirements = reqClass() - - logger.debug('load %s as LCGRequirements' % reqName) - except: - logger.debug('load default LCGRequirements') - - # dynamic sandbox cache object loading - # force to use GridftpSandboxCache - self.sandboxcache = GridftpSandboxCache() - try: - scName1 = config['SandboxCache'] - scName = config['SandboxCache'].split('.').pop() - scModule = __import__(scName1, globals(), locals(), [scName1]) - scClass = vars(scModule)[scName] - self.sandboxcache = scClass() - logger.debug('load %s as SandboxCache' % scName) - except: - logger.debug('load default SandboxCache') - - def __refresh_jobinfo__(self, job): - '''Refresh the lcg jobinfo. It will be called after resubmission.''' - job.backend.status = '' - job.backend.reason = '' - job.backend.actualCE = '' - job.backend.exitcode = '' - job.backend.exitcode_cream = '' - job.backend.workernode = '' - job.backend.isbURI = '' - job.backend.osbURI = '' - - def __setup_sandboxcache__(self, job): - '''Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend''' - - re_token = re.compile('^token:(.*):(.*)$') - - self.sandboxcache.vo = config['VirtualOrganisation'] - self.sandboxcache.timeout = config['SandboxTransferTimeout'] - - if self.sandboxcache._name == 'LCGSandboxCache': - if not self.sandboxcache.lfc_host: - self.sandboxcache.lfc_host = Grid.__get_lfc_host__() - - if not self.sandboxcache.se: - - token = '' - se_host = config['DefaultSE'] - m = re_token.match(se_host) - if m: - token = m.group(1) - se_host = m.group(2) - - self.sandboxcache.se = se_host - - if token: - self.sandboxcache.srm_token = token - - if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token): - self.sandboxcache.srm_token = config['DefaultSRMToken'] - - elif self.sandboxcache._name == 'GridftpSandboxCache': - # If the copy command is set in the config then use it. - if config['CreamCopyCommand']: - self.sandboxcache.copyCommand = config['CreamCopyCommand'] - - if config['CreamInputSandboxBaseURI']: - self.sandboxcache.baseURI = config['CreamInputSandboxBaseURI'] - elif self.CE: - ce_host = re.sub(r'\:[0-9]+', '', self.CE.split('/cream')[0]) - self.sandboxcache.baseURI = 'gsiftp://%s/opt/glite/var/cream_sandbox/%s' % ( - ce_host, self.sandboxcache.vo) - else: - logger.error('baseURI not available for GridftpSandboxCache') - return False - - return True - - def __check_and_prestage_inputfile__(self, file): - '''Checks the given input file size and if it's size is - over "BoundSandboxLimit", prestage it to a grid SE. - - The argument is a path of the local file. - - It returns a dictionary containing information to refer to the file: - - idx = {'lfc_host': lfc_host, - 'local': [the local file pathes], - 'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... } - } - - If prestaging failed, None object is returned. - - If the file has been previously uploaded (according to md5sum), - the prestaging is ignored and index to the previously uploaded file - is returned. - ''' - - idx = {'lfc_host': '', 'local': [], 'remote': {}} - - job = self.getJobObject() - - # read-in the previously uploaded files - uploadedFiles = [] - - # getting the uploaded file list from the master job - if job.master: - uploadedFiles += job.master.backend.sandboxcache.get_cached_files() - - # set and get the $LFC_HOST for uploading oversized sandbox - self.__setup_sandboxcache__(job) - - uploadedFiles += self.sandboxcache.get_cached_files() - - lfc_host = None - - # for LCGSandboxCache, take the one specified in the sansboxcache object. - # the value is exactly the same as the one from the local grid shell env. if - # it is not specified exclusively. - if self.sandboxcache._name == 'LCGSandboxCache': - lfc_host = self.sandboxcache.lfc_host - - # or in general, query it from the Grid object - if not lfc_host: - lfc_host = Grid.__get_lfc_host__() - - idx['lfc_host'] = lfc_host - - abspath = os.path.abspath(file) - fsize = os.path.getsize(abspath) - - if fsize > config['BoundSandboxLimit']: - - md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True) - - doUpload = True - for uf in uploadedFiles: - if uf.md5sum == md5sum: - # the same file has been uploaded to the iocache - idx['remote'][os.path.basename(file)] = uf.id - doUpload = False - break - - if doUpload: - - logger.warning( - 'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...' % (file, config['BoundSandboxLimit'])) - - if self.sandboxcache.upload([abspath]): - remote_sandbox = self.sandboxcache.get_cached_files()[-1] - idx['remote'][remote_sandbox.name] = remote_sandbox.id - else: - logger.error( - 'Oversized sandbox not successfully pre-staged') - return None - else: - idx['local'].append(abspath) - - return idx - - def __mt_job_prepare__(self, rjobs, subjobconfigs, masterjobconfig): - '''preparing jobs in multiple threads''' - - logger.warning( - 'preparing %d subjobs ... it may take a while' % len(rjobs)) - - # prepare the master job (i.e. create shared inputsandbox, etc.) - master_input_sandbox = IBackend.master_prepare(self, masterjobconfig) - - # uploading the master job if it's over the WMS sandbox limitation - for f in master_input_sandbox: - master_input_idx = self.__check_and_prestage_inputfile__(f) - - if not master_input_idx: - logger.error('master input sandbox perparation failed: %s' % f) - return None - - # the algorithm for preparing a single bulk job - class MyAlgorithm(Algorithm): - - def __init__(self): - Algorithm.__init__(self) - - def process(self, sj_info): - my_sc = sj_info[0] - my_sj = sj_info[1] - - try: - logger.debug("preparing job %s" % my_sj.getFQID('.')) - jdlpath = my_sj.backend.preparejob( - my_sc, master_input_sandbox) - - if (not jdlpath) or (not os.path.exists(jdlpath)): - raise GangaException( - 'job %s not properly prepared' % my_sj.getFQID('.')) - - self.__appendResult__(my_sj.id, jdlpath) - return True - except Exception as x: - log_user_exception() - return False - - mt_data = [] - for sc, sj in zip(subjobconfigs, rjobs): - mt_data.append([sc, sj]) - - myAlg = MyAlgorithm() - myData = Data(collection=mt_data) - - runner = MTRunner( - name='lcg_jprepare', algorithm=myAlg, data=myData, numThread=10) - runner.start() - runner.join(-1) - - if len(runner.getDoneList()) < len(mt_data): - return None - else: - # return a JDL file dictionary with subjob ids as keys, JDL file - # paths as values - return runner.getResults() - - @require_credential - def __mt_bulk_submit__(self, node_jdls): - '''submitting jobs in multiple threads''' - - job = self.getJobObject() - - logger.warning( - 'submitting %d subjobs ... it may take a while' % len(node_jdls)) - - # the algorithm for submitting a single bulk job - class MyAlgorithm(Algorithm): - - def __init__(self, cred_req, masterInputWorkspace, ce, delid): - Algorithm.__init__(self) - self.inpw = masterInputWorkspace - self.cred_req = cred_req - self.ce = ce - self.delid = delid - - def process(self, jdl_info): - my_sj_id = jdl_info[0] - my_sj_jdl = jdl_info[1] - - my_sj_jid = Grid.cream_submit(my_sj_jdl, self.ce, self.delid, self.cred_req) - - if not my_sj_jid: - return False - else: - self.__appendResult__(my_sj_id, my_sj_jid) - return True - - mt_data = [] - for id, jdl in node_jdls.items(): - mt_data.append((id, jdl)) - - myAlg = MyAlgorithm(cred_req=self.credential_requirements, - masterInputWorkspace=job.getInputWorkspace(), - ce=self.CE, - delid=self.delegation_id) - myData = Data(collection=mt_data) - - runner = MTRunner(name='cream_jsubmit', algorithm=myAlg, - data=myData, numThread=config['SubmissionThread']) - runner.start() - runner.join(timeout=-1) - - if len(runner.getDoneList()) < len(mt_data): - # not all bulk jobs are successfully submitted. canceling the - # submitted jobs on WMS immediately - logger.error( - 'some bulk jobs not successfully (re)submitted, canceling submitted jobs on WMS') - Grid.cancel_multiple(list(runner.getResults().values())) - return None - else: - return runner.getResults() - - def __jobWrapperTemplate__(self): - '''Create job wrapper''' - - script = """#!/usr/bin/env python -#----------------------------------------------------- -# This job wrapper script is automatically created by -# GANGA LCG backend handler. -# -# It controls: -# 1. unpack input sandbox -# 2. invoke application executable -# 3. invoke monitoring client -#----------------------------------------------------- -import os,os.path,shutil,tempfile -import sys,time,traceback - -#bugfix #36178: subprocess.py crashes if python 2.5 is used -#try to import subprocess from local python installation before an -#import from PYTHON_DIR is attempted some time later -try: - import subprocess -except ImportError: - pass - -## Utility functions ## -def timeString(): - return time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) - -def printInfo(s): - out.write(timeString() + ' [Info]' + ' ' + str(s) + os.linesep) - out.flush() - -def printError(s): - out.write(timeString() + ' [Error]' + ' ' + str(s) + os.linesep) - out.flush() - -def lcg_file_download(vo,guid,localFilePath,timeout=60,maxRetry=3): - cmd = 'lcg-cp -t %d --vo %s %s file://%s' % (timeout,vo,guid,localFilePath) - - printInfo('LFC_HOST set to %s' % os.environ['LFC_HOST']) - printInfo('lcg-cp timeout: %d' % timeout) - - i = 0 - rc = 0 - isDone = False - try_again = True - - while try_again: - i = i + 1 - try: - ps = os.popen(cmd) - status = ps.close() - - if not status: - isDone = True - printInfo('File %s download from iocache' % os.path.basename(localFilePath)) - else: - raise IOError("Download file %s from iocache failed with error code: %d, trial %d." % (os.path.basename(localFilePath), status, i)) - - except IOError as e: - isDone = False - printError(str(e)) - - if isDone: - try_again = False - elif i == maxRetry: - try_again = False - else: - try_again = True - - return isDone - -## system command executor with subprocess -def execSyscmdSubprocess(cmd, wdir=os.getcwd()): - - import os, subprocess - - global exitcode - - outfile = open('stdout','w') - errorfile = open('stderr','w') - - try: - child = subprocess.Popen(cmd, cwd=wdir, shell=True, stdout=outfile, stderr=errorfile) - - while 1: - exitcode = child.poll() - if exitcode is not None: - break - else: - outfile.flush() - errorfile.flush() - time.sleep(0.3) - finally: - pass - - outfile.flush() - errorfile.flush() - outfile.close() - errorfile.close() - - return True - -## system command executor with multi-thread -## stderr/stdout handler -def execSyscmdEnhanced(cmd, wdir=os.getcwd()): - - import os, subprocess, threading - - cwd = os.getcwd() - - isDone = False - - try: - ## change to the working directory - os.chdir(wdir) - - child = subprocess.Popen(cmd,1) - child.tochild.close() # don't need stdin - - class PipeThread(threading.Thread): - - def __init__(self,infile,outfile,stopcb): - self.outfile = outfile - self.infile = infile - self.stopcb = stopcb - self.finished = 0 - threading.Thread.__init__(self) - - def run(self): - stop = False - while not stop: - buf = self.infile.read(10000) - self.outfile.write(buf) - self.outfile.flush() - time.sleep(0.01) - stop = self.stopcb() - #FIXME: should we do here?: self.infile.read() - #FIXME: this is to make sure that all the output is read (if more than buffer size of output was produced) - self.finished = 1 - - def stopcb(poll=False): - global exitcode - if poll: - exitcode = child.poll() - return exitcode != -1 - - out_thread = PipeThread(child.fromchild, sys.stdout, stopcb) - err_thread = PipeThread(child.childerr, sys.stderr, stopcb) - - out_thread.start() - err_thread.start() - while not out_thread.finished and not err_thread.finished: - stopcb(True) - time.sleep(0.3) - - sys.stdout.flush() - sys.stderr.flush() - - isDone = True - - except(Exception,e): - isDone = False - - ## return to the original directory - os.chdir(cwd) - - return isDone - -############################################################################################ - -###INLINEMODULES### - -############################################################################################ - -## Main program ## - -outputsandbox = ###OUTPUTSANDBOX### -input_sandbox = ###INPUTSANDBOX### -wrapperlog = ###WRAPPERLOG### -appexec = ###APPLICATIONEXEC### -appargs = ###APPLICATIONARGS### -appenvs = ###APPLICATIONENVS### -timeout = ###TRANSFERTIMEOUT### - -exitcode=-1 - -import sys, stat, os, os.path, commands - -# Change to scratch directory if provided -scratchdir = '' -tmpdir = '' - -orig_wdir = os.getcwd() - -# prepare log file for job wrapper -out = open(os.path.join(orig_wdir, wrapperlog),'w') - -if os.getenv('EDG_WL_SCRATCH'): - scratchdir = os.getenv('EDG_WL_SCRATCH') -elif os.getenv('TMPDIR'): - scratchdir = os.getenv('TMPDIR') - -if scratchdir: - (status, tmpdir) = commands.getstatusoutput('mktemp -d %s/gangajob_XXXXXXXX' % (scratchdir)) - if status == 0: - os.chdir(tmpdir) - else: - ## if status != 0, tmpdir should contains error message so print it to stderr - printError('Error making ganga job scratch dir: %s' % tmpdir) - printInfo('Unable to create ganga job scratch dir in %s. Run directly in: %s' % ( scratchdir, os.getcwd() ) ) - - ## reset scratchdir and tmpdir to disable the usage of Ganga scratch dir - scratchdir = '' - tmpdir = '' - -wdir = os.getcwd() - -if scratchdir: - printInfo('Changed working directory to scratch directory %s' % tmpdir) - try: - os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stdout'), os.path.join(wdir, 'stdout'))) - os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stderr'), os.path.join(wdir, 'stderr'))) - except Exception as e: - printError(sys.exc_info()[0]) - printError(sys.exc_info()[1]) - str_traceback = traceback.format_tb(sys.exc_info()[2]) - for str_tb in str_traceback: - printError(str_tb) - printInfo('Linking stdout & stderr to original directory failed. Looking at stdout during job run may not be possible') - -os.environ['PATH'] = '.:'+os.environ['PATH'] - -vo = os.environ['GANGA_LCG_VO'] - -try: - printInfo('Job Wrapper start.') - -# download inputsandbox from remote cache - for f,guid in input_sandbox['remote'].iteritems(): - if not lcg_file_download(vo, guid, os.path.join(wdir,f), timeout=int(timeout)): - raise IOError('Download remote input %s:%s failed.' % (guid,f) ) - else: - if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']: - getPackedInputSandbox(f) - else: - shutil.copy(f, os.path.join(os.getcwd(), os.path.basename(f))) - - printInfo('Download inputsandbox from iocache passed.') - -# unpack inputsandbox from wdir - for f in input_sandbox['local']: - if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']: - getPackedInputSandbox(os.path.join(orig_wdir,f)) - - printInfo('Unpack inputsandbox passed.') - - #get input files - ###DOWNLOADINPUTFILES### - - printInfo('Loading Python modules ...') - - sys.path.insert(0,os.path.join(wdir,PYTHON_DIR)) - - # check the python library path - try: - printInfo(' ** PYTHON_DIR: %s' % os.environ['PYTHON_DIR']) - except KeyError: - pass - - try: - printInfo(' ** PYTHONPATH: %s' % os.environ['PYTHONPATH']) - except KeyError: - pass - - for lib_path in sys.path: - printInfo(' ** sys.path: %s' % lib_path) - -# execute application - - ## convern appenvs into environment setup script to be 'sourced' before executing the user executable - - printInfo('Prepare environment variables for application executable') - - env_setup_script = os.path.join(os.getcwd(), '__ganga_lcg_env__.sh') - - f = open( env_setup_script, 'w') - f.write('#!/bin/sh' + os.linesep ) - f.write('##user application environmet setup script generated by Ganga job wrapper' + os.linesep) - for k,v in appenvs.items(): - - str_env = 'export %s="%s"' % (k, v) - - printInfo(' ** ' + str_env) - - f.write(str_env + os.linesep) - f.close() - - try: #try to make shipped executable executable - os.chmod('%s/%s'% (wdir,appexec),stat.S_IXUSR|stat.S_IRUSR|stat.S_IWUSR) - except: - pass - - status = False - try: - # use subprocess to run the user's application if the module is available on the worker node - import subprocess - printInfo('Load application executable with subprocess module') - status = execSyscmdSubprocess('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir) - except ImportError as err: - # otherwise, use separate threads to control process IO pipes - printInfo('Load application executable with separate threads') - status = execSyscmdEnhanced('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir) - - os.system("cp %s/stdout stdout.1" % orig_wdir) - os.system("cp %s/stderr stderr.1" % orig_wdir) - - printInfo('GZipping stdout and stderr...') - - os.system("gzip stdout.1 stderr.1") - - # move them to the original wdir so they can be picked up - os.system("mv stdout.1.gz %s/stdout.gz" % orig_wdir) - os.system("mv stderr.1.gz %s/stderr.gz" % orig_wdir) - - if not status: - raise OSError('Application execution failed.') - printInfo('Application execution passed with exit code %d.' % exitcode) - - ###OUTPUTUPLOADSPOSTPROCESSING### - - for f in os.listdir(os.getcwd()): - command = "cp %s %s" % (os.path.join(os.getcwd(),f), os.path.join(orig_wdir,f)) - os.system(command) - - createPackedOutputSandbox(outputsandbox,None,orig_wdir) - -# pack outputsandbox -# printInfo('== check output ==') -# for line in os.popen('pwd; ls -l').readlines(): -# printInfo(line) - - printInfo('Pack outputsandbox passed.') - - # Clean up after us - All log files and packed outputsandbox should be in "wdir" - if scratchdir: - os.chdir(orig_wdir) - os.system("rm %s -rf" % wdir) -except Exception as e: - printError(sys.exc_info()[0]) - printError(sys.exc_info()[1]) - str_traceback = traceback.format_tb(sys.exc_info()[2]) - for str_tb in str_traceback: - printError(str_tb) - -printInfo('Job Wrapper stop.') - -out.close() - -# always return exit code 0 so the in the case of application failure -# one can always get stdout and stderr back to the UI for debug. -sys.exit(0) -""" - return script - - def preparejob(self, jobconfig, master_job_sandbox): - '''Prepare the JDL''' - - script = self.__jobWrapperTemplate__() - - job = self.getJobObject() - inpw = job.getInputWorkspace() - - wrapperlog = '__jobscript__.log' - - import GangaCore.Core.Sandbox as Sandbox - - # FIXME: check what happens if 'stdout','stderr' are specified here - script = script.replace( - '###OUTPUTSANDBOX###', repr(jobconfig.outputbox)) - - script = script.replace( - '###APPLICATION_NAME###', getName(job.application)) - script = script.replace( - '###APPLICATIONEXEC###', repr(jobconfig.getExeString())) - script = script.replace( - '###APPLICATIONARGS###', repr(jobconfig.getArguments())) - - from GangaCore.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles - - script = script.replace( - '###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) - - script = script.replace( - '###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) - - if jobconfig.env: - script = script.replace( - '###APPLICATIONENVS###', repr(jobconfig.env)) - else: - script = script.replace('###APPLICATIONENVS###', repr({})) - - script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) - import inspect - script = script.replace( - '###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) - - mon = job.getMonitoringService() - - self.monInfo = None - - # set the monitoring file by default to the stdout - if isinstance(self.monInfo, dict): - self.monInfo['remotefile'] = 'stdout' - - # try to print out the monitoring service information in debug mode - try: - logger.debug('job info of monitoring service: %s' % - str(self.monInfo)) - except: - pass - -# prepare input/output sandboxes - import GangaCore.Utility.files - from GangaCore.GPIDev.Lib.File import File - from GangaCore.Core.Sandbox.WNSandbox import PYTHON_DIR - import inspect - - fileutils = File(inspect.getsourcefile(GangaCore.Utility.files), subdir=PYTHON_DIR) - packed_files = jobconfig.getSandboxFiles() + [fileutils] - sandbox_files = job.createPackedInputSandbox(packed_files) - - # sandbox of child jobs should include master's sandbox - sandbox_files.extend(master_job_sandbox) - - # check the input file size and pre-upload larger inputs to the iocache - lfc_host = '' - - input_sandbox_uris = [] - input_sandbox_names = [] - - ick = True - - max_prestaged_fsize = 0 - for f in sandbox_files: - - idx = self.__check_and_prestage_inputfile__(f) - - if not idx: - logger.error('input sandbox preparation failed: %s' % f) - ick = False - break - else: - - if idx['lfc_host']: - lfc_host = idx['lfc_host'] - - if idx['remote']: - abspath = os.path.abspath(f) - fsize = os.path.getsize(abspath) - - if fsize > max_prestaged_fsize: - max_prestaged_fsize = fsize - - input_sandbox_uris.append( - idx['remote'][os.path.basename(f)]) - - input_sandbox_names.append( - os.path.basename(urlparse(f)[2])) - - if idx['local']: - input_sandbox_uris += idx['local'] - input_sandbox_names.append(os.path.basename(f)) - - if not ick: - logger.error('stop job submission') - return None - - # determin the lcg-cp timeout according to the max_prestaged_fsize - # - using the assumption of 1 MB/sec. - max_prestaged_fsize = 0 - lfc_host = '' - transfer_timeout = config['SandboxTransferTimeout'] - predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) - - if predict_timeout > transfer_timeout: - transfer_timeout = predict_timeout - - if transfer_timeout < 60: - transfer_timeout = 60 - - script = script.replace( - '###TRANSFERTIMEOUT###', '%d' % transfer_timeout) - - # update the job wrapper with the inputsandbox list - script = script.replace( - '###INPUTSANDBOX###', repr({'remote': {}, 'local': input_sandbox_names})) - - # write out the job wrapper and put job wrapper into job's inputsandbox - scriptPath = inpw.writefile( - FileBuffer('__jobscript_%s__' % job.getFQID('.'), script), executable=1) - input_sandbox = input_sandbox_uris + [scriptPath] - - for isb in input_sandbox: - logger.debug('ISB URI: %s' % isb) - - # compose output sandbox to include by default the following files: - # - gzipped stdout (transferred only when the JobLogHandler is WMS) - # - gzipped stderr (transferred only when the JobLogHandler is WMS) - # - __jobscript__.log (job wrapper's log) - output_sandbox = [wrapperlog] - - from GangaCore.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns - for outputSandboxPattern in getOutputSandboxPatterns(job): - output_sandbox.append(outputSandboxPattern) - - if config['JobLogHandler'] in ['WMS']: - output_sandbox += ['stdout.gz', 'stderr.gz'] - - if len(jobconfig.outputbox): - output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] - - # compose LCG JDL - jdl = { - 'VirtualOrganisation': config['VirtualOrganisation'], - 'Executable': os.path.basename(scriptPath), - 'Environment': {'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host}, - 'StdOutput': 'stdout', - 'StdError': 'stderr', - 'InputSandbox': input_sandbox, - 'OutputSandbox': output_sandbox, - 'OutputSandboxBaseDestURI': 'gsiftp://localhost' - } - - jdl['Environment'].update({'GANGA_LCG_CE': self.CE}) - jdl['Requirements'] = self.requirements.merge( - jobconfig.requirements).convert() - - if self.jobtype.upper() in ['NORMAL', 'MPICH']: - jdl['JobType'] = self.jobtype.upper() - if self.jobtype.upper() == 'MPICH': - #jdl['Requirements'].append('(other.GlueCEInfoTotalCPUs >= NodeNumber)') - jdl['Requirements'].append( - 'Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') - jdl['NodeNumber'] = self.requirements.nodenumber - else: - logger.warning('JobType "%s" not supported' % self.jobtype) - return - -# additional settings from the job -# if jobconfig.env: -# jdl['Environment'].update(jobconfig.env) - - jdlText = Grid.expandjdl(jdl) - logger.debug('subjob JDL: %s' % jdlText) - return inpw.writefile(FileBuffer('__jdlfile__', jdlText)) - - @require_credential - def kill(self): - '''Kill the job''' - job = self.getJobObject() - - logger.info('Killing job %s' % job.getFQID('.')) - - if not self.id: - logger.warning('Job %s is not running.' % job.getFQID('.')) - return False - - return Grid.cream_cancel_multiple([self.id], self.credential_requirements) - - def master_kill(self): - '''kill the master job to the grid''' - - job = self.getJobObject() - - if not job.master and len(job.subjobs) == 0: - return IBackend.master_kill(self) - elif job.master: - return IBackend.master_kill(self) - else: - return self.master_bulk_kill() - - @require_credential - def master_bulk_kill(self): - '''GLITE bulk resubmission''' - - job = self.getJobObject() - - # killing the individually re-submitted subjobs - logger.debug('cancelling running/submitted subjobs.') - - # 1. collect job ids - ids = [] - for sj in job.subjobs: - if sj.status in ['submitted', 'running'] and sj.backend.id: - ids.append(sj.backend.id) - - # 2. cancel the collected jobs - ck = Grid.cream_cancel_multiple(ids, self.credential_requirements) - if not ck: - logger.warning('Job cancellation failed') - return False - else: - for sj in job.subjobs: - if sj.backend.id in ids: - sj.updateStatus('killed') - - return True - - def master_bulk_submit(self, rjobs, subjobconfigs, masterjobconfig): - '''submit multiple subjobs in parallel, by default using 10 concurrent threads''' - - assert(implies(rjobs, len(subjobconfigs) == len(rjobs))) - - # prepare the subjobs, jdl repository before bulk submission - node_jdls = self.__mt_job_prepare__( - rjobs, subjobconfigs, masterjobconfig) - - if not node_jdls: - logger.error('Some jobs not successfully prepared') - return False - - # set all subjobs to submitting status - for sj in rjobs: - sj.updateStatus('submitting') - - node_jids = self.__mt_bulk_submit__(node_jdls) - - status = False - - if node_jids: - for sj in rjobs: - if sj.id in node_jids: - sj.backend.id = node_jids[sj.id] - sj.backend.CE = self.CE - sj.backend.actualCE = sj.backend.CE - sj.updateStatus('submitted') - sj.info.submit_counter += 1 - else: - logger.warning( - 'subjob %s not successfully submitted' % sj.getFQID('.')) - - status = True - - return status - - def master_bulk_resubmit(self, rjobs): - '''CREAM bulk resubmission''' - - from GangaCore.Utility.logging import log_user_exception - -# job = self.getJobObject() - - # compose master JDL for collection job - node_jdls = {} - for sj in rjobs: - jdlpath = os.path.join(sj.inputdir, '__jdlfile__') - node_jdls[sj.id] = jdlpath - - # set all subjobs to submitting status - for sj in rjobs: - sj.updateStatus('submitting') - - node_jids = self.__mt_bulk_submit__(node_jdls) - - status = False - - if node_jids: - for sj in rjobs: - if sj.id in node_jids: - self.__refresh_jobinfo__(sj) - sj.backend.id = node_jids[sj.id] - sj.backend.CE = self.CE - sj.backend.actualCE = sj.backend.CE - sj.updateStatus('submitted') - sj.info.submit_counter += 1 - else: - logger.warning( - 'subjob %s not successfully submitted' % sj.getFQID('.')) - - status = True - -# # set all subjobs to submitted status -# # NOTE: this is just a workaround to avoid the unexpected transition -# # that turns the master job's status from 'submitted' to 'submitting'. -# # As this transition should be allowed to simulate a lock mechanism in Ganga 4, the workaround -# # is to set all subjobs' status to 'submitted' so that the transition can be avoided. -# # A more clear solution should be implemented with the lock mechanism introduced in Ganga 5. -# for sj in rjobs: -# sj.updateStatus('submitted') -# sj.info.submit_counter += 1 - - return status - - @require_credential - def master_submit(self, rjobs, subjobconfigs, masterjobconfig): - '''Submit the master job to the grid''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - job = self.getJobObject() - - # finding CREAM CE endpoint for job submission - allowed_celist = [] - try: - allowed_celist = self.requirements.getce() - if not self.CE and allowed_celist: - self.CE = allowed_celist[0] - except: - logger.warning( - 'CREAM CE assigment from AtlasCREAMRequirements failed.') - - if self.CE and allowed_celist: - if self.CE not in allowed_celist: - logger.warning('submission to CE not allowed: %s, use %s instead' % ( - self.CE, allowed_celist[0])) - self.CE = allowed_celist[0] - - if not self.CE: - raise GangaException('CREAM CE endpoint not set') - - # delegate proxy to CREAM CE - self.delegation_id = Grid.cream_proxy_delegation(self.CE, self.delegation_id, self.credential_requirements) - if not self.delegation_id: - logger.warning('proxy delegation to %s failed' % self.CE) - - # doing massive job preparation - if len(job.subjobs) == 0: - ick = IBackend.master_submit( - self, rjobs, subjobconfigs, masterjobconfig) - else: - ick = self.master_bulk_submit( - rjobs, subjobconfigs, masterjobconfig) - - profiler.check('==> master_submit() elapsed time') - - return ick - - @require_credential - def submit(self, subjobconfig, master_job_sandbox): - '''Submit the job to the grid''' - - ick = False - - jdlpath = self.preparejob(subjobconfig, master_job_sandbox) - - if jdlpath: - self.id = Grid.cream_submit(jdlpath, self.CE, self.delegation_id, self.credential_requirements) - - if self.id: - self.actualCE = self.CE - ick = True - - return ick - - def master_auto_resubmit(self, rjobs): - """ - Resubmit each subjob individually as bulk resubmission will overwrite - previous master job statuses - """ - - # check for master failure - in which case bulk resubmit - mj = self._getParent() - if mj.status == 'failed': - return self.master_resubmit(rjobs) - - for j in rjobs: - if not j.backend.master_resubmit([j]): - return False - - return True - - @require_credential - def master_resubmit(self, rjobs): - '''Resubmit the master job to the grid''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - job = self.getJobObject() - - ick = False - - # delegate proxy to CREAM CE - self.delegation_id = Grid.cream_proxy_delegation(self.CE, self.delegation_id, self.credential_requirements) - if not self.delegation_id: - logger.warning('proxy delegation to %s failed' % self.CE) - - if not job.master and len(job.subjobs) == 0: - # case 1: master job normal resubmission - logger.debug('rjobs: %s' % str(rjobs)) - logger.debug('mode: master job normal resubmission') - ick = IBackend.master_resubmit(self, rjobs) - - elif job.master: - # case 2: individual subjob resubmission - logger.debug('mode: individual subjob resubmission') - ick = IBackend.master_resubmit(self, rjobs) - - else: - # case 3: master job bulk resubmission - logger.debug('mode: master job resubmission') - - ick = self.master_bulk_resubmit(rjobs) - if not ick: - raise GangaException('CREAM bulk submission failure') - - profiler.check('job re-submission elapsed time') - - return ick - - @require_credential - def resubmit(self): - '''Resubmit the job''' - - ick = False - - job = self.getJobObject() - - jdlpath = job.getInputWorkspace().getPath("__jdlfile__") - - if jdlpath: - self.id = Grid.cream_submit(jdlpath, self.CE, self.delegation_id, self.credential_requirements) - - if self.id: - # refresh the lcg job information - self.__refresh_jobinfo__(job) - self.actualCE = self.CE - ick = True - - return ick - - @staticmethod - def updateMonitoringInformation(jobs): - '''Monitoring loop for normal jobs''' - - jobdict = dict([(job.backend.id, job) for job in jobs if job.backend.id]) - - # Group jobs by the backend's credential requirements - cred_to_backend_id_list = defaultdict(list) - for job in jobs: - cred_to_backend_id_list[job.backend.credential_requirements].append(job.backend.id) - - # Batch the status requests by credential requirement - jobInfoDict = {} - for cred_req, job_ids in cred_to_backend_id_list.items(): - # If the credential is not valid or doesn't exist then skip it - cred = credential_store.get(cred_req) - if not cred or not cred.is_valid(): - needed_credentials.add(cred_req) - continue - # Create a ``Grid`` for each credential requirement and request the relevant jobs through it - info = Grid.cream_status(job_ids, cred_req) - jobInfoDict.update(info) - - jidListForPurge = [] - - # update job information for those available in jobInfoDict - for id, info in jobInfoDict.items(): - - if info: - - job = jobdict[id] - - if job.backend.status != info['Current Status'] and ('ExitCode' not in info or ('ExitCode' in info and info['ExitCode'].isdigit())): - - if 'Worker Node' in info: - job.backend.workernode = info['Worker Node'] - - if 'CREAM ISB URI' in info: - job.backend.isbURI = info['CREAM ISB URI'] - - if 'CREAM OSB URI' in info: - job.backend.osbURI = info['CREAM OSB URI'] - - doStatusUpdate = True - - # no need to update Ganga job status if backend status is - # not changed - if info['Current Status'] == job.backend.status: - doStatusUpdate = False - - # download output sandboxes if final status is reached - elif info['Current Status'] in ['DONE-OK', 'DONE-FAILED']: - - # resolve output sandbox URIs based on the JDL - # information - osbURIList = __cream_resolveOSBList__(job, info['JDL']) - - logger.debug('OSB list:') - for f in osbURIList: - logger.debug(f) - - if osbURIList: - - if Grid.cream_get_output(osbURIList, job.getOutputWorkspace(create=True).getPath(), job.backend.credential_requirements): - (ick, app_exitcode) = Grid.__get_app_exitcode__( - job.getOutputWorkspace(create=True).getPath()) - job.backend.exitcode = app_exitcode - - jidListForPurge.append(job.backend.id) - - else: - logger.error( - 'fail to download job output: %s' % jobdict[id].getFQID('.')) - - if doStatusUpdate: - job.backend.status = info['Current Status'] - if 'ExitCode' in info and info['ExitCode'] != "W": - try: - job.backend.exitcode_cream = int( - info['ExitCode']) - except: - job.backend.exitcode_cream = 1 - - if 'FailureReason' in info: - try: - job.backend.reason = info['FailureReason'] - except: - pass - - job.backend.updateGangaJobStatus() - else: - logger.warning( - 'fail to retrieve job informaton: %s' % jobdict[id].getFQID('.')) - - # purging the jobs the output has been fetched locally - if jidListForPurge: - for cred_req, job_ids in cred_to_backend_id_list.items(): - Grid.cream_purge_multiple(set(job_ids) & set(jidListForPurge), cred_req) - - def updateGangaJobStatus(self): - '''map backend job status to Ganga job status''' - - job = self.getJobObject() - - if self.status in ['RUNNING', 'REALLY-RUNNING']: - job.updateStatus('running') - - elif self.status == 'DONE-OK': - if job.backend.exitcode and job.backend.exitcode != 0: - job.backend.reason = 'non-zero app. exit code: %s' % repr( - job.backend.exitcode) - job.updateStatus('failed') - elif job.backend.exitcode_cream and job.backend.exitcode_cream != 0: - job.backend.reason = 'non-zero CREAM job exit code: %s' % repr( - job.backend.exitcode_cream) - job.updateStatus('failed') - else: - job.updateStatus('completed') - - elif self.status in ['DONE-FAILED', 'ABORTED', 'UNKNOWN']: - job.updateStatus('failed') - - elif self.status in ['CANCELLED']: - job.updateStatus('killed') - - elif self.status in ['REGISTERED', 'PENDING', 'IDLE', 'HELD']: - pass - - else: - logger.warning('Unexpected job status "%s"', self.status) - - -logger = getLogger() diff --git a/ganga/GangaCore/Lib/LCG/ElapsedTimeProfiler.py b/ganga/GangaCore/Lib/LCG/ElapsedTimeProfiler.py deleted file mode 100755 index 0f773e3dce..0000000000 --- a/ganga/GangaCore/Lib/LCG/ElapsedTimeProfiler.py +++ /dev/null @@ -1,37 +0,0 @@ -############################################################################### -# Ganga Project. http://cern.ch/ganga -# -# $Id: ElapsedTimeProfiler.py,v 1.1 2008-07-17 16:40:57 moscicki Exp $ -############################################################################### -# -# LCG backend profiler -# -# ATLAS/ARDA -# -# Date: November 2007 -import time - -from GangaCore.Utility.logging import getLogger - - -class ElapsedTimeProfiler(object): - - '''Elapsed time profiler''' - - def __init__(self, logger=None): - - if not logger: - logger = getLogger(name='GangaCore.Lib.LCG.ElapsedTimeProfiler') - - self.logger = logger - - def start(self): - self.beg = time.time() - - def check(self, message): - etime = time.time() - self.beg - self.logger.debug('%s: %f sec.' % (message, etime)) - - def checkAndStart(self, message): - self.check(message) - self.start() diff --git a/ganga/GangaCore/Lib/LCG/Grid.py b/ganga/GangaCore/Lib/LCG/Grid.py deleted file mode 100755 index f1477004c2..0000000000 --- a/ganga/GangaCore/Lib/LCG/Grid.py +++ /dev/null @@ -1,1175 +0,0 @@ -import os -import re -import shutil -import tempfile -import datetime - -from GangaCore.GPIDev.Credentials import credential_store - -from GangaCore.Utility.Config import getConfig -from GangaCore.Utility.logging import getLogger - -from GangaCore.Utility.GridShell import getShell - -from GangaCore.Lib.LCG.GridftpSandboxCache import GridftpFileIndex, GridftpSandboxCache - -from GangaCore.Lib.LCG.Utility import get_uuid -from GangaCore.Lib.Executable import randomString - -# global variables -logger = getLogger() - -config = getConfig('LCG') - - -def __set_submit_option__(): - - submit_option = '' - - if config['Config']: - submit_option += ' --config %s' % config['Config'] - elif config['GLITE_ALLOWED_WMS_LIST']: - wms_conf_path = os.path.join(os.environ['GLITE_WMS_LOCATION'], 'etc', - config['VirtualOrganisation'], 'glite_wmsui.conf') - temp_wms_conf = tempfile.NamedTemporaryFile(suffix='.conf', delete=False) - - with open(wms_conf_path, "r") as this_file: - orig_text = this_file.read() - - # find the last bracket and add in the new text - pos = orig_text.rfind("]") - wms_text = "\nWMProxyEndpoints = {" + \ - ",".join("\"%s\"" % wms for wms in config['GLITE_ALLOWED_WMS_LIST']) + \ - "};\n]\n" - new_text = orig_text[:pos] + wms_text - - # write the new config file - with open(temp_wms_conf, "w") as this_file: - this_file.write(new_text) - - submit_option += ' --config %s' % temp_wms_conf.name - - submit_option = ' %s ' % submit_option - - return submit_option - - -def __resolve_gridcmd_log_path__(regxp_logfname, cmd_output): - match_log = re.search(regxp_logfname, cmd_output) - - logfile = None - if match_log: - logfile = match_log.group(1) - return logfile - - -def __clean_gridcmd_log__(regxp_logfname, cmd_output): - - logfile = __resolve_gridcmd_log_path__(regxp_logfname, cmd_output) - - if logfile and os.path.exists(logfile): - os.remove(logfile) - - return True - - -def __print_gridcmd_log__(regxp_logfname, cmd_output): - - logfile = __resolve_gridcmd_log_path__(regxp_logfname, cmd_output) - - if logfile: - for l in open(logfile, 'r'): - logger.warning(l.strip()) - - # here we assume the logfile is no longer needed at this point - - # remove it - os.remove(logfile) - else: - logger.warning('output\n%s\n', cmd_output) - logger.warning('end of output') - - -def __get_proxy_voname__(cred_req): - """Check validity of proxy vo""" - - vo = credential_store[cred_req].vo - - logger.debug('voms of credential: %s' % vo) - return vo - - -def __get_lfc_host__(): - """Gets the LFC_HOST: from current shell or querying BDII on demand""" - lfc_host = None - - if 'LFC_HOST' in getShell().env: - lfc_host = getShell().env['LFC_HOST'] - - if not lfc_host: - lfc_host = __get_default_lfc__() - - return lfc_host - - -def __get_default_lfc__(): - """Gets the default lfc host from lcg-infosites""" - - output = wrap_lcg_infosites('lfc') - - if output == '': - return None - else: - lfc_list = output.strip().split('\n') - return lfc_list[0] - - -def __resolve_no_matching_jobs__(cmd_output): - """Parsing the glite-wms-job-status log to get the glite jobs which have been removed from the WMS""" - - logfile = __resolve_gridcmd_log_path__( - r'(.*-job-status.*\.log)', cmd_output) - - glite_ids = [] - - if logfile: - - re_jid = re.compile(r'^Unable to retrieve the status for: (https://\S+:9000/[0-9A-Za-z_.-]+)\s*$') - re_key = re.compile(r'^.*(no matching jobs found)\s*$') - - myjid = '' - for line in open(logfile, 'r'): - m_jid = re_jid.match(line) - if m_jid: - myjid = m_jid.group(1) - - if myjid: - m_key = re_key.match(line) - if m_key: - glite_ids.append(myjid) - myjid = '' - - return glite_ids - - -def list_match(jdlpath, cred_req, ce=None): - """Returns a list of computing elements can run the job""" - - re_ce = re.compile(r'^\s*\-\s*(\S+:(2119|8443)/\S+)\s*$') - - matched_ces = [] - - cmd = 'glite-wms-job-list-match -a' - - submit_opt = __set_submit_option__() - - if not submit_opt: - return matched_ces - else: - cmd += submit_opt - - cmd = '%s --noint "%s"' % (cmd, jdlpath) - - logger.debug('job list match command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - for l in output.split('\n'): - - matches = re_ce.match(l) - - if matches: - matched_ces.append(matches.group(1)) - - if ce: - if matched_ces.count(ce) > 0: - matched_ces = [ce] - else: - matched_ces = [] - - logger.debug('== matched CEs ==') - for myce in matched_ces: - logger.debug(myce) - logger.debug('== matched CEs ==') - - return matched_ces - - -def submit(jdlpath, cred_req, ce=None, perusable=False): - """Submit a JDL file to LCG""" - - # doing job submission - cmd = 'glite-wms-job-submit -a' - - submit_opt = __set_submit_option__() - - if not submit_opt: - return - else: - cmd += submit_opt - - if ce: - cmd += ' -r %s' % ce - - cmd = '%s --nomsg "%s" < /dev/null' % (cmd, jdlpath) - - logger.debug('job submit command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['SubmissionTimeout']) - - if output: - output = "%s" % output.strip() - - match = re.search(r'.*(https://\S+:9000/[0-9A-Za-z_\.\-]+)', output) - - if match: - logger.debug('job id: %s' % match.group(1)) - if perusable: - logger.info("Enabling perusal") - getShell(cred_req).cmd1("glite-wms-job-perusal --set -f stdout %s" % match.group(1)) - - # remove the glite command log if it exists - __clean_gridcmd_log__(r'(.*-job-submit.*\.log)', output) - return match.group(1) - - else: - logger.warning('Job submission failed.') - __print_gridcmd_log__(r'(.*-job-submit.*\.log)', output) - return - - -def native_master_cancel(jobids, cred_req): - """Native bulk cancellation supported by GLITE middleware.""" - - cmd = 'glite-wms-job-cancel' - - if not __set_submit_option__(): - return False - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - cmd = '%s --noint -i %s' % (cmd, idsfile) - - logger.debug('job cancel command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - # clean up tempfile - if os.path.exists(idsfile): - os.remove(idsfile) - - if rc != 0: - logger.warning('Job cancellation failed.') - __print_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return False - else: - # job cancellation succeeded, try to remove the glite command - # logfile if it exists - __clean_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return True - - -def status(jobids, cred_req, is_collection=False): - """Query the status of jobs on the grid""" - - if not jobids: - return [], [] - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - cmd = 'glite-wms-job-status' - - if is_collection: - cmd = '%s -v 3' % cmd - - cmd = '%s --noint -i %s' % (cmd, idsfile) - logger.debug('job status command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['StatusPollingTimeout']) - os.remove(idsfile) - - missing_glite_jids = [] - if rc != 0: - missing_glite_jids = __resolve_no_matching_jobs__(output) - - if missing_glite_jids: - logger.info( - 'some jobs removed from WMS, will set corresponding Ganga job to \'failed\' status') - logger.debug('jobs removed from WMS: %s' % - repr(missing_glite_jids)) - else: - __print_gridcmd_log__(r'(.*-job-status.*\.log)', output) - - # job status query succeeded, try to remove the glite command logfile - # if it exists - __clean_gridcmd_log__(r'(.*-job-status.*\.log)', output) - - re_id = re.compile(r'^\s*Status info for the Job : (https://.*\S)\s*$') - re_status = re.compile(r'^\s*Current Status:\s+(.*\S)\s*$') - - # from glite UI version 1.5.14, the attribute 'Node Name:' is no longer available - # for distinguishing master and node jobs. A new way has to be applied. - re_exit = re.compile(r'^\s*Exit code:\s+(.*\S)\s*$') - re_reason = re.compile(r'^\s*Status Reason:\s+(.*\S)\s*$') - re_dest = re.compile(r'^\s*Destination:\s+(.*\S)\s*$') - - # pattern to distinguish master and node jobs - re_master = re.compile(r'^BOOKKEEPING INFORMATION:\s*$') - re_node = re.compile(r'^- Nodes information.*\s*$') - - # pattern for node jobs - re_nodename = re.compile(r'^\s*NodeName\s*=\s*"(gsj_[0-9]+)";\s*$') - - info = [] - is_node = False - - for line in output.split('\n'): - - match = re_master.match(line) - if match: - is_node = False - continue - - match = re_node.match(line) - if match: - is_node = True - continue - - match = re_id.match(line) - if match: - info += [{'id': match.group(1), - 'name': '', - 'is_node': False, - 'status': '', - 'exit': '', - 'reason': '', - 'destination': ''}] - if is_node: - info[-1]['is_node'] = True - continue - - match = re_nodename.match(line) - if match and is_node: - info[-1]['name'] = match.group(1) - continue - - match = re_status.match(line) - if match: - info[-1]['status'] = match.group(1) - continue - - match = re_exit.match(line) - if match: - info[-1]['exit'] = match.group(1) - continue - - match = re_reason.match(line) - if match: - info[-1]['reason'] = match.group(1) - continue - - match = re_dest.match(line) - if match: - info[-1]['destination'] = match.group(1) - continue - - return info, missing_glite_jids - - -def get_loginfo(jobids, directory, cred_req, verbosity=1): - """Fetch the logging info of the given job and save the output in the job's outputdir""" - - cmd = 'glite-wms-job-logging-info -v %d' % verbosity - - log_output = directory + '/__jobloginfo__.log' - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - cmd = '%s --noint -o %s -i %s' % (cmd, log_output, idsfile) - - logger.debug('job logging info command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - os.remove(idsfile) - - if rc != 0: - __print_gridcmd_log__(r'(.*-logging-info.*\.log)', output) - return False - else: - # logging-info checking succeeded, try to remove the glite command - # logfile if it exists - __clean_gridcmd_log__(r'(.*-logging-info.*\.log)', output) - # returns the path to the saved logging info if success - return log_output - - -def get_output(jobid, directory, cred_req): - """Retrieve the output of a job on the grid""" - - cmd = 'glite-wms-job-output' - # general WMS options (somehow used by the glite-wms-job-output - # command) - if config['Config']: - cmd += ' --config %s' % config['Config'] - - cmd = '%s --noint --dir %s %s' % (cmd, directory, jobid) - - logger.debug('job get output command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - match = re.search(r'directory:\n\s*([^\t\n\r\f\v]+)\s*\n', output) - - if not match: - logger.warning('Job output fetch failed.') - __print_gridcmd_log__(r'(.*-output.*\.log)', output) - return False, 'cannot fetch job output' - - # job output fetching succeeded, try to remove the glite command - # logfile if it exists - __clean_gridcmd_log__(r'(.*-output.*\.log)', output) - - outdir = match.group(1) - -# some versions of LCG middleware create an extra output directory (named _) -# inside the job.outputdir. Try to match the jid_hash in the outdir. Do output movement -# if the is found in the path of outdir. - import urllib.parse - jid_hash = urllib.parse.urlparse(jobid)[2][1:] - - if outdir.count(jid_hash): - if getShell(cred_req).system('mv "%s"/* "%s"' % (outdir, directory)) == 0: - try: - os.rmdir(outdir) - except Exception as msg: - logger.warning( - "Error trying to remove the empty directory %s:\n%s" % (outdir, msg)) - else: - logger.warning("Error moving output from %s to %s.\nOutput is left in %s." % ( - outdir, directory, outdir)) - else: - pass - - return __get_app_exitcode__(directory) - - -def cancel_multiple(jobids, cred_req): - """Cancel multiple jobs in one LCG job cancellation call""" - - # compose a temporary file with job ids in it - if not jobids: - return True - - # do the cancellation using a proper LCG command - cmd = 'glite-wms-job-cancel' - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - # compose the cancel command - cmd = '%s --noint -i %s' % (cmd, idsfile) - - logger.debug('job cancel command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - # clean up tempfile - if os.path.exists(idsfile): - os.remove(idsfile) - - if rc == 0: - # job cancelling succeeded, try to remove the glite command logfile - # if it exists - __clean_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return True - else: - logger.warning("Failed to cancel jobs.\n%s" % output) - __print_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return False - - -def cancel(jobid, cred_req): - """Cancel a job""" - - cmd = 'glite-wms-job-cancel' - - cmd = '%s --noint %s' % (cmd, jobid) - - logger.debug('job cancel command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - if rc == 0: - # job cancelling succeeded, try to remove the glite command logfile - # if it exists - __clean_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return True - else: - logger.warning("Failed to cancel job %s.\n%s" % (jobid, output)) - __print_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return False - - -def __cream_parse_job_status__(log): - """Parsing job status report from CREAM CE status query""" - - job_info_dict = {} - - re_jid = re.compile(r'^\s+JobID=\[(https://.*[:0-9]?/CREAM.*)\]$') - re_log = re.compile(r'^\s+(\S+.*\S+)\s+=\s+\[(.*)\]$') - - re_jts = re.compile(r'^\s+Job status changes:$') - re_ts = re.compile(r'^\s+Status\s+=\s+\[(.*)\]\s+\-\s+\[(.*)\]\s+\(([0-9]+)\)$') - re_cmd = re.compile(r'^\s+Issued Commands:$') - - # in case of status retrieval failed - re_jnf = re.compile(r'^.*job not found.*$') - - jid = None - - for jlog in log.split('******')[1:]: - - for l in jlog.split('\n'): - l.strip() - - m = re_jid.match(l) - - if m: - jid = m.group(1) - job_info_dict[jid] = {} - continue - - if re_jnf.match(l): - break - - m = re_log.match(l) - if m: - att = m.group(1) - val = m.group(2) - job_info_dict[jid][att] = val - continue - - if re_jts.match(l): - job_info_dict[jid]['Timestamps'] = {} - continue - - m = re_ts.match(l) - if m: - s = m.group(1) - t = int(m.group(3)) - job_info_dict[jid]['Timestamps'][s] = t - continue - - if re_cmd.match(l): - break - - return job_info_dict - - -def cream_proxy_delegation(ce, delid, cred_req): - """CREAM CE proxy delegation""" - - if not ce: - logger.warning('No CREAM CE endpoint specified') - return - - if not delid: - - logger.debug('making new proxy delegation to %s' % ce) - - cmd = 'glite-ce-delegate-proxy' - - cmd += ' -e %s' % ce.split('/cream')[0] - - delid = '%s_%s' % (credential_store[cred_req].identity, get_uuid()) - - cmd = '%s "%s"' % (cmd, delid) - - logger.debug('proxy delegation command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['SubmissionTimeout']) - if rc != 0: - # failed to delegate proxy - logger.error('proxy delegation error: %s' % output) - delid = '' - else: - # proxy delegated successfully - t_expire = datetime.datetime.now() + credential_store[cred_req].time_left() - - logger.debug('new proxy at %s valid until %s' % (ce, t_expire)) - - return delid - - -def cream_submit(jdlpath, ce, delid, cred_req): - """CREAM CE direct job submission""" - - if not ce: - logger.warning('No CREAM CE endpoint specified') - return - - cmd = 'glite-ce-job-submit' - - delid = cream_proxy_delegation(ce, delid, cred_req) - - if delid: - cmd += ' -D "%s"' % delid - else: - cmd += ' -a' - - cmd += ' -r %s' % ce - - cmd = '%s --nomsg "%s" < /dev/null' % (cmd, jdlpath) - - logger.debug('job submit command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['SubmissionTimeout']) - - if output: - output = "%s" % output.strip() - - match = re.search(r'^(https://\S+:8443/[0-9A-Za-z_\.\-]+)$', output) - - if match: - logger.debug('job id: %s' % match.group(1)) - return match.group(1) - else: - logger.warning('Job submission failed.') - return - - -def cream_status(jobids, cred_req): - """CREAM CE job status query""" - - if not jobids: - return [], [] - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('##CREAMJOBS##\n' + '\n'.join(jobids) + '\n') - - cmd = 'glite-ce-job-status' - - cmd = '%s -L 2 -n -i %s' % (cmd, idsfile) - logger.debug('job status command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['StatusPollingTimeout']) - job_info_dict = {} - if rc == 0 and output: - job_info_dict = __cream_parse_job_status__(output) - - # clean up tempfile - if os.path.exists(idsfile): - os.remove(idsfile) - - return job_info_dict - - -def cream_purge_multiple(jobids, cred_req): - """CREAM CE job purging""" - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('##CREAMJOBS##\n' + '\n'.join(jobids) + '\n') - - cmd = 'glite-ce-job-purge' - - cmd = '%s -n -N -i %s' % (cmd, idsfile) - - logger.debug('job purge command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - logger.debug(output) - - # clean up tempfile - if os.path.exists(idsfile): - os.remove(idsfile) - - if rc == 0: - return True - else: - return False - - -def cream_cancel_multiple(jobids, cred_req): - """CREAM CE job cancelling""" - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('##CREAMJOBS##\n' + '\n'.join(jobids) + '\n') - - cmd = 'glite-ce-job-cancel' - - cmd = '%s -n -N -i %s' % (cmd, idsfile) - - logger.debug('job cancel command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - logger.debug(output) - - # clean up tempfile - if os.path.exists(idsfile): - os.remove(idsfile) - - if rc == 0: - return True - else: - return False - - -def cream_get_output(osb_uri_list, directory, cred_req): - """CREAM CE job output retrieval""" - - gfiles = [] - for uri in osb_uri_list: - gf = GridftpFileIndex() - gf.id = uri - gfiles.append(gf) - - cache = GridftpSandboxCache() - cache.uploaded_files = gfiles - - return cache.download(cred_req=cred_req, files=[x.id for x in gfiles], dest_dir=directory) - - -def __get_app_exitcode__(outputdir): - import GangaCore.Core.Sandbox as Sandbox - - Sandbox.getPackedOutputSandbox(outputdir, outputdir) - - # check the application exit code - app_exitcode = -1 - runtime_log = os.path.join(outputdir, '__jobscript__.log') - pat = re.compile(r'.*exit code (\d+).') - - if not os.path.exists(runtime_log): - logger.warning('job runtime log not found: %s' % runtime_log) - return False, 'job runtime log not found: %s' % runtime_log - - for line in open(runtime_log, 'r'): - mat = pat.match(line) - if mat: - app_exitcode = eval(mat.groups()[0]) - break - - # returns False if the exit code of the real executable is not zero - # the job status of GANGA will be changed to 'failed' if the return - # value is False - if app_exitcode != 0: - logger.debug( - 'job\'s executable returns non-zero exit code: %d' % app_exitcode) - return False, app_exitcode - else: - return True, 0 - - -def expandxrsl(items): - """Expand xrsl items""" - - xrsl = "&\n" - for key, value in items.items(): - - if key == "inputFiles": - # special case for input files - xrsl += "(inputFiles=" - - for f in value: - xrsl += "(\"%s\" \"%s\")\n" % (os.path.basename(f), f) - - xrsl += ")\n" - - elif key == "outputFiles": - # special case for input files - xrsl += "(outputFiles=" - - for f in value: - xrsl += "(\"%s\" \"\")\n" % (os.path.basename(f)) - - xrsl += ")\n" - - elif isinstance(value, dict): - # expand if a dictionary - xrsl += "(%s=" % key - for key2, value2 in value.items(): - xrsl += "(\"%s\" \"%s\")\n" % (key2, value2) - - xrsl += ")\n" - else: - # straight key pair - xrsl += "(%s=\"%s\")\n" % (key, value) - - return xrsl - - -def expandjdl(items): - """Expand jdl items""" - - text = "[\n" - for key, value in items.items(): - - if key == 'Requirements': - if value: - text += 'Requirements = \n %s;\n' % ' &&\n '.join( - value) - - elif key in ['ShallowRetryCount', 'RetryCount', 'NodeNumber', 'ExpiryTime', 'PerusalTimeInterval']: - try: - value = int(value) - if value < 0: - raise ValueError - text += '%s = %d;\n' % (key, value) - except ValueError: - logger.warning('%s is not positive integer.' % key) - - elif key == 'Environment': - if value: - text += 'Environment = {\n "%s"\n};\n' % '",\n "'.join( - ['%s=\'%s\'' % var for var in value.items()]) - - elif key == 'DataRequirements': - text += 'DataRequirements = {\n' - for entry in value: - text += ' [\n' - text += ' InputData = {\n' - for datafile in entry['InputData']: - text += ' "%s",\n' % datafile - # Get rid of trailing comma - text = text.rstrip(',\n') + '\n' - text += ' };\n' - text += ' DataCatalogType = "%s";\n' % entry[ - 'DataCatalogType'] - if 'DataCatalog' in entry: - text += ' DataCatalog = "%s";\n' % entry[ - 'DataCatalog'] - text += ' ],\n' - text = text.rstrip(',\n') + '\n' # Get rid of trailing comma - text += '};\n' - - elif isinstance(value, list): - if value: - text += '%s = {\n "%s"\n};\n' % (key, - '",\n "'.join(value)) - - elif key == 'Rank': - text += 'Rank = ( %s );\n' % value - - elif key == 'Nodes': - text += 'Nodes = %s;\n' % value - - elif key in ['PerusalFileEnable', 'AllowZippedISB']: - text += '%s = %s;\n' % (key, value) - - else: - text += '%s = "%s";\n' % (key, value) - - text += "\n]\n" - return text - - -def wrap_lcg_infosites(opts=""): - """Wrap the lcg-infosites command""" - - cmd = 'lcg-infosites --vo %s %s' % ( - config['VirtualOrganisation'], opts) - - logger.debug('lcg-infosites command: %s' % cmd) - - rc, output, m = getShell().cmd1('%s' % cmd, allowed_exit=[0, 255]) - - if rc != 0: - return "" - else: - return output - - -def __arc_get_config_file_arg__(): - """Helper function to return the config file argument""" - if config['ArcConfigFile']: - return "-z " + config['ArcConfigFile'] - - return "" - - -def arc_submit(jdlpath, ce, verbose, cred_req): - """ARC CE direct job submission""" - - # No longer need to specify CE if available in client.conf - # if not ce: - # logger.warning('No CREAM CE endpoint specified') - # return - - # write to a temporary XML file as otherwise can't submit in parallel - tmpstr = '/tmp/' + randomString() + '.arcsub.xml' - cmd = 'arcsub %s -S org.nordugrid.gridftpjob -j %s' % (__arc_get_config_file_arg__(), tmpstr) - - if verbose: - cmd += ' -d DEBUG ' - - if ce: - cmd += ' -c %s' % ce - - cmd = '%s "%s" < /dev/null' % (cmd, jdlpath) - - logger.debug('job submit command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['SubmissionTimeout']) - - if output: - output = "%s" % output.strip() - getShell().system('rm ' + tmpstr) - - # Job submitted with jobid: - # gsiftp://lcgce01.phy.bris.ac.uk:2811/jobs/vSoLDmvvEljnvnizHq7yZUKmABFKDmABFKDmCTGKDmABFKDmfN955m - match = re.search(r'(gsiftp://\S+:2811/jobs/[0-9A-Za-z_\.\-]+)$', output) - - # Job submitted with jobid: https://ce2.dur.scotgrid.ac.uk:8443/arex/.. - if not match: - match = re.search(r'(https://\S+:8443/arex/[0-9A-Za-z_\.\-]+)$', output) - - if match: - logger.debug('job id: %s' % match.group(1)) - return match.group(1) - else: - logger.warning('Job submission failed.') - return - - -def arc_status(jobids, ce_list, cred_req): - """ARC CE job status query""" - - if not jobids: - return [], [] - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - cmd = 'arcstat' - - cmd += ' %s -i %s -j %s' % (__arc_get_config_file_arg__(), idsfile, config["ArcJobListFile"]) - logger.debug('job status command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 1, 255], - timeout=config['StatusPollingTimeout']) - job_info_dict = {} - - if rc != 0: - logger.warning('jobs not found in XML file: arcsync will be executed to update the job information') - __arc_sync__(ce_list, cred_req) - - if rc == 0 and output: - job_info_dict = __arc_parse_job_status__(output) - - return job_info_dict - - -def __arc_parse_job_status__(log): - """Parsing job status report from CREAM CE status query""" - - # Job: gsiftp://lcgce01.phy.bris.ac.uk:2811/jobs/FowMDmswEljnvnizHq7yZUKmABFKDmABFKDmxbGKDmABFKDmlw9pKo - # State: Finished (FINISHED) - # Exit Code: 0 - - # Job: https://ce2.dur.scotgrid.ac.uk:8443/arex/jNxMDmXTj7jnVDJaVq17x81mABFKDmABFKDmhfRKDmjBFKDmLaCRVn - # State: Finished (terminal:client-stageout-possible) - # Exit Code: 0 - - job_info_dict = {} - jid = None - - for ln in log.split('\n'): - - ln.strip() - - # do we have a failed retrieval? - if ln.find("Job not found") != -1: - logger.warning("Could not find info for job id '%s'" % jid) - jid = None - elif ln.find("Job:") != -1 and ln.find("gsiftp") != -1: - # new job info block - jid = ln[ln.find("gsiftp"):].strip() - job_info_dict[jid] = {} - elif ln.find("Job:") != -1 and ln.find("https") != -1: - # new job info block - jid = ln[ln.find("https"):].strip() - job_info_dict[jid] = {} - - # get info - if ln.find("State:") != -1: - job_info_dict[jid]['State'] = ln[ln.find("State:") + len("State:"):].strip() - - if ln.find("Exit Code:") != -1: - job_info_dict[jid]['Exit Code'] = ln[ln.find("Exit Code:") + len("Exit Code:"):].strip() - - if ln.find("Job Error:") != -1: - job_info_dict[jid]['Job Error'] = ln[ln.find("Job Error:") + len("Job Error:"):].strip() - - return job_info_dict - - -def __arc_sync__(ce_list, cred_req): - """Collect jobs to jobs.xml""" - - if ce_list[0]: - cmd = 'arcsync %s -j %s -f -c %s' % (__arc_get_config_file_arg__( - ), config["ArcJobListFile"], ' -c '.join(ce_list)) - else: - cmd = 'arcsync %s -j %s -f ' % ( - __arc_get_config_file_arg__(), config["ArcJobListFile"]) - - logger.debug('sync ARC jobs list with: %s' % cmd) - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['StatusPollingTimeout']) - - if rc != 0: - logger.error('Unable to sync ARC jobs. Error: %s' % output) - - -def arc_get_output(jid, directory, cred_req): - """ARC CE job output retrieval""" - - # construct URI list from ID and output from arcls - cmd = 'arcls %s %s' % (__arc_get_config_file_arg__(), jid) - logger.debug('arcls command: %s' % cmd) - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 255], - timeout=config['SubmissionTimeout']) - if rc: - logger.error( - "Could not find directory associated with ARC job ID '%s'" % jid) - return False - - tmpdir = tempfile.gettempdir() - jobhash = jid.split('/')[-1] - - copy_cmd = 'arcget -j %s %s -D %s' % (config["ArcJobListFile"], jid, tmpdir) - rc, output, m = getShell(cred_req).cmd1(copy_cmd, - allowed_exit=[0, 255], - timeout=config['SubmissionTimeout']) - # By now the job's output should be in the temp directory - if rc: - logger.error( - "Problem downloading output for job '%s'" % jid) - return False - - files_location = os.path.join(tmpdir, jobhash) - files_to_copy = os.listdir(files_location) - for _f in files_to_copy: - _f_path = os.path.join(files_location, _f) - shutil.copy(_f_path, directory) - shutil.rmtree(files_location) - - return True - - -def arc_purge_multiple(jobids, cred_req): - """ARC CE job purging""" - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - cmd = 'arcclean' - - cmd = '%s %s -i %s -j %s' % ( - cmd, __arc_get_config_file_arg__(), idsfile, config["ArcJobListFile"]) - - logger.debug('job purge command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - logger.debug(output) - - if rc == 0: - return True - else: - return False - - -def arc_cancel(jobid, cred_req): - """Cancel a job""" - - cmd = 'arckill' - - cmd = '%s %s %s -j %s' % (cmd, str( - jobid)[1:-1], __arc_get_config_file_arg__(), config["ArcJobListFile"]) - - logger.debug('job cancel command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - if rc == 0: - # job cancelling succeeded, try to remove the glite command logfile - # if it exists - __clean_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return True - else: - logger.warning("Failed to cancel job %s.\n%s" % (jobid, output)) - __print_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return False - - -def arc_cancel_multiple(jobids, cred_req): - """Cancel multiple jobs in one LCG job cancellation call""" - - # compose a temporary file with job ids in it - if not jobids: - return True - - cmd = 'arckill' - - idsfile = tempfile.mktemp('.jids') - with open(idsfile, 'w') as ids_file: - ids_file.write('\n'.join(jobids) + '\n') - - # compose the cancel comman - cmd = '%s %s -i %s -j %s' % ( - cmd, __arc_get_config_file_arg__(), idsfile, config["ArcJobListFile"]) - - logger.debug('job cancel command: %s' % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, allowed_exit=[0, 255]) - - if rc == 0: - # job cancelling succeeded, try to remove the glite command logfile - # if it exists - __clean_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return True - else: - logger.warning("Failed to cancel jobs.\n%s" % output) - __print_gridcmd_log__(r'(.*-job-cancel.*\.log)', output) - return False - - -def arc_info(cred_req): - """Run the arcinfo command""" - - cmd = 'arcinfo %s > /dev/null' % __arc_get_config_file_arg__() - logger.debug("Running arcinfo command '%s'" % cmd) - - rc, output, m = getShell(cred_req).cmd1(cmd, - allowed_exit=[0, 1, 255], - timeout=config['StatusPollingTimeout']) - return rc, output diff --git a/ganga/GangaCore/Lib/LCG/GridSandboxCache.py b/ganga/GangaCore/Lib/LCG/GridSandboxCache.py deleted file mode 100755 index bf97a67ef2..0000000000 --- a/ganga/GangaCore/Lib/LCG/GridSandboxCache.py +++ /dev/null @@ -1,362 +0,0 @@ -############################################################################### -# Ganga Project. http://cern.ch/ganga -# -# $Id: GridSandboxCache.py,v 1.10 2009-07-16 10:41:17 hclee Exp $ -############################################################################### -# -# LCG backend -# -# ATLAS/ARDA -# -# Date: January 2007 -import re - -import GangaCore.Utility.logging -from GangaCore.GPIDev.Base import GangaObject -from GangaCore.GPIDev.Base.Proxy import getName -from GangaCore.GPIDev.Schema import ComponentItem, Schema, SimpleItem, Version -from GangaCore.Lib.LCG.Utility import get_uuid -from GangaCore.Utility.ColourText import (ANSIMarkup, Effects, Foreground, - NoMarkup) -from GangaCore.Utility.logging import getLogger - -logger = GangaCore.Utility.logging.getLogger() - - -class GridFileIndex(GangaObject): - - ''' - Data object for indexing a file on the grid. - - @author: Hurng-Chun Lee - @contact: hurngchunlee@gmail.com - ''' - - _schema = Schema(Version(1, 0), { - 'id': SimpleItem(defvalue='', doc='the main identity of the file'), - 'name': SimpleItem(defvalue='', doc='the name of the file'), - 'md5sum': SimpleItem(defvalue='', doc='the md5sum of the file'), - 'attributes': SimpleItem(defvalue={}, doc='a key:value pairs of file metadata') - }) - - _category = 'GridFileIndex' - _name = 'GridFileIndex' - - logger = getLogger() - - def __init__(self): - super(GridFileIndex, self).__init__() - - def __eq__(self, other): - return other.id == self.id - - -class GridSandboxCache(GangaObject): - - ''' - Helper class for upladong/downloading/deleting sandbox files on a grid cache. - - @author: Hurng-Chun Lee - @contact: hurngchunlee@gmail.com - ''' - - _schema = Schema(Version(1, 1), { - 'protocol': SimpleItem(defvalue='', copyable=1, doc='file transfer protocol'), - 'max_try': SimpleItem(defvalue=1, doc='max. number of tries in case of failures'), - 'timeout': SimpleItem(defvalue=180, copyable=0, hidden=1, doc='transfer timeout in seconds'), - 'uploaded_files': ComponentItem('GridFileIndex', defvalue=[], sequence=1, protected=1, copyable=0, hidden=1, doc='a repository record for the uploaded files') - }) - - _category = 'GridSandboxCache' - _name = 'GridSandboxCache' - _exportmethods = ['upload', 'download', 'delete', - 'get_cached_files', 'list_cached_files', 'cleanup'] - - logger = getLogger() - - def __init__(self): - super(GridSandboxCache, self).__init__() - - def upload(self, cred_req, files=[], opts=''): - """ - Uploads multiple files to a remote grid storage. - - @param files is a list of local files to be uploaded to the grid. - The element can be a file path or a File object. - - @return True if files are successfully uploaded; otherwise it returns False - """ - status = False - - paths = [] - for f in files: - if getName(f) == 'File': - paths.append('file://%s' % f.name) - elif getName(f) == 'str': - paths.append('file://%s' % f) - else: - self.logger.warning('unknown file expression: %s' % repr(f)) - - uploaded_files = self.impl_upload(cred_req=cred_req, files=paths, opts=opts) - - if len(uploaded_files) == len(files): - status = self.impl_bookkeepUploadedFiles( - uploaded_files, append=True, opts=opts) - else: - status = False - - if len(uploaded_files) == len(files): - status = self.impl_bookkeepUploadedFiles( - uploaded_files, append=True, opts=opts) - else: - status = False - - return status - - def download(self, cred_req, files=[], dest_dir=None, opts=''): - """ - Downloads multiple files from remote grid storages to - a local directory. - - If the file is successfully downloaded, the local file path would be: - - - os.path.join(dest_dir, os.path.basename(local_fname_n) - - @param files is a list of files to be downloaded from the grid. - The data format of it should be: - - [index_grid_file_1, index_grid_file_2, ...] - - @param dest_dir is a local destination directory to store the downloaded files. - - @return True if files are successfully downloaded; otherwise it returns False - """ - status = False - myFiles = self.__get_file_index_objects__(files) - downloadedFiles = self.impl_download(cred_req=cred_req, files=myFiles, dest_dir=dest_dir, opts=opts) - - if len(downloadedFiles) == len(myFiles): - status = True - else: - self.logger.warning('some files not successfully downloaded') - - return status - - def delete(self, cred_req, files=[], opts=''): - """ - Deletes multiple files from remote grid storages. - - @param files is a list of files to be deleted from the grid. - The data format of it should be: - - [index_grid_file_1, index_grid_file_2, ...] - - @return True if files are successfully deleted; otherwise it returns False - """ - status = False - myFiles = self.__get_file_index_objects__(files) - deletedFiles = self.impl_delete(cred_req=cred_req, files=myFiles, opts=opts) - - if len(deletedFiles) == len(myFiles): - status = True - else: - self.logger.warning('some files not successfully deleted') - - return status - - def cleanup(self, cred_req, opts=''): - """ - Cleans up the uploaded files. - - @return True if all grid files listed in the index file are successfully deleted. - """ - status = False - - all_files = self.get_cached_files() - - f_ids = [] - for f in all_files: - f_ids.append(f.id) - - return self.delete(cred_req=cred_req, files=f_ids) - - def get_cached_files(self, opts=''): - """ - Gets the indexes of the uploaded files on the grid. - - @return the dictionary indexing the uploaded files on the grid. - The key of the dictionary should be the main index (e.g. GUID) of the grid files. - """ - return self.impl_getUploadedFiles(opts=opts) - - def list_cached_files(self, loop=True, opts=''): - """ - Lists the uploaded files. - - if loop = True, it prints also the uploaded files associated with subjobs. - """ - - fc = 0 - ds = '' - - doColoring = True - - fg = Foreground() - fx = Effects() - - status_colors = {'inuse': fg.orange, - 'free': fg.blue, - 'gone': fg.red} - - status_mapping = {'new': 'inuse', - 'submitted': 'inuse', - 'submitting': 'inuse', - 'running': 'inuse', - 'completed': 'free', - 'completing': 'free', - 'failed': 'free', - 'killed': 'free'} - - if doColoring: - markup = ANSIMarkup() - else: - markup = NoMarkup() - - def __markup_by_status__(fileIndex, counter, status): - - fmtStr = '\n%4d\t%-30s\t%-12s\t%s' % ( - counter, fileIndex.name, status, fileIndex.id) - - try: - return markup(fmtStr, status_colors[status]) - except KeyError: - return markup(fmtStr, fx.normal) - - j = self.getJobObject() - - for f in self.get_cached_files(opts=opts): - - my_status = 'unknown' - - if j: - try: - my_status = status_mapping[j.status] - except KeyError: - pass - - ds += __markup_by_status__(f, fc, my_status) - - fc += 1 - - if j and loop: - for sj in j.subjobs: - for f in sj.backend.sandboxcache.get_cached_files(opts=opts): - - my_status = 'unknown' - - try: - my_status = status_mapping[sj.status] - except KeyError: - pass - - ds += __markup_by_status__(f, fc, my_status) - - fc += 1 - - return ds - - # methods to be implemented in the child classes - def impl_upload(self, cred_req, files=[], opts=''): - """ - Uploads multiple files to a remote grid storage. - - @param files is a list of files in URL format (i.e. file://...) - - @return a list of successfully uploaded files represented by GridFileIndex objects - """ - raise NotImplementedError - - def impl_download(self, cred_req, files=[], dest_dir=None, opts=''): - """ - Downloads multiple files from remote grid storages to - a local directory. - - @param files is a list of files represented by GridFileIndex objects - @param dest_dir is a local destination directory to store the downloaded files. - - @return a list of successfully downloaded files represented by GridFileIndex objects - """ - raise NotImplementedError - - def impl_delete(self, cred_req, files=[], opts=''): - """ - Deletes multiple files from remote grid storages. - - @param files is a list of files represented by GridFileIndex objects - @return a list of successfully deleted files represented by GridFileIndex objects - """ - raise NotImplementedError - - def impl_bookkeepUploadedFiles(self, files=[], append=True, opts=''): - """ - basic implementation for bookkeeping the uploaded files. - It simply keeps the GridFileIndex objects in the job repository. - - @param files is a list of files represented by GridFileIndex objects - @return True if files are successfully logged in the local index file - """ - - self.uploaded_files = files - - return True - - def impl_getUploadedFiles(self, opts=''): - """ - basic implementation for getting the previously uploaded files from the - job repository. - - @return a list of files represented by GridFileIndex objects - """ - files = self.uploaded_files - - return files - - # private methods - def __get_file_index_objects__(self, files=[]): - '''Gets file index object according to the given file list - - try to get the GridFileIndex object from the local index file. - - @param files is a list of file indexes - @return a list of files represented by GridFileIndex objects - ''' - - cachedFiles = self.get_cached_files() - myFiles = [] - for f in cachedFiles: - if f.id in files: - myFiles.append(f) - - return myFiles - - def __get_unique_fname__(self): - '''gets an unique filename''' - fname = 'user.%s' % (get_uuid()) - return fname - - def __cmd_retry_loop__(self, shell, cmd, maxRetry=3): - '''Executing system command with retry feature''' - i = 0 - rc = 0 - output = None - m = None - try_again = True - while try_again: - i = i + 1 - self.logger.debug('run cmd: %s' % cmd) - rc, output, m = shell.cmd1(cmd, allowed_exit=[0, 255]) - if rc in [0, 255]: - try_again = False - elif i == maxRetry: - try_again = False - else: - self.logger.warning("trial %d: error: %s" % (i, output)) - - return (rc, output, m) diff --git a/ganga/GangaCore/Lib/LCG/GridSimulator/GridSimulator.py b/ganga/GangaCore/Lib/LCG/GridSimulator/GridSimulator.py deleted file mode 100755 index a3d51dfd5b..0000000000 --- a/ganga/GangaCore/Lib/LCG/GridSimulator/GridSimulator.py +++ /dev/null @@ -1,273 +0,0 @@ -from GangaCore.Utility.Config import getConfig -from GangaCore.Utility.logging import getLogger - -from GangaCore.Utility.GridShell import getShell - -logger = getLogger() - -logger.critical('LCG Grid Simulator ENABLED') - -########################################################################## -# GRID SIMULATOR -########################################################################## - -config = getConfig("GridSimulator") - - -def sleep(val): - import time - time.sleep(get_number(val)) - - -def failed(val): - t = get_number(val) - import random - return random.random() < t - - -def get_number(val): - import random - if isinstance(val, str): - t = eval(val, {'random': random}) - else: - t = val - if not type(t) in [type(1.0), type(1)]: - # print 'problem with configuration option, invalid value: %s'%val - logger.error( - 'problem with configuration option, invalid value: %s', val) - return 0 - # print t - return t - - -import os -import time - -cmd = 'simulation' - - -class GridSimulator(object): - - '''Simulator of LCG interactions''' - - credential = None - - def __init__(self): - self.active = True - #import GangaCore.Core.FileWorkspace - #basedir = GangaCore.Core.FileWorkspace.gettop() - #basedir = '/tmp' - basedir = '.' - self.gridmap_filename = '%s/lcg_simulator_gridmap' % basedir - import shelve - # map Grid job id into inputdir (where JDL file is) - self.jobid_map = shelve.open(self.gridmap_filename, writeback=False) - self.jobid_map.setdefault('_job_count', 0) - - # here we store the job finish times as seen by ganga - self.finished_jobs_filename = '%s/lcg_simulator_finished_jobs' % basedir - self.ganga_finish_time = shelve.open( - self.finished_jobs_filename, writeback=False) - - self.shell = getShell('GLITE') - - logger.critical('Grid Simulator data files: %s %s', - self.gridmap_filename, self.finished_jobs_filename) - - def check_proxy(self): - return True - - def submit(self, jdlpath, ce=None): - '''This method is used for normal and native bulk submission supported by GLITE middleware.''' - - logger.debug( - 'job submit command: submit(jdlpath=%s,ce=%s)', jdlpath, ce) - - jdl = eval(file(jdlpath).read()) - - subjob_ids = [] - if jdl['Type'] == 'collection': - import re - # we need to parse the Nodes attribute string here - r = re.compile(r'.*NodeName = "(gsj_\d+)"; file="(\S*)"*') - for line in jdl['Nodes'].splitlines()[1:-1]: - m = r.match(line) - if m: - nodename, sjdl_path = m.groups() - subjob_ids.append( - self._submit(sjdl_path, ce, [], nodename=nodename)) - - masterid = self._submit(jdlpath, ce, subjob_ids) - - return masterid - - def _params_filename(self, jobid): - inputdir = os.path.realpath(self.jobid_map[jobid]) - return os.path.join(inputdir, 'params') - - def _submit(self, jdlpath, ce, subjob_ids, nodename=None): - '''Submit a JDL file to LCG''' - - logger.debug( - 'job submit command: _submit(jdlpath=%s,ce=%s,subjob_ids=%s)', jdlpath, ce, subjob_ids) - - inputdir = os.path.dirname(os.path.realpath(jdlpath)) - - def write(): - with open(os.path.join(inputdir, 'params'), 'w') as file_: - file_.write(repr(runtime_params)) - - runtime_params = {} - runtime_params['submission_time_start'] = time.time() - - sleep(config['submit_time']) - runtime_params['submission_time_stop'] = time.time() - - if failed(config['submit_failure_rate']): - runtime_params['status'] = 'failed_to_submit' - write() - logger.warning('Job submission failed.') - return - - jobid = self._make_new_id() - - self.jobid_map[jobid] = inputdir - - runtime_params['jobid'] = jobid - runtime_params['status'] = 'submitted' - runtime_params['should_fail'] = failed(config['job_failure_rate']) - runtime_params['expected_job_id_resolve_time'] = get_number( - config['job_id_resolved_time']) - runtime_params['expected_finish_time'] = time.time( - ) + get_number(config['job_finish_time']) - runtime_params['subjob_ids'] = subjob_ids - runtime_params['nodename'] = nodename - write() - return jobid - - def _make_new_id(self): - self.jobid_map['_job_count'] += 1 - jobid = 'https://ganga.simulator.cern.ch/%d' % self.jobid_map[ - '_job_count'] - return jobid - - def _cancel(self, jobid): - inputdir = self.jobid_map[jobid] - - sleep(config['cancel_time']) - if failed(config['cancel_failure_rate']): - with open(self._params_filename(jobid), 'a') as file_: - file_.write('\n failed to cancel: %d' % time.time()) - return False - with open(self._params_filename(jobid), 'a') as file_: - file_.write('\ncancelled: %d' % time.time()) - return True - - def native_master_cancel(self, jobid): - '''Native bulk cancellation supported by GLITE middleware.''' - - logger.debug( - 'job cancel command: native_master_cancel(jobid=%s', jobid) - - # FIXME: TODO: emulate bulk! - return self._cancel(jobid) - - def _status(self, jobid, has_id): - logger.debug( - 'job status command: _status(jobid=%s,has_id=%d)', jobid, has_id) - - info = {'id': None, - 'name': None, - 'status': None, - 'exit': None, - 'reason': None, - 'is_node': False, - 'destination': 'anywhere'} - - params = eval(file(self._params_filename(jobid)).read()) - - sleep(config['single_status_time']) - - assert params['jobid'] == jobid - - if has_id: - info['id'] = params['jobid'] - info['name'] = params['nodename'] - - # if is_collection and time.time() > params['expected_job_id_resolve_time']: - # info['name'] = 'node_%d' % 0 # FIXME: some number (read from jdl?) - - logger.debug('current_time-expected_finish_time = %d', - time.time() - params['expected_finish_time']) - - if time.time() > params['expected_finish_time']: - if params['should_fail']: - info['status'] = 'Aborted' - info['reason'] = 'for no reason' - info['exit'] = -1 - self.ganga_finish_time[jobid] = time.time() - else: - info['status'] = 'Done (Success)' - info['exit'] = 0 - info['reason'] = 'for a reason' - - logger.debug('_status (jobid=%s) -> %s', jobid, repr(info)) - - # PENDING: handle other statuses: 'Running','Aborted','Cancelled','Done - # (Exit Code !=0)','Cleared' - return info - - def status(self, jobids, is_collection=False): - '''Query the status of jobs on the grid. - If is_collection is False then jobids is a list of non-split jobs or emulated bulk subjobs of a single master job. - If is_collection is True then jobids is a list of master jobs which are natively bulk. - ''' - - logger.debug( - 'job status command: status(jobid=%s,is_collection=%d)', jobids, is_collection) - - info = [] - - for id in jobids: - if is_collection: - # print 'master _status' - sleep(config['master_status_time']) - info.append(self._status(id, True)) - # print 'master _status done' - params = eval(file(self._params_filename(id)).read()) - # print 'master params',params - has_id = time.time() > params['expected_job_id_resolve_time'] - for sid in params['subjob_ids']: - info.append(self._status(sid, has_id)) - info[-1]['is_node'] = True - else: - has_id = False - info.append(self._status(id, True)) - - return info - - def get_loginfo(self, jobid, directory, verbosity=1): - '''Fetch the logging info of the given job and save the output in the jobs outputdir''' - - return "" - - def get_output(self, jobid, directory, wms_proxy=False): - '''Retrieve the output of a job on the grid''' - - logger.debug( - 'job get output command: get_output(jobid=%s,directory=%s)', jobid, directory) - sleep(config['get_output_time']) - self.ganga_finish_time[jobid] = time.time() - return (True, None) - - def cancel(self, jobid): - '''Cancel a job''' - logger.debug('job cancel command: cancel(jobid=%s)', jobid) - - return self._cancel(jobid) - - @staticmethod - def expandjdl(items): - '''Expand jdl items''' - - return repr(items) diff --git a/ganga/GangaCore/Lib/LCG/GridSimulator/__init__.py b/ganga/GangaCore/Lib/LCG/GridSimulator/__init__.py deleted file mode 100755 index 9e667a1199..0000000000 --- a/ganga/GangaCore/Lib/LCG/GridSimulator/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ - -from .GridSimulator import GridSimulator diff --git a/ganga/GangaCore/Lib/LCG/GridSimulator/simulator-analyze.py b/ganga/GangaCore/Lib/LCG/GridSimulator/simulator-analyze.py deleted file mode 100755 index 677e3c3307..0000000000 --- a/ganga/GangaCore/Lib/LCG/GridSimulator/simulator-analyze.py +++ /dev/null @@ -1,89 +0,0 @@ - - -#from GangaCore.Lib.LCG.GridSimulator import GridSimulator -#g = GridSimulator() -#gridmap_filename = g.gridmap_filename -#finished_jobs_filename = g.finished_jobs_filename - -import os.path -import shelve -import sys - -try: - basedir = sys.argv[1] -except IndexError: - basedir = '.' - -gridmap_filename = "%s/lcg_simulator_gridmap" % basedir -finished_jobs_filename = "%s/lcg_simulator_finished_jobs" % basedir - - -gridmap = shelve.open(gridmap_filename) -finished_jobs = shelve.open(finished_jobs_filename) - -deltas = [] -job_finished_times = [] -ganga_finished_times = [] - -for gid in gridmap: - if gid[0] == '_': - continue - params = eval(file(os.path.join(gridmap[gid], 'params')).read()) - try: - job_finished_times.append(params['expected_finish_time']) - ganga_finished_times.append(finished_jobs[gid]) - deltas.append(ganga_finished_times[-1] - job_finished_times[-1]) - except KeyError: - print('Missing data for:', gid, file=sys.stderr) - -idle_cnt = 0 -idle = [] - -job_finished_times.sort() -ganga_finished_times.sort() - -i = 0 -j = 0 - -INF = 1e40 - -start_t = min(job_finished_times[0], ganga_finished_times[0]) - -while i < len(job_finished_times) and j < len(ganga_finished_times): - - try: - a = job_finished_times[i] - except IndexError: - a = INF - - try: - b = ganga_finished_times[j] - except IndexError: - b = INF - - if a < b: - idle_cnt += 1 - idle.append((a - start_t, idle_cnt)) - i += 1 - elif a > b: - idle_cnt -= 1 - idle.append((b - start_t, idle_cnt)) - j += 1 - else: - idle.append((a - start_t, idle_cnt)) - i += 1 - j += 1 - - -with open('%s/idle.dat' % basedir, 'w') as f: - f.write( - "# time-based counter of jobs which were reported by the grid as finished but not completed/failed in ganga\n") - f.write( - "# x = time in seconds from the beginning of the analysis, y = counter of 'idle' jobs\n") - for i in idle: - f.write("%d %d\n" % i) -with open('%s/deltas.dat' % basedir, 'w') as f: - f.write( - "# time difference (for each individual job) between the job was reported by the grid as finished and completed/failed in ganga\n") - for d in deltas: - f.write(d + '\n') diff --git a/ganga/GangaCore/Lib/LCG/GridSimulator/simulator.py b/ganga/GangaCore/Lib/LCG/GridSimulator/simulator.py deleted file mode 100755 index f9b0fa6886..0000000000 --- a/ganga/GangaCore/Lib/LCG/GridSimulator/simulator.py +++ /dev/null @@ -1,75 +0,0 @@ - -# this is a ganga grid simulator driver script -# usage: -# env GANGA_GRID_SIMULATOR=1 ganga -o[LCG]GLITE_ENABLE=True -# -oGLITE_SETUP=/dev/null -o[PollThread]autostart=True simulator.py - -# when the simulator is enabled it will produce data files in the current working directory -# these files may be further processed with the simulator-analyze.py -# script to extract timing data - -# recommended way is to have the driver + simulation parameters + results all in one directory e.g. -# -# mkdir simu -# cd simu -# cp path/to/simulation.py . -# run simulation.py as described above - -from GangaCore.GPIDev.Lib.Job.Job import Job -from GangaCore.Lib.LCG import LCG -from GangaCore.Lib.Splitters import GenericSplitter -from GangaCore.Utility.logging import getLogger -logger = getLogger(modulename=True) - -config = config['GridSimulator'] -config['submit_time'] = '0.2' -config['submit_failure_rate'] = 0.0 -config['cancel_time'] = 'random.uniform(0,1)' -config['cancel_failure_rate'] = 0.0 -config['single_status_time'] = 0.0 # * number of subjobs -config['master_status_time'] = 'random.uniform(2,5)' # constant -config['get_output_time'] = '0.0' -config['job_id_resolved_time'] = 'random.uniform(10,50)' # up to 800s -config['job_finish_time'] = '10+random.uniform(10,10)' -config['job_failure_rate'] = 'random.uniform(0,0.05)' - -# submit K parallel master jobs with N subjobs each - - -def submit(N, K): - jobs = [] - for i in range(K): - j = Job() - j._auto__init__() - j.backend = LCG() - j.backend.middleware = 'GLITE' - j.splitter = GenericSplitter() - j.splitter.attribute = 'application.args' - j.splitter.values = [['x']] * N - j.submit() - jobs.append(j) - import time - - def finished(): - for j in jobs: - if not j.status in ['failed', 'completed']: - return False - return True - - while not finished(): - time.sleep(1) - - return jobs - -# repeat M times for better statistics (and repository scalability) - - -M = 5 - -for i in range(M): - logger.info('*' * 80) - logger.info('starting %d out of %d' % (i, M)) - logger.info('*' * 80) - submit(50, 10) - -logger.info('finished!') diff --git a/ganga/GangaCore/Lib/LCG/GridftpSandboxCache.py b/ganga/GangaCore/Lib/LCG/GridftpSandboxCache.py deleted file mode 100644 index 2e3b9313fb..0000000000 --- a/ganga/GangaCore/Lib/LCG/GridftpSandboxCache.py +++ /dev/null @@ -1,270 +0,0 @@ -import os -import os.path -import re -from urllib.parse import urlparse - -from GangaCore.Core.GangaThread.MTRunner import Algorithm, Data, MTRunner -from GangaCore.GPIDev.Schema import Schema, SimpleItem, Version -from GangaCore.Lib.LCG.GridSandboxCache import GridFileIndex, GridSandboxCache -from GangaCore.Lib.LCG.Utility import get_md5sum, urisplit -from GangaCore.Utility.GridShell import getShell - -gridftp_sandbox_cache_schema_datadict = GridSandboxCache._schema.inherit_copy( -).datadict -gridftp_file_index_schema_datadict = GridFileIndex._schema.inherit_copy( -).datadict - - -from GangaCore.Utility.logging import getLogger - - -class GridftpFileIndex(GridFileIndex): - - """ - Data object containing Gridftp file index information. - - - id: gsiftp URI - - name: basename of the file - - md5sum: md5 checksum - - attributes['fpath']: path of the file on local machine - - @author: Hurng-Chun Lee - @contact: hurngchunlee@gmail.com - """ - - _schema = Schema(Version(1, 0), gridftp_file_index_schema_datadict) - _category = 'GridFileIndex' - _name = 'GridftpFileIndex' - - def __init__(self): - super(GridftpFileIndex, self).__init__() - - -class GridftpSandboxCache(GridSandboxCache): - - ''' - Helper class for upladong/downloading/deleting sandbox files using lcg-cp/lcg-del commands with gsiftp protocol. - - @author: Hurng-Chun Lee - @contact: hurngchunlee@gmail.com - ''' - - gridftp_sandbox_cache_schema_datadict.update({ - 'baseURI': SimpleItem(defvalue='', copyable=1, doc='the base URI for storing cached files'), - 'copyCommand': SimpleItem(defvalue='globus-copy-url', typelist=[str], copyable=1, doc='the command to be exectued to copy files'), - }) - - _schema = Schema(Version(1, 0), gridftp_sandbox_cache_schema_datadict) - _category = 'GridSandboxCache' - _name = 'GridftpSandboxCache' - - logger = getLogger() - - def __init__(self): - super(GridftpSandboxCache, self).__init__() - self.protocol = 'gsiftp' - - def impl_upload(self, cred_req, files=[], opts=''): - """ - Uploads multiple files to a remote gridftp server. - """ - - shell = getShell(cred_req) - - # making the directory on remove storage at destURI - dirname = self.__get_unique_fname__() - - # creating subdirectory - - dir_ok = False - - destURI = '%s/%s' % (self.baseURI, dirname) - - uri_info = urisplit(destURI) - - cmd = 'uberftp %s "cd %s"' % (uri_info[1], uri_info[2]) - - rc, output, m = self.__cmd_retry_loop__(shell, cmd, 1) - - if rc != 0: - - for l in output.split('\n'): - l.strip() - if re.match(r'^550.*', l): - # the directory is not found (error code 550), try to creat - # the lowest level one - cmd = 'uberftp %s "mkdir %s"' % (uri_info[1], uri_info[2]) - - rc, output, m = self.__cmd_retry_loop__(shell, cmd, 1) - - if rc != 0: - self.logger.error(output) - else: - dir_ok = True - - break - else: - self.logger.debug( - 'parent directory already available: %s' % destURI) - dir_ok = True - - if not dir_ok: - self.logger.error('parent directory not available: %s' % destURI) - return [] - - # the algorithm of uploading one file - class MyAlgorithm(Algorithm): - - def __init__(self, cacheObj): - Algorithm.__init__(self) - self.cacheObj = cacheObj - - def process(self, file): - # decide number of parallel stream to be used - fsize = os.path.getsize(urlparse(file)[2]) - fname = os.path.basename(urlparse(file)[2]) - fpath = os.path.abspath(urlparse(file)[2]) - - md5sum = get_md5sum(fpath, ignoreGzipTimestamp=True) - nbstream = int((fsize * 1.0) / (10.0 * 1024 * 1024 * 1024)) - - if nbstream < 1: - nbstream = 1 # min stream - if nbstream > 8: - nbstream = 8 # max stream - - myDestURI = '%s/%s' % (destURI, fname) - - # uploading the file - cmd = 'uberftp' - if nbstream > 1: - cmd += ' -c %d' % nbstream - - cmd += ' file:%s %s' % (fpath, myDestURI) - - rc, output, m = self.cacheObj.__cmd_retry_loop__( - shell, cmd, self.cacheObj.max_try) - - if rc != 0: - self.cacheObj.logger.error(output) - return False - else: - fidx = GridftpFileIndex() - fidx.id = myDestURI - fidx.name = fname - fidx.md5sum = md5sum - fidx.attributes['fpath'] = fpath - - self.__appendResult__(file, fidx) - return True - - myAlg = MyAlgorithm(cacheObj=self) - myData = Data(collection=files) - - runner = MTRunner( - name='sandboxcache_gridftp', algorithm=myAlg, data=myData) - runner.start() - runner.join(-1) - - return list(runner.getResults().values()) - - def impl_download(self, cred_req, files=[], dest_dir=None, opts=''): - """ - Downloads multiple files from gridftp server to - a local directory. - """ - if not dest_dir: - dest_dir = os.getcwd() - self.logger.debug('download file to: %s', dest_dir) - - shell = getShell(cred_req) - - # the algorithm of downloading one file to a local directory - class MyAlgorithm(Algorithm): - - def __init__(self, cacheObj, copyCommand): - Algorithm.__init__(self) - self.cacheObj = cacheObj - self.copyCommand = copyCommand - - def process(self, file): - - srcURI = file.id - fname = os.path.basename(urisplit(srcURI)[2]) - destURI = 'file:%s/%s' % (dest_dir, fname) - - #cmd = 'uberftp %s %s' % (srcURI, destURI) - cmd = '%s %s %s' % (self.copyCommand, srcURI, destURI) - - rc, output, m = self.cacheObj.__cmd_retry_loop__( - shell, cmd, self.cacheObj.max_try) - - if rc != 0: - self.cacheObj.logger.error(output) - return False - else: - self.__appendResult__(file.id, file) - return True - - myAlg = MyAlgorithm(cacheObj=self, copyCommand=self.copyCommand) - myData = Data(collection=files) - - runner = MTRunner( - name='sandboxcache_gridftp', algorithm=myAlg, data=myData) - runner.start() - runner.join(-1) - - return list(runner.getResults().values()) - - def impl_delete(self, cred_req, files=[], opts=''): - """ - Deletes multiple files from remote gridftp server - """ - - shell = getShell(cred_req) - - # the algorithm of downloading one file to a local directory - class MyAlgorithm(Algorithm): - - def __init__(self, cacheObj): - Algorithm.__init__(self) - self.cacheObj = cacheObj - - def process(self, file): - - destURI = file.id - - uri_info = urisplit(destURI) - - cmd = 'uberftp %s "rm %s"' % (uri_info[1], uri_info[2]) - - rc, output, m = self.cacheObj.__cmd_retry_loop__( - shell, cmd, self.cacheObj.max_try) - - if rc != 0: - self.cacheObj.logger.error(output) - return False - else: - self.__appendResult__(file.id, file) - return True - - myAlg = MyAlgorithm(cacheObj=self) - myData = Data(collection=files) - - runner = MTRunner( - name='sandboxcache_lcgdel', algorithm=myAlg, data=myData) - runner.start() - runner.join(-1) - - # update the local index file - del_files = list(runner.getResults().values()) - all_files = self.get_cached_files() - - left_files = [] - for f in all_files: - if f not in del_files: - left_files.append(f) - - self.impl_bookkeepUploadedFiles(left_files, append=False) - - return del_files diff --git a/ganga/GangaCore/Lib/LCG/LCG.py b/ganga/GangaCore/Lib/LCG/LCG.py deleted file mode 100755 index e5d8446e17..0000000000 --- a/ganga/GangaCore/Lib/LCG/LCG.py +++ /dev/null @@ -1,2743 +0,0 @@ - -############################################################################### -# Ganga Project. http://cern.ch/ganga -# -# $Id: LCG.py,v 1.39 2009-07-16 10:39:27 hclee Exp $ -############################################################################### -# -# LCG backend -# -# ATLAS/ARDA -# -# Date: August 2005 - -import os -import re -import math -import mimetypes -import shutil -from collections import defaultdict - -from GangaCore.Core.GangaThread.MTRunner import MTRunner, Data, Algorithm -from GangaCore.Core.exceptions import GangaException - -from GangaCore.GPIDev.Schema import Schema, Version, SimpleItem, ComponentItem -from GangaCore.GPIDev.Lib.File import FileBuffer -from GangaCore.GPIDev.Adapters.IBackend import IBackend -from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig -from GangaCore.Utility.Config import getConfig -import GangaCore.Utility.Config -from GangaCore.Utility.logging import getLogger, log_user_exception -from GangaCore.Utility.util import isStringLike -from GangaCore.Lib.LCG.ElapsedTimeProfiler import ElapsedTimeProfiler -from GangaCore.Lib.LCG.LCGOutputDownloader import LCGOutputDownloader -from GangaCore.Lib.LCG.Utility import get_uuid, get_md5sum -from GangaCore.Utility.logic import implies -from GangaCore.GPIDev.Base.Proxy import isType, getName -from GangaCore.Utility.GridShell import getShell - -from GangaCore.GPIDev.Credentials import require_credential, credential_store, needed_credentials -from GangaCore.GPIDev.Credentials.VomsProxy import VomsProxy - -from . import Grid - -logger = getLogger() -config = getConfig('LCG') - -_lcg_output_downloader = None - - -def get_lcg_output_downloader(): - global _lcg_output_downloader - - if not _lcg_output_downloader: - - number_of_threads = config['OutputDownloaderThread'] - - _lcg_output_downloader = LCGOutputDownloader(numThread=number_of_threads) - _lcg_output_downloader.start() - - return _lcg_output_downloader - -# helper routines - - -def __fail_missing_jobs__(missing_glite_jids, jobdict): - """failing the Ganga jobs if the associated glite job id is appearing in missing_glite_jids""" - - for glite_jid in missing_glite_jids: - if glite_jid in jobdict: - j = jobdict[glite_jid] - - if j.master: - # this is a subjob - j.backend.status = 'Removed' - j.backend.reason = 'job removed from WMS' - j.updateStatus('failed') - - else: - # this is a master job - for sj in j.subjobs: - if sj.backend.parent_id == glite_jid: - sj.backend.status = 'Removed' - sj.backend.reason = 'job removed from WMS' - sj.updateStatus('failed') - - j.updateStatus('failed') - - -class LCG(IBackend): - - """LCG backend - submit jobs to the EGEE/LCG Grid using gLite middleware. - - If the input sandbox exceeds the limit specified in the ganga - configuration, it is automatically uploaded to a storage element. This - overcomes sandbox size limits on the resource broker. - - For gLite middleware bulk (faster) submission is supported so splitting - jobs may be more efficient than submitting bunches of individual jobs. - - For more options see help on LCGRequirements. - - See also: http://cern.ch/glite/documentation - """ - - # internal usage of the flag: - # - 0: job without the need of special control - # - 1: job (normally a subjob) resubmitted individually. The monitoring of those jobs should be separated. - _schema = Schema(Version(1, 9), { - 'CE': SimpleItem(defvalue='', doc='Request a specific Computing Element'), - 'jobtype': SimpleItem(defvalue='Normal', doc='Job type: Normal, MPICH'), - 'requirements': ComponentItem('LCGRequirements', doc='Requirements for the resource selection'), - 'sandboxcache': ComponentItem('GridSandboxCache', copyable=1, doc='Interface for handling oversized input sandbox'), - 'parent_id': SimpleItem(defvalue='', protected=1, copyable=0, hidden=1, doc='Middleware job identifier for its parent job'), - 'id': SimpleItem(defvalue='', typelist=[str, list], protected=1, copyable=0, doc='Middleware job identifier'), - 'status': SimpleItem(defvalue='', typelist=[str, dict], protected=1, copyable=0, doc='Middleware job status'), - 'middleware': SimpleItem(defvalue='GLITE', protected=0, copyable=1, doc='Middleware type', checkset='__checkset_middleware__'), - 'exitcode': SimpleItem(defvalue='', protected=1, copyable=0, doc='Application exit code'), - 'exitcode_lcg': SimpleItem(defvalue='', protected=1, copyable=0, doc='Middleware exit code'), - 'reason': SimpleItem(defvalue='', protected=1, copyable=0, doc='Reason of causing the job status'), - 'perusable': SimpleItem(defvalue=False, protected=0, copyable=1, doc='Enable the job perusal feature of GLITE'), - 'actualCE': SimpleItem(defvalue='', protected=1, copyable=0, doc='Computing Element where the job actually runs.'), - 'monInfo': SimpleItem(defvalue={}, protected=1, copyable=0, hidden=1, doc='Hidden information of the monitoring service.'), - 'flag': SimpleItem(defvalue=0, protected=1, copyable=0, hidden=1, doc='Hidden flag for internal control.'), - 'credential_requirements': ComponentItem('CredentialRequirement', defvalue=VomsProxy()), - }) - - _category = 'backends' - _name = 'LCG' - _exportmethods = ['loginfo', 'inspect', - 'match', 'get_wms_list', 'get_ce_list', 'get_se_list'] - - _final_ganga_states = ['completing', 'completed', 'failed'] - - def __init__(self): - super(LCG, self).__init__() - - # Disable GLITE perusal by default, since it can be dangerous - self.perusable = False - - # dynamic requirement object loading - try: - reqName1 = config['Requirements'] - reqName = config['Requirements'].split('.').pop() - reqModule = __import__(reqName1, globals(), locals(), [reqName1]) - reqClass = vars(reqModule)[reqName] - self.requirements = reqClass() - - logger.debug('load %s as LCGRequirements' % reqName) - except: - logger.debug('load default LCGRequirements') - - # dynamic sandbox cache object loading - try: - scName1 = config['SandboxCache'] - scName = config['SandboxCache'].split('.').pop() - scModule = __import__(scName1, globals(), locals(), [scName1]) - scClass = vars(scModule)[scName] - self.sandboxcache = scClass() - logger.debug('load %s as SandboxCache' % scName) - except: - logger.debug('load default LCGSandboxCAche') - - def __setup_sandboxcache__(self, job): - """Sets up the sandbox cache object to adopt the runtime configuration of the LCG backend""" - - re_token = re.compile('^token:(.*):(.*)$') - - self.sandboxcache.vo = config['VirtualOrganisation'] - self.sandboxcache.timeout = config['SandboxTransferTimeout'] - - from GangaCore.Lib.LCG.LCGSandboxCache import LCGSandboxCache - if isType(self.sandboxcache, LCGSandboxCache): - if not self.sandboxcache.lfc_host: - self.sandboxcache.lfc_host = Grid.__get_lfc_host__() - - if not self.sandboxcache.se: - - token = '' - se_host = config['DefaultSE'] - m = re_token.match(se_host) - if m: - token = m.group(1) - se_host = m.group(2) - - self.sandboxcache.se = se_host - - if token: - self.sandboxcache.srm_token = token - - if (self.sandboxcache.se_type in ['srmv2']) and (not self.sandboxcache.srm_token): - self.sandboxcache.srm_token = config['DefaultSRMToken'] - - return True - - @require_credential - def __check_and_prestage_inputfile__(self, file): - """Checks the given input file size and if it's size is - over "BoundSandboxLimit", prestage it to a grid SE. - - The argument is a path of the local file. - - It returns a dictionary containing information to refer to the file: - - idx = {'lfc_host': lfc_host, - 'local': [the local file pathes], - 'remote': {'fname1': 'remote index1', 'fname2': 'remote index2', ... } - } - - If prestaging failed, None object is returned. - - If the file has been previously uploaded (according to md5sum), - the prestaging is ignored and index to the previously uploaded file - is returned. - """ - - idx = {'lfc_host': '', 'local': [], 'remote': {}} - - job = self.getJobObject() - - # read-in the previously uploaded files - uploadedFiles = [] - - # getting the uploaded file list from the master job - if job.master: - uploadedFiles += job.master.backend.sandboxcache.get_cached_files() - - # set and get the $LFC_HOST for uploading oversized sandbox - self.__setup_sandboxcache__(job) - - uploadedFiles += self.sandboxcache.get_cached_files() - - lfc_host = None - - # for LCGSandboxCache, take the one specified in the sansboxcache object. - # the value is exactly the same as the one from the local grid shell env. if - # it is not specified exclusively. - from GangaCore.Lib.LCG.LCGSandboxCache import LCGSandboxCache - if isType(self.sandboxcache, LCGSandboxCache): - lfc_host = self.sandboxcache.lfc_host - - # or in general, query it from the Grid object - if not lfc_host: - lfc_host = Grid.__get_lfc_host__() - - idx['lfc_host'] = lfc_host - - abspath = os.path.abspath(file) - fsize = os.path.getsize(abspath) - if fsize > config['BoundSandboxLimit']: - - md5sum = get_md5sum(abspath, ignoreGzipTimestamp=True) - - doUpload = True - for uf in uploadedFiles: - if uf.md5sum == md5sum: - # the same file has been uploaded to the iocache - idx['remote'][os.path.basename(file)] = uf.id - doUpload = False - break - - if doUpload: - - logger.warning( - 'The size of %s is larger than the sandbox limit (%d byte). Please wait while pre-staging ...' % (file, config['BoundSandboxLimit'])) - - if self.sandboxcache.upload([abspath]): - remote_sandbox = self.sandboxcache.get_cached_files()[-1] - idx['remote'][remote_sandbox.name] = remote_sandbox.id - else: - logger.error( - 'Oversized sandbox not successfully pre-staged') - return None - else: - idx['local'].append(abspath) - - return idx - - def __refresh_jobinfo__(self, job): - '''Refresh the lcg jobinfo. It will be called after resubmission.''' - job.backend.status = '' - job.backend.reason = '' - job.backend.actualCE = '' - job.backend.exitcode = '' - job.backend.exitcode_lcg = '' - job.backend.flag = 0 - - def __print_no_resource_error__(self, jdl): - '''Prints out the error message when no matched resource''' - - logger.error('No matched resource: check/report the JDL below') - - logger.error('=== JDL ===\n' + '\n'.join(l.strip() - for l in open(jdl, 'r'))) - - return - - def master_auto_resubmit(self, rjobs): - """ - Resubmit each subjob individually as bulk resubmission will overwrite - previous master job statuses - """ - - # check for master failure - in which case bulk resubmit - mj = self._getParent() - if mj.status == 'failed': - return self.master_resubmit(rjobs) - - for j in rjobs: - if not j.backend.master_resubmit([j]): - return False - - return True - - def master_submit(self, rjobs, subjobconfigs, masterjobconfig): - '''Submit the master job to the grid''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - -# if config['DrySubmit']: -# logger.warning('No job will be submitted in DrySubmit mode') - - job = self.getJobObject() - - ick = False - if len(job.subjobs) == 0: - ick = IBackend.master_submit(self, rjobs, subjobconfigs, masterjobconfig) - else: - ick = self.master_bulk_submit(rjobs, subjobconfigs, masterjobconfig) - if not ick: - raise GangaException('GLITE bulk submission failure') - - profiler.check('==> master_submit() elapsed time') - -# if config['DrySubmit']: -# ick = False - - return ick - - def master_resubmit(self, rjobs): - '''Resubmit the master job to the grid''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - -# if config['DrySubmit']: -# logger.warning('No job will be submitted in DrySubmit mode') - - job = self.getJobObject() - - ick = False - if not job.master and len(job.subjobs) == 0: - # case 1: master job normal resubmission - logger.debug('rjobs: %s' % str(rjobs)) - logger.debug('mode: master job normal resubmission') - ick = IBackend.master_resubmit(self, rjobs) - - elif job.master: - # case 2: individual subjob resubmission - logger.debug('mode: individual subjob resubmission') - status = IBackend.master_resubmit(self, rjobs) - if status: - # set the backend flag to 1 if the job is individually submitted - # the monitoring loop on the master job shouldn't taken - # into account this job - job.backend.flag = 1 - ick = status - - else: - # case 3: master job bulk resubmission - logger.debug('mode: master job bulk resubmission') - ick = self.master_bulk_resubmit(rjobs) - if not ick: - raise GangaException('GLITE bulk submission failure') - - profiler.check('job re-submission elapsed time') - -# if config['DrySubmit']: -# ick = False - - return ick - - def master_kill(self): - '''kill the master job to the grid''' - job = self.getJobObject() - - if not job.master and len(job.subjobs) == 0: - return IBackend.master_kill(self) - elif job.master: - #logger.warning('Killing individual subjob in GLITE middleware is an experimental function.') - return IBackend.master_kill(self) - else: - return self.master_bulk_kill() - - @require_credential - def __mt_bulk_submit__(self, node_jdls, max_node): - '''submitting bulk jobs in multiple threads''' - - job = self.getJobObject() - - logger.info('submitting %d subjobs ... it may take a while' % - len(node_jdls)) - - # the algorithm for submitting a single bulk job - class MyAlgorithm(Algorithm): - - def __init__(self, cred_req, masterInputWorkspace): - Algorithm.__init__(self) - self.cred_req = cred_req - self.inpw = masterInputWorkspace - - def process(self, node_info): - my_node_offset = node_info['offset'] - my_node_jdls = node_info['jdls'] - coll_jdl_name = '__jdlfile__%d_%d__' % (my_node_offset, my_node_offset + len(my_node_jdls)) - # compose master JDL for collection job - jdl_cnt = self.__make_collection_jdl__(my_node_jdls, offset=my_node_offset) - jdl_path = self.inpw.writefile(FileBuffer(coll_jdl_name, jdl_cnt)) - - master_jid = Grid.submit(jdl_path, self.cred_req, ce=None) - if not master_jid: - return False - else: - self.__appendResult__(my_node_offset, master_jid) - return True - - def __make_collection_jdl__(self, nodeJDLFiles=[], offset=0): - '''Compose the collection JDL for the master job''' - - nodes = ',\n'.join( - ['[file = "%s";]' % x for x in nodeJDLFiles]) - - jdl = { - 'Type': 'collection', - 'VirtualOrganisation': config['VirtualOrganisation'], - 'Nodes': '' - } - - # specification of the node jobs - node_cnt = offset - node_str = '' - jdl['Nodes'] = '{\n' - for f in nodeJDLFiles: - node_str += '[NodeName = "gsj_%d"; file="%s";],\n' % ( - node_cnt, f) - node_cnt += 1 - if node_str: - jdl['Nodes'] += node_str.strip()[:-1] - jdl['Nodes'] += '\n}' - - jdlText = Grid.expandjdl(jdl) - logger.debug('master job JDL: %s' % jdlText) - return jdlText - - # split to multiple glite bulk jobs - num_chunks = len(node_jdls) / max_node - if len(node_jdls) % max_node > 0: - num_chunks += 1 - - mt_data = [] - - for i in range(num_chunks): - data = {} - ibeg = i * max_node - iend = min(ibeg + max_node, len(node_jdls)) - data['offset'] = ibeg - data['jdls'] = node_jdls[ibeg:iend] - mt_data.append(data) - - myAlg = MyAlgorithm(self.credential_requirements, masterInputWorkspace=job.getInputWorkspace()) - myData = Data(collection=mt_data) - - runner = MTRunner(name='lcg_jsubmit', algorithm=myAlg, - data=myData, numThread=config['SubmissionThread']) - runner.start() - runner.join(timeout=-1) - - if len(runner.getDoneList()) < num_chunks: - # not all bulk jobs are successfully submitted. canceling the - # submitted jobs on WMS immediately - logger.error('some bulk jobs not successfully (re)submitted, canceling submitted jobs on WMS') - Grid.cancel_multiple(list(runner.getResults().values()), self.credential_requirements) - return None - else: - return runner.getResults() - - def __mt_job_prepare__(self, rjobs, subjobconfigs, masterjobconfig): - '''preparing jobs in multiple threads''' - - logger.info('preparing %d subjobs ... it may take a while' % - len(rjobs)) - - job = self.getJobObject() - - # prepare the master job (i.e. create shared inputsandbox, etc.) - master_input_sandbox = IBackend.master_prepare(self, masterjobconfig) - - # uploading the master job if it's over the WMS sandbox limitation - for f in master_input_sandbox: - master_input_idx = self.__check_and_prestage_inputfile__(f) - - if not master_input_idx: - logger.error('master input sandbox perparation failed: %s' % f) - return None - - # the algorithm for preparing a single bulk job - class MyAlgorithm(Algorithm): - - def __init__(self): - Algorithm.__init__(self) - - def process(self, sj_info): - my_sc = sj_info[0] - my_sj = sj_info[1] - - try: - logger.debug("preparing job %s" % my_sj.getFQID('.')) - jdlpath = my_sj.backend.preparejob( - my_sc, master_input_sandbox) - - if (not jdlpath) or (not os.path.exists(jdlpath)): - raise GangaException( - 'job %s not properly prepared' % my_sj.getFQID('.')) - - self.__appendResult__(my_sj.id, jdlpath) - return True - except Exception as x: - log_user_exception() - return False - - mt_data = [] - for sc, sj in zip(subjobconfigs, rjobs): - mt_data.append([sc, sj]) - - myAlg = MyAlgorithm() - myData = Data(collection=mt_data) - - runner = MTRunner( - name='lcg_jprepare', algorithm=myAlg, data=myData, numThread=10) - runner.start() - runner.join(-1) - - if len(runner.getDoneList()) < len(mt_data): - return None - else: - # the result should be sorted - results = runner.getResults() - sc_ids = sorted(results.keys()) - - node_jdls = [] - for id in sc_ids: - node_jdls.append(results[id]) - return node_jdls - - @require_credential - def master_bulk_submit(self, rjobs, subjobconfigs, masterjobconfig): - '''GLITE bulk submission''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - assert(implies(rjobs, len(subjobconfigs) == len(rjobs))) - - # prepare the subjobs, jdl repository before bulk submission - node_jdls = self.__mt_job_prepare__( - rjobs, subjobconfigs, masterjobconfig) - - if not node_jdls: - logger.error('Some jobs not successfully prepared') - return False - - profiler.checkAndStart('job preparation elapsed time') - - if config['MatchBeforeSubmit']: - matches = Grid.list_match(node_jdls[-1], self.credential_requirements, ce=self.CE) - if not matches: - self.__print_no_resource_error__(node_jdls[-1]) - return False - - profiler.checkAndStart('job list-match elapsed time') - - # set all subjobs to submitting status - for sj in rjobs: - sj.updateStatus('submitting') - - profiler.checkAndStart( - 'job state transition (submitting) elapsed time') - - max_node = config['GliteBulkJobSize'] - results = self.__mt_bulk_submit__(node_jdls, max_node=max_node) - - profiler.checkAndStart('job submission elapsed time') - - status = False - if results: - offsets = sorted(results.keys()) - - self.id = [] - self.status = {} - for ibeg in offsets: - mid = results[ibeg] - self.id.append(mid) - self.status[mid] = '' - iend = min(ibeg + max_node, len(node_jdls)) - for i in range(ibeg, iend): - sj = rjobs[i] - sj.backend.parent_id = mid - sj.updateStatus('submitted') - sj.info.submit_counter += 1 - - status = True - - return status - - @require_credential - def master_bulk_resubmit(self, rjobs): - '''GLITE bulk resubmission''' - - from GangaCore.Utility.logging import log_user_exception - - job = self.getJobObject() - - # compose master JDL for collection job - node_jdls = [] - for sj in rjobs: - jdlpath = os.path.join(sj.inputdir, '__jdlfile__') - node_jdls.append(jdlpath) - - if config['MatchBeforeSubmit']: - matches = Grid.list_match(node_jdls[-1], self.credential_requirements, ce=self.CE) - if not matches: - self.__print_no_resource_error__(node_jdls[-1]) - return False - - max_node = config['GliteBulkJobSize'] - - results = self.__mt_bulk_submit__(node_jdls, max_node=max_node) - - status = False - if results: - offsets = sorted(results.keys()) - - self.__refresh_jobinfo__(job) - self.id = [] - self.status = {} - for ibeg in offsets: - mid = results[ibeg] - self.id.append(mid) - self.status[mid] = '' - iend = min(ibeg + max_node, len(node_jdls)) - for i in range(ibeg, iend): - sj = rjobs[i] - sj.backend.id = None - sj.backend.parent_id = mid - self.__refresh_jobinfo__(sj) - sj.updateStatus('submitting') - - # set all subjobs to submitted status - # NOTE: this is just a workaround to avoid the unexpected transition - # that turns the master job's status from 'submitted' to 'submitting'. - # As this transition should be allowed to simulate a lock mechanism in Ganga 4, the workaround - # is to set all subjobs' status to 'submitted' so that the transition can be avoided. - # A more clear solution should be implemented with the lock - # mechanism introduced in Ganga 5. - for sj in rjobs: - sj.updateStatus('submitted') - sj.info.submit_counter += 1 - - status = True - - return status - - @require_credential - def master_bulk_kill(self): - '''GLITE bulk resubmission''' - - job = self.getJobObject() - - # killing the individually re-submitted subjobs - logger.debug('cancelling individually resubmitted subjobs.') - - # 1. collect job ids - ids = [] - for sj in job.subjobs: - if sj.backend.flag == 1 and sj.status in ['submitted', 'running']: - ids.append(sj.backend.id) - - # 2. cancel the collected jobs - ck = Grid.cancel_multiple(ids, self.credential_requirements) - if not ck: - logger.warning('Job cancellation failed') - return False - else: - for sj in job.subjobs: - if sj.backend.flag == 1 and sj.status in ['submitted', 'running']: - sj.updateStatus('killed') - - # killing the master job - logger.debug('cancelling the master job.') - - # avoid killing master jobs in the final state - final_states = ['Aborted', 'Cancelled', 'Cleared', - 'Done (Success)', 'Done (Failed)', 'Done (Exit Code !=0)', 'Done(Success)', 'Done(Failed)', 'Done(Exit Code !=0)'] - myids = [] - if isStringLike(self.id): - if job.backend.status not in final_states: - myids.append(self.id) - else: - for myid in self.id: - try: - if job.backend.status[myid] not in final_states: - myids.append(myid) - except KeyError: - pass - - ck = Grid.native_master_cancel(myids, self.credential_requirements) - - if not ck: - logger.warning('Job cancellation failed: %s' % self.id) - return False - else: - for sj in job.subjobs: - if sj.backend.flag != 1 and sj.status in ['submitted', 'running']: - sj.updateStatus('killed') - return True - - @require_credential - def loginfo(self, verbosity=1): - """Get the job's logging info""" - - job = self.getJobObject() - - logger.debug('Getting logging info of job %s' % job.getFQID('.')) - - if not self.id: - logger.warning('Job %s is not running.' % job.getFQID('.')) - return None - - if isStringLike(self.id): - my_ids = [self.id] - else: - my_ids = self.id - - # successful logging info fetching returns a file path to the - # information - loginfo_output = Grid.get_loginfo(my_ids, job.outputdir, self.credential_requirements, verbosity) - - if loginfo_output: - - # returns the name of the file where the logging info is saved - return loginfo_output - else: - logger.debug('Getting logging info of job %s failed.' % - job.getFQID('.')) - return None - - @require_credential - def match(self): - '''Match the job against available grid resources''' - - # - grabe the existing __jdlfile__ for failed/completed jobs - # - simulate the job preparation procedure (for jobs never been submitted) - # - subjobs from job splitter are not created (as its not essential for match-making) - # - create a temporary JDL file for match making - # - call job list match - # - clean up the job's inputdir - - job = self.getJobObject() - - # check job status - if job.status not in ['new', 'submitted', 'failed', 'completed']: - msg = 'only jobs in \'new\', \'failed\', \'submitted\' or \'completed\' state can do match' - logger.warning(msg) - return - - doPrepareEmulation = False - - matches = [] - - # catch the files that are already in inputdir - existing_files = os.listdir(job.inputdir) - - app = job.application - - # select the runtime handler - from GangaCore.GPIDev.Adapters.ApplicationRuntimeHandlers import allHandlers - try: - rtHandler = allHandlers.get(app._name, 'LCG')() - except KeyError: - msg = 'runtime handler not found for application=%s and backend=%s' % ( - app._name, 'LCG') - logger.warning(msg) - return - - try: - logger.info('matching job %d' % job.id) - - jdlpath = '' - - # try to pick up the created jdlfile in a failed job - if job.status in ['submitted', 'failed', 'completed']: - - logger.debug('picking up existing JDL') - - # looking for existing jdl file - # this is a subjob, take the __jdlfile__ in the job's dir - if job.master: - jdlpath = os.path.join(job.inputdir, '__jdlfile__') - else: - if len(job.subjobs) > 0: # there are subjobs - jdlpath = os.path.join( - job.subjobs[0].inputdir, '__jdlfile__') - else: - jdlpath = os.path.join(job.inputdir, '__jdlfile__') - - if not os.path.exists(jdlpath): - jdlpath = '' - - # simulate the job preparation procedure - if not jdlpath: - - logger.debug( - 'emulating the job preparation procedure to create JDL') - - doPrepareEmulation = True - - # FIXME: obsoleted "modified" flag - appmasterconfig = app.master_configure()[1] - - # here we don't do job splitting - presuming the JDL for - # non-splitted job is the same as the splitted jobs - rjobs = [job] - - # configure the application of each subjob - # FIXME: obsoleted "modified" flag - appsubconfig = [ - j.application.configure(appmasterconfig)[1] for j in rjobs] - - # prepare the master job with the runtime handler - jobmasterconfig = rtHandler.master_prepare( - app, appmasterconfig) - - # prepare the subjobs with the runtime handler - jobsubconfig = [rtHandler.prepare(j.application, s, appmasterconfig, jobmasterconfig) for ( - j, s) in zip(rjobs, appsubconfig)] - - # prepare masterjob's inputsandbox - master_input_sandbox = self.master_prepare(jobmasterconfig) - - # prepare JDL - jdlpath = self.preparejob( - jobsubconfig[0], master_input_sandbox) - - logger.debug('JDL used for match-making: %s' % jdlpath) - - matches = Grid.list_match(jdlpath, self.credential_requirements, ce=self.CE) - - except Exception as x: - logger.warning('job match failed: %s', str(x)) - - # clean up the job's inputdir - if doPrepareEmulation: - logger.debug('clean up job inputdir') - files = os.listdir(job.inputdir) - for f in files: - if f not in existing_files: - os.remove(os.path.join(job.inputdir, f)) - - return matches - - @require_credential - def submit(self, subjobconfig, master_job_sandbox): - '''Submit the job to the grid''' - - jdlpath = self.preparejob(subjobconfig, master_job_sandbox) - - if config['MatchBeforeSubmit']: - matches = Grid.list_match(jdlpath, self.credential_requirements, ce=self.CE) - if not matches: - self.__print_no_resource_error__(jdlpath) - return None - - self.id = Grid.submit(jdlpath, self.credential_requirements, ce=self.CE, perusable=self.perusable) - - self.parent_id = self.id - - return not self.id is None - - @require_credential - def resubmit(self): - '''Resubmit the job''' - job = self.getJobObject() - - jdlpath = job.getInputWorkspace().getPath("__jdlfile__") - - # fix for savannah 76289, update the excludedCEs in jdl file - self.updateExcudedCEsInJdl(jdlpath) - - if config['MatchBeforeSubmit']: - matches = Grid.list_match(jdlpath, self.credential_requirements, ce=self.CE) - if not matches: - self.__print_no_resource_error__(jdlpath) - return None - - self.id = Grid.submit(jdlpath, self.credential_requirements, ce=self.CE, perusable=self.perusable) - self.parent_id = self.id - - if self.id: - # refresh the lcg job information - self.__refresh_jobinfo__(job) - - return not self.id is None - - @require_credential - def kill(self): - '''Kill the job''' - - job = self.getJobObject() - - logger.info('Killing job %s' % job.getFQID('.')) - - if not self.id: - logger.warning('Job %s is not running.' % job.getFQID('.')) - return False - - return Grid.cancel(self.id, self.credential_requirements) - - def __jobWrapperTemplate__(self): - '''Create job wrapper''' - - script = """#!/usr/bin/env python -#----------------------------------------------------- -# This job wrapper script is automatically created by -# GANGA LCG backend handler. -# -# It controls: -# 1. unpack input sandbox -# 2. invoke application executable -# 3. invoke monitoring client -#----------------------------------------------------- -import os,os.path,shutil,tempfile -import sys,time,traceback -import re - -#bugfix #36178: subprocess.py crashes if python 2.5 is used -#try to import subprocess from local python installation before an -#import from PYTHON_DIR is attempted some time later -try: - import subprocess -except ImportError: - pass - -## Utility functions ## -def timeString(): - return time.strftime('%a %b %d %H:%M:%S %Y',time.gmtime(time.time())) - -def printInfo(s): - out.write(timeString() + ' [Info]' + ' ' + str(s) + os.linesep) - out.flush() - -def printError(s): - out.write(timeString() + ' [Error]' + ' ' + str(s) + os.linesep) - out.flush() - -def lcg_file_download(vo,guid,localFilePath,timeout=60,maxRetry=3): - cmd = 'lcg-cp -t %d --vo %s %s file://%s' % (timeout,vo,guid,localFilePath) - - printInfo('LFC_HOST set to %s' % os.environ['LFC_HOST']) - printInfo('lcg-cp timeout: %d' % timeout) - - i = 0 - rc = 0 - isDone = False - try_again = True - - while try_again: - i = i + 1 - try: - ps = os.popen(cmd) - status = ps.close() - - if not status: - isDone = True - printInfo('File %s download from iocache' % os.path.basename(localFilePath)) - else: - raise IOError("Download file %s from iocache failed with error code: %d, trial %d." % (os.path.basename(localFilePath), status, i)) - - except IOError as e: - isDone = False - printError(str(e)) - - if isDone: - try_again = False - elif i == maxRetry: - try_again = False - else: - try_again = True - - return isDone - -## system command executor with subprocess -def execSyscmdSubprocess(cmd, wdir=os.getcwd()): - - import os, subprocess - - global exitcode - - outfile = open('stdout','w') - errorfile = open('stderr','w') - - try: - child = subprocess.Popen(cmd, cwd=wdir, shell=True, stdout=outfile, stderr=errorfile) - - while 1: - exitcode = child.poll() - if exitcode is not None: - break - else: - outfile.flush() - errorfile.flush() - time.sleep(0.3) - finally: - pass - - outfile.flush() - errorfile.flush() - outfile.close() - errorfile.close() - - return True - -## system command executor with multi-thread -## stderr/stdout handler -def execSyscmdEnhanced(cmd, wdir=os.getcwd()): - - import os, subprocess, threading - - cwd = os.getcwd() - - isDone = False - - try: - ## change to the working directory - os.chdir(wdir) - - child = subprocess.Popen(cmd,1) - child.tochild.close() # don't need stdin - - class PipeThread(threading.Thread): - - def __init__(self,infile,outfile,stopcb): - self.outfile = outfile - self.infile = infile - self.stopcb = stopcb - self.finished = 0 - threading.Thread.__init__(self) - - def run(self): - stop = False - while not stop: - buf = self.infile.read(10000) - self.outfile.write(buf) - self.outfile.flush() - time.sleep(0.01) - stop = self.stopcb() - #FIXME: should we do here?: self.infile.read() - #FIXME: this is to make sure that all the output is read (if more than buffer size of output was produced) - self.finished = 1 - - def stopcb(poll=False): - global exitcode - if poll: - exitcode = child.poll() - return exitcode != -1 - - out_thread = PipeThread(child.fromchild, sys.stdout, stopcb) - err_thread = PipeThread(child.childerr, sys.stderr, stopcb) - - out_thread.start() - err_thread.start() - while not out_thread.finished and not err_thread.finished: - stopcb(True) - time.sleep(0.3) - - sys.stdout.flush() - sys.stderr.flush() - - isDone = True - - except(Exception,e): - isDone = False - - ## return to the original directory - os.chdir(cwd) - - return isDone - -############################################################################################ - -###INLINEMODULES### - -############################################################################################ - -## Main program ## - -outputsandbox = ###OUTPUTSANDBOX### -input_sandbox = ###INPUTSANDBOX### -wrapperlog = ###WRAPPERLOG### -appexec = ###APPLICATIONEXEC### -appargs = ###APPLICATIONARGS### -appenvs = ###APPLICATIONENVS### -timeout = ###TRANSFERTIMEOUT### - -exitcode=-1 - -import sys, stat, os, os.path, commands - -# Change to scratch directory if provided -scratchdir = '' -tmpdir = '' - -orig_wdir = os.getcwd() - -# prepare log file for job wrapper -out = open(os.path.join(orig_wdir, wrapperlog),'w') - -if os.getenv('EDG_WL_SCRATCH'): - scratchdir = os.getenv('EDG_WL_SCRATCH') -elif os.getenv('TMPDIR'): - scratchdir = os.getenv('TMPDIR') - -if scratchdir: - (status, tmpdir) = commands.getstatusoutput('mktemp -d %s/gangajob_XXXXXXXX' % (scratchdir)) - if status == 0: - os.chdir(tmpdir) - else: - ## if status != 0, tmpdir should contains error message so print it to stderr - printError('Error making ganga job scratch dir: %s' % tmpdir) - printInfo('Unable to create ganga job scratch dir in %s. Run directly in: %s' % ( scratchdir, os.getcwd() ) ) - - ## reset scratchdir and tmpdir to disable the usage of Ganga scratch dir - scratchdir = '' - tmpdir = '' - -wdir = os.getcwd() - -if scratchdir: - printInfo('Changed working directory to scratch directory %s' % tmpdir) - try: - os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stdout'), os.path.join(wdir, 'stdout'))) - os.system("ln -s %s %s" % (os.path.join(orig_wdir, 'stderr'), os.path.join(wdir, 'stderr'))) - except Exception as e: - printError(sys.exc_info()[0]) - printError(sys.exc_info()[1]) - str_traceback = traceback.format_tb(sys.exc_info()[2]) - for str_tb in str_traceback: - printError(str_tb) - printInfo('Linking stdout & stderr to original directory failed. Looking at stdout during job run may not be possible') - -os.environ['PATH'] = '.:'+os.environ['PATH'] - -vo = os.environ['GANGA_LCG_VO'] - -try: - printInfo('Job Wrapper start.') - -# download inputsandbox from remote cache - for f,guid in input_sandbox['remote'].iteritems(): - if not lcg_file_download(vo, guid, os.path.join(wdir,f), timeout=int(timeout)): - raise IOError('Download remote input %s:%s failed.' % (guid,f) ) - else: - if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']: - getPackedInputSandbox(f) - else: - shutil.copy(f, os.path.join(os.getcwd(), os.path.basename(f))) - - printInfo('Download inputsandbox from iocache passed.') - -# unpack inputsandbox from wdir - for f in input_sandbox['local']: - if mimetypes.guess_type(f)[1] in ['gzip', 'bzip2']: - getPackedInputSandbox(os.path.join(orig_wdir,f)) - - printInfo('Unpack inputsandbox passed.') - - #get input files - ###DOWNLOADINPUTFILES### - - printInfo('Loading Python modules ...') - - sys.path.insert(0,os.path.join(wdir,PYTHON_DIR)) - - # check the python library path - try: - printInfo(' ** PYTHON_DIR: %s' % os.environ['PYTHON_DIR']) - except KeyError: - pass - - try: - printInfo(' ** PYTHONPATH: %s' % os.environ['PYTHONPATH']) - except KeyError: - pass - - for lib_path in sys.path: - printInfo(' ** sys.path: %s' % lib_path) - -# execute application - - ## convern appenvs into environment setup script to be 'sourced' before executing the user executable - - printInfo('Prepare environment variables for application executable') - - env_setup_script = os.path.join(os.getcwd(), '__ganga_lcg_env__.sh') - - f = open( env_setup_script, 'w') - f.write('#!/bin/sh' + os.linesep ) - f.write('##user application environmet setup script generated by Ganga job wrapper' + os.linesep) - for k,v in appenvs.items(): - - str_env = 'export %s="%s"' % (k, v) - - printInfo(' ** ' + str_env) - - f.write(str_env + os.linesep) - f.close() - - try: #try to make shipped executable executable - os.chmod('%s/%s'% (wdir,appexec),stat.S_IXUSR|stat.S_IRUSR|stat.S_IWUSR) - except: - pass - - status = False - try: - # use subprocess to run the user's application if the module is available on the worker node - import subprocess - printInfo('Load application executable with subprocess module') - status = execSyscmdSubprocess('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir) - printInfo('source %s; %s %s' % (env_setup_script, appexec, appargs)) - printInfo(wdir) - except ImportError as err: - # otherwise, use separate threads to control process IO pipes - printInfo('Load application executable with separate threads') - status = execSyscmdEnhanced('source %s; %s %s' % (env_setup_script, appexec, appargs), wdir) - - os.system("cp %s/stdout stdout.1" % orig_wdir) - os.system("cp %s/stderr stderr.1" % orig_wdir) - - printInfo('GZipping stdout and stderr...') - - os.system("gzip stdout.1 stderr.1") - - # move them to the original wdir so they can be picked up - os.system("mv stdout.1.gz %s/stdout.gz" % orig_wdir) - os.system("mv stderr.1.gz %s/stderr.gz" % orig_wdir) - - if not status: - raise OSError('Application execution failed.') - printInfo('Application execution passed with exit code %d.' % exitcode) - - ###OUTPUTUPLOADSPOSTPROCESSING### - - createPackedOutputSandbox(outputsandbox,None,orig_wdir) - -# pack outputsandbox -# printInfo('== check output ==') -# for line in os.popen('pwd; ls -l').readlines(): -# printInfo(line) - - printInfo('Pack outputsandbox passed.') - - # Clean up after us - All log files and packed outputsandbox should be in "wdir" - if scratchdir: - os.chdir(orig_wdir) - os.system("rm %s -rf" % wdir) -except Exception as e: - printError(sys.exc_info()[0]) - printError(sys.exc_info()[1]) - str_traceback = traceback.format_tb(sys.exc_info()[2]) - for str_tb in str_traceback: - printError(str_tb) - -printInfo('Job Wrapper stop.') - -out.close() - -# always return exit code 0 so the in the case of application failure -# one can always get stdout and stderr back to the UI for debug. -sys.exit(0) -""" - return script - - def peek(self, filename='', command=''): - """ - Allow peeking of this job's stdout on the WN - (i.e. while job is in 'running' state) - - Return value: None - """ - if filename and filename != 'stdout': - logger.warning( - 'Arbitrary file peeking not supported for a running LCG job') - else: - self.inspect(command) - - def inspect(self, cmd=None): - """ - Allow viewing of this job's stdout on the WN - (i.e. while job is in 'running' state) - - Return value: None - """ - - job = self.getJobObject() - - # Use GLITE's job perusal feature if enabled - if self.status == "Running" and self.perusable: - fname = os.path.join(job.outputdir, '_peek.dat') - - sh = getShell(self.credential_requirements) - re, output, m = sh.cmd("glite-wms-job-perusal --get --all -f stdout %s" % self.id, fname) - job.viewFile(fname, cmd) - - return None - - def preparejob(self, jobconfig, master_job_sandbox): - '''Prepare the JDL''' - - script = self.__jobWrapperTemplate__() - - job = self.getJobObject() - inpw = job.getInputWorkspace() - - wrapperlog = '__jobscript__.log' - - import GangaCore.Core.Sandbox as Sandbox - - from GangaCore.GPIDev.Lib.File.OutputFileManager import getOutputSandboxPatterns - - # FIXME: check what happens if 'stdout','stderr' are specified here - script = script.replace('###OUTPUTSANDBOX###', repr( - jobconfig.outputbox + getOutputSandboxPatterns(job))) - - script = script.replace( - '###APPLICATION_NAME###', getName(job.application)) - script = script.replace( - '###APPLICATIONEXEC###', repr(jobconfig.getExeString())) - script = script.replace( - '###APPLICATIONARGS###', repr(jobconfig.getArguments())) - - from GangaCore.GPIDev.Lib.File.OutputFileManager import getWNCodeForOutputPostprocessing, getWNCodeForDownloadingInputFiles - - script = script.replace( - '###OUTPUTUPLOADSPOSTPROCESSING###', getWNCodeForOutputPostprocessing(job, ' ')) - - script = script.replace( - '###DOWNLOADINPUTFILES###', getWNCodeForDownloadingInputFiles(job, ' ')) - - if jobconfig.env: - script = script.replace( - '###APPLICATIONENVS###', repr(jobconfig.env)) - else: - script = script.replace('###APPLICATIONENVS###', repr({})) - - script = script.replace('###WRAPPERLOG###', repr(wrapperlog)) - import inspect - script = script.replace( - '###INLINEMODULES###', inspect.getsource(Sandbox.WNSandbox)) - - mon = job.getMonitoringService() - - self.monInfo = None - - # set the monitoring file by default to the stdout - if isinstance(self.monInfo, dict): - self.monInfo['remotefile'] = 'stdout' - - # try to print out the monitoring service information in debug mode - try: - logger.debug('job info of monitoring service: %s' % - str(self.monInfo)) - except: - pass - - -# prepare input/output sandboxes - import GangaCore.Utility.files - from GangaCore.GPIDev.Lib.File import File - from GangaCore.Core.Sandbox.WNSandbox import PYTHON_DIR - import inspect - - fileutils = File(inspect.getsourcefile(GangaCore.Utility.files), subdir=PYTHON_DIR) - packed_files = jobconfig.getSandboxFiles() + [fileutils] - sandbox_files = job.createPackedInputSandbox(packed_files) - - # sandbox of child jobs should include master's sandbox - sandbox_files.extend(master_job_sandbox) - - # check the input file size and pre-upload larger inputs to the iocache - inputs = {'remote': {}, 'local': []} - lfc_host = '' - - ick = True - - max_prestaged_fsize = 0 - for f in sandbox_files: - - idx = self.__check_and_prestage_inputfile__(f) - - if not idx: - logger.error('input sandbox preparation failed: %s' % f) - ick = False - break - else: - if idx['lfc_host']: - lfc_host = idx['lfc_host'] - - if idx['remote']: - abspath = os.path.abspath(f) - fsize = os.path.getsize(abspath) - - if fsize > max_prestaged_fsize: - max_prestaged_fsize = fsize - - inputs['remote'].update(idx['remote']) - - if idx['local']: - inputs['local'] += idx['local'] - - if not ick: - logger.error('stop job submission') - return None - else: - logger.debug('LFC: %s, input file indices: %s' % - (lfc_host, repr(inputs))) - - # determin the lcg-cp timeout according to the max_prestaged_fsize - # - using the assumption of 1 MB/sec. - transfer_timeout = config['SandboxTransferTimeout'] - predict_timeout = int(math.ceil(max_prestaged_fsize / 1000000.0)) - - if predict_timeout > transfer_timeout: - transfer_timeout = predict_timeout - - if transfer_timeout < 60: - transfer_timeout = 60 - - script = script.replace( - '###TRANSFERTIMEOUT###', '%d' % transfer_timeout) - - # update the job wrapper with the inputsandbox list - script = script.replace('###INPUTSANDBOX###', repr( - {'remote': inputs['remote'], 'local': [os.path.basename(f) for f in inputs['local']]})) - - # write out the job wrapper and put job wrapper into job's inputsandbox - scriptPath = inpw.writefile( - FileBuffer('__jobscript_%s__' % job.getFQID('.'), script), executable=1) - input_sandbox = inputs['local'] + [scriptPath] - - # compose output sandbox to include by default the following files: - # - gzipped stdout (transferred only when the JobLogHandler is WMS) - # - gzipped stderr (transferred only when the JobLogHandler is WMS) - # - __jobscript__.log (job wrapper's log) - output_sandbox = [wrapperlog] - - if config['JobLogHandler'] == 'WMS': - output_sandbox += ['stdout.gz', 'stderr.gz'] - - if len(jobconfig.outputbox + getOutputSandboxPatterns(job)): - output_sandbox += [Sandbox.OUTPUT_TARBALL_NAME] - - # compose LCG JDL - jdl = { - 'VirtualOrganisation': config['VirtualOrganisation'], - 'Executable': os.path.basename(scriptPath), - 'Environment': {'GANGA_LCG_VO': config['VirtualOrganisation'], 'GANGA_LOG_HANDLER': config['JobLogHandler'], 'LFC_HOST': lfc_host}, - 'StdOutput': 'stdout', - 'StdError': 'stderr', - 'InputSandbox': input_sandbox, - 'OutputSandbox': output_sandbox - } - - if config['GLITE_WMS_WMPROXY_ENDPOINT'] != '': - jdl['Environment']['GLITE_WMS_WMPROXY_ENDPOINT'] = config[ - 'GLITE_WMS_WMPROXY_ENDPOINT'] - - # workaround of glite WMS bug: - # https://savannah.cern.ch/bugs/index.php?32345 - jdl['AllowZippedISB'] = 'false' - - if self.perusable: - logger.debug("Adding persual info to JDL") - # remove the ExpiryTime attribute as it's absolute timestamp that will cause the re-submitted job being - # ignored by the WMS. TODO: fix it in a better way. - # jdl['ExpiryTime'] = time.time() + config['JobExpiryTime'] - jdl['PerusalFileEnable'] = 'true' - jdl['PerusalTimeInterval'] = 120 - - if self.CE: - jdl['Requirements'] = ['other.GlueCEUniqueID=="%s"' % self.CE] - # send the CE name as an environmental variable of the job if CE is specified - # this is basically for monitoring purpose - jdl['Environment'].update({'GANGA_LCG_CE': self.CE}) - else: - jdl['Requirements'] = self.requirements.merge( - jobconfig.requirements).convert() - # Set DataRequirements directly from the input information in - # requirements.datarequirements - if self.requirements.datarequirements: - jdl['DataRequirements'] = self.requirements.datarequirements - elif jobconfig.inputdata: - # If we have no DataRequirements then fall back to the - # deprecated method of InputData - jdl['InputData'] = jobconfig.inputdata - # This must be set and will have a sensible default - jdl['DataAccessProtocol'] = self.requirements.dataaccessprotocol - - if self.jobtype.upper() in ['MPICH', 'NORMAL', 'INTERACTIVE']: - jdl['JobType'] = self.jobtype.upper() - if self.jobtype.upper() == 'MPICH': - jdl['Requirements'].append( - '(other.GlueCEInfoTotalCPUs >= NodeNumber)') - jdl['Requirements'].append( - 'Member("MPICH",other.GlueHostApplicationSoftwareRunTimeEnvironment)') - jdl['NodeNumber'] = self.requirements.nodenumber - else: - logger.warning('JobType "%s" not supported' % self.jobtype) - return - -# additional settings from the job -# if jobconfig.env: -# jdl['Environment'].update(jobconfig.env) - -# the argument of JDL should be the argument for the wrapper script -# application argument has been put into the wrapper script -# if jobconfig.args: jdl['Arguments'] = jobconfig.getArguments() - -# additional settings from the configuration - for name in ['ShallowRetryCount', 'RetryCount']: - if config[name] >= 0: - jdl[name] = config[name] - - for name in ['Rank', 'ReplicaCatalog', 'StorageIndex', 'MyProxyServer']: - if config[name]: - jdl[name] = config[name] - - jdlText = Grid.expandjdl(jdl) - logger.debug('subjob JDL: %s' % jdlText) - return inpw.writefile(FileBuffer('__jdlfile__', jdlText)) - - @staticmethod - def updateGangaJobStatus(job, status): - """map backend job status to Ganga job status""" - - if status == 'Running': - job.updateStatus('running') - - elif status == 'Done (Success)' or status == 'Done(Success)': - job.updateStatus('completed') - - elif status in ['Aborted', 'Cancelled', 'Done (Exit Code !=0)', 'Done(Exit Code !=0)']: - job.updateStatus('failed') - - elif status == 'Cleared': - if job.status in LCG._final_ganga_states: - # do nothing in this case as it's in the middle of the - # corresponding job downloading task - return - logger.warning( - 'The job %d has reached unexpected the Cleared state and Ganga cannot retrieve the output.', job.getFQID('.')) - job.updateStatus('failed') - - elif status in ['Submitted', 'Waiting', 'Scheduled', 'Ready', 'Done (Failed)', 'Done(Failed)']: - pass - - else: - logger.warning('Unexpected job status "%s"', status) - - @staticmethod - def master_updateMonitoringInformation(jobs): - """Main Monitoring loop""" - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - emulated_bulk_jobs = [] - native_bulk_jobs = [] - - for j in jobs: - - if len(j.subjobs) == 0: - emulated_bulk_jobs.append(j) - else: - native_bulk_jobs.append(j) - # put the individually submitted subjobs into the emulated_bulk_jobs list - # those jobs should be checked individually as a single job - for sj in j.subjobs: - if sj.backend.flag == 1 and sj.status in ['submitted', 'running']: - logger.debug( - 'job %s submitted individually. separate it in a different monitoring loop.' % sj.getFQID('.')) - emulated_bulk_jobs.append(sj) - - # invoke normal monitoring method for normal jobs - for j in emulated_bulk_jobs: - logger.debug('emulated bulk job to be monitored: %s' % j.getFQID('.')) - IBackend.master_updateMonitoringInformation(emulated_bulk_jobs) - - # invoke special monitoring method for glite bulk jobs - for j in native_bulk_jobs: - logger.debug('native bulk job to be monitored: %s' % j.getFQID('.')) - LCG.master_bulk_updateMonitoringInformation(native_bulk_jobs) - - # should went through all jobs to update overall master job status - for j in jobs: - if (len(j.subjobs) > 0) and j.backend.id: - logger.debug('updating overall master job status: %s' % j.getFQID('.')) - j.updateMasterJobStatus() - - profiler.check('==> master_updateMonitoringInformation() elapsed time') - - @staticmethod - def updateMonitoringInformation(jobs): - """Monitoring loop for normal jobs""" - - jobdict = dict([(job.backend.id, job) for job in jobs if job.backend.id]) - - # Group jobs by the backend's credential requirements - cred_to_backend_id_list = defaultdict(list) - for job in jobs: - cred_to_backend_id_list[job.backend.credential_requirements].append(job.backend.id) - - # Batch the status requests by credential requirement - status_info = [] - missing_glite_jids = [] - for cred_req, job_ids in cred_to_backend_id_list.items(): - # If the credential is not valid or doesn't exist then skip it - cred = credential_store.get(cred_req) - if not cred or not cred.is_valid(): - needed_credentials.add(cred_req) - continue - status, missing = Grid.status(job_ids, cred_req) - status_info += status - missing_glite_jids += missing - - __fail_missing_jobs__(missing_glite_jids, jobdict) - - cnt_new_download_task = 0 - for info in status_info: - - create_download_task = False - - job = jobdict[info['id']] - - if job.backend.actualCE != info['destination']: - logger.info('job %s has been assigned to %s', job.getFQID('.'), info['destination']) - job.backend.actualCE = info['destination'] - - if job.backend.status != info['status']: - logger.info('job %s has changed status to %s', job.getFQID('.'), info['status']) - job.backend.status = info['status'] - job.backend.reason = info['reason'] - job.backend.exitcode_lcg = info['exit'] - if info['status'] in ['Done (Success)', 'Done(Success)']: - create_download_task = True - else: - LCG.updateGangaJobStatus(job, info['status']) - elif (info['status'] in ['Done (Success)', 'Done(Success)']) and (job.status not in LCG._final_ganga_states): - create_download_task = True - - if create_download_task: - # update to 'running' before changing to 'completing' - if job.status == 'submitted': - job.updateStatus('running') - - downloader = get_lcg_output_downloader() - downloader.addTask(job) - - cnt_new_download_task += 1 - - if cnt_new_download_task > 0: - downloader = get_lcg_output_downloader() - logger.debug('%d new downloading tasks; %d alive downloading agents' % ( - cnt_new_download_task, downloader.countAliveAgent())) - - @staticmethod - def master_bulk_updateMonitoringInformation(jobs): - '''Monitoring loop for glite bulk jobs''' - - # split up the master job into several LCG bulk job ids - # - checking subjob status and excluding the master jobs with all subjobs in a final state) - # - excluding the resubmitted jobs - # - checking master jobs with the status not being properly updated while all subjobs are in final states - jobdict = {} - #mjob_status_updatelist = [] - for j in jobs: - #cnt_sj_final = 0 - if j.backend.id: - - # collect master jobs need to be updated by polling the status - # from gLite WMS - for sj in j.subjobs: - # if (sj.status in ['completed','failed']): - # cnt_sj_final += 1 - - if (sj.status not in LCG._final_ganga_states) and \ - (sj.backend.parent_id in j.backend.id) and \ - (sj.backend.parent_id not in jobdict): - jobdict[sj.backend.parent_id] = j - - # if j not in mjob_status_updatelist: - # mjob_status_updatelist.append(j) - - # collect master jobs with status not being updated even when all subjobs are in final states - # if (j.status not in ['completed','failed']) and (cnt_sj_final == len(j.subjobs)): - # if j not in mjob_status_updatelist: - # mjob_status_updatelist.append(j) - - job = None - subjobdict = {} - - # Group jobs by the backend's credential requirements - cred_to_job_list = defaultdict(list) - for job in jobs: - cred_to_job_list[job.backend.credential_requirements].append(job) - - # Batch the status requests by credential requirement - status_info = [] - missing_glite_jids = [] - for cred_req, job_list in cred_to_job_list.items(): - # If the credential is not valid or doesn't exist then skip it - cred = credential_store.get(cred_req) - if not cred or not cred.is_valid(): - needed_credentials.add(cred_req) - continue - # Create a ``Grid`` for each credential requirement and request the relevant jobs through it - status, missing = Grid.status(job_list, cred_req, is_collection=True) - status_info += status - missing_glite_jids += missing - - __fail_missing_jobs__(missing_glite_jids, jobdict) - - # update GANGA job repository according to the available job - # information - cnt_new_download_task = 0 - for info in status_info: - if not info['is_node']: # this is the info for the master job - - cachedParentId = info['id'] - master_jstatus = info['status'] - - job = jobdict[cachedParentId] - - # update master job's status if needed - if cachedParentId not in job.backend.status: - # if this happens, something must be seriously wrong - logger.warning( - 'job id not found in the submitted master job: %s' % cachedParentId) - elif master_jstatus != job.backend.status[cachedParentId]: - job.backend.status[cachedParentId] = master_jstatus - - subjobdict = dict([[str(subjob.id), subjob] - for subjob in job.subjobs]) - - else: # this is the info for the node job - - # subjob's node name is not available - if not info['name']: - continue - - subjob = subjobdict[info['name'].replace('gsj_', '')] - - create_download_task = False - - # skip updating the resubmitted jobs by comparing: - # - the subjob's parent job id - # - the parent id returned from status - if cachedParentId != subjob.backend.parent_id: - logger.debug( - 'job %s has been resubmitted, ignore the status update.' % subjob.getFQID('.')) - continue - - # skip updating the cleared jobs - if info['status'] == 'Cleared' and subjob.status in LCG._final_ganga_states: - continue - - # skip updating the jobs that are individually resubmitted - # after the original bulk submission - if subjob.backend.flag == 1: - logger.debug( - 'job %s was resubmitted individually. skip updating it from the monitoring of its master job.' % subjob.getFQID('.')) - # skip updating the jobs that are individually killed - elif subjob.status == 'killed': - logger.debug( - 'job %s was killed individually. skip updating it from the monitoring of its master job.' % subjob.getFQID('.')) - else: - if not subjob.backend.id: - # send out the subjob's id which is becoming available at the first time. - # (a temporary workaround for fixing the monitoring issue of getting the job id) - # Note: As the way of sending job id is implemented as an generic hook triggered - # by the transition from 'submitting' to 'submitted'. For gLite bulk submission - # the id is not available immediately right after the submission, therefore a late - # job id transmission is needed. - # This issue linked to the temporary workaround of setting subjob's status to 'submitted' - # in the master_bulk_(re)submit() methods. In Ganga 5, a clear implementation should be - # applied with the new lock mechanism. - logger.debug( - 'job %s obtained backend id, transmit it to monitoring service.' % subjob.getFQID('.')) - subjob.backend.id = info['id'] - subjob.getMonitoringService().submit() - - # in the temporary workaround, there is no need to set job status to 'submitted' - # subjob.updateStatus('submitted') - - if subjob.backend.actualCE != info['destination']: - logger.info( - 'job %s has been assigned to %s', subjob.getFQID('.'), info['destination']) - subjob.backend.actualCE = info['destination'] - - if subjob.backend.status != info['status']: - logger.info( - 'job %s has changed status to %s', subjob.getFQID('.'), info['status']) - subjob.backend.status = info['status'] - subjob.backend.reason = info['reason'] - subjob.backend.exitcode_lcg = info['exit'] - if info['status'] == 'Done (Success)' or info['status'] == 'Done(Success)': - create_download_task = True - else: - LCG.updateGangaJobStatus(subjob, info['status']) - elif (info['status'] == 'Done (Success)' or info['status'] == 'Done(Success)') and (subjob.status not in LCG._final_ganga_states): - create_download_task = True - - if create_download_task: - # update to 'running' before changing to 'completing' - if subjob.status == 'submitted': - subjob.updateStatus('running') - downloader = get_lcg_output_downloader() - downloader.addTask(subjob) - - cnt_new_download_task += 1 - - if cnt_new_download_task > 0: - downloader = get_lcg_output_downloader() - logger.debug('%d new downloading tasks; %d alive downloading agents' % ( - cnt_new_download_task, downloader.countAliveAgent())) - - # update master job status - # if updateMasterStatus: - # for mj in mjob_status_updatelist: - # logger.debug('updating overall master job status: %s' % mj.getFQID('.')) - # mj.updateMasterJobStatus() - - @require_credential - def get_requirement_matches(self, jdl_file=None, spec_ce=''): - """Return any matches using the requirements or given jdlfile""" - - jdl_file2 = jdl_file - if not jdl_file: - # create a dummy jdl file from the given requirements - import tempfile - jdl = {'VirtualOrganisation': config['VirtualOrganisation'], - 'Executable': os.path.basename(__file__), - 'InputSandbox': [__file__], - 'Requirements': self.requirements.convert()} - - jdl_file_txt = Grid.expandjdl(jdl) - - jdl_file2 = tempfile.mktemp('.jdl') - with open(jdl_file2, 'w') as file_: - file_.write(jdl_file_txt) - - matches = Grid.list_match(jdl_file2, self.credential_requirements, ce=spec_ce) - - # clean up - if not jdl_file: - os.remove(jdl_file2) - - return matches - - def get_wms_list(self): - """Grab a list of WMSs""" - out = Grid.wrap_lcg_infosites("WMS") - - if out == "": - logger.warning("get_wms_list returned no results!") - return [] - - # parse the output - # assume: WMSNAME - wms_list = [] - for wms in out.split("\n"): - if len(wms) > 0: - wms_list.append(wms) - - return wms_list - - def get_ce_list(self): - """Grab a list of CEs""" - out = Grid.wrap_lcg_infosites("CE") - - if out == "": - logger.warning("get_ce_list returned no results!") - return {} - - # parse the output - # assume: CPU Free Total Jobs Running Waiting ComputingElement - # 360 4 289 289 0 - # abaddon.hec.lancs.ac.uk:8443/cream-lsf-hex - - ce_list = {} - for ce in out.split("\n"): - if len(ce) > 0 and ce.find("Running") == -1: - toks = ce.split() - if len(toks) != 6: - continue - ce_list[toks[5]] = {'CPU': int(toks[0]), 'Free': int(toks[1]), 'Total Jobs': int( - toks[2]), 'Running': int(toks[3]), 'Waiting': int(toks[4])} - - return ce_list - - def get_se_list(self): - """Grab a list of SEs""" - out = Grid.wrap_lcg_infosites("SE") - - if out == "": - logger.warning("get_se_list returned no results!") - return {} - - # parse the output - # assume: Avail Space(kB) Used Space(kB) Type SE - # 2713301090 2082969419 gftp AGLT2_TEST_classicSE - - se_list = {} - for se in out.split("\n"): - if len(se) > 0 and se.find("Used Space") == -1: - toks = se.split() - - if len(toks) != 4: - continue - - if toks[3] not in se_list: - se_list[toks[3]] = [] - - if toks[0] == "n.a": - avail = -1 - else: - avail = int(toks[0]) - - if toks[1] == "n.a": - used = -1 - else: - used = int(toks[1]) - - se_list[toks[3]].append( - {'Avail': avail, 'Used': used, 'Type': toks[2]}) - - return se_list - - def updateExcudedCEsInJdl(self, jdlpath): - - import re - configexcludedCEs = config['ExcludedCEs'] - - with open(jdlpath, 'r') as jdlFileRead: - jdlText = jdlFileRead.read() - hasRequirements = jdlText.find("Requirements =") > -1 - - if hasRequirements == False: - - if configexcludedCEs != '': - - linesToAppend = [] - - with open(jdlpath, 'a') as jdlFileAppend: - linesToAppend.append("Requirements = \n") - excludedCEs = re.split(r'\s+', configexcludedCEs) - index = 1 - - for excludedCE in excludedCEs: - # if not the last one - if index != len(excludedCEs): - linesToAppend.append( - ' (!RegExp("%s",other.GlueCEUniqueID)) &&\n' % excludedCE) - else: - linesToAppend.append( - ' (!RegExp("%s",other.GlueCEUniqueID));\n' % excludedCE) - - index += 1 - - jdlFileAppend.writelines(linesToAppend) - else: - - with open(jdlpath, 'r') as jdlFileRead: - originalLines = jdlFileRead.readlines() - - index = 0 - thereAreExcudedCEs = False - for line in originalLines: - - # find the index of the first line with excludedCE - if line.find('!RegExp') > -1 and line.find('other.GlueCEUniqueID') > -1: - thereAreExcudedCEs = True - break - - index += 1 - - if (thereAreExcudedCEs == False): - index = 2 - for line in originalLines: - - if line == "Requirements = \n": - break - - index += 1 - - newLines = [] - removedLines = [] - - for line in originalLines: - - if line.find('!RegExp') > -1 and line.find('other.GlueCEUniqueID') > -1: - removedLines.append(line) - continue - - newLines.append(line) - - endOfRequirements = False - - for line in removedLines: - if line.endswith(';\n'): - endOfRequirements = True - break - - if configexcludedCEs != '': - excludedCEs = re.split(r'\s+', configexcludedCEs) - innerIndex = 1 - for excludedCE in excludedCEs: - if innerIndex != len(excludedCEs): - newLines.insert( - index + innerIndex - 1, ' (!RegExp("%s",other.GlueCEUniqueID)) &&\n' % excludedCE) - else: - if endOfRequirements and thereAreExcudedCEs: - newLines.insert( - index + innerIndex - 1, ' (!RegExp("%s",other.GlueCEUniqueID));\n' % excludedCE) - else: - newLines.insert( - index + innerIndex - 1, ' (!RegExp("%s",other.GlueCEUniqueID)) &&\n' % excludedCE) - - innerIndex += 1 - - i = 0 - for line in newLines: - if line == 'Requirements = \n': - break - i += 1 - - if newLines[-1] == 'Requirements = \n': - newLines.remove('Requirements = \n') - elif (not newLines[i + 1].startswith(' ')): - newLines.remove('Requirements = \n') - else: - i += 1 - while i < len(newLines): - if newLines[i].startswith(' '): - i += 1 - else: - break - - if newLines[i - 1].endswith(' &&\n'): - newLines[i - 1] = newLines[i - 1][:-4] + ';\n' - - with open(jdlpath, 'w') as jdlFileWrite: - jdlFileWrite.writelines(newLines) - - -class LCGJobConfig(StandardJobConfig): - - '''Extends the standard Job Configuration with additional attributes''' - - def __init__(self, exe=None, inputbox=[], args=[], outputbox=[], env={}, inputdata=[], requirements=None): - - self.inputdata = inputdata - self.requirements = requirements - - StandardJobConfig.__init__(self, exe, inputbox, args, outputbox) - - def getArguments(self): - - return ' '.join(self.getArgStrings()) - - def getExecutable(self): - - exe = self.getExeString() - if os.path.dirname(exe) == '.': - return os.path.basename(exe) - else: - return exe - -# $Log: not supported by cvs2svn $ -# Revision 1.38 2009/07/15 08:23:29 hclee -# add resource match-making as an option before doing real job submission to WMS. -# - this option can be activated by setting config.LCG.MatchBeforeSubmit = True -# -# Revision 1.37 2009/06/24 19:12:48 hclee -# add support for two JDL attributes: DataRequirements & DataAccessProtocol -# -# Revision 1.36 2009/06/09 15:41:44 hclee -# bugfix: https://savannah.cern.ch/bugs/?50589 -# -# Revision 1.35 2009/06/05 12:23:15 hclee -# bugfix for https://savannah.cern.ch/bugs/?51298 -# -# Revision 1.34 2009/03/27 10:14:33 hclee -# fix race condition issue: https://savannah.cern.ch/bugs/?48435 -# -# Revision 1.33 2009/03/12 12:26:16 hclee -# merging bug fixes from branch Ganga-LCG-old-MTRunner to trunk -# -# Revision 1.32 2009/03/12 12:17:31 hclee -# adopting GangaThread in GangaCore.Core -# -# Revision 1.31 2009/02/25 08:39:20 hclee -# introduce and adopt the basic class for Ganga multi-thread handler -# -# Revision 1.30.2.2 2009/03/03 13:23:43 hclee -# failing Ganga jobs if the corresponding glite jobs have been removed from WMS -# -# Revision 1.30.2.1 2009/03/03 12:42:54 hclee -# set Ganga job to fail if the corresponding glite jobs have been removed from WMS -# -# Revision 1.30 2009/02/16 14:10:05 hclee -# change basedir of DQ2SandboxCache from users to userxx where xx represents the last two digits of year -# -# Revision 1.29 2009/02/05 19:35:36 hclee -# GridSandboxCache enhancement: -# - put cached file information in job repository (instead of __iocache__ file) -# - add and expose method: list_cached_files() -# -# Revision 1.28 2009/02/05 09:00:40 hclee -# add AllowZippedISB=false to glite JDL -# - workaround for WMS bug: https://savannah.cern.ch/bugs/index.php?32345 -# -# Revision 1.27 2009/02/04 17:01:02 hclee -# enhancement for bug: https://savannah.cern.ch/bugs/?43502 -# -# Revision 1.26 2009/01/26 16:11:33 hclee -# modification for handling stdout/err in different ways -# - add config.LCG.JobLogHandler, default value is 'WMS', meaning that stdout/err -# will be shipped back to user via WMS's output sandbox mechanism -# - set config.LCG.JobLogHandler to other values will remove stdout/err from WMS's output sandbox -# and the application can pick it up accordingly to handle stdout/err in different ways -# (e.g. store it in a DQ2 dataset) -# -# Revision 1.25 2009/01/16 09:15:11 hclee -# fix for glite perusable function -# -# Revision 1.24 2009/01/15 13:16:31 hclee -# killing partially submitted bulk jobs on WMS immediately if the whole job submission is not done properly -# -# Revision 1.23 2008/12/11 11:14:33 hclee -# clean up logging messages -# -# Revision 1.22 2008/12/11 09:15:31 hclee -# allow to set the max. node number of a glite bulk job -# -# Revision 1.21 2008/12/08 08:44:52 hclee -# make the number of output downloader threads configurable -# -# Revision 1.20 2008/11/25 15:26:07 hclee -# introducing "SubmissionThread" configuration variable for setting the concurrent -# number of job submission threads -# -# Revision 1.19 2008/11/13 11:34:23 hclee -# update master job's status at the end of the master_updateMonitorInformation() in any case -# -# Revision 1.18 2008/11/07 13:02:25 hclee -# expand $VAR and '~' when setting path-like options -# -# Revision 1.17 2008/11/05 13:51:03 hclee -# fix the bug in passing LFC_HOST to the job wrapper while using LCGSandboxCache -# -# Revision 1.16 2008/11/05 10:20:58 hclee -# fix the bug triggering the annoying warning message after subjob resubmission -# -# Revision 1.15 2008/11/03 15:27:48 hclee -# enhance the internal setup for the SandboxCache -# -# Revision 1.14 2008/10/08 07:42:47 hclee -# avoid doing glite-wms-job-cancel on jobs which is in a final state -# - glite bulk job status is now correctly stored as master job's status -# -# Revision 1.13 2008/09/30 17:51:08 hclee -# fine tune the typelist attribute in the schema -# -# Revision 1.12 2008/09/29 13:17:55 hclee -# fix the type checking issue -# -# Revision 1.11 2008/09/23 12:29:32 hclee -# fix the status update logic -# -# Revision 1.10 2008/09/22 22:43:41 hclee -# cache the logging information coming out from the LCGOutputDownloader threads -# -# Revision 1.9 2008/09/19 11:45:19 hclee -# turn off debug message of the MTRunner objects -# try to avoid the race condition amoung concurrent threads -# -# Revision 1.8 2008/09/18 16:34:58 hclee -# improving job submission/output fetching performance -# -# Revision 1.7 2008/09/15 20:42:38 hclee -# improve sandbox cache handler and adopt it in the LCG backend -# -# Revision 1.6 2008/09/04 14:00:34 hclee -# fix the type-checking issue when setting up CE attribute -# -# Revision 1.5 2008/08/12 13:57:42 hclee -# - remove redundant functions -# - set minimum timeout of downloading oversized inputsandbox to 60 secs. -# -# Revision 1.4 2008/08/12 12:37:37 hclee -# - improving oversized inputsandbox downloading -# * add more debug information -# * automatically determine the lcg-cp timeout assuming the rate of 1MB/sec -# * add config.LCG.SandboxTransferTimeout allowing user to set it manually -# -# Revision 1.3 2008/07/30 10:27:22 hclee -# fix indentation issue in the code -# -# Revision 1.2 2008/07/28 11:00:55 hclee -# patching up to the up-to-date development after CVS migration -# -# Revision 1.95.4.12 2008/07/15 11:51:42 hclee -# bug fix: https://savannah.cern.ch/bugs/?37825https://savannah.cern.ch/bugs/?37825 -# -# Revision 1.95.4.11 2008/07/09 13:26:08 hclee -# bug fix of https://savannah.cern.ch/bugs/index.php?38368 -# - ignoring configuration postprocess on the grid object corresponding to a -# disabled middleware -# -# Revision 1.95.4.10 2008/07/09 13:10:18 hclee -# apply the patch of feature request: https://savannah.cern.ch/bugs/?37825 -# - using scratch directory as job's working directory -# -# Revision 1.95.4.9 2008/05/15 16:01:08 hclee -# - bugfix #36178 (subprocess in python2.5) -# -# Revision 1.95.4.8 2008/05/08 13:28:06 hclee -# gzipped stdout stderr -# -# Revision 1.95.4.7 2008/03/31 15:56:27 hclee -# merge the srmv2 space token support made in Ganga4 branch -# -# Revision 1.95.4.6 2008/03/07 12:27:31 hclee -# distinguish application exitcode and middleware exitcode in schema -# - exitcode: application exitcode -# - exitcode_lcg: middleware exitcode -# -# Revision 1.95.4.5 2008/02/06 17:05:01 hclee -# add descriptions of configuration attributes -# -# Revision 1.95.4.4 2008/02/06 11:21:20 hclee -# merge 4.4 and 5.0 and fix few issues -# -# Revision 1.95.4.3 2007/12/11 09:54:30 amuraru -# moved GLITE_SETUP and EDG_SETUP to LCG module -# -# Revision 1.95.4.2 2007/12/10 18:05:13 amuraru -# merged the 4.4.4 changes -# -# Revision 1.95.4.1 2007/10/12 13:56:25 moscicki -# merged with the new configuration subsystem -# -# Revision 1.95.6.3 2007/10/12 08:16:50 roma -# Migration to new Config -# -# Revision 1.95.6.2 2007/10/09 15:06:47 roma -# Migration to new Config -# -# Revision 1.95.6.1 2007/09/25 09:45:12 moscicki -# merged from old config branch -# -# Revision 1.111 2007/12/04 17:26:19 hclee -# fix small typo -# -# Revision 1.110 2007/12/04 17:19:42 hclee -# - fix bugs in updating bulk job's status -# - fix status parser for gLite 3.1 -# -# Revision 1.109 2007/12/04 15:53:49 moscicki -# sparated Grid class into another module -# added optional import of GridSimulator class -# -# Revision 1.108 2007/11/30 11:31:12 hclee -# - improve the job id parser in the submit method -# - remove the warning message for individual subjob submission/killing -# -# Revision 1.107 2007/11/29 13:57:40 hclee -# fill up subjob ids in the monitoring loop -# -# Revision 1.106 2007/11/23 15:22:52 hclee -# add performance profiler -# -# Revision 1.105 2007/11/09 03:12:39 hclee -# bug fix on job id parser for edg-job-submit command -# -# Revision 1.104 2007/11/08 02:40:31 hclee -# fix the bug of parsing job id of edg-job-submit, remove the heading white spaces before parsing -# -# Revision 1.103 2007/10/23 12:18:43 hclee -# fix the subjob ordering issue of the glite collective job -# -# Revision 1.102 2007/10/19 14:43:14 hclee -# use -i in LCG command to kill multiple subjobs which are individually resubmitted -# -# Revision 1.101 2007/10/19 14:32:39 hclee -# bug fix for resubmission and kill on individual subjob -# -# Revision 1.100 2007/10/19 12:34:21 hclee -# - improving the control of the resubmission of each individual subjob submitted through glite-bulk job -# - enabling kill() on each individual subjob submitted through glite-bulk job -# - updating job.info.submit_count on subjobs in submit and resubmit methods -# -# Revision 1.99 2007/10/11 12:00:16 hclee -# support job resubmission on the glite subjobs -# -# Revision 1.98 2007/10/08 16:21:01 hclee -# - introduce "ShallowRetryCount" JDL attribute and set default to 10 -# - use the subprocess module to launch the application executable in the job wrapper -# -# Revision 1.97 2007/09/25 13:22:19 hclee -# implement the peek method with Octopus monitoring service -# -# Revision 1.114 2008/01/18 15:24:16 hclee -# - integrate job perusal feature implemented by Philip -# - fix bugs in backend.loginfo() and backend.inspect() -# -# Revision 1.113 2008/01/10 11:46:54 hclee -# - disable the JDL attribute "ExpiryTime" to avoid the immediate crash of the resubmitted jobs -# - merge the modification for enabling glite job perusal feature (contributed by Philip Rodrigues) -# -# Revision 1.112 2007/12/14 11:32:58 hclee -# fix the broken bulk submission - add temporary workaround to avoid the master job's state transition from 'submitted' to 'submitting' -# -# Revision 1.111 2007/12/04 17:26:19 hclee -# fix small typo -# -# Revision 1.110 2007/12/04 17:19:42 hclee -# - fix bugs in updating bulk job's status -# - fix status parser for gLite 3.1 -# -# Revision 1.109 2007/12/04 15:53:49 moscicki -# sparated Grid class into another module -# added optional import of GridSimulator class -# -# Revision 1.108 2007/11/30 11:31:12 hclee -# - improve the job id parser in the submit method -# - remove the warning message for individual subjob submission/killing -# -# Revision 1.107 2007/11/29 13:57:40 hclee -# fill up subjob ids in the monitoring loop -# -# Revision 1.106 2007/11/23 15:22:52 hclee -# add performance profiler -# -# Revision 1.105 2007/11/09 03:12:39 hclee -# bug fix on job id parser for edg-job-submit command -# -# Revision 1.104 2007/11/08 02:40:31 hclee -# fix the bug of parsing job id of edg-job-submit, remove the heading white spaces before parsing -# -# Revision 1.103 2007/10/23 12:18:43 hclee -# fix the subjob ordering issue of the glite collective job -# -# Revision 1.102 2007/10/19 14:43:14 hclee -# use -i in LCG command to kill multiple subjobs which are individually resubmitted -# -# Revision 1.101 2007/10/19 14:32:39 hclee -# bug fix for resubmission and kill on individual subjob -# -# Revision 1.100 2007/10/19 12:34:21 hclee -# - improving the control of the resubmission of each individual subjob submitted through glite-bulk job -# - enabling kill() on each individual subjob submitted through glite-bulk job -# - updating job.info.submit_count on subjobs in submit and resubmit methods -# -# Revision 1.99 2007/10/11 12:00:16 hclee -# support job resubmission on the glite subjobs -# -# Revision 1.98 2007/10/08 16:21:01 hclee -# - introduce "ShallowRetryCount" JDL attribute and set default to 10 -# - use the subprocess module to launch the application executable in the job wrapper -# -# Revision 1.97 2007/09/25 13:22:19 hclee -# implement the peek method with Octopus monitoring service -# -# Revision 1.95 2007/08/09 14:01:45 kuba -# fixed the logic of dynamic requirements loading (fix from Johannes) -# -# Revision 1.94 2007/08/09 11:03:45 kuba -# protection for passing non-strings to printError and printWarning functions -# -# Revision 1.93 2007/08/01 13:39:27 hclee -# replace old glite-job-* commands with glite-wms-job-* commands -# -# Revision 1.92 2007/07/27 15:13:39 moscicki -# merged the monitoring services branch from kuba -# -# Revision 1.91 2007/07/25 14:08:07 hclee -# - combine the query for glite subjob id (right after the job submission) with the hook of sending monitoring information to Dashboard -# - improve the debug message in the job wrapper -# -# Revision 1.90 2007/07/24 13:53:11 hclee -# query for subjob ids right after the glite bulk submission -# -# Revision 1.89 2007/07/16 15:42:16 hclee -# - move LCGRequirements out from LCG class -# - add config['LCG']['Requirements'] attribute, default to the LCGRequirements class -# - dynamic loading of the requirements module, allowing applications to override merge() and convert() methods for app specific requirement based on the GLUE schema -# -# Revision 1.88 2007/07/10 13:08:32 moscicki -# docstring updates (ganga devdays) -# -# Revision 1.87 2007/07/03 10:05:10 hclee -# pass the GridShell instance to GridCache for pre-staging oversized inputsandbox -# -# Revision 1.86.2.1 2007/06/21 15:04:24 moscicki -# improvement of the monitoring services interface -# -# Revision 1.86 2007/06/15 08:42:59 hclee -# - adopt the Credential plugin to get the voname from the voms proxy -# - modify the logic of the Grid.check_proxy() method -# -# Revision 1.85 2007/06/06 18:56:38 hclee -# bug fix -# -# Revision 1.84 2007/06/06 15:21:52 hclee -# fix the issue that if the grids['EDG'] and Grid not properly created on the machine without UI installation -# -# Revision 1.83 2007/06/05 16:43:06 hclee -# get default lfc_host from lcg-infosites utility -# -# Revision 1.82 2007/06/05 15:06:22 hclee -# add a post-config hook for setting corresponding env. variables of the cached GridShells -# - for instance, only config['LCG']['DefaultLFC'] affects GridShell.env['LFC_HOST'] -# -# Revision 1.81 2007/05/30 16:17:26 hclee -# check the exit code of the real executable (bug #26290) -# -# Revision 1.80 2007/05/23 15:43:24 hclee -# - introduce 'DefaultLFC' configuration property -# - check the exit code from real executable (bug #26290) -# - pass local 'LFC_HOST' environment variable to grid WNs (bug #26443) -# -# Revision 1.79 2007/05/10 10:05:14 liko -# Use srm.cern.ch for big sandbox and do not overwrite X509_USER_PROXY -# -# Revision 1.78.4.1 2007/06/18 07:44:56 moscicki -# config prototype -# -# Revision 1.78 2007/04/05 14:30:19 hclee -# - fix the bug in distinguishing master and node jobs of the glite bulk submission -# - add logic for handling master_resubmit and master_cancel for glite bulk jobs -# -# Revision 1.77 2007/04/05 07:13:01 hclee -# allow users to call the 'cleanup_iocache()' method when job is in 'completed' and 'failed' status -# -# Revision 1.76 2007/03/23 03:45:02 hclee -# remove CVS confliction marks -# -# Revision 1.75 2007/03/23 03:41:24 hclee -# merge modifications in 4.2.2-bugfix-branch -# -# Revision 1.74 2007/01/31 11:13:52 hclee -# remove the python path prepending when calling edg or glite UI commands -# -# Revision 1.73 2007/01/23 17:32:44 hclee -# input sandbox pre-upload is workable for gLite bulk submission -# -# Revision 1.72 2007/01/23 11:45:58 hclee -# the inputsandbox pre-upload takes into account the shared inputsandbox -# - the shared inputsandbox will not be uploaded again if it has been existing on the remote iocache -# add and export cleanup_iocache() method for deleting the pre-uploaded input sandboxes -# - if the job is not "completed", the operation will be simply ignored with some warning message -# -# Revision 1.71 2007/01/22 16:22:10 hclee -# the workable version for remote file cache using lcg-utils -# -# Revision 1.70 2007/01/17 17:54:36 hclee -# working for file upload -# -# Revision 1.69 2007/01/16 16:58:37 hclee -# In the middle of implementing large inputsandbox support -# -# Revision 1.68 2007/01/16 15:31:11 hclee -# Adopt the GridCache object for remote file I/O -# -# Revision 1.67 2006/12/14 08:53:03 hclee -# add file upload/download/delete methods -# -# Revision 1.66 2006/12/13 13:17:19 hclee -# merge the modifications in the 4-2-2 bugfix branch -# -# Revision 1.65 2006/11/02 13:35:49 hclee -# add resubmission implementations -# -# Revision 1.63.2.8 2006/12/13 12:52:40 hclee -# add _GPI_Prefs -# -# Revision 1.63.2.7 2006/11/22 20:39:10 hclee -# make sure the numerical values of requirements are correctly converted into string -# -# Revision 1.63.2.6 2006/11/22 15:40:16 hclee -# Make a more clear instruction for calling check_proxy method -# -# Revision 1.63.2.5 2006/11/03 15:57:18 hclee -# introduce the environmental variable, GANGA_LCG_CE, for monitoring purpose -# if the backend.CE is specified by the user -# -# Revision 1.63.2.4 2006/11/03 13:19:09 hclee -# rollback unintentional commit to exclude the resubmission feature -# -# Revision 1.63.2.3 2006/11/02 13:25:27 hclee -# implements the resubmit methods for both EDG and GLITE modes -# -# Revision 1.63.2.2 2006/10/26 14:14:46 hclee -# include the monitoring component -# -# Revision 1.63.2.1 2006/10/26 13:33:36 hclee -# - accept the verbosity argument when use backend.loginfo() -# - the backend.loginfo() method returns a filename of the saved logging info instead of printing out of the plain text of the logging info -# -# Revision 1.63 2006/10/24 12:53:48 hclee -# skip taking VO name from the voms proxy if using EDG middleware -# -# Revision 1.62 2006/10/12 13:00:27 hclee -# - for subjobs, change to status 'submitting' before changing to 'submitted' -# -# Revision 1.61 2006/10/09 10:38:39 hclee -# Simplify the usage of the "Grid" objects -# -# Revision 1.60 2006/10/09 09:37:43 hclee -# voms attributes in the proxy takes precedence for VO detection in composing job submission command -# -# Revision 1.59 2006/10/09 09:14:43 hclee -# Appending "MPICH" requirements instead of overriding -# -# Revision 1.58 2006/10/06 08:05:08 hclee -# Add supports for multiple job types (Normal, MPICH, Interactive) -# -# Revision 1.57 2006/10/05 09:12:42 hclee -# - add default value of the configurable parameters -# - simplify the code accordingly by removing the checking of the existence of the configurable parameters -# - expose the exitcode of the real executable inside the job wrapper -# -# Revision 1.56 2006/09/28 14:36:56 hclee -# remove the redundant __credential_validity__ method -# change some message in the submit function to debug level info -# -# Revision 1.55 2006/09/18 09:48:46 hclee -# add "-r" option in job submission command if CE is specified (bypassing the RB match-making) -# change the name of some private method: Grid.proxy_voname() -> Grid.__get_proxy_voname__() -# change the argument of the Grid.__credential_validity__() method. Replace "value" with "type". -# -# Revision 1.54 2006/09/11 12:33:29 hclee -# job status rolls back to "failed" if output fetching fails. -# -# Revision 1.53 2006/09/06 15:08:54 hclee -# Catch and print the log file of grid commands -# -# Revision 1.52 2006/08/28 15:20:32 hclee -# - integrate shared inputsandbox for glite bulk submission -# - small fixes in job wrapper -# -# Revision 1.51 2006/08/24 16:48:24 moscicki -# - master/subjob sandbox support -# - fixes in the config for setting VO -# -# Revision 1.50 2006/08/22 12:06:30 hclee -# unpack the output sandbox tarball after getting output -# -# Revision 1.49 2006/08/21 10:31:55 hclee -# set PATH environment to search current working directory in the job wrapper -# -# Revision 1.48 2006/08/18 16:08:05 hclee -# small fix for vo switching -# -# Revision 1.47 2006/08/18 13:46:00 hclee -# update for the bugs: -# - #19122: use jobconfig.getExeString() to get correct path of exeutable -# - #19067: use an enhanced system call handler implemented in the Local handler to better control the stdout/stderr -# - #19155: job submission/cancelling/monitoring will be just failed if no proxy is available -# -# Revision 1.46 2006/08/16 15:15:33 hclee -# fix the path problem of the actual executable in job wrapper -# -# Revision 1.45 2006/08/15 11:10:01 hclee -# - reduce verbosity -# - correct the way to specify default configuration attributes -# -# Revision 1.44 2006/08/10 13:39:50 moscicki -# using Sandbox mechanism -# -# Revision 1.43 2006/08/09 14:36:10 hclee -# - use ProxyTimeLeft and ProxyTimeValid in proxy creating and checking -# - in submit and cancel methods, check_proxy is called if no valid proxy available -# -# Revision 1.42 2006/08/09 11:07:32 hclee -# - use getCredential method to create a credential -# - enhancement in get_output() method -# -# Revision 1.41 2006/08/08 21:44:23 hclee -# change wrapper log format -# -# Revision 1.40 2006/08/08 21:23:40 hclee -# Change format of the wrapper log -# -# Revision 1.39 2006/08/08 20:02:36 hclee -# - Add job wrapper -# - modify the loop of backend status update -# - use GridShell module to create Shell object -# -# Revision 1.38 2006/08/08 14:23:49 hclee -# - Integrate with Credential module -# - Add method for getting Shell objects -# - In the middle of the job wrapper implementation -# -# Revision 1.37 2006/07/31 13:25:55 hclee -# replace the code of master job update with the factored out method: updateMasterJobStatus() -# -# Revision 1.36 2006/07/31 13:06:21 hclee -# Integration with state machine -# few bug fixes -# -# Revision 1.35 2006/07/20 21:06:15 hclee -# - remove existing "jdlrepos" directory of bulk job -# -# Revision 1.34 2006/07/20 20:51:59 hclee -# - return False if bulk submission failed -# -# Revision 1.33 2006/07/19 17:06:20 hclee -# initial implementation for gLite bulk submission -# -# Revision 1.32 2006/07/18 15:09:59 hclee -# Supporting both EDG and GLITE middlewares in LCG handler -# -# Revision 1.31 2006/07/17 10:14:29 hclee -# merge Alvin's patch for the version (Ganga-LCG-1-1) in Ganga release 4-2-0-beta2 -# -# Revision 1.30 2006/07/10 13:12:59 moscicki -# changes from Johannes: outputdata handling and a bugfix -# -# Revision 1.29 2006/07/07 14:27:01 hclee -# Fix the scenario of VO check in the __avoidVOSwitch__ function -# -# Revision 1.28 2006/07/07 12:04:11 hclee -# Avoid VO switching in GANGA session -# -# Revision 1.27 2006/07/04 11:41:36 hclee -# Add internal function in Grid object for setting up the edg-job-submit options -# - effective configurations are used in composing the options -# - more virtual organisation checks -# - the function will be called everytime the submit() function is called -# -# Revision 1.27 2006/07/03 13:55:30 hclee -# Add internal function in Grid object for setting up the edg-job-submit options -# - effective configurations are used in composing the options -# - more virtual organisation checks -# - the function will be called everytime the submit() function is called -# -# Revision 1.26 2006/06/07 17:16:02 liko -# Additional logic for the cleared state -# -# Revision 1.25 2006/06/07 17:15:44 liko -# Additional logic for the cleared state -# -# Revision 1.24 2006/05/31 10:12:17 liko -# Add Cleared -# -# Revision 1.23 2006/05/19 22:11:59 liko -# Add status Submitted -# -# Revision 1.22 2006/05/18 15:38:31 liko -# : -# -# Revision 1.21 2006/05/15 16:39:30 liko -# Done (Failed) is not final state ... -# -# Revision 1.20 2006/05/08 11:50:53 liko -# Include changes by Johannes -# -# Revision 1.19 2006/04/27 09:13:25 moscicki -# -# PREFIX_HACK: -# work around inconsistency of LCG setup script and commands: -# LCG commands require python2.2 but the setup script does not set this version of python. If another version of python is used (like in GUI), then python2.2 runs against wrong python libraries possibly should be fixed in LCG: either remove python2.2 from command scripts or make setup script force correct version of python -# -# Revision 1.18 2006/04/24 17:30:02 liko -# Several bug fixes -# -# Revision 1.17 2006/03/20 10:01:53 liko -# Fix retry count -# -# Revision 1.16 2006/03/17 00:55:19 liko -# Fix problem with replica catalog -# -# Revision 1.15 2006/03/17 00:06:55 liko -# defaults for config attributes ReplicaCatalog -# -# Revision 1.14 2006/03/16 23:53:12 liko -# Fix stupid proxy message -# -# Revision 1.13 2006/02/10 14:38:37 moscicki -# replaced KeyError by ConfigError -# -# fixed: bug #13462 overview: stdin and stdout are unconditionally added to OutputSandbox -# -# fixed: edg-job-cancel with the new release of LCG asks an interactive questions which made Ganga to "hang" on it, --noint option added wherever possible -# -# Revision 1.12 2006/02/07 13:02:33 liko -# -# 1) Fix problem with conflicting requirements definitions -# 2) Fix problem with AllowedCEs in configuration -# 3) Support for LFC in Athena handler -# -# Revision 1.11 2005/11/08 09:15:05 liko -# Fix a bug in the handling of the environment -# -# Revision 1.10 2005/10/21 13:19:09 moscicki -# fixed: kill should return the boolean sucess code -# -# Revision 1.9 2005/10/11 11:56:37 liko -# Default values for new configuration file -# -# Revision 1.8 2005/09/22 21:41:15 liko -# Add Cleared status -# -# Revision 1.7 2005/09/21 09:05:58 andrew -# Added a retry mechanism to the 'proxy-init' call. Now the user has -# 3 retries before giving up. -# -# Revision 1.6 2005/09/06 11:37:13 liko -# Mainly the Athena handler -# -# Revision 1.5 2005/09/02 12:46:10 liko -# Extensively updated version diff --git a/ganga/GangaCore/Lib/LCG/LCGOutputDownloader.py b/ganga/GangaCore/Lib/LCG/LCGOutputDownloader.py deleted file mode 100755 index 178a5dbadd..0000000000 --- a/ganga/GangaCore/Lib/LCG/LCGOutputDownloader.py +++ /dev/null @@ -1,115 +0,0 @@ -from GangaCore.Core.GangaThread.MTRunner import Algorithm, Data, MTRunner -from GangaCore.Lib.LCG import Grid -from GangaCore.Utility.logging import getLogger - -logger = getLogger() - - -class LCGOutputDownloadTask(object): - - """ - Class for defining a data object for each output downloading task. - """ - - _attributes = ('gridObj', 'jobObj') - - def __init__(self, jobObj): - self.jobObj = jobObj - - def __eq__(self, other): - """ - download task comparison based on job's FQID. - """ - if self.jobObj.getFQID('.') == other.jobObj.getFQID('.'): - return True - else: - return False - - def __str__(self): - """ - represents the task by the job object - """ - return 'downloading task for job %s' % self.jobObj.getFQID('.') - - -class LCGOutputDownloadAlgorithm(Algorithm): - - """ - Class for implementing the logic of each downloading task. - """ - - def process(self, item): - """ - downloads output of one LCG job - """ - - pps_check = (True, None) - - job = item.jobObj - - # it is very likely that the job's downloading task has been - # created and assigned in a previous monitoring loop - # ignore such kind of cases - if job.status in ['completing', 'completed', 'failed']: - return True - - # it can also happen that the job was killed/removed by user between - # the downloading task was created in queue and being taken by one of - # the downloading thread. Ignore suck kind of cases - if job.status in ['removed', 'killed']: - return True - - job.updateStatus('completing') - outw = job.getOutputWorkspace() - - pps_check = Grid.get_output(job.backend.id, outw.getPath(), job.backend.credential_requirements) - - if pps_check[0]: - job.updateStatus('completed') - job.backend.exitcode = 0 - else: - job.updateStatus('failed') - # update the backend's reason if the failure detected in the - # Ganga's pps - if pps_check[1] != 0: - job.backend.reason = 'non-zero app. exit code: %s' % pps_check[ - 1] - job.backend.exitcode = pps_check[1] - - # needs to update the master job's status to give an up-to-date status - # of the whole job - if job.master: - job.master.updateMasterJobStatus() - - self.__appendResult__(job.getFQID('.'), True) - - return True - - -class LCGOutputDownloader(MTRunner): - - """ - Class for managing the LCG output downloading activities based on MTRunner. - """ - - def __init__(self, numThread=10): - - MTRunner.__init__(self, name='lcg_output_downloader', data=Data( - collection=[]), algorithm=LCGOutputDownloadAlgorithm()) - - self.keepAlive = True - self.numThread = numThread - - def countAliveAgent(self): - - return self.__cnt_alive_threads__() - - def addTask(self, job): - - task = LCGOutputDownloadTask(job) - - logger.debug('add output downloading task: job %s' % job.getFQID('.')) - - self.addDataItem(task) - - return True diff --git a/ganga/GangaCore/Lib/LCG/LCGRequirements.py b/ganga/GangaCore/Lib/LCG/LCGRequirements.py deleted file mode 100755 index a034bcd7db..0000000000 --- a/ganga/GangaCore/Lib/LCG/LCGRequirements.py +++ /dev/null @@ -1,130 +0,0 @@ -import re - -from GangaCore.GPIDev.Base import GangaObject -from GangaCore.GPIDev.Schema import Schema, SimpleItem, Version -from GangaCore.Utility.Config import getConfig - - -class LCGRequirements(GangaObject): - - '''Helper class to group LCG requirements. - - See also: JDL Attributes Specification at http://cern.ch/glite/documentation - ''' - - _schema = Schema(Version(1, 2), { - 'software': SimpleItem(defvalue=[], typelist=[str], sequence=1, doc='Software Installations'), - 'nodenumber': SimpleItem(defvalue=1, doc='Number of Nodes for MPICH jobs'), - 'memory': SimpleItem(defvalue=0, doc='Mininum available memory (MB)'), - 'cputime': SimpleItem(defvalue=0, doc='Minimum available CPU time (min)'), - 'walltime': SimpleItem(defvalue=0, doc='Mimimum available total time (min)'), - 'ipconnectivity': SimpleItem(defvalue=False, doc='External connectivity'), - 'allowedCEs': SimpleItem(defvalue='', doc='allowed CEs in regular expression'), - 'excludedCEs': SimpleItem(defvalue='', doc='excluded CEs in regular expression'), - 'datarequirements': SimpleItem(defvalue=[], typelist=[dict], sequence=1, doc='The DataRequirements entry for the JDL. A list of dictionaries, each with "InputData", "DataCatalogType" and optionally "DataCatalog" entries'), - 'dataaccessprotocol': SimpleItem(defvalue=['gsiftp'], typelist=[str], sequence=1, doc='A list of strings giving the available DataAccessProtocol protocols'), - 'other': SimpleItem(defvalue=[], typelist=[str], sequence=1, doc='Other Requirements') - }) - - _category = 'LCGRequirements' - _name = 'LCGRequirements' - - def __init__(self): - - super(LCGRequirements, self).__init__() - - def merge(self, other): - '''Merge requirements objects''' - - if not other: - return self - - merged = LCGRequirements() - for name in ['software', 'nodenumber', 'memory', 'cputime', 'walltime', 'ipconnectivity', 'allowedCEs', 'excludedCEs', 'datarequirements', 'dataaccessprotocol', 'other']: - - attr = '' - - try: - attr = getattr(other, name) - except KeyError as e: - pass - - if not attr: - attr = getattr(self, name) - setattr(merged, name, attr) - - return merged - - def convert(self): - '''Convert the condition in a JDL specification''' - - import re - - requirements = [ - 'Member("%s",other.GlueHostApplicationSoftwareRunTimeEnvironment)' % sw for sw in self.software] - if self.memory: - requirements += ['other.GlueHostMainMemoryVirtualSize >= %s' % - str(self.memory)] - if self.cputime: - requirements += [ - '(other.GlueCEPolicyMaxCPUTime >= %s || other.GlueCEPolicyMaxCPUTime == 0)' % str(self.cputime)] - if self.walltime: - requirements += [ - '(other.GlueCEPolicyMaxWallClockTime >= %s || other.GlueCEPolicyMaxWallClockTime == 0)' % str(self.walltime)] - if self.ipconnectivity: - requirements += ['other.GlueHostNetworkAdapterOutboundIP==true'] - requirements += self.other - - config = getConfig('LCG') - - # retrieve allowed_ces and excluded_ces from LCGRequirement object and - # the config['Allowed/ExcludedCEs'] - allowed_ces = [] - excluded_ces = [] - - # from Ganga configuration - if config['AllowedCEs']: - ce_req = config['AllowedCEs'].strip() - allowed_ces += re.split(r'\s+', ce_req) - - if config['ExcludedCEs']: - ce_req = config['ExcludedCEs'].strip() - excluded_ces += re.split(r'\s+', ce_req) - - # from LCGRequirements object - # if string starts with '+', it means the requirement to be appeneded - re_append = re.compile(r'^(\++)\s*(.*)') - try: - ce_req = self.allowedCEs.strip() - if ce_req: - m = re_append.match(ce_req) - if m: - allowed_ces += re.split(r'\s+', m.group(2)) - else: - allowed_ces = re.split(r'\s+', ce_req) - except KeyError as e: - pass - - try: - ce_req = self.excludedCEs.strip() - if ce_req: - m = re_append.match(ce_req) - if m: - excluded_ces += re.split(r'\s+', m.group(2)) - else: - excluded_ces = re.split(r'\s+', ce_req) - except KeyError as e: - pass - - # composing the requirements given the list of allowed_ces and - # excluded_ces - if allowed_ces: - requirements += ['( %s )' % ' || '.join( - ['RegExp("%s",other.GlueCEUniqueID)' % ce for ce in allowed_ces])] - - if excluded_ces: - #requirements += [ '(!RegExp("%s",other.GlueCEUniqueID))' % ce for ce in excluded_ces ] - requirements += ['( %s )' % ' && '.join( - ['(!RegExp("%s",other.GlueCEUniqueID))' % ce for ce in excluded_ces])] - - return requirements diff --git a/ganga/GangaCore/Lib/LCG/LCGSandboxCache.py b/ganga/GangaCore/Lib/LCG/LCGSandboxCache.py deleted file mode 100755 index dc091545a7..0000000000 --- a/ganga/GangaCore/Lib/LCG/LCGSandboxCache.py +++ /dev/null @@ -1,293 +0,0 @@ -############################################################################### -# Ganga Project. http://cern.ch/ganga -# -# $Id: LCGSandboxCache.py,v 1.8 2009-03-12 12:17:31 hclee Exp $ -############################################################################### -# -# LCG backend -# -# ATLAS/ARDA -# -# Date: January 2007 -import os -import os.path -import re -from urllib.parse import urlparse - -from GangaCore.Core.GangaThread.MTRunner import Algorithm, Data, MTRunner -from GangaCore.GPIDev.Schema import Schema, SimpleItem, Version -from GangaCore.Lib.LCG.GridSandboxCache import GridFileIndex, GridSandboxCache -from GangaCore.Lib.LCG.Utility import get_md5sum -from GangaCore.Utility.GridShell import getShell -from GangaCore.Utility.logging import getLogger - -lcg_sandbox_cache_schema_datadict = GridSandboxCache._schema.inherit_copy().datadict -lcg_file_index_schema_datadict = GridFileIndex._schema.inherit_copy().datadict - - -class LCGFileIndex(GridFileIndex): - - """ - Data object containing LCG file index information. - - @author: Hurng-Chun Lee - @contact: hurngchunlee@gmail.com - """ - - lcg_file_index_schema_datadict.update({ - 'lfc_host': SimpleItem(defvalue='', copyable=1, doc='the LFC hostname'), - 'local_fpath': SimpleItem(defvalue='', copyable=1, doc='the original file path on local machine') - }) - - _schema = Schema(Version(1, 0), lcg_file_index_schema_datadict) - _category = 'GridFileIndex' - _name = 'LCGFileIndex' - - def __init__(self): - super(LCGFileIndex, self).__init__() - - -class LCGSandboxCache(GridSandboxCache): - - ''' - Helper class for upladong/downloading/deleting sandbox files using lcg-cr/lcg-cp/lcg-del commands. - - @author: Hurng-Chun Lee - @contact: hurngchunlee@gmail.com - ''' - - lcg_sandbox_cache_schema_datadict.update({ - 'se': SimpleItem(defvalue='', copyable=1, doc='the LCG SE hostname'), - 'se_type': SimpleItem(defvalue='srmv2', copyable=1, doc='the LCG SE type'), - 'se_rpath': SimpleItem(defvalue='generated', copyable=1, doc='the relative path to the VO directory on the SE'), - 'lfc_host': SimpleItem(defvalue='', copyable=1, doc='the LCG LFC hostname'), - 'srm_token': SimpleItem(defvalue='', copyable=1, doc='the SRM space token, meaningful only when se_type is set to srmv2') - }) - - _schema = Schema(Version(1, 0), lcg_sandbox_cache_schema_datadict) - _category = 'GridSandboxCache' - _name = 'LCGSandboxCache' - - logger = getLogger() - - def __init__(self): - super(LCGSandboxCache, self).__init__() - self.protocol = 'lcg' - - def __setattr__(self, attr, value): - if attr == 'se_type' and value not in ['', 'srmv1', 'srmv2', 'se']: - raise AttributeError('invalid se_type: %s' % value) - super(LCGSandboxCache, self).__setattr__(attr, value) - - def impl_upload(self, cred_req, files=[], opts=''): - """ - Uploads multiple files to a remote grid storage. - """ - - shell = getShell(cred_req) - - if self.lfc_host: - shell.env['LFC_HOST'] = self.lfc_host - - self.logger.debug( - 'upload file with LFC_HOST: %s', shell.env['LFC_HOST']) - - # the algorithm of uploading one file - class MyAlgorithm(Algorithm): - - def __init__(self, cacheObj): - Algorithm.__init__(self) - self.cacheObj = cacheObj - self.dirname = self.cacheObj.__get_unique_fname__() - - def process(self, file): - # decide number of parallel stream to be used - fsize = os.path.getsize(urlparse(file)[2]) - fname = os.path.basename(urlparse(file)[2]) - fpath = os.path.abspath(urlparse(file)[2]) - - md5sum = get_md5sum(fpath, ignoreGzipTimestamp=True) - nbstream = int((fsize * 1.0) / (10.0 * 1024 * 1024 * 1024)) - - if nbstream < 1: - nbstream = 1 # min stream - if nbstream > 8: - nbstream = 8 # max stream - - cmd = 'lcg-cr -t 180 --vo %s -n %d' % ( - self.cacheObj.vo, nbstream) - if self.cacheObj.se is not None: - cmd = cmd + ' -d %s' % self.cacheObj.se - if self.cacheObj.se_type == 'srmv2' and self.cacheObj.srm_token: - cmd = cmd + ' -D srmv2 -s %s' % self.cacheObj.srm_token - - # specify the physical location - cmd = cmd + \ - ' -P %s/ganga.%s/%s' % (self.cacheObj.se_rpath, - self.dirname, fname) - - # specify the logical filename - # NOTE: here we assume the root dir for VO is /grid/ - lfc_dir = '/grid/%s/ganga.%s' % ( - self.cacheObj.vo, self.dirname) - if not self.cacheObj.__lfc_mkdir__(shell, lfc_dir): - self.cacheObj.logger.warning( - 'cannot create LFC directory: %s' % lfc_dir) - return None - - cmd = cmd + ' -l %s/%s %s' % (lfc_dir, fname, file) - rc, output, m = self.cacheObj.__cmd_retry_loop__( - shell, cmd, self.cacheObj.max_try) - - if rc != 0: - return False - else: - match = re.search(r'(guid:\S+)', output) - if match: - guid = match.group(1) - - fidx = LCGFileIndex() - fidx.id = guid - fidx.name = fname - fidx.md5sum = md5sum - fidx.lfc_host = self.cacheObj.lfc_host - fidx.local_fpath = fpath - - self.__appendResult__(file, fidx) - return True - else: - return False - - myAlg = MyAlgorithm(cacheObj=self) - myData = Data(collection=files) - - runner = MTRunner( - name='sandboxcache_lcgcr', algorithm=myAlg, data=myData) - runner.start() - runner.join(-1) - - return list(runner.getResults().values()) - - def impl_download(self, cred_req, files=[], dest_dir=None, opts=''): - """ - Downloads multiple files from remote grid storages to - a local directory. - """ - if not dest_dir: - dest_dir = os.getcwd() - self.logger.debug('download file to: %s', dest_dir) - - # the algorithm of downloading one file to a local directory - class MyAlgorithm(Algorithm): - - def __init__(self, cacheObj, cred_req): - Algorithm.__init__(self) - self.cacheObj = cacheObj - self.shell = getShell(cred_req) - - def process(self, file): - - guid = file.id - lfn = file.attributes['local_fpath'] - lfc_host = file.attributes['lfc_host'] - fname = os.path.basename(urlparse(lfn)[2]) - - self.shell.env['LFC_HOST'] = lfc_host - self.cacheObj.logger.debug( - 'download file with LFC_HOST: %s', self.shell.env['LFC_HOST']) - - cmd = 'lcg-cp -t %d --vo %s ' % ( - self.cacheObj.timeout, self.cacheObj.vo) - if self.cacheObj.se_type: - cmd += '-T %s ' % self.cacheObj.se_type - cmd += '%s file://%s/%s' % (guid, dest_dir, fname) - - self.cacheObj.logger.debug('download file: %s', cmd) - - rc, output, m = self.cacheObj.__cmd_retry_loop__( - self.shell, cmd, self.cacheObj.max_try) - - if rc != 0: - return False - else: - self.__appendResult__(file.id, file) - return True - - myAlg = MyAlgorithm(cacheObj=self, cred_req=cred_req) - myData = Data(collection=files) - - runner = MTRunner( - name='sandboxcache_lcgcp', algorithm=myAlg, data=myData) - runner.start() - runner.join(-1) - - return list(runner.getResults().values()) - - def impl_delete(self, cred_req, files=[], opts=''): - """ - Deletes multiple files from remote grid storages. - """ - - # the algorithm of downloading one file to a local directory - class MyAlgorithm(Algorithm): - - def __init__(self, cacheObj, cred_req): - Algorithm.__init__(self) - self.cacheObj = cacheObj - self.shell = getShell(cred_req) - - def process(self, file): - - guid = file.id - - lfc_host = file.attributes['lfc_host'] - - self.shell.env['LFC_HOST'] = lfc_host - - self.cacheObj.logger.debug( - 'delete file with LFC_HOST: %s' % self.shell.env['LFC_HOST']) - - cmd = 'lcg-del -a -t 60 --vo %s %s' % (self.cacheObj.vo, guid) - - rc, output, m = self.cacheObj.__cmd_retry_loop__( - self.shell, cmd, self.cacheObj.max_try) - - if rc != 0: - return False - else: - self.__appendResult__(file.id, file) - return True - - myAlg = MyAlgorithm(cacheObj=self, cred_req=cred_req) - myData = Data(collection=files) - - runner = MTRunner( - name='sandboxcache_lcgdel', algorithm=myAlg, data=myData) - runner.start() - runner.join(-1) - - # update the local index file - del_files = list(runner.getResults().values()) - all_files = self.get_cached_files() - - left_files = [] - for f in all_files: - if f not in del_files: - left_files.append(f) - - self.impl_bookkeepUploadedFiles(left_files, append=False) - - return del_files - - # For GUID protocol - def __lfc_mkdir__(self, shell, path, mode='775'): - '''Creates a directory in LFC''' - - cmd = 'lfc-mkdir -p -m %s %s' % (mode, path) - - (rc, output, m) = self.__cmd_retry_loop__(shell, cmd, 1) - - if rc != 0: - return False - else: - return True diff --git a/ganga/GangaCore/Lib/LCG/Utility.py b/ganga/GangaCore/Lib/LCG/Utility.py deleted file mode 100755 index 73fddd4611..0000000000 --- a/ganga/GangaCore/Lib/LCG/Utility.py +++ /dev/null @@ -1,79 +0,0 @@ -#!/usr/bin/env python -import gzip -import hashlib -import random -import re -import socket -import time - -from GangaCore.Lib.LCG.ElapsedTimeProfiler import ElapsedTimeProfiler -from GangaCore.Utility.logging import getLogger - - -def get_uuid(*args): - ''' Generates a universally unique ID. ''' - t = time.time() * 1000 - r = random.random() * 100000000000000000 - try: - a = socket.gethostbyname(socket.gethostname()) - except: - # if we can't get a network address, just imagine one - a = random.random() * 100000000000000000 - data = str(t) + ' ' + str(r) + ' ' + str(a) + ' ' + str(args) - - md5_obj = hashlib.md5() - md5_obj.update(data) - data = md5_obj.hexdigest() - - return data - - -def urisplit(uri): - """ - Basic URI Parser according to STD66 aka RFC3986 - - >>> urisplit("scheme://authority/path?query#fragment") - ('scheme', 'authority', 'path', 'query', 'fragment') - - """ - # regex straight from STD 66 section B - regex = r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?' - p = re.match(regex, uri).groups() - scheme, authority, path, query, fragment = p[1], p[3], p[4], p[6], p[8] - #if not path: path = None - return (scheme, authority, path, query, fragment) - - -def get_md5sum(fname, ignoreGzipTimestamp=False): - ''' Calculates the MD5 checksum of a file ''' - - profiler = ElapsedTimeProfiler(getLogger(name='Profile.LCG')) - profiler.start() - - # if the file is a zipped format (determined by extension), - # try to get checksum from it's content. The reason is that - # gzip file contains a timestamp in the header, which causes - # different md5sum value even the contents are the same. - #re_gzipfile = re.compile('.*[\.tgz|\.gz].*$') - - f = None - - if ignoreGzipTimestamp and (fname.find('.tgz') > 0 or fname.find('.gz') > 0): - f = gzip.open(fname, 'rb') - else: - f = open(fname, 'rb') - - m = hashlib.md5() - - while True: - d = f.read(8096) - if not d: - break - m.update(d) - f.close() - - md5sum = m.hexdigest() - - profiler.check('md5sum calculation time') - - return md5sum diff --git a/ganga/GangaCore/Lib/LCG/__init__.py b/ganga/GangaCore/Lib/LCG/__init__.py deleted file mode 100755 index e6afc42061..0000000000 --- a/ganga/GangaCore/Lib/LCG/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ - -from .ARC import ARC -from .CREAM import CREAM -from .ElapsedTimeProfiler import ElapsedTimeProfiler -from .GridftpSandboxCache import GridftpFileIndex, GridftpSandboxCache -from .GridSandboxCache import GridFileIndex, GridSandboxCache -from .LCG import LCG, LCGJobConfig -from .LCGOutputDownloader import (LCGOutputDownloadAlgorithm, - LCGOutputDownloader, LCGOutputDownloadTask) -from .LCGRequirements import LCGRequirements -from .LCGSandboxCache import LCGFileIndex, LCGSandboxCache diff --git a/ganga/GangaCore/Lib/Notebook/Notebook.py b/ganga/GangaCore/Lib/Notebook/Notebook.py index f713cf465d..6d1f4ce66f 100755 --- a/ganga/GangaCore/Lib/Notebook/Notebook.py +++ b/ganga/GangaCore/Lib/Notebook/Notebook.py @@ -142,7 +142,6 @@ def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): allHandlers.add('Notebook', 'PBS', NotebookRTHandler) allHandlers.add('Notebook', 'SGE', NotebookRTHandler) allHandlers.add('Notebook', 'Condor', NotebookRTHandler) -allHandlers.add('Notebook', 'LCG', NotebookRTHandler) allHandlers.add('Notebook', 'gLite', NotebookRTHandler) allHandlers.add('Notebook', 'TestSubmitter', NotebookRTHandler) allHandlers.add('Notebook', 'Interactive', NotebookRTHandler) diff --git a/ganga/GangaCore/Runtime/plugins.py b/ganga/GangaCore/Runtime/plugins.py index d06b4724df..1774453405 100644 --- a/ganga/GangaCore/Runtime/plugins.py +++ b/ganga/GangaCore/Runtime/plugins.py @@ -38,9 +38,6 @@ logger.debug("Loading LocalHost") import GangaCore.Lib.Localhost -logger.debug("Loading LCG") -import GangaCore.Lib.LCG - logger.debug("Loading Condor") import GangaCore.Lib.Condor diff --git a/ganga/GangaCore/Utility/GridShell.py b/ganga/GangaCore/Utility/GridShell.py deleted file mode 100644 index 8d8a3d398f..0000000000 --- a/ganga/GangaCore/Utility/GridShell.py +++ /dev/null @@ -1,125 +0,0 @@ -########################################################################## -# Ganga - a computational task management tool for easy access to Grid resources -# https://github.com/ganga-devs/ganga -# -# $Id: GridShell.py,v 1.1 2008-07-17 16:41:00 moscicki Exp $ -# -# Copyright (C) 2003 The Ganga Project -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -# -# LCG backend -# -# ATLAS/ARDA -# -# Date: August 2006 -########################################################################## - -import os - -from GangaCore.Utility.Shell import Shell -from GangaCore.Utility.Config import getConfig, ConfigError -from GangaCore.Utility.logging import getLogger - -from GangaCore.GPIDev.Credentials import credential_store -from GangaCore.Core.exceptions import InvalidCredentialError - -_allShells = {} - -logger = getLogger() - - -def constructShell(): - """ - Construct a grid shell based upon either the GLITE_SETUP or GLITE_LOCATION as possibly defined by the user - """ - - values = {} - for key in ['X509_CERT_DIR', 'X509_VOMS_DIR']: - try: - values[key] = os.environ[key] - except KeyError: - pass - - config = getConfig('LCG') - - # 1. check if the GLITE_SETUP is changed by user -> take the user's value as session value - # 2. else check if GLITE_LOCATION is defined as env. variable -> do nothing (ie. create shell without any lcg setup) - # 3. else take the default GLITE_SETUP as session value - - if config.getEffectiveLevel('GLITE_SETUP') == 2 and 'GLITE_LOCATION' in os.environ: - s = Shell() - else: - if os.path.exists(config['GLITE_SETUP']): - s = Shell(config['GLITE_SETUP']) - else: - logger.error("Configuration of GLITE for LCG: ") - logger.error("File not found: %s" % config['GLITE_SETUP']) - return None - - for key, val in values.items(): - s.env[key] = val - - # check and set env. variables for default LFC setup - if 'LFC_HOST' not in s.env: - try: - s.env['LFC_HOST'] = config['DefaultLFC'] - except ConfigError: - pass - - if 'LFC_CONNTIMEOUT' not in s.env: - s.env['LFC_CONNTIMEOUT'] = '20' - - if 'LFC_CONRETRY' not in s.env: - s.env['LFC_CONRETRY'] = '0' - - if 'LFC_CONRETRYINT' not in s.env: - s.env['LFC_CONRETRYINT'] = '1' - - return s - - -def getShell(cred_req=None): - """ - Utility function for getting Grid Shell. - - If a cred_req is given then the grid shell which has been cached for this credential requirement is returned. - If a cred_req is given and the credential does not exist in the credential_store then an InvalidCredentialError - exception is raised - - If no cred_req is given then a grid shell is contructed based upon either the GLITE_SETUP or GLITE_LOCATION - as possibly defined by the user - THERE IS NO CACHING MADE HERE IN THIS CASE!!! - - Arguments: - cred_req (ICredentialRequirement): This is the credential requirement required. - """ - - if cred_req is not None: - if not credential_store[cred_req].is_valid(): - logger.info('GridShell.getShell given credential which is invalid') - raise InvalidCredentialError() - - if cred_req in _allShells.keys(): - return _allShells[cred_req] - - constructed_shell = constructShell() - - if cred_req is not None: - constructed_shell.env['X509_USER_PROXY'] = credential_store[cred_req].location - - _allShells[cred_req] = constructed_shell - - return constructed_shell diff --git a/ganga/GangaCore/__init__.py b/ganga/GangaCore/__init__.py index 2f70fca981..f5865f9545 100644 --- a/ganga/GangaCore/__init__.py +++ b/ganga/GangaCore/__init__.py @@ -214,7 +214,7 @@ def getLCGRootPath(): Set the default plugin in a given category. For example: default_applications = DaVinci -default_backends = LCG +default_backends = Dirac ''') # ------------------------------------------------ @@ -253,7 +253,6 @@ def getLCGRootPath(): poll_config.addOption('default_backend_poll_rate', 30, 'Default rate for polling job status in the thread pool. This is the default value for all backends.') poll_config.addOption('Local', 10, 'Poll rate for Local backend.') -poll_config.addOption('LCG', 30, 'Poll rate for LCG backend.') poll_config.addOption('Condor', 30, 'Poll rate for Condor backend.') poll_config.addOption('gLite', 30, 'Poll rate for gLite backend.') poll_config.addOption('LSF', 20, 'Poll rate for LSF backend.') @@ -327,17 +326,9 @@ def getLCGRootPath(): # ------------------------------------------------ # Root root_config = makeConfig('ROOT', "Options for Root backend") -# Not needed when we can't do option substitution internally but support it at the .gangarc level!!!!! 27-09-2015 rcurrie -# config.addOption('lcgpath', getLCGRootPath(), -# 'Path of the LCG release that the ROOT project and it\'s externals are taken from') root_config.addOption('arch', 'x86_64-slc6-gcc48-opt', 'Architecture of ROOT') -# Auto-Interporatation doesn't appear to work when setting the default value -# config.addOption('location', '${lcgpath}/ROOT/${version}/${arch}/', 'Location of ROOT') root_config.addOption('location', '%s/ROOT/6.04.02/x86_64-slc6-gcc48-opt' % getLCGRootPath(), 'Location of ROOT') root_config.addOption('path', '', 'Set to a specific ROOT version. Will override other options.') -# Doesn't appear to work see above ^^^ -# config.addOption('pythonhome', '${lcgpath}/Python/${pythonversion}/${arch}/', -# 'Location of the python used for execution of PyROOT script') root_config.addOption('pythonhome', '%s/Python/2.7.9.p1/x86_64-slc6-gcc48-opt' % getLCGRootPath(), 'Location of the python used for execution of PyROOT script') root_config.addOption('pythonversion', '2.7.9.p1', "Version number of python used for execution python ROOT script") @@ -351,111 +342,6 @@ def getLCGRootPath(): local_config.addOption( 'location', None, 'The location where the workdir will be created. If None it defaults to the value of $TMPDIR') -# ------------------------------------------------ -# LCG -lcg_config = makeConfig('LCG', 'LCG/gLite/EGEE configuration parameters') -# gproxy_config = getConfig('GridProxy_Properties') - -lcg_config.addOption('GLITE_SETUP', '/afs/cern.ch/sw/ganga/install/config/grid_env_auto.sh', - 'sets the LCG-UI environment setup script for the GLITE middleware', - filter=GangaCore.Utility.Config.expandvars) - -lcg_config.addOption('VirtualOrganisation', '', - 'sets the name of the grid virtual organisation') - -lcg_config.addOption('Config', '', 'sets the generic LCG-UI configuration script for the GLITE workload management system', - filter=GangaCore.Utility.Config.expandvars) - -lcg_config.addOption( - 'AllowedCEs', '', 'sets allowed computing elements by a regular expression') -lcg_config.addOption( - 'ExcludedCEs', '', 'sets excluded computing elements by a regular expression') - -lcg_config.addOption( - 'GLITE_WMS_WMPROXY_ENDPOINT', '', 'sets the WMProxy service to be contacted') -lcg_config.addOption('GLITE_ALLOWED_WMS_LIST', [], '') - -lcg_config.addOption('MyProxyServer', 'myproxy.cern.ch', 'sets the myproxy server') -lcg_config.addOption('RetryCount', 3, 'sets maximum number of job retry') -lcg_config.addOption( - 'ShallowRetryCount', 10, 'sets maximum number of job shallow retry') - -lcg_config.addOption( - 'Rank', '', 'sets the ranking rule for picking up computing element') -lcg_config.addOption('ReplicaCatalog', '', 'sets the replica catalogue server') -lcg_config.addOption('StorageIndex', '', 'sets the storage index') - -lcg_config.addOption( - 'DefaultSE', 'srm.cern.ch', 'sets the default storage element') -lcg_config.addOption('DefaultSRMToken', '', - 'sets the space token for storing temporary files (e.g. oversized input sandbox)') -lcg_config.addOption( - 'DefaultLFC', 'prod-lfc-shared-central.cern.ch', 'sets the file catalogue server') -lcg_config.addOption('BoundSandboxLimit', 10 * 1024 * 1024, - ('sets the size limitation of the input sandbox, oversized input sandbox will be pre-uploaded ' - 'to the storage element specified by \'DefaultSE\' in the area specified by \'DefaultSRMToken\'')) - -lcg_config.addOption('Requirements', 'GangaCore.Lib.LCG.LCGRequirements', - 'sets the full qualified class name for other specific LCG job requirements') - -lcg_config.addOption('SandboxCache', 'GangaCore.Lib.LCG.LCGSandboxCache', - 'sets the full qualified class name for handling the oversized input sandbox') - -lcg_config.addOption('GliteBulkJobSize', 50, - 'sets the maximum number of nodes (i.e. subjobs) in a gLite bulk job') - -lcg_config.addOption('SubmissionThread', 10, - 'sets the number of concurrent threads for job submission to gLite WMS') - -lcg_config.addOption( - 'SubmissionTimeout', 300, 'sets the gLite job submission timeout in seconds') - -lcg_config.addOption('StatusPollingTimeout', 300, - 'sets the gLite job status polling timeout in seconds') - -lcg_config.addOption('OutputDownloaderThread', 10, - 'sets the number of concurrent threads for downloading job\'s output sandbox from gLite WMS') - -lcg_config.addOption('SandboxTransferTimeout', 60, - 'sets the transfer timeout of the oversized input sandbox') - -lcg_config.addOption( - 'JobLogHandler', 'WMS', 'sets the way the job\'s stdout/err are being handled.') - -lcg_config.addOption('MatchBeforeSubmit', False, - ('sets to True will do resource matching before submitting jobs, ' - 'jobs without any matched resources will fail the submission')) - -lcg_config.addOption('IgnoreGliteScriptHeader', False, - ('sets to True will load script-based glite-wms-* commands forcely with current python, ' - 'a trick for 32/64 bit compatibility issues.')) - -# add ARC specific configuration options -# lcg_config.addOption('ArcInputSandboxBaseURI', '', 'sets the baseURI for getting the input sandboxes for the job') -# lcg_config.addOption('ArcOutputSandboxBaseURI', '', 'sets the baseURI for putting the output sandboxes for the job') -lcg_config.addOption('ArcWaitTimeBeforeStartingMonitoring', 240, - 'Time (s) to wait after submission before starting to monitor ARC jobs to ensure they are in the system') -lcg_config.addOption('ArcJobListFile', "~/.arc/gangajobs.xml", - ('File to store ARC job info in when submitting and monitoring, i.e. argument to "-j" option in arcsub. ' - 'Ganga default is different to ARC default (~/.arc/jobs.xml) to keep them separate.')) -lcg_config.addOption('ArcConfigFile', "", - ('Config file for ARC submission. Use to specify CEs, etc. Default is blank which will mean no config ' - 'file is specified and the default (~/arc/client.conf) is used')) -lcg_config.addOption('ArcCopyCommand', 'arcget', - 'sets the copy command for ARC when dealing with sandboxes') -# lcg_config.addOption('ArcPrologue','','sets the prologue script') -# lcg_config.addOption('ArcEpilogue','','sets the epilogue script') - -# add CREAM specific configuration options -lcg_config.addOption('CreamInputSandboxBaseURI', '', - 'sets the baseURI for getting the input sandboxes for the job') -lcg_config.addOption('CreamOutputSandboxBaseURI', '', - 'sets the baseURI for putting the output sandboxes for the job') -lcg_config.addOption('CreamCopyCommand', 'gfal-copy-url', - 'sets the copy command for CREAM when dealing with sandboxes') -# lcg_config.addOption('CreamPrologue','','sets the prologue script') -# lcg_config.addOption('CreamEpilogue','','sets the epilogue script') - # ------------------------------------------------ # GridSimulator gridsim_config = makeConfig('GridSimulator', 'Grid Simulator configuration parameters') @@ -801,29 +687,6 @@ def filefilter(fn): 'uploadOptions': LocalUpOpt}, LocalFileExt) - -# LCGSEFILE - -LCGSEBakPost = {'LSF': 'client', - 'PBS': 'client', - 'SGE': 'client', - 'Slurm': 'client', - 'Condor': 'client', - 'LCG': 'WN', - 'CREAM': 'WN', - 'ARC': 'WN', - 'Local': 'WN', - 'Interactive': 'WN' - } -LCGSEUpOpt = {'LFC_HOST': 'lfc-dteam.cern.ch', 'dest_SRM': 'srm-public.cern.ch'} -LCGSEFileExt = docstr_Ext % ('LCG SE', 'LCG') - -output_config.addOption('LCGSEFile', - {'fileExtensions': ['*.root', '*.asd'], - 'backendPostprocess': LCGSEBakPost, - 'uploadOptions': LCGSEUpOpt}, - LCGSEFileExt) - # DiracFile # TODO MOVE ME TO GANGADIRAC!!! # Should this be in Core or elsewhere? @@ -833,8 +696,6 @@ def filefilter(fn): 'SGE': 'WN', 'Slurm': 'WN', 'Condor': 'WN', - 'LCG': 'WN', - 'CREAM': 'WN', 'ARC': 'WN', 'Local': 'WN', 'Interactive': 'WN'} @@ -855,7 +716,6 @@ def filefilter(fn): 'SGE': 'client', 'Slurm': 'client', 'Condor': 'client', - 'LCG': 'client', 'CREAM': 'client', 'ARC': 'client', 'Local': 'client', @@ -927,7 +787,6 @@ def filefilter(fn): 'Condor': 'WN', 'SGE': 'WN', 'Slurm': 'WN', - 'LCG': 'client', 'CREAM': 'client', 'ARC': 'client', 'Local': 'WN', @@ -942,7 +801,6 @@ def filefilter(fn): massStorageFileExt) sharedFileBackendPost = {'LSF': 'WN', - 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'WN', diff --git a/ganga/GangaCore/old_test/Internals/TestObjectConfig.py b/ganga/GangaCore/old_test/Internals/TestObjectConfig.py index 37e03ecfdf..87f5b98ee8 100755 --- a/ganga/GangaCore/old_test/Internals/TestObjectConfig.py +++ b/ganga/GangaCore/old_test/Internals/TestObjectConfig.py @@ -115,7 +115,7 @@ def test003_failures(self): ## jobConfig = getConfig( "Job_Properties" ) # first set something legal -# jobConfig.setSessionOption("backend","LCG") +# jobConfig.setSessionOption("backend","Dirac") ## job1 = Job() # now something illegal diff --git a/ganga/GangaCore/test/GPI/Bugs/TestSavannah40220.py b/ganga/GangaCore/test/GPI/Bugs/TestSavannah40220.py deleted file mode 100644 index 05a8c8e478..0000000000 --- a/ganga/GangaCore/test/GPI/Bugs/TestSavannah40220.py +++ /dev/null @@ -1,22 +0,0 @@ - - -from GangaCore.testlib.GangaUnitTest import GangaUnitTest - - -class TestSavannah40220(GangaUnitTest): - def test_Savannah40220(self): - from GangaCore.GPI import LCG, Job, export, load - - j = Job(backend=LCG()) - import tempfile - f, self.fname = tempfile.mkstemp() - - self.assertTrue(export(j, self.fname)) - - self.assertTrue(load(self.fname)) - - def tearDown(self): - import os - os.remove(self.fname) - - super(TestSavannah40220, self).tearDown() diff --git a/ganga/GangaCore/test/GPI/Credentials/TestCredentials.py b/ganga/GangaCore/test/GPI/Credentials/TestCredentials.py index cbde67b32f..9a44a8613a 100644 --- a/ganga/GangaCore/test/GPI/Credentials/TestCredentials.py +++ b/ganga/GangaCore/test/GPI/Credentials/TestCredentials.py @@ -39,51 +39,3 @@ def test_voms_proxy_life_cycle(gpi): explicit_default_cred = credential_store.create(VomsProxy(vo=getConfig('LCG')['VirtualOrganisation'])) assert explicit_default_cred == default_cred assert credential_store[VomsProxy(vo=getConfig('LCG')['VirtualOrganisation'])] - - -@external -def test_lcg(gpi): - from GangaCore.GPI import Job, LCG, VomsProxy, credential_store, jobs - - logger.info('Submitting first job') - j1 = Job() - j1.backend = LCG() - j1.submit() - - logger.info('Submitting second job') - j2 = Job() - j2.backend = LCG(credential_requirements=VomsProxy(vo='lhcb')) - j2.submit() - - # Wipe out all the credentials to make sure they can be created on cue - for cred in credential_store: - cred.destroy() - - logger.info('Monitoring jobs') - for j in jobs: - stripProxy(j).backend.master_updateMonitoringInformation([stripProxy(j)]) - - # Wipe out all the credentials to make sure they can be created on cue - credential_store.clear() - - logger.info('Monitoring jobs') - for j in jobs: - stripProxy(j).backend.master_updateMonitoringInformation([stripProxy(j)]) - - # Wipe out all the credentials to make sure they can be created on cue - for cred in credential_store: - cred.destroy() - credential_store.clear() - - logger.info('Monitoring jobs') - for j in jobs: - stripProxy(j).backend.master_updateMonitoringInformation([stripProxy(j)]) - - # Wipe out all the credentials to make sure they can be created on cue - for cred in credential_store: - cred.destroy() - credential_store.clear() - - logger.info('Killing jobs') - for j in jobs: - j.kill() diff --git a/ganga/GangaCore/test/GPI/FileTests/TestLocalFileClient.py b/ganga/GangaCore/test/GPI/FileTests/TestLocalFileClient.py index 609c733544..dd65a17a47 100644 --- a/ganga/GangaCore/test/GPI/FileTests/TestLocalFileClient.py +++ b/ganga/GangaCore/test/GPI/FileTests/TestLocalFileClient.py @@ -19,7 +19,7 @@ class TestLocalFileClient(GangaUnitTest): # This sets up a LocalFileConfiguration which works by placing a file on local storage somewhere we can test using standard tools LocalFileConfig = {'fileExtensions': [''], 'uploadOptions': {}, - 'backendPostprocess': {'LSF': 'client', 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', + 'backendPostprocess': {'LSF': 'client', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'client', 'Interactive': 'client', 'Local': 'client', 'CREAM': 'client'}} _ext = '.root' diff --git a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageClient.py b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageClient.py index c610cfa3b2..dc327b8f0e 100644 --- a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageClient.py +++ b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageClient.py @@ -29,7 +29,7 @@ class TestMassStorageClient(GangaUnitTest): MassStorageTestConfig = {'defaultProtocol': 'file://', 'fileExtensions': [''], 'uploadOptions': {'path': outputFilePath, 'cp_cmd': 'cp', 'ls_cmd': 'ls', 'mkdir_cmd': 'mkdir'}, - 'backendPostprocess': {'LSF': 'client', 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', + 'backendPostprocess': {'LSF': 'client', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'client', 'Interactive': 'client', 'Local': 'client', 'CREAM': 'client'}} _ext = '.root' diff --git a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageGetPut.py b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageGetPut.py index 330271c44f..339d10d441 100644 --- a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageGetPut.py +++ b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageGetPut.py @@ -31,7 +31,7 @@ class TestMassStorageGetPut(GangaUnitTest): MassStorageTestConfig = {'defaultProtocol': 'file://', 'fileExtensions': [''], 'uploadOptions': {'path': outputFilePath, 'cp_cmd': 'cp', 'ls_cmd': 'ls', 'mkdir_cmd': 'mkdir -p'}, - 'backendPostprocess': {'LSF': 'client', 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', + 'backendPostprocess': {'LSF': 'client', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'client', 'Interactive': 'client', 'Local': 'client', 'CREAM': 'client'}} def setUp(self): diff --git a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageInput.py b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageInput.py index 91dc41fe2b..bf45cc5b88 100644 --- a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageInput.py +++ b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageInput.py @@ -29,7 +29,7 @@ class TestMassStorageClientInput(GangaUnitTest): MassStorageTestConfig = {'defaultProtocol': 'file://', 'fileExtensions': [''], 'uploadOptions': {'path': outputFilePath, 'cp_cmd': 'cp', 'ls_cmd': 'ls', 'mkdir_cmd': 'mkdir -p'}, - 'backendPostprocess': {'LSF': 'client', 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', + 'backendPostprocess': {'LSF': 'client', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'client', 'Interactive': 'client', 'Local': 'client', 'CREAM': 'client'}} def setUp(self): diff --git a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageWN.py b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageWN.py index 4d487fa43a..d69a265dbc 100644 --- a/ganga/GangaCore/test/GPI/FileTests/TestMassStorageWN.py +++ b/ganga/GangaCore/test/GPI/FileTests/TestMassStorageWN.py @@ -29,7 +29,7 @@ class TestMassStorageWN(GangaUnitTest): MassStorageTestConfig = {'defaultProtocol': 'file://', 'fileExtensions': [''], 'uploadOptions': {'path': outputFilePath, 'cp_cmd': 'cp', 'ls_cmd': 'ls', 'mkdir_cmd': 'mkdir'}, - 'backendPostprocess': {'LSF': 'WN', 'LCG': 'client', 'ARC': 'client', 'Dirac': 'client', + 'backendPostprocess': {'LSF': 'WN', 'ARC': 'client', 'Dirac': 'client', 'PBS': 'WN', 'Interactive': 'client', 'Local': 'WN', 'CREAM': 'client'}} standardFormat = '{jid}/{fname}' diff --git a/ganga/GangaCore/test/GPI/LCG/TestCREAM.py b/ganga/GangaCore/test/GPI/LCG/TestCREAM.py deleted file mode 100644 index 0bd6fe7bef..0000000000 --- a/ganga/GangaCore/test/GPI/LCG/TestCREAM.py +++ /dev/null @@ -1,37 +0,0 @@ -import re -import subprocess - -from GangaCore.Utility.Config import getConfig - -from GangaCore.testlib.mark import external, requires_cred - -from GangaCore.GPIDev.Credentials.VomsProxy import VomsProxy - - -@external -@requires_cred(VomsProxy(), 'CREAM requires a Voms proxy to submit a job') -def test_job_kill(gpi): - from GangaCore.GPI import Job, CREAM - - vo = getConfig('LCG')['VirtualOrganisation'] - call = subprocess.Popen(['lcg-infosites', 'ce', 'cream', '--vo', vo], stdout=subprocess.PIPE) - stdout, stderr = call.communicate() - - # Based on output of: - # - # # CPU Free Total Jobs Running Waiting ComputingElement - # ---------------------------------------------------------------- - # 19440 2089 17760 17351 409 arc-ce01.gridpp.rl.ac.uk:2811/nordugrid-Condor-grid3000M - # 3240 0 1594 1250 344 carceri.hec.lancs.ac.uk:8443/cream-sge-grid - # 1176 30 1007 587 420 ce01.tier2.hep.manchester.ac.uk:8443/cream-pbs-long - # - # Select the CREAM CEs (URL path starts with '/cream') and how many free slots they have - ces = re.findall( - r'^\s*\d+\s*(?P\d+)\s*\d+\s*\d+\s*\d+\s*(?P[^:/\s]+uk:\d+/cream.*)$', stdout, re.MULTILINE) - # Grab the one with the most empty slots - ce = sorted(ces)[-1][1] - - j = Job() - j.backend = CREAM(CE=ce) - j.submit() - j.kill() diff --git a/ganga/GangaCore/test/GPI/LCG/TestLCG.py b/ganga/GangaCore/test/GPI/LCG/TestLCG.py deleted file mode 100644 index 04d453c7fd..0000000000 --- a/ganga/GangaCore/test/GPI/LCG/TestLCG.py +++ /dev/null @@ -1,104 +0,0 @@ -from GangaCore.GPIDev.Base.Proxy import stripProxy - -try: - from unittest.mock import patch -except ImportError: - from mock import patch - -from GangaCore.testlib.mark import external, requires_cred -from GangaCore.testlib.monitoring import run_until_completed - -from GangaCore.GPIDev.Credentials.VomsProxy import VomsProxy - - -@external -@requires_cred(VomsProxy(), 'LCG Requires a Voms proxy for testing') -def test_job_submit_and_monitor(gpi): - from GangaCore.GPI import Job, LCG - - j = Job() - j.backend = LCG() - j.submit() - - assert j.status != 'new' - stripProxy(LCG).master_updateMonitoringInformation([stripProxy(j)]) - - -@external -@requires_cred(VomsProxy(), 'LCG Requires a Voms proxy for testing') -def test_job_kill(gpi): - from GangaCore.GPI import Job, LCG - - j = Job() - j.backend = LCG() - j.submit() - j.kill() - - -@requires_cred(VomsProxy(), 'LCG Requires a Voms proxy for testing') -def test_submit_kill_resubmit(gpi): - """ - Test that a simple submit-kill-resubmit-kill cycle works - """ - - from GangaCore.GPI import Job, LCG - j = Job() - j.backend = LCG() - - with patch('GangaCore.Lib.LCG.Grid.submit', return_value='https://example.com:9000/42') as submit: - j.submit() - submit.assert_called_once() - assert j.backend.id == 'https://example.com:9000/42' - - with patch('GangaCore.Lib.LCG.Grid.cancel', return_value=True) as cancel: - j.kill() - cancel.assert_called_once() - assert j.status == 'killed' - - with patch('GangaCore.Lib.LCG.Grid.submit', return_value='https://example.com:9000/43') as submit: - j.resubmit() - submit.assert_called_once() - assert j.backend.id == 'https://example.com:9000/43' - - with patch('GangaCore.Lib.LCG.Grid.cancel', return_value=True): - j.kill() - - -@requires_cred(VomsProxy(), 'LCG Requires a Voms proxy for testing') -def test_submit_monitor(gpi): - """ - Test that an LCG job can be monitored - """ - - from GangaCore.GPI import Job, LCG - j = Job() - j.backend = LCG() - - job_id = 'https://example.com:9000/42' - - with patch('GangaCore.Lib.LCG.Grid.submit', return_value=job_id) as submit: - j.submit() - submit.assert_called_once() - assert j.backend.id == job_id - - status_info = { - 'status': 'Submitted', - 'name': '', - 'destination': '', - 'reason': '', - 'exit': '', - 'is_node': False, - 'id': job_id - } - - status_results = [ - ([status_info], []), # Once for the proper status call - ([], []) # Once for the bulk monitoring call - ] - - with patch('GangaCore.Lib.LCG.Grid.status', side_effect=status_results) as status: - stripProxy(j).backend.master_updateMonitoringInformation([stripProxy(j)]) - assert status.call_count >= 1 - - with patch('GangaCore.Lib.LCG.Grid.cancel', return_value=True): - j.kill() diff --git a/ganga/GangaCore/test/GPI/LCG/__init__.py b/ganga/GangaCore/test/GPI/LCG/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ganga/GangaCore/test/Unit/Files/TestLCGSEFile.py b/ganga/GangaCore/test/Unit/Files/TestLCGSEFile.py deleted file mode 100644 index f27da06bdf..0000000000 --- a/ganga/GangaCore/test/Unit/Files/TestLCGSEFile.py +++ /dev/null @@ -1,29 +0,0 @@ -import importlib -import os -from unittest.mock import MagicMock, patch - -from ganga.GangaCore.testlib.GangaUnitTest import GangaUnitTest - - -class TestLCGSEFile(GangaUnitTest): - @patch('GangaCore.GPIDev.Credentials.CredentialStore.CredentialStore.__getitem__') - def test_put_lcsgefile(self, creds_mock): - """Test LCGSEFile's put method which is used to upload a local file to the grid.""" - from GangaCore.GPI import LCGSEFile - - # Manually import the LCGSEFile module due to shared module and class name confusion - l_module = importlib.import_module('GangaCore.GPIDev.Lib.File.LCGSEFile') - grid_shell_mock = MagicMock() - l_module.getShell = grid_shell_mock - grid_shell_mock.return_value.cmd1.return_value = (0, '', '') - - filename = 'test.txt' - f = LCGSEFile(filename) - localdir = '/' - f.localDir = localdir - f.put() - - grid_shell_mock.assert_called() - upload_cmd = grid_shell_mock.return_value.cmd1.call_args[0][0] - self.assertTrue(f.getUploadCmd() in upload_cmd) - self.assertTrue(os.path.join(localdir, filename) in upload_cmd) diff --git a/ganga/GangaCore/test/Unit/LCG/TestGrid.py b/ganga/GangaCore/test/Unit/LCG/TestGrid.py deleted file mode 100644 index 9244d4e06a..0000000000 --- a/ganga/GangaCore/test/Unit/LCG/TestGrid.py +++ /dev/null @@ -1,41 +0,0 @@ - - -from GangaCore.Utility.GridShell import Shell - -from ..Credentials.TestCredentialStore import FakeCred - - -def test_submit_bad_output(mocker): - """ - Test that the external command returning bad data causes the job to fail - """ - __set_submit_option__ = mocker.patch('GangaCore.Lib.LCG.Grid.__set_submit_option__', return_value=' ') - mocker.patch('GangaCore.Lib.LCG.Grid.getShell', return_value=Shell) - cmd1 = mocker.patch('GangaCore.Utility.GridShell.Shell.cmd1', return_value=(0, 'some bad output', False)) - - from GangaCore.Lib.LCG import Grid - job_url = Grid.submit('/some/path', cred_req=FakeCred()) - - assert __set_submit_option__.call_count == 1 - assert cmd1.call_count == 1 - - assert job_url is None - - -def test_submit(mocker): - """ - Test that a job submit succeeds with valid input - """ - __set_submit_option__ = mocker.patch('GangaCore.Lib.LCG.Grid.__set_submit_option__', return_value=' ') - mocker.patch('GangaCore.Lib.LCG.Grid.getShell', return_value=Shell) - cmd1 = mocker.patch('GangaCore.Utility.GridShell.Shell.cmd1', - return_value=(0, 'https://example.com:9000/some_url', False)) - - from GangaCore.Lib.LCG import Grid - job_url = Grid.submit('/some/path', cred_req=FakeCred()) - - assert __set_submit_option__.call_count == 1 - assert cmd1.call_count == 1 - - assert '/some/path' in cmd1.call_args[0][0], 'JDL path was not passed correctly' - assert job_url == 'https://example.com:9000/some_url' diff --git a/ganga/GangaCore/test/Unit/LCG/__init__.py b/ganga/GangaCore/test/Unit/LCG/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/ganga/GangaND280/Highland/Highland.py b/ganga/GangaND280/Highland/Highland.py index a42248e5ab..f968603893 100644 --- a/ganga/GangaND280/Highland/Highland.py +++ b/ganga/GangaND280/Highland/Highland.py @@ -135,13 +135,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -157,12 +150,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('Highland','SGE', RTHandler) allHandlers.add('Highland','Slurm', RTHandler) allHandlers.add('Highland','Condor', RTHandler) -allHandlers.add('Highland','LCG', LCGRTHandler) allHandlers.add('Highland','gLite', gLiteRTHandler) allHandlers.add('Highland','TestSubmitter', RTHandler) allHandlers.add('Highland','Interactive', RTHandler) allHandlers.add('Highland','Batch', RTHandler) allHandlers.add('Highland','Cronus', RTHandler) -allHandlers.add('Highland','Remote', LCGRTHandler) -allHandlers.add('Highland','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280.ini b/ganga/GangaND280/ND280.ini index 21e629d03e..26c6b46b1d 100644 --- a/ganga/GangaND280/ND280.ini +++ b/ganga/GangaND280/ND280.ini @@ -6,18 +6,7 @@ RUNTIME_PATH = GangaND280 ND280DCacheDatasetCommandStr = {'TRIUMF' : 'curl --compressed -sk --header "Accept: text/plain" https://user:password@nd280web.nd280.org/full/path/to/https/nd280data/%s'} ND280DCacheDatasetFileBasePath = {'TRIUMF' : 'dcap://t2ksrm.nd280.org/full/path/to/dcap/nd280data/'} -[LCG] -DefaultLFC = lfc.gridpp.rl.ac.uk -#EDG_ENABLE = False -#GLITE_ENABLE = False -MyProxyServer = myproxy.gridpp.rl.ac.uk -VirtualOrganisation = t2k.org - [defaults_GridProxy] minValidity = 10:00 validityAtCreation = 24:00 voms = t2k.org - -#[defaults_LCG] -#middleware=GLITE - diff --git a/ganga/GangaND280/ND280Control/runND280.py b/ganga/GangaND280/ND280Control/runND280.py index 1f1e9a40c5..f2571eb3bb 100644 --- a/ganga/GangaND280/ND280Control/runND280.py +++ b/ganga/GangaND280/ND280Control/runND280.py @@ -157,13 +157,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): c = StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) return c - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -179,12 +172,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('runND280','SGE', RTHandler) allHandlers.add('runND280','Slurm', RTHandler) allHandlers.add('runND280','Condor', RTHandler) -allHandlers.add('runND280','LCG', LCGRTHandler) allHandlers.add('runND280','gLite', gLiteRTHandler) allHandlers.add('runND280','TestSubmitter', RTHandler) allHandlers.add('runND280','Interactive', RTHandler) allHandlers.add('runND280','Batch', RTHandler) allHandlers.add('runND280','Cronus', RTHandler) -allHandlers.add('runND280','Remote', LCGRTHandler) -allHandlers.add('runND280','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280Control/runND280CosMC.py b/ganga/GangaND280/ND280Control/runND280CosMC.py index c8e4412c70..77c5345899 100644 --- a/ganga/GangaND280/ND280Control/runND280CosMC.py +++ b/ganga/GangaND280/ND280Control/runND280CosMC.py @@ -157,13 +157,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): c = StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) return c - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -179,12 +172,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('runND280CosMC','SGE', RTHandler) allHandlers.add('runND280CosMC','Slurm', RTHandler) allHandlers.add('runND280CosMC','Condor', RTHandler) -allHandlers.add('runND280CosMC','LCG', LCGRTHandler) allHandlers.add('runND280CosMC','gLite', gLiteRTHandler) allHandlers.add('runND280CosMC','TestSubmitter', RTHandler) allHandlers.add('runND280CosMC','Interactive', RTHandler) allHandlers.add('runND280CosMC','Batch', RTHandler) allHandlers.add('runND280CosMC','Cronus', RTHandler) -allHandlers.add('runND280CosMC','Remote', LCGRTHandler) -allHandlers.add('runND280CosMC','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280Control/runND280CtrlSmpl.py b/ganga/GangaND280/ND280Control/runND280CtrlSmpl.py index b65a8d99d6..477643be49 100644 --- a/ganga/GangaND280/ND280Control/runND280CtrlSmpl.py +++ b/ganga/GangaND280/ND280Control/runND280CtrlSmpl.py @@ -155,13 +155,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): c = StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) return c - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -177,12 +170,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('runND280CtrlSmpl','SGE', RTHandler) allHandlers.add('runND280CtrlSmpl','Slurm', RTHandler) allHandlers.add('runND280CtrlSmpl','Condor', RTHandler) -allHandlers.add('runND280CtrlSmpl','LCG', LCGRTHandler) allHandlers.add('runND280CtrlSmpl','gLite', gLiteRTHandler) allHandlers.add('runND280CtrlSmpl','TestSubmitter', RTHandler) allHandlers.add('runND280CtrlSmpl','Interactive', RTHandler) allHandlers.add('runND280CtrlSmpl','Batch', RTHandler) allHandlers.add('runND280CtrlSmpl','Cronus', RTHandler) -allHandlers.add('runND280CtrlSmpl','Remote', LCGRTHandler) -allHandlers.add('runND280CtrlSmpl','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280Control/runND280Kin.py b/ganga/GangaND280/ND280Control/runND280Kin.py index e6ff02fcc7..0cf2f0a676 100644 --- a/ganga/GangaND280/ND280Control/runND280Kin.py +++ b/ganga/GangaND280/ND280Control/runND280Kin.py @@ -147,13 +147,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): c = StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) return c - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -169,12 +162,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('runND280Kin','SGE', RTHandler) allHandlers.add('runND280Kin','Slurm', RTHandler) allHandlers.add('runND280Kin','Condor', RTHandler) -allHandlers.add('runND280Kin','LCG', LCGRTHandler) allHandlers.add('runND280Kin','gLite', gLiteRTHandler) allHandlers.add('runND280Kin','TestSubmitter', RTHandler) allHandlers.add('runND280Kin','Interactive', RTHandler) allHandlers.add('runND280Kin','Batch', RTHandler) allHandlers.add('runND280Kin','Cronus', RTHandler) -allHandlers.add('runND280Kin','Remote', LCGRTHandler) -allHandlers.add('runND280Kin','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280Control/runND280RDP.py b/ganga/GangaND280/ND280Control/runND280RDP.py index 9a83f24043..5f3fed3a11 100644 --- a/ganga/GangaND280/ND280Control/runND280RDP.py +++ b/ganga/GangaND280/ND280Control/runND280RDP.py @@ -157,13 +157,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): c = StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) return c - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.Lib.gLite import gLiteJobConfig @@ -178,12 +171,9 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('runND280RDP','SGE', RTHandler) allHandlers.add('runND280RDP','Slurm', RTHandler) allHandlers.add('runND280RDP','Condor', RTHandler) -allHandlers.add('runND280RDP','LCG', LCGRTHandler) allHandlers.add('runND280RDP','gLite', gLiteRTHandler) allHandlers.add('runND280RDP','TestSubmitter', RTHandler) allHandlers.add('runND280RDP','Interactive', RTHandler) allHandlers.add('runND280RDP','Batch', RTHandler) allHandlers.add('runND280RDP','Cronus', RTHandler) -allHandlers.add('runND280RDP','Remote', LCGRTHandler) -allHandlers.add('runND280RDP','CREAM', LCGRTHandler) allHandlers.add('runND280RDP','Batch', RTHandler) diff --git a/ganga/GangaND280/ND280Control/runND280SandMC.py b/ganga/GangaND280/ND280Control/runND280SandMC.py index d2b5a8d18c..ebb9b24d64 100644 --- a/ganga/GangaND280/ND280Control/runND280SandMC.py +++ b/ganga/GangaND280/ND280Control/runND280SandMC.py @@ -162,13 +162,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): c = StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) return c - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -184,12 +177,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('runND280SandMC','SGE', RTHandler) allHandlers.add('runND280SandMC','Slurm', RTHandler) allHandlers.add('runND280SandMC','Condor', RTHandler) -allHandlers.add('runND280SandMC','LCG', LCGRTHandler) allHandlers.add('runND280SandMC','gLite', gLiteRTHandler) allHandlers.add('runND280SandMC','TestSubmitter', RTHandler) allHandlers.add('runND280SandMC','Interactive', RTHandler) allHandlers.add('runND280SandMC','Batch', RTHandler) allHandlers.add('runND280SandMC','Cronus', RTHandler) -allHandlers.add('runND280SandMC','Remote', LCGRTHandler) -allHandlers.add('runND280SandMC','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280Executable/ND280Executable.py b/ganga/GangaND280/ND280Executable/ND280Executable.py index 49deecd3ee..757a76c030 100644 --- a/ganga/GangaND280/ND280Executable/ND280Executable.py +++ b/ganga/GangaND280/ND280Executable/ND280Executable.py @@ -141,13 +141,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -163,12 +156,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('ND280Executable','SGE', RTHandler) allHandlers.add('ND280Executable','Slurm', RTHandler) allHandlers.add('ND280Executable','Condor', RTHandler) -allHandlers.add('ND280Executable','LCG', LCGRTHandler) allHandlers.add('ND280Executable','gLite', gLiteRTHandler) allHandlers.add('ND280Executable','TestSubmitter', RTHandler) allHandlers.add('ND280Executable','Interactive', RTHandler) allHandlers.add('ND280Executable','Batch', RTHandler) allHandlers.add('ND280Executable','Cronus', RTHandler) -allHandlers.add('ND280Executable','Remote', LCGRTHandler) -allHandlers.add('ND280Executable','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280RecoValidation/RecoPlusVFT.py b/ganga/GangaND280/ND280RecoValidation/RecoPlusVFT.py index 667941a644..a0f5b57b90 100644 --- a/ganga/GangaND280/ND280RecoValidation/RecoPlusVFT.py +++ b/ganga/GangaND280/ND280RecoValidation/RecoPlusVFT.py @@ -185,13 +185,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -207,12 +200,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('RecoPlusVFT','SGE', RTHandler) allHandlers.add('RecoPlusVFT','Slurm', RTHandler) allHandlers.add('RecoPlusVFT','Condor', RTHandler) -allHandlers.add('RecoPlusVFT','LCG', LCGRTHandler) allHandlers.add('RecoPlusVFT','gLite', gLiteRTHandler) allHandlers.add('RecoPlusVFT','TestSubmitter', RTHandler) allHandlers.add('RecoPlusVFT','Interactive', RTHandler) allHandlers.add('RecoPlusVFT','Batch', RTHandler) allHandlers.add('RecoPlusVFT','Cronus', RTHandler) -allHandlers.add('RecoPlusVFT','Remote', LCGRTHandler) -allHandlers.add('RecoPlusVFT','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280RecoValidation/VFT_make_ana.py b/ganga/GangaND280/ND280RecoValidation/VFT_make_ana.py index 1982852266..f8f7fef520 100644 --- a/ganga/GangaND280/ND280RecoValidation/VFT_make_ana.py +++ b/ganga/GangaND280/ND280RecoValidation/VFT_make_ana.py @@ -182,13 +182,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -204,12 +197,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('VFT_make_ana','SGE', RTHandler) allHandlers.add('VFT_make_ana','Slurm', RTHandler) allHandlers.add('VFT_make_ana','Condor', RTHandler) -allHandlers.add('VFT_make_ana','LCG', LCGRTHandler) allHandlers.add('VFT_make_ana','gLite', gLiteRTHandler) allHandlers.add('VFT_make_ana','TestSubmitter', RTHandler) allHandlers.add('VFT_make_ana','Interactive', RTHandler) allHandlers.add('VFT_make_ana','Batch', RTHandler) allHandlers.add('VFT_make_ana','Cronus', RTHandler) -allHandlers.add('VFT_make_ana','Remote', LCGRTHandler) -allHandlers.add('VFT_make_ana','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280RecoValidation/oaReconPlusoaAnalysis.py b/ganga/GangaND280/ND280RecoValidation/oaReconPlusoaAnalysis.py index 166954cde8..4bf0e0222f 100644 --- a/ganga/GangaND280/ND280RecoValidation/oaReconPlusoaAnalysis.py +++ b/ganga/GangaND280/ND280RecoValidation/oaReconPlusoaAnalysis.py @@ -172,13 +172,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -194,12 +187,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('oaReconPlusoaAnalysis','SGE', RTHandler) allHandlers.add('oaReconPlusoaAnalysis','Slurm', RTHandler) allHandlers.add('oaReconPlusoaAnalysis','Condor', RTHandler) -allHandlers.add('oaReconPlusoaAnalysis','LCG', LCGRTHandler) allHandlers.add('oaReconPlusoaAnalysis','gLite', gLiteRTHandler) allHandlers.add('oaReconPlusoaAnalysis','TestSubmitter', RTHandler) allHandlers.add('oaReconPlusoaAnalysis','Interactive', RTHandler) allHandlers.add('oaReconPlusoaAnalysis','Batch', RTHandler) allHandlers.add('oaReconPlusoaAnalysis','Cronus', RTHandler) -allHandlers.add('oaReconPlusoaAnalysis','Remote', LCGRTHandler) -allHandlers.add('oaReconPlusoaAnalysis','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280Skimmer/ND280Skimmer.py b/ganga/GangaND280/ND280Skimmer/ND280Skimmer.py index 90ef11f48a..e3c0ff5e49 100644 --- a/ganga/GangaND280/ND280Skimmer/ND280Skimmer.py +++ b/ganga/GangaND280/ND280Skimmer/ND280Skimmer.py @@ -177,12 +177,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.Lib.gLite import gLiteJobConfig @@ -197,12 +191,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('ND280RecoSkimmer','SGE', RTHandler) allHandlers.add('ND280RecoSkimmer','Slurm', RTHandler) allHandlers.add('ND280RecoSkimmer','Condor', RTHandler) -allHandlers.add('ND280RecoSkimmer','LCG', LCGRTHandler) allHandlers.add('ND280RecoSkimmer','gLite', gLiteRTHandler) allHandlers.add('ND280RecoSkimmer','TestSubmitter', RTHandler) allHandlers.add('ND280RecoSkimmer','Interactive', RTHandler) allHandlers.add('ND280RecoSkimmer','Batch', RTHandler) allHandlers.add('ND280RecoSkimmer','Cronus', RTHandler) -allHandlers.add('ND280RecoSkimmer','Remote', LCGRTHandler) -allHandlers.add('ND280RecoSkimmer','CREAM', LCGRTHandler) - diff --git a/ganga/GangaND280/ND280TPCGasInteractions/TRExPlusOAAnalysis.py b/ganga/GangaND280/ND280TPCGasInteractions/TRExPlusOAAnalysis.py index 93f71fe90d..7e1f91c190 100644 --- a/ganga/GangaND280/ND280TPCGasInteractions/TRExPlusOAAnalysis.py +++ b/ganga/GangaND280/ND280TPCGasInteractions/TRExPlusOAAnalysis.py @@ -165,13 +165,6 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): from GangaCore.GPIDev.Adapters.StandardJobConfig import StandardJobConfig return StandardJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) - - -class LCGRTHandler(IRuntimeHandler): - def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - return LCGJobConfig(app._scriptname,app._getParent().inputsandbox,[],app._getParent().outputsandbox,app.env) class gLiteRTHandler(IRuntimeHandler): def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): @@ -187,12 +180,8 @@ def prepare(self,app,appconfig,appmasterconfig,jobmasterconfig): allHandlers.add('TRExPlusOAAnalysis','SGE', RTHandler) allHandlers.add('TRExPlusOAAnalysis','Slurm', RTHandler) allHandlers.add('TRExPlusOAAnalysis','Condor', RTHandler) -allHandlers.add('TRExPlusOAAnalysis','LCG', LCGRTHandler) allHandlers.add('TRExPlusOAAnalysis','gLite', gLiteRTHandler) allHandlers.add('TRExPlusOAAnalysis','TestSubmitter', RTHandler) allHandlers.add('TRExPlusOAAnalysis','Interactive', RTHandler) allHandlers.add('TRExPlusOAAnalysis','Batch', RTHandler) allHandlers.add('TRExPlusOAAnalysis','Cronus', RTHandler) -allHandlers.add('TRExPlusOAAnalysis','Remote', LCGRTHandler) -allHandlers.add('TRExPlusOAAnalysis','CREAM', LCGRTHandler) - diff --git a/ganga/GangaTutorial/Lib/PrimeFactorizer.py b/ganga/GangaTutorial/Lib/PrimeFactorizer.py index a4beb050a8..c388e64a10 100755 --- a/ganga/GangaTutorial/Lib/PrimeFactorizer.py +++ b/ganga/GangaTutorial/Lib/PrimeFactorizer.py @@ -140,22 +140,11 @@ def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): return c -class LCGRTHandler(IRuntimeHandler): - def prepare(self, app, appconfig, appmasterconfig, jobmasterconfig): - from GangaCore.Lib.LCG import LCGJobConfig - - c = LCGJobConfig(app.exe, app.inputs, app.args, app.outputs, app.envs) - c.monitoring_svc = mc['GangaTutorial'] - - return c - - allHandlers.add('PrimeFactorizer', 'LSF', RTHandler) allHandlers.add('PrimeFactorizer', 'Local', RTHandler) allHandlers.add('PrimeFactorizer', 'PBS', RTHandler) allHandlers.add('PrimeFactorizer', 'SGE', RTHandler) allHandlers.add('PrimeFactorizer', 'Condor', RTHandler) -allHandlers.add('PrimeFactorizer', 'LCG', LCGRTHandler) allHandlers.add('PrimeFactorizer', 'TestSubmitter', RTHandler) allHandlers.add('PrimeFactorizer', 'Interactive', RTHandler) allHandlers.add('PrimeFactorizer', 'Batch', RTHandler)