Source code for fyrd.batch_systems

# -*- coding: utf-8 -*-
"""
Modular batch system handling.

All batch system specific functions are contained within files in the
batch_systems folder.  The files must have the same name as the batch system,
and possible batch systems are set in the DEFINED_SYSTEMS set. Most batch
system functions are set in the modules in this package, but system detection
methods are hardcoded into get_cluster_environment() also.

To add new systems, create a new batch system with identical function and
classes names and return/yield values to those in an existing definition. You
will also need to update the options.py script to include keywords for your
system and the get_cluster_environment() function to include autodetection.
"""
from importlib import import_module as _import

from .. import run as _run
from .. import logme as _logme
from .. import ClusterError as _ClusterError

DEFINED_SYSTEMS = {'torque', 'slurm', 'local'}

MODE = None

# Define job states all batch systems must return one of these states
GOOD_STATES      = ['complete', 'completed', 'special_exit']
ACTIVE_STATES    = ['configuring', 'completing', 'pending',
                    'held', 'running']
BAD_STATES       = ['boot_fail', 'cancelled', 'failed', 'killed',
                    'node_fail', 'timeout', 'disappeared']
UNCERTAIN_STATES = ['preempted', 'stopped',
                    'suspended']
ALL_STATES = GOOD_STATES + ACTIVE_STATES + BAD_STATES + UNCERTAIN_STATES
DONE_STATES = GOOD_STATES + BAD_STATES

_default_batches = None


[docs]def get_batch_system(qtype=None): """Return a batch_system module.""" qtype = qtype if qtype else get_cluster_environment() if qtype not in DEFINED_SYSTEMS: raise _ClusterError( 'qtype value {0} is not recognized, '.format(qtype) + 'should be one of {0}'.format(DEFINED_SYSTEMS) ) global _default_batches if not _default_batches: _default_batches = {} if not qtype in _default_batches: _default_batches[qtype] = _import( 'fyrd.batch_systems.{}'.format(qtype) ) return _default_batches[qtype]
################################# # Set the global cluster type # #################################
[docs]def get_cluster_environment(overwrite=False): """Detect the local cluster environment and set MODE globally. Detect the current batch system by looking for command line utilities. Order is important here, so we hard code the batch system lookups. Paths to files can also be set in the config file. Parameters ---------- overwrite : bool, optional If True, run checks anyway, otherwise just accept MODE if it is already set. Returns ------- MODE : str """ global MODE if not overwrite and MODE and MODE in list(DEFINED_SYSTEMS): return MODE from .. import conf as _conf conf_queue = _conf.get_option('queue', 'queue_type', 'auto') if conf_queue not in list(DEFINED_SYSTEMS) + ['auto']: _logme.log('queue_type in the config file is {}, '.format(conf_queue) + 'but it should be one of {}'.format(DEFINED_SYSTEMS) + ' or auto. Resetting it to auto', 'warn') _conf.set_option('queue', 'queue_type', 'auto') conf_queue = 'auto' if conf_queue == 'auto': # Hardcode queue lookups here sbatch_cmnd = _conf.get_option('queue', 'sbatch') qsub_cmnd = _conf.get_option('queue', 'qsub') sbatch_cmnd = sbatch_cmnd if sbatch_cmnd else 'sbatch' qsub_cmnd = qsub_cmnd if qsub_cmnd else 'qsub' if _run.which(sbatch_cmnd): MODE = 'slurm' elif _run.which(qsub_cmnd): MODE = 'torque' else: MODE = 'local' else: MODE = conf_queue if MODE is None: _logme.log('No functional batch system detected, will not be able to' 'run', 'error') elif MODE == 'local': _logme.log('No cluster environment detected, using multiprocessing', 'debug') else: _logme.log('{0} detected, using for cluster submissions'.format(MODE), 'debug') return MODE
############################## # Check if queue is usable # ##############################
[docs]def check_queue(qtype=None): """Check if *both* MODE and qtype are valid. First checks the MODE global and autodetects its value, if that fails, no other tests are done, the qtype argument is ignored. After MODE is found to be a reasonable value, the queried queue is tested for functionality. If qtype is defined, this queue is tested, else the queue in MODE is tested. Tests are defined per batch system. Parameters ---------- qtype : str Returns ------ batch_system_functional : bool Raises ------ ClusterError If MODE or qtype is not in DEFINED_SYSTEMS See Also -------- get_cluster_environment : Auto detect the batch environment get_batch_system : Return the batch system module """ if 'MODE' not in globals(): global MODE MODE = get_cluster_environment() if not MODE: MODE = get_cluster_environment() if not MODE: _logme.log('Queue system not detected', 'error') return False if MODE not in DEFINED_SYSTEMS: raise _ClusterError( 'MODE value {0} is not recognized, '.format(MODE) + 'should be one of {0}'.format(DEFINED_SYSTEMS) ) if qtype and qtype not in DEFINED_SYSTEMS: raise _ClusterError( 'qtype value {0} is not recognized, '.format(qtype) + 'should be one of {0}'.format(DEFINED_SYSTEMS) ) qtype = qtype if qtype else MODE batch_system = get_batch_system(qtype) return batch_system.queue_test(warn=True)
# Make options easily available everywhere from . import options