filemap.conf

[global]

#
# How many distributed copies of source data should be stored.
# If you set this, set syncdir (see below)
#
# replication = 1

#
# Build a work queue by hostname rather than by [Nodename]
# (Use this if you have multiple "nodes" (e.g. disks) on a hostname
#  and sshd is rejecting the rate of connections.  Better to raise
#  MaxStartups in sshd_config).
# 
# queuebyhost = True

# 
# Number of times to retry an all2all command.  
# Default is 7 to accomodate all2all over ssh with default MaxStartups value.
# For better performance when there is no MaxStartups cap, set this to 1
# 
# all2all_replicate = 7

#
# Command to execute to connect to a machine where we can do SLURM scheduling.
# If not set, then assume that we are not using SLURM. 
# If set to localhost, then SLURM commands will be run locally.
#
# slurmfe = ssh cluster
# slurmfe = localhost

#
# Extra directories to add to the search path for executables
# (: delimited like PATH)
#
# pathextras =

##
## Global defaults: (can be overridden per node)
##

#
# Determine number of items to run concurrently based on CPU and I/O load.
# The value is the minimum amount of time (in seconds) after a new process is 
# started before determining if the system still has headroom for more.
# Set to 0/False/None in order to use static scheduling.
#
# Default: dynamicload = 2.0

#
# If not using dynamic load scheduling (see above) each node will run 
# up to <processes> total concurrent processes shared across all concurrent jobs.  
# Each job is further limited to (<cpusperjob> * <procspercpu>) processes.
# <procspercpu> is a job-specific argument for "fm map" and defaults to 1.  
# By default that limit is 1 process per node and is appropriate for I/O bound
# jobs.  For CPU-bound jobs on multicore nodes, either change these settings
# or let dynamicload be set in order to automatically add processes until the node is
# busy.   
# 
# Default: processes = 1000
# Default: cpusperjob = 1

#
# Directory used for synchronization files.  You should set this if
# replication is >1 or else data will be processed redundantly. This dir
# must be globally accessible from Nodes (but not necessarily this client machine)
# and coherent, but need not be large or fast. This path is relative to the Nodes, 
# not necessarily this client machine. If the location is mounted in different 
# locations on nodes, override the setting in node definitions.
#
# Experimental redis support:
#    If Python redis support is installed (pip install redis), you may specify
#    "syncdir = redis://server:port" to specify a redis server to use instead
#
# If no syncdir is specified, then nodes will process all of the data they see.
#
# syncdir =

#
# An optional location where source data may be found.  
# Nothing will be written to this directory; any derived files will be placed
# in the node's normal rootdir.  This can be a shared NFS mount.
#
# cowdir =

#
# When reading from cowdir, how many nodes should attempt any given file they see.
# If cowdir is a global directory, then every node could attempt every file, 
# but that leads to extra work on the syncdir which can cause delays.  
# 0 means that node should try all files they can see (use this if not global)
#
# demand = 3

#
# FM command (specify alternate path)
# Note: Many versions of SSH will not execute your .login equivalent,
# so all commands must be explicitly referenced or in a PATH in your
# ~/.ssh/environment file (man ssh(1)).
#
# fm = %(ROOTDIR)/sys/fm

#
# PYTHON command (specify alternate path)
# The fm script will be executed under the interpreter specified here.
#
# python = python

#
# SSH command (specify alternate path or options)
# Note: for reduce operations to function, credential or agent forwarding must work
# Note: older versions of rsync don't support quoted arguments in the ssh command
# For improved performance use -S
# Default: ssh = ssh -o GSSAPIDelegateCredentials=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=no -Ax
#
#ssh = srun -N 1 -n 1 --unbuffered -w  #When running under SLURM
#ssh = ssh -o GSSAPIDelegateCredentials=yes -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o ControlMaster=auto -S ~/.ssh/%l-%r@%h:%p -Ax

#
# RSYNC command (specify alternate path or options)
# Default: rsync = rsync -tO
#
# rsync = /opt/local/bin/rsync -t

##
## List your nodes here.
## The [Nodename] should be unique, but is not used.
## If a 'hostname' is specified, then this is a remote host and 
## the given hostname must be accessible from all nodes.
## Most of the global settings can be over-ridden with per-node values.
##

[Node1]
rootdir = /tmp/foo

[Node4]
rootdir = /tmp/bar

[Node2]
#hostname = localhost
rootdir = /var/tmp/baz

##
## An example cluster config using ranges and lists.  
## This example defines 6000 "nodes" (a combination of 6 disks on each of 1000 nodes).
## Reference elements of the list using Python "replacement field" syntax
## In this example hostnames are formatted node0001 - node1000 so we have to zero-pad the hostnames
##
#[(1-1000).(1-6)]
#hostname = node{0:04d}
#rootdir = /data{1}
#
#[(13,18,86,112).(1-6)]
#inactive = True