diff --git a/doc/user_manual/src/Howtorun.tex b/doc/user_manual/src/Howtorun.tex
index 17d50aa2..e05b3928 100644
--- a/doc/user_manual/src/Howtorun.tex
+++ b/doc/user_manual/src/Howtorun.tex
@@ -25,3 +25,32 @@ \subsection{How to run}
\end{lstlisting}
Alternatively, you can use Python to run \texttt{HERON/src/main.py} with the HERON XML input as
argument; however, this will bypass loading the \texttt{raven\_libraries} and other initialization.
+
+\subsection{Parallel Notes}
+
+HERON uses RAVEN's parallel tools. Since running on different clusters
+can require somewhat different commands, HERON allows the commands
+used for parallel running to be chosen based on the hostname.
+
+These are stored in the directory \texttt{templates/parallel}. Example:
+
+\begin{lstlisting}[style=XML]
+
+
+
+ mpi
+
+
+
+
+ ray
+
+
+\end{lstlisting}
+
+The \texttt{hostregexp} is a regular expression and the first regular
+expression that matches the hostname will be used as the template for
+running in parallel. If parallel is used, then the section in
+\texttt{useParallel} will be added to the RunInfo in the RAVEN
+input. If the batch size is greater than one then the code in the
+section \texttt{outer} will be used.
diff --git a/templates/parallel/bitterroot.xml b/templates/parallel/bitterroot.xml
new file mode 100644
index 00000000..d8b282e8
--- /dev/null
+++ b/templates/parallel/bitterroot.xml
@@ -0,0 +1,11 @@
+
+
+ slurm
+
+
+
+
+ dask
+
+
+
diff --git a/templates/parallel/sawtooth.xml b/templates/parallel/sawtooth.xml
new file mode 100644
index 00000000..eabc23dd
--- /dev/null
+++ b/templates/parallel/sawtooth.xml
@@ -0,0 +1,11 @@
+
+
+
+ mpi
+
+
+
+
+ ray
+
+
diff --git a/templates/template_driver.py b/templates/template_driver.py
index 54046104..8ae47e25 100644
--- a/templates/template_driver.py
+++ b/templates/template_driver.py
@@ -10,6 +10,9 @@
import shutil
import xml.etree.ElementTree as ET
import itertools as it
+import socket
+import glob
+import re
import numpy as np
import dill as pk
@@ -237,6 +240,23 @@ def _modify_outer_mode(self, template, case, components, sources):
elif case.get_mode() == 'opt':
template.find('Samplers').remove(template.find(".//Grid[@name='grid']"))
+ def _get_parallel_xml(self, hostname):
+ """
+ Finds the xml file to go with the given hostname.
+ @ In, hostname, string with the hostname to search for
+ @ Out, xml, xml.eTree.ElementTree or None, if an xml file is found then use it, otherwise return None
+ """
+ # Should this allow loading from another directory (such as one
+ # next to the input file?)
+ path = os.path.join(os.path.dirname(__file__),"parallel","*.xml")
+ filenames = glob.glob(path)
+ for filename in filenames:
+ cur_xml = ET.parse(filename).getroot()
+ regexp = cur_xml.attrib['hostregexp']
+ if re.match(regexp, hostname):
+ return cur_xml
+ return None
+
def _modify_outer_runinfo(self, template, case):
"""
Defines modifications to the RunInfo of outer.xml RAVEN input file.
@@ -257,15 +277,26 @@ def _modify_outer_runinfo(self, template, case):
elif case.get_mode() == 'opt':
run_info.find('Sequence').text = 'optimize, plot'
# parallel
+ # Should there be a way to override the hostname (such as if we are
+ # generating the files to run on a different computer?)
+ hostname = socket.gethostbyaddr(socket.gethostname())[0]
+ self.parallel_xml = self._get_parallel_xml(hostname)
+ #note, parallel_xml might be None
if case.outerParallel:
- # set outer batchsize and InternalParallel
- batchSize = run_info.find('batchSize')
- batchSize.text = f'{case.outerParallel}'
- run_info.append(xmlUtils.newNode('internalParallel', text='True'))
+ self._modify_outer_parallel(template, case)
if case.useParallel:
- #XXX this doesn't handle non-mpi modes like torque or other custom ones
- mode = xmlUtils.newNode('mode', text='mpi')
- mode.append(xmlUtils.newNode('runQSUB'))
+ if self.parallel_xml is None:
+ #this doesn't handle non-mpi modes like torque or other custom ones
+ # so it is highly recommended that a parallel xml template be created
+ # for hosts that are using those.
+ mode = xmlUtils.newNode('mode', text='mpi')
+ mode.append(xmlUtils.newNode('runQSUB'))
+ else:
+ for child in self.parallel_xml.find('useParallel'):
+ if child.tag == 'mode':
+ mode = child
+ else:
+ run_info.append(child)
if 'memory' in case.parallelRunInfo:
mode.append(xmlUtils.newNode('memory', text=case.parallelRunInfo.pop('memory')))
for sub in case.parallelRunInfo:
@@ -274,6 +305,26 @@ def _modify_outer_runinfo(self, template, case):
if case.innerParallel:
run_info.append(xmlUtils.newNode('NumMPI', text=case.innerParallel))
+ def _modify_outer_parallel(self, template, case):
+ """
+ Modifies the outer parallel stuff. This should only be called if
+ case.outerparallel > 0
+ @ In, template, xml.etree.ElementTree.Element, root of XML to modify
+ @ In, case, HERON Case, defining Case instance
+ @ Out, None
+
+ """
+ run_info = template.find('RunInfo')
+ # set outer batchsize and InternalParallel
+ batchSize = run_info.find('batchSize')
+ batchSize.text = f'{case.outerParallel}'
+ if self.parallel_xml is None:
+ run_info.append(xmlUtils.newNode('internalParallel', text='True'))
+ else:
+ #append all the children in the 'outer' element
+ for child in self.parallel_xml.find('outer'):
+ run_info.append(child)
+
def _modify_outer_vargroups(self, template, case, components, sources):
"""
Defines modifications to the VariableGroups of outer.xml RAVEN input file.
@@ -703,12 +754,8 @@ def _modify_outer_samplers(self, template, case, components):
#XXX if we had a way to calculate this ahead of time,
# this could be done in _modify_outer_runinfo
#Need to update the outerParallel number
- run_info = template.find('RunInfo')
case.outerParallel = len(self.__sweep_vars) + 1
- #XXX duplicate of code in _modify_outer_runinfo
- batchSize = run_info.find('batchSize')
- batchSize.text = f'{case.outerParallel}'
- run_info.append(xmlUtils.newNode('internalParallel', text='True'))
+ self._modify_outer_parallel(template, case)
def _modify_outer_optimizers(self, template, case):
"""