Skip to content

Commit

Permalink
Merge pull request #1228 from ComparativeGenomicsToolkit/oneshot
Browse files Browse the repository at this point in the history
Lower defautl minigraph construct memory; add override option
  • Loading branch information
glennhickey authored Nov 15, 2023
2 parents 2343a84 + c34177b commit 06872ce
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/pangenome.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ The Minigraph-Cactus pipeline is run via the `cactus-pangenome` command. It cons
**Before running large jobs, it is important to consider the following options:**

* `--mgCores` the number of cores for `minigraph` construction (default: all available)
* `--mgMemory` the amount of memory for `minigraph` construction. The default estimate can be quite conservative (ie high), so if it is too high for your system, you can lower it with this option (default: estimate based on input size).
* `--mapCores` the number of cores for each `minigraph` mapping job (default: up to 6)
* `--consCores` the number of cores for each `cactus-consolidated` job (default: all available)
* `--consMemory` the amount of memory for each `cactus-consolidated` job. By default, it is estimated from the data but these estimates being wrong can be catastrophic on [SLURM](./progressive.md#running-on-a-cluster). Consider setting to the maximum memory you have available when running on a cluster to be extra safe (seems to be more of an issue for non-human data)
Expand Down
14 changes: 11 additions & 3 deletions src/cactus/refmap/cactus_minigraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
from cactus.shared.common import cactus_call
from cactus.shared.common import getOptionalAttrib, findRequiredNode
from cactus.shared.version import cactus_commit
from cactus.progressive.cactus_prepare import human2bytesN
from cactus.preprocessor.checkUniqueHeaders import sanitize_fasta_headers
from toil.job import Job
from toil.common import Toil
from toil.statsAndLogging import logger
from toil.statsAndLogging import set_logging_from_options
from toil.realtimeLogger import RealtimeLogger
from toil.lib.conversions import bytes2human
from cactus.shared.common import cactus_cpu_count
from cactus.progressive.multiCactusTree import MultiCactusTree
from sonLib.bioio import getTempDirectory
Expand All @@ -43,7 +45,10 @@ def main():
parser.add_argument("--reference", required=True, nargs='+', type=str,
help = "Reference genome name(s) (added to minigraph first). Mash distance to 1st reference to determine order of other genomes (use minigraphSortInput in the config xml to toggle this behavior).")
parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).")

parser.add_argument("--mgMemory", type=human2bytesN,
help="Memory in bytes for the minigraph construction job (defaults to an estimate based on the input data size). "
"Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)

#Progressive Cactus Options
parser.add_argument("--configFile", dest="configFile",
help="Specify cactus configuration file",
Expand Down Expand Up @@ -295,8 +300,11 @@ def minigraph_construct(job, options, config_node, seq_id_map, seq_order, gfa_pa
max_size = max([x.size for x in seq_id_map.values()])
total_size = sum([x.size for x in seq_id_map.values()])
disk = total_size * 2
mem = 128 * max_size + int(total_size / 4)
mem = max(mem, 2**30)
mem = 60 * max_size + int(total_size / 4)
mem = max(mem, 2**31)
if options.mgMemory is not None:
RealtimeLogger.info('Overriding minigraph_construct memory estimate of {} with {} value {} from --mgMemory'.format(bytes2human(mem), 'greater' if options.mgMemory > mem else 'lesser', bytes2human(options.mgMemory)))
mem = options.mgMemory
return job.addChildJobFn(minigraph_construct, options, config_node, seq_id_map, seq_order, gfa_path,
has_resources=True, disk=disk, memory=mem, cores=options.mgCores).rv()

Expand Down
3 changes: 3 additions & 0 deletions src/cactus/refmap/cactus_pangenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def main():

# cactus-minigraph options
parser.add_argument("--mgCores", type=int, help = "Number of cores for minigraph construction (defaults to the same as --maxCores).")
parser.add_argument("--mgMemory", type=human2bytesN,
help="Memory in bytes for the minigraph construction job (defaults to an estimate based on the input data size). "
"Standard suffixes like K, Ki, M, Mi, G or Gi are supported (default=bytes))", default=None)

# cactus-graphmap options
parser.add_argument("--mapCores", type=int, help = "Number of cores for minigraph map. Overrides graphmap cpu in configuration")
Expand Down

0 comments on commit 06872ce

Please sign in to comment.