-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Loading status checks…
adding kegalign
Richard Burhans
committed
Aug 29, 2024
1 parent
167e4bf
commit f032b04
Showing
16 changed files
with
1,638 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
categories: | ||
- Next Gen Mappers | ||
description: A Scalable GPU System for Pairwise Whole Genome Alignments based on LASTZ's seed-filter-extend paradigm. | ||
homepage_url: https://github.com/galaxyproject/KegAlign | ||
long_description: | | ||
KegAlign is a scalable, GPU-accelerated system for computing pairwise | ||
WGA. KegAlign is based on the standard seed-filter-extend heuristic, | ||
in which the filtering stage dominates the runtime. | ||
name: kegalign | ||
owner: richard-burhans | ||
remote_repository_url: https://github.com/richard-burhans/galaxytools/tree/main/tools/kegalign | ||
type: unrestricted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,278 @@ | ||
#!/usr/bin/env python | ||
|
||
|
||
""" | ||
Diagonal partitioning for segment files output by KegAlign. | ||
Usage: | ||
diagonal_partition.py <max-segments> <lastz-command> | ||
set <max-segments> = 0 to skip partitioning, -1 to estimate best parameter | ||
""" | ||
|
||
import collections | ||
import os | ||
import statistics | ||
import sys | ||
import typing | ||
|
||
|
||
def chunks(lst: tuple[str, ...], n: int) -> typing.Iterator[tuple[str, ...]]: | ||
"""Yield successive n-sized chunks from list.""" | ||
for i in range(0, len(lst), n): | ||
yield lst[i:i + n] | ||
|
||
|
||
if __name__ == "__main__": | ||
# TODO: make these optional user defined parameters | ||
|
||
# deletes original segment file after splitting | ||
DELETE_AFTER_CHUNKING = True | ||
|
||
# don't partition segment files with line count below this value | ||
MIN_CHUNK_SIZE = 5000 | ||
|
||
# only used when segment size is being estimated | ||
MAX_CHUNK_SIZE = 50000 | ||
|
||
# include chosen split size in file name | ||
DEBUG = False | ||
|
||
# first parameter contains chunk size | ||
chunk_size = int(sys.argv[1]) | ||
params = sys.argv[2:] | ||
|
||
# don't do anything if 0 chunk size | ||
if chunk_size == 0: | ||
print(" ".join(params), flush=True) | ||
sys.exit(0) | ||
|
||
# Parsing command output from KegAlign | ||
segment_key = "--segments=" | ||
segment_index = None | ||
input_file = None | ||
|
||
for index, value in enumerate(params): | ||
if value[:len(segment_key)] == segment_key: | ||
segment_index = index | ||
input_file = value[len(segment_key):] | ||
break | ||
|
||
if segment_index is None: | ||
sys.exit(f"Error: could not get segment key {segment_key} from parameters {params}") | ||
|
||
if input_file is None: | ||
sys.exit(f"Error: could not get segment file from parameters {params}") | ||
|
||
if not os.path.isfile(input_file): | ||
sys.exit(f"Error: File {input_file} does not exist") | ||
|
||
# each char is 1 byte | ||
line_size = None | ||
file_size = os.path.getsize(input_file) | ||
with open(input_file, "r") as f: | ||
# add 1 for newline | ||
line_size = len(f.readline()) | ||
|
||
estimated_lines = file_size // line_size | ||
|
||
# check if chunk size should be estimated | ||
if chunk_size < 0: | ||
# optimization, do not need to get each file size in this case | ||
if estimated_lines < MIN_CHUNK_SIZE: | ||
print(" ".join(params), flush=True) | ||
sys.exit(0) | ||
|
||
# get size of each segment assuming DELETE_AFTER_CHUNKING == True | ||
# takes into account already split segments | ||
files = [i for i in os.listdir(".") if i.endswith(".segments")] | ||
|
||
if len(files) < 2: | ||
# if not enough segment files for estimation, use MAX_CHUNK_SIZE | ||
chunk_size = MAX_CHUNK_SIZE | ||
else: | ||
fdict: typing.DefaultDict[str, int] = collections.defaultdict(int) | ||
for filename in files: | ||
size = os.path.getsize(filename) | ||
f_ = filename.split(".split", 1)[0] | ||
fdict[f_] += size | ||
|
||
if len(fdict) < 7: | ||
# outliers can heavily skew prediction if <7 data points | ||
# to be safe, use 50% quantile | ||
chunk_size = int(statistics.quantiles(fdict.values())[1] // line_size) | ||
else: | ||
# otherwise use 75% quantile | ||
chunk_size = int(statistics.quantiles(fdict.values())[-1] // line_size) | ||
# if not enough data points, there is a chance of getting unlucky | ||
# minimize worst case by using MAX_CHUNK_SIZE | ||
|
||
chunk_size = min(chunk_size, MAX_CHUNK_SIZE) | ||
|
||
# no need to sort if number of lines <= chunk_size | ||
if (estimated_lines <= chunk_size): | ||
print(" ".join(params), flush=True) | ||
sys.exit(0) | ||
|
||
# Find rest of relevant parameters | ||
output_key = "--output=" | ||
output_index = None | ||
output_alignment_file = None | ||
output_alignment_file_base = None | ||
output_format = None | ||
|
||
strand_key = "--strand=" | ||
strand_index = None | ||
for index, value in enumerate(params): | ||
if value[:len(output_key)] == output_key: | ||
output_index = index | ||
output_alignment_file = value[len(output_key):] | ||
output_alignment_file_base, output_format = output_alignment_file.rsplit(".", 1) | ||
|
||
if value[:len(strand_key)] == strand_key: | ||
strand_index = index | ||
|
||
if output_alignment_file_base is None: | ||
sys.exit(f"Error: could not get output alignment file base from parameters {params}") | ||
|
||
if output_format is None: | ||
sys.exit(f"Error: could not get output format from parameters {params}") | ||
|
||
if output_index is None: | ||
sys.exit(f"Error: could not get output key {output_key} from parameters {params}") | ||
|
||
if strand_index is None: | ||
sys.exit(f"Error: could not get strand key {strand_key} from parameters {params}") | ||
|
||
# error file is at very end | ||
err_index = -1 | ||
err_name_base = params[-1].split(".err", 1)[0] | ||
|
||
# dict of list of tuple (x, y, str) | ||
data: dict[tuple[str, str], list[tuple[int, int, str]]] = {} | ||
|
||
direction = None | ||
if "plus" in params[strand_index]: | ||
direction = "f" | ||
elif "minus" in params[strand_index]: | ||
direction = "r" | ||
else: | ||
sys.exit(f"Error: could not figure out direction from strand value {params[strand_index]}") | ||
|
||
for line in open(input_file, "r"): | ||
if line == "": | ||
continue | ||
seq1_name, seq1_start, seq1_end, seq2_name, seq2_start, seq2_end, _dir, score = line.split() | ||
# data.append((int(seq1_start), int(seq2_start), line)) | ||
half_dist = int((int(seq1_end) - int(seq1_start)) // 2) | ||
assert int(seq1_end) > int(seq1_start) | ||
assert int(seq2_end) > int(seq2_start) | ||
seq1_mid = int(seq1_start) + half_dist | ||
seq2_mid = int(seq2_start) + half_dist | ||
data.setdefault((seq1_name, seq2_name), []).append((seq1_mid, seq2_mid, line)) | ||
|
||
# If there are chromosome pairs with segment count <= chunk_size | ||
# then no need to sort and split these pairs into separate files. | ||
# It is better to keep these pairs in a single segment file. | ||
|
||
# pairs that have count <= chunk_size. these will not be sorted | ||
skip_pairs = [] | ||
|
||
# save query key order | ||
# for lastz segment files: 'Query sequence names must appear in the same | ||
# order as they do in the query file' | ||
|
||
# NOTE: assuming data.keys() preserves order of keys. Requires Python 3.7+ | ||
|
||
query_key_order = list(dict.fromkeys([i[1] for i in data.keys()])) | ||
|
||
if len(data.keys()) > 1: | ||
for pair in data.keys(): | ||
if len(data[pair]) <= chunk_size: | ||
skip_pairs.append(pair) | ||
|
||
# sorting for forward segments | ||
if direction == "r": | ||
for pair in data.keys(): | ||
if pair not in skip_pairs: | ||
data[pair] = sorted(data[pair], key=lambda coord: (coord[1] - coord[0], coord[0])) | ||
|
||
# sorting for reverse segments | ||
elif direction == "f": | ||
for pair in data.keys(): | ||
if pair not in skip_pairs: | ||
data[pair] = sorted(data[pair], key=lambda coord: (coord[1] + coord[0], coord[0])) | ||
else: | ||
sys.exit(f"INVALID DIRECTION VALUE: {direction}") | ||
|
||
# Writing file in chunks | ||
ctr = 0 | ||
# [i for i in data_keys if i not in set(skip_pairs)]: | ||
for pair in (data.keys() - skip_pairs): | ||
for chunk in chunks(list(zip(*data[pair]))[2], chunk_size): | ||
ctr += 1 | ||
name_addition = f".split{ctr}" | ||
|
||
if DEBUG: | ||
name_addition = f".{chunk_size}{name_addition}" | ||
|
||
fname = input_file.split(".segments", 1)[0] + name_addition + ".segments" | ||
|
||
assert len(chunk) != 0 | ||
with open(fname, "w") as f: | ||
f.writelines(chunk) | ||
# update segment file in command | ||
params[segment_index] = segment_key + fname | ||
# update output file in command | ||
params[output_index] = output_key + output_alignment_file_base + name_addition + "." + output_format | ||
# update error file in command | ||
params[-1] = err_name_base + name_addition + ".err" | ||
print(" ".join(params), flush=True) | ||
|
||
# writing unsorted skipped pairs | ||
if len(skip_pairs) > 0: | ||
# list of tuples of (pair length, pair) | ||
skip_pairs_with_len = sorted([(len(data[p]), p) for p in skip_pairs]) | ||
# NOTE: This sorting can violate lastz query key order requirement, this is fixed later | ||
|
||
# used for sorting | ||
query_key_order_table = {item: idx for idx, item in enumerate(query_key_order)} | ||
|
||
# list of list of pair names | ||
aggregated_skip_pairs: list[list[tuple[str, str]]] = [] | ||
current_count = 0 | ||
aggregated_skip_pairs.append([]) | ||
for count, pair in skip_pairs_with_len: | ||
if current_count + count <= chunk_size: | ||
current_count += count | ||
aggregated_skip_pairs[-1].append(pair) | ||
else: | ||
aggregated_skip_pairs.append([]) | ||
current_count = count | ||
aggregated_skip_pairs[-1].append(pair) | ||
|
||
for aggregate in aggregated_skip_pairs: | ||
ctr += 1 | ||
name_addition = f".split{ctr}" | ||
|
||
if DEBUG: | ||
name_addition = f".{chunk_size}{name_addition}" | ||
|
||
fname = input_file.split(".segments", 1)[0] + name_addition + ".segments" | ||
|
||
with open(fname, "w") as f: | ||
# fix possible lastz query key order violations | ||
# p[1] is query key | ||
for pair in sorted(aggregate, key=lambda p: query_key_order_table[p[1]]): | ||
chunk = list(zip(*data[pair]))[2] | ||
f.writelines(chunk) | ||
# update segment file in command | ||
params[segment_index] = segment_key + fname | ||
# update output file in command | ||
params[output_index] = output_key + output_alignment_file_base + name_addition + "." + output_format | ||
# update error file in command | ||
params[-1] = err_name_base + name_addition + ".err" | ||
print(" ".join(params), flush=True) | ||
|
||
if DELETE_AFTER_CHUNKING: | ||
os.remove(input_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<macros> | ||
<xml name="gapped_extension_options"> | ||
<section name="gapped_extension_options" expanded="false" title="Gapped Extension Options"> | ||
<param argument="--ydrop" type="integer" value="9430" label="Y-drop value for gapped extension"/> | ||
<param argument="--gappedthresh" type="integer" value="" optional="true" label="Score threshold for gapped alignments"/> | ||
<param argument="--notrivial" type="boolean" checked="false" label="Don't output a trivial self-alignment block if the target and query sequences are identical"/> | ||
</section> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
<tool id="kegalign" name="KegAlign" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>A Scalable GPU System for Pairwise Whole Genome Alignments based on LASTZ's seed-filter-extend paradigm</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
<import>sequence_options.xml</import> | ||
<import>scoring_options.xml</import> | ||
<import>seeding_options.xml</import> | ||
<import>ungapped_extension_options.xml</import> | ||
<import>gapped_extension_options.xml</import> | ||
<import>output_options.xml</import> | ||
<import>system_options.xml</import> | ||
</macros> | ||
<expand macro="edam_ontology"/> | ||
<expand macro="requirements"/> | ||
<required_files> | ||
<include path="diagonal_partition.py"/> | ||
<include path="lastz-cmd.ini"/> | ||
<include path="package_output.py"/> | ||
<include path="runner.py"/> | ||
</required_files> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
## Convert input sequences to 2bit ------------------------------------- | ||
mkdir -p "\$(pwd)/work" && | ||
faToTwoBit <(gzip -cdfq '$target') "\$(pwd)/work/ref.2bit" && | ||
faToTwoBit <(gzip -cdfq '$query') "\$(pwd)/work/query.2bit" && | ||
## Run KegAlign -------------------------------------------------------- | ||
## explicitly calling python to bypass a pulsar bug | ||
## https://github.com/galaxyproject/pulsar/issues/341 | ||
python '$__tool_directory__/runner.py' | ||
--output-type tarball | ||
--output-file '$kegalign_output' | ||
--diagonal-partition | ||
--num-cpu \${GALAXY_SLOTS:-2} | ||
--tool_directory '$__tool_directory__' | ||
'$target' | ||
'$query' | ||
## Sequence Options ---------------------------------------------------- | ||
--strand '$sequence_options.strand_selector' | ||
## Scoring Options ----------------------------------------------------- | ||
#set $scoring_pathname = str($scoring_options.scoring) | ||
#if $scoring_pathname != "None": | ||
--scoring '$scoring_pathname' | ||
#end if | ||
#if str($scoring_options.ambiguous_selector) != "x" | ||
#if str($scoring_options.set_ambiguous_params_selector) == "true" | ||
#set $argument_value = ','.join($scoring_options.ambiguous_selector, $scoring_options.ambiguous_reward, $scoring_options.ambiguous_penalty) | ||
--ambiguous '$argument_value' | ||
#else | ||
--ambiguous '$ambiguous_selector' | ||
#end if | ||
#end if | ||
## Seeding Options ----------------------------------------------------- | ||
#if str($seeding_options.seed.seed_selector) == "custom" | ||
--seed '$seeding_options.seed.custom_seed' | ||
#else | ||
--seed '$seeding_options.seed.seed_selector' | ||
#end if | ||
--step '$seeding_options.step' | ||
#if str($seeding_options.notransition) == "true" | ||
--notransition | ||
#end if | ||
## Ungapped Extension Options ------------------------------------------ | ||
--xdrop '$ungapped_extension_options.xdrop' | ||
--hspthresh '$ungapped_extension_options.hspthresh' | ||
#if str($ungapped_extension_options.noentropy) == "true" | ||
--noentropy | ||
#end if | ||
## Gapped Extension Options -------------------------------------------- | ||
--ydrop '$gapped_extension_options.ydrop' | ||
#if str($gapped_extension_options.gappedthresh) != "" | ||
--gappedthresh '$gapped_extension_options.gappedthresh' | ||
#end if | ||
#if str($gapped_extension_options.notrivial) == "true" | ||
--notrivial | ||
#end if | ||
## Output Options ----------------------------------------------------- | ||
#if str($output_options.format.format_selector) == "bam" | ||
--format '$output_options.format.bam_options' | ||
#else if str($output_options.format.format_selector) == "general_def" | ||
--format general- | ||
#else if str($output_options.format.format_selector) == "general_full" | ||
--format 'general-:${output_options.format.fields}' | ||
#else if str($output_options.format.format_selector) == "maf" | ||
--format '$output_options.format.maf_type' | ||
#else if str($output_options.format.format_selector) == "blastn" | ||
--format=BLASTN- | ||
#else if str($output_options.format.format_selector) == "differences" | ||
--format=differences | ||
#end if | ||
## todo :: rplot, bam | ||
## --action:target=multiple | ||
## $output_format.rplot | ||
## .if str( $output_format.out.format ) == "bam": | ||
## | samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '${output}' | ||
## .else: | ||
## > '${output}' | ||
## .end if | ||
## .if $output_format.rplot: | ||
## && | ||
## Rscript $r_plot > /dev/null 2>&1 | ||
## .end if | ||
## System Options ----------------------------------------------------- | ||
--wga_chunk_size '$system_options.wga_chunk_size' | ||
--lastz_interval_size '$system_options.lastz_interval_size' | ||
--seq_block_size '$system_options.seq_block_size' | ||
--num_gpu '$system_options.num_gpu' | ||
#if str($system_options.debug) == "true" | ||
--debug | ||
#end if | ||
## Package Output ---------------------------------------------------- | ||
&& | ||
python '$__tool_directory__/package_output.py' | ||
--tool_directory '$__tool_directory__' | ||
--format_selector '$output_options.format.format_selector' | ||
#if str($system_options.debug) == "true" | ||
--debug | ||
#end if | ||
]]></command> | ||
<inputs> | ||
<param name="target" type="data" format="fasta,fasta.gz" label="Target sequence file in FASTA format"/> | ||
<param name="query" type="data" format="fasta,fasta.gz" label="Query sequence file in FASTA format"/> | ||
<expand macro="sequence_options"/> | ||
<expand macro="scoring_options"/> | ||
<expand macro="seeding_options"/> | ||
<expand macro="ungapped_extension_options"/> | ||
<expand macro="gapped_extension_options"/> | ||
<expand macro="output_options"/> | ||
<expand macro="system_options"/> | ||
</inputs> | ||
<outputs> | ||
<data name="kegalign_output" format="tgz" from_work_dir="data_package.tgz" label="KegAlign on ${on_string}"/> | ||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="1" expect_test_failure="true"> | ||
<param name="target" value="hg38.chr20.chunk.fa.gz" ftype="fasta.gz"/> | ||
<param name="query" value="mm39.chr2.chunk.fa.gz" ftype="fasta.gz"/> | ||
<output name="kegalign_output" ftype="tgz"> | ||
<assert_contents> | ||
<has_archive_member path="galaxy/commands.json"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
KegAlign is a scalable, GPU-accelerated system for computing pairwise WGA. KegAlign is based on the standard seed-filter-extend heuristic, in which the filtering stage dominates the runtime (e.g. 98% for human-mouse WGA), and is accelerated using GPU(s). KegAlign was designed as a faster replacement for lastz pairwise aligner. | ||
**Using this tool** | ||
.. class:: warningmark | ||
This tool is the first part of a two-step process for generation of paiwrise alignments. The output of this tool is used as an input to **Batched LASTZ** tool. The *Batched LASTZ** can be found in the list of tool of this Galaxy instance. | ||
**What it does** | ||
KegAlign processes **Target** and **Query** sequences to identify highly similar regions where gapped extension will be performed to create actual alignments. The actual alignments are generated by **Batched LASTZ** that should be run on the output of this tool. | ||
.. class:: infomark | ||
Although this tool is only the first part of the alignment process all parameters related to generation of alignments are set **during this stage**. | ||
**Scoring Options** | ||
By default the HOXD70 substitution scores are used (from `Chiaromonte et al. 2002 <https://www.ncbi.nlm.nih.gov/pubmed/11928468>`_):: | ||
bad_score = X:-1000 # used for sub['X'][*] and sub[*]['X'] | ||
fill_score = -100 # used when sub[*][*] is not defined | ||
gap_open_penalty = 400 | ||
gap_extend_penalty = 30 | ||
A C G T | ||
A 91 -114 -31 -123 | ||
C -114 100 -125 -31 | ||
G -31 -125 100 -114 | ||
T -123 -31 -114 91 | ||
Matrix can be supplied as an input to **Read the substitution scores** parameter in *Scoring* section. Substitution matrix can be inferred from your data using another LASTZ-based tool (LASTZ_D: Infer substitution scores). | ||
**Output Options** | ||
.. class:: infomark | ||
The final format in which alignmnets will be generated by **Batched LASTZ** are set here. | ||
The default output is a MAF alignment file. Other formats can be configured in *Output Options* section. See LASTZ manual <https://lastz.github.io/lastz/#formats>`_ for description of possible formats. | ||
]]></help> | ||
<expand macro="citations"/> | ||
</tool> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
[arguments] | ||
flag_args = | ||
allgappedbounds | ||
anyornone | ||
gapped | ||
gfextend | ||
markend | ||
nocensus | ||
noentropy | ||
nofilter | ||
nogapped | ||
nogfextend | ||
nomirror | ||
norecoverseeds | ||
notransition | ||
notrivial | ||
notwins | ||
noytrim | ||
recoverseeds | ||
self | ||
version | ||
yasra75 | ||
yasra85 | ||
yasra85short | ||
yasra90 | ||
yasra95 | ||
yasra95short | ||
yasra98 | ||
|
||
str_args = | ||
action:query | ||
action:target | ||
ambiguous | ||
ball | ||
chores | ||
filter | ||
format | ||
gap | ||
hspthresh | ||
include | ||
match | ||
maxwordcount | ||
mismatch | ||
nochain | ||
output | ||
outputmasking | ||
outputmasking+ | ||
outputmasking+:soft | ||
outputmasking:soft | ||
querydepth | ||
queryhsplimit | ||
rdotplot | ||
readgroup | ||
scores | ||
seed | ||
segments | ||
show | ||
strand | ||
targetcapsule | ||
twins | ||
writecapsule | ||
writesegments | ||
|
||
int_args = | ||
allocate:query | ||
allocate:target | ||
allocate:traceback | ||
exact | ||
gappedthresh | ||
inner | ||
masking | ||
queryhspbest | ||
step | ||
word | ||
xdrop | ||
ydrop | ||
|
||
bool_str_args= | ||
census | ||
census16 | ||
census32 | ||
chain | ||
entropy | ||
help | ||
infer | ||
inferonly | ||
infscores | ||
tableonly | ||
|
||
bool_int_args= | ||
progress | ||
progress+masking | ||
transition | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
<macros> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">kegalign-full</requirement> | ||
</requirements> | ||
</xml> | ||
<token name="@TOOL_VERSION@">0.1.2.7</token> | ||
<token name="@VERSION_SUFFIX@">1</token> | ||
<token name="@PROFILE@">21.05</token> | ||
<xml name="edam_ontology"> | ||
<edam_operations> | ||
<edam_operation>operation_0491</edam_operation> | ||
</edam_operations> | ||
</xml> | ||
<xml name="citations"> | ||
<citations> | ||
<citation type="doi">10.1109/SC41405.2020.00043</citation> | ||
</citations> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
<macros> | ||
<xml name="output_options"> | ||
<section name="output_options" expanded="false" title="Output Options"> | ||
<conditional name="format"> | ||
<param name="format_selector" type="select" display="radio" label="Specify the output format"> | ||
<option value="bam">BAM --format=sam)</option> | ||
<option value="general_def">General default (--format=general)</option> | ||
<option value="general_full">Customized general (--format=general[:fields])</option> | ||
<option value="maf" selected="true">MAF (--format=maf)</option> | ||
<option value="blastn">blastn (--format=BLASTN)</option> | ||
<option value="differences">Differences (--format=differences)</option> | ||
</param> | ||
<when value="bam"> | ||
<param name="bam_options" type="select" display="radio" argument="--format=sam, --format=softsam" label="Select a BAM flavor to output" help="Lastz actually outputs SAM data but Galaxy converts it into BAM to save space. For alignments that don't reach the end of a query, ‑‑format=sam uses 'hard clipping', while ‑‑format=softsam uses 'soft clipping'. See the section on 'clipped alignment' in the SAM specification for an explanation of what this means. The options ‑‑format=sam- and ‑‑format=softsam- suppress the SAM header lines. This makes them suitable for concatenating output from multiple runs. If you need to specify readgroup information: use AddOrEplaceReadGroups from Picard package"> | ||
<option value="sam">BAM</option> | ||
<option value="softsam">soft-clipped BAM</option> | ||
<option value="sam-">BAM without header</option> | ||
<option value="softsam-">soft-clipped BAM without header</option> | ||
</param> | ||
</when> | ||
<when value="general_def"> | ||
<!-- Do nothing --> | ||
</when> | ||
<when value="general_full"> | ||
<param name="fields" type="select" display="checkboxes" multiple="true" label="Select which fields to include" argument="--format=general-[:fields]"> | ||
<option value="score" selected="true">score: Score of the alignment block</option> | ||
<option value="name1" selected="true">name1: Name of the target sequence</option> | ||
<option value="number1">number1: Number of the target sequence within the target file</option> | ||
<option value="strand1" selected="true">strand1: Target sequence strand </option> | ||
<option value="size1" selected="true">size1: Size of the entire target sequence</option> | ||
<option value="start1">start1: Starting position of the alignment block in the target, origin-one</option> | ||
<option value="zstart1" selected="true">zstart1: Starting position of the alignment block in the target, origin-zero</option> | ||
<option value="end1" selected="true">end1: Ending position of the alignment block in the target</option> | ||
<option value="length1">length1: Length of the alignment block in the target (excluding gaps)</option> | ||
<option value="text1">text1: Aligned characters in the target, including gap characters</option> | ||
<option value="qalign1">qalign1: The target quality sequence (if there is one) correpsonding to aligned characters</option> | ||
<option value="nucs1">nucs1: The entire target sequence</option> | ||
<option value="name2" selected="true">name2: Name of the query sequence</option> | ||
<option value="number2">number2: Number of the query sequence within the query file</option> | ||
<option value="strand2" selected="true">strand2: Query sequence strand</option> | ||
<option value="size2" selected="true">size2: Size of the entire query sequence</option> | ||
<option value="start2">start2: Starting position of the alignment block in the query, origin-one</option> | ||
<option value="zstart2" selected="true">zstart2: Starting position of the alignment block in the query, origin-one</option> | ||
<option value="end2" selected="true">end2: Ending position of the alignment block in the query</option> | ||
<option value="length2">length2: Length of the alignment block in the query (excluding gaps)</option> | ||
<option value="text2">text2: Aligned characters in the query, including gap characters</option> | ||
<option value="qalign2">qalign2: The query quality sequence (if there is one) correpsonding to aligned characters</option> | ||
<option value="nucs2">nucs2: The entire query sequence</option> | ||
<option value="nmatch">nmatch: Match count</option> | ||
<option value="nmismatch">nmismatch: Mismatch count</option> | ||
<option value="ncolumn">ncolumn: Number of columns in the block. This includes matches, mismatches (substitutions), and gaps</option> | ||
<option value="npair">npair: Number of aligned bases in the block that are matches or mismatches (substitutions)</option> | ||
<option value="ngap">ngap: Gap count, the number of gaps in the block, counting each run of gapped columns as a single gap</option> | ||
<option value="cgap">cgap: Gap column count, the number of gaps in the block, counting each gapped column as a separate gap</option> | ||
<option value="diff">diff: Differences between what would be written for text1 and text2</option> | ||
<option value="cigar">cigar: A CIGAR-like representation of the alignment’s path</option> | ||
<option value="cigarx">cigarx: Same as cigar, but uses a newer syntax that distinguishes matches from substitutions</option> | ||
<option value="identity" selected="true">identity: Fraction of aligned bases in the block that are matches </option> | ||
<option value="idfrac">idfrac: Fraction of aligned bases in the block that are matches </option> | ||
<option value="id%" selected="true">id% Fraction of aligned bases in the block that are matches (as %)</option> | ||
<option value="blastid%">blastid%: Fraction of the alignment block that is matches, as would be reported by NCBI BLAST</option> | ||
<option value="continuity">continuity: Rate of non-gaps (non-indels) in the alignment block</option> | ||
<option value="confrac">confrac: Rate of non-gaps (non-indels) in the alignment block (as fraction)</option> | ||
<option value="con%">con%: Rate of non-gaps (non-indels) in the alignment block (as %)</option> | ||
<option value="coverage" selected="true">coverage: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block</option> | ||
<option value="covfrac">covfrac: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (as fraction)</option> | ||
<option value="cov%" selected="true">cov%: Fraction of the entire input sequence (target or query, whichever is shorter) that is covered by the alignment block (as %)</option> | ||
<option value="diagonal">diagonal: The diagonal of the start of the alignment block in the DP matrix, expressed as an identifying number start1-start2</option> | ||
<option value="shingle">shingle: A measurement of the shingle overlap between the target and the query</option> | ||
<option value="number">number: The alignment number, counted as alignments are written to output (1-base)</option> | ||
<option value="znumber">znumber: The alignment number, counted as alignments are written to output (0-base)</option> | ||
<sanitizer invalid_char=""> | ||
<valid initial="string.letters,string.digits"> | ||
<add value="%"/> | ||
</valid> | ||
</sanitizer> | ||
</param> | ||
</when> | ||
<when value="maf"> | ||
<param name="maf_type" type="select" display="radio" argument="--format=maf" label="Select MAF flavor" help="MAF is a multiple alignment format developed at UCSC"> | ||
<option value="maf">MAF</option> | ||
<option value="maf+">MAF with additional stats</option> | ||
<option value="maf-" selected="true">MAF without header and comments</option> | ||
</param> | ||
</when> | ||
<when value="blastn"> | ||
<!-- Do nothing --> | ||
</when> | ||
<when value="differences"> | ||
<!-- Do nothing --> | ||
</when> | ||
</conditional> | ||
</section> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,337 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import configparser | ||
import json | ||
import os | ||
import resource | ||
import sys | ||
import tarfile | ||
import time | ||
import typing | ||
|
||
import bashlex | ||
|
||
RUSAGE_ATTRS: typing.Final = ["ru_utime", "ru_stime", "ru_maxrss", "ru_minflt", "ru_majflt", "ru_inblock", "ru_oublock", "ru_nvcsw", "ru_nivcsw"] | ||
|
||
|
||
class PackageFile: | ||
def __init__( | ||
self, | ||
pathname: str = "data_package.tgz", | ||
top_dir: str = "galaxy", | ||
data_dir: str = "files", | ||
config_file: str = "commands.json", | ||
format_file: str = "format.txt" | ||
) -> None: | ||
self.pathname: str = os.path.realpath(pathname) | ||
self.data_root: str = os.path.join(top_dir, data_dir) | ||
self.config_path: str = os.path.join(top_dir, config_file) | ||
self.config_file: str = config_file | ||
self.format_path: str = os.path.join(top_dir, format_file) | ||
self.format_file: str = format_file | ||
self.tarfile: typing.Optional[tarfile.TarFile] = None | ||
self.name_cache: typing.Dict[typing.Any, typing.Any] = {} | ||
self.working_dir: str = os.path.realpath(os.getcwd()) | ||
|
||
def _initialize(self) -> None: | ||
if self.tarfile is None: | ||
self.tarfile = tarfile.open( | ||
name=self.pathname, | ||
mode="w:gz", | ||
format=tarfile.GNU_FORMAT, | ||
compresslevel=6, | ||
) | ||
|
||
def add_config(self, pathname: str) -> None: | ||
if self.tarfile is None: | ||
self._initialize() | ||
|
||
source_path = os.path.realpath(pathname) | ||
|
||
if self.tarfile is not None: | ||
self.tarfile.add(source_path, arcname=self.config_path, recursive=False) | ||
|
||
def add_file(self, pathname: str, arcname: typing.Optional[str] = None) -> None: | ||
if self.tarfile is None: | ||
self._initialize() | ||
|
||
source_path = os.path.realpath(pathname) | ||
|
||
dest_path = None | ||
|
||
if arcname is None: | ||
dest_path = os.path.join(self.data_root, os.path.basename(source_path)) | ||
else: | ||
arc_path = os.path.realpath(arcname) | ||
rel_path = os.path.relpath(arc_path, self.working_dir) | ||
if not (os.path.isabs(rel_path) or rel_path.startswith("../")): | ||
dest_path = os.path.join(self.data_root, rel_path) | ||
else: | ||
sys.exit("path fail") | ||
|
||
if dest_path is not None: | ||
if self.tarfile is not None: | ||
if dest_path not in self.name_cache: | ||
try: | ||
self.tarfile.add( | ||
source_path, arcname=dest_path, recursive=False | ||
) | ||
except FileNotFoundError: | ||
sys.exit(f"missing source file {source_path}") | ||
|
||
self.name_cache[dest_path] = 1 | ||
# print(f"added: {dest_path}", flush=True) | ||
|
||
def add_format(self, pathname: str) -> None: | ||
if self.tarfile is None: | ||
self._initialize() | ||
|
||
source_path = os.path.realpath(pathname) | ||
|
||
if self.tarfile is not None: | ||
self.tarfile.add(source_path, arcname=self.format_path, recursive=False) | ||
|
||
def close(self) -> None: | ||
if self.tarfile is not None: | ||
self.tarfile.close() | ||
self.tarfile = None | ||
|
||
|
||
class bashCommandLineFile: | ||
def __init__( | ||
self, | ||
pathname: str, | ||
config: configparser.ConfigParser, | ||
args: argparse.Namespace, | ||
package_file: PackageFile, | ||
) -> None: | ||
self.pathname: str = pathname | ||
self.config = config | ||
self.args = args | ||
self.package_file = package_file | ||
self.executable: typing.Optional[str] = None | ||
self._parse_lines() | ||
self._write_format() | ||
|
||
def _parse_lines(self) -> None: | ||
with open("commands.json", "w") as ofh: | ||
with open(self.pathname) as f: | ||
line: str | ||
for line in f: | ||
line = line.rstrip("\n") | ||
command_dict = self._parse_line(line) | ||
# we may want to re-write args here | ||
new_args_list = [] | ||
|
||
args_list = command_dict.get("args", []) | ||
for arg in args_list: | ||
if arg.startswith("--target="): | ||
pathname = arg[9:] | ||
new_args_list.append(arg) | ||
if "[" in pathname: | ||
elems = pathname.split("[") | ||
sequence_file = elems.pop(0) | ||
self.package_file.add_file(sequence_file, sequence_file) | ||
for elem in elems: | ||
if elem.endswith("]"): | ||
elem = elem[:-1] | ||
if elem.startswith("subset="): | ||
subset_file = elem[7:] | ||
self.package_file.add_file(subset_file) | ||
|
||
elif arg.startswith("--query="): | ||
pathname = arg[8:] | ||
new_args_list.append(arg) | ||
if "[" in pathname: | ||
elems = pathname.split("[") | ||
sequence_file = elems.pop(0) | ||
self.package_file.add_file(sequence_file, sequence_file) | ||
for elem in elems: | ||
if elem.endswith("]"): | ||
elem = elem[:-1] | ||
if elem.startswith("subset="): | ||
subset_file = elem[7:] | ||
self.package_file.add_file(subset_file) | ||
elif arg.startswith("--segments="): | ||
pathname = arg[11:] | ||
new_args_list.append(arg) | ||
self.package_file.add_file(pathname) | ||
elif arg.startswith("--scores="): | ||
pathname = arg[9:] | ||
new_args_list.append("--scores=data/scores.txt") | ||
self.package_file.add_file(pathname, "data/scores.txt") | ||
else: | ||
new_args_list.append(arg) | ||
|
||
command_dict["args"] = new_args_list | ||
print(json.dumps(command_dict), file=ofh) | ||
|
||
self.package_file.add_config("commands.json") | ||
|
||
def _parse_line(self, line: str) -> typing.Dict[str, typing.Any]: | ||
# resolve shell redirects | ||
trees: typing.List[typing.Any] = bashlex.parse(line, strictmode=False) | ||
positions: typing.List[typing.Tuple[int, int]] = [] | ||
|
||
for tree in trees: | ||
visitor = nodevisitor(positions) | ||
visitor.visit(tree) | ||
|
||
# do replacements from the end so the indicies will be correct | ||
positions.reverse() | ||
|
||
processed = list(line) | ||
for start, end in positions: | ||
processed[start:end] = "" | ||
|
||
processed_line: str = "".join(processed) | ||
|
||
command_dict = self._parse_processed_line(processed_line) | ||
command_dict["stdin"] = visitor.stdin | ||
command_dict["stdout"] = visitor.stdout | ||
command_dict["stderr"] = visitor.stderr | ||
|
||
return command_dict | ||
|
||
def _parse_processed_line(self, line: str) -> typing.Dict[str, typing.Any]: | ||
argv: typing.List[str] = list(bashlex.split(line)) | ||
self.executable = argv.pop(0) | ||
|
||
parser: argparse.ArgumentParser = argparse.ArgumentParser(add_help=False) | ||
if "arguments" in self.config: | ||
arguments_section = self.config["arguments"] | ||
|
||
arg: str | ||
if "flag_args" in arguments_section: | ||
for arg in arguments_section["flag_args"].split(): | ||
parser.add_argument(f"--{arg}", action="store_true") | ||
|
||
if "str_args" in arguments_section: | ||
for arg in arguments_section["str_args"].split(): | ||
parser.add_argument(f"--{arg}", type=str) | ||
|
||
if "bool_str_args" in arguments_section: | ||
for arg in arguments_section["bool_str_args"].split(): | ||
parser.add_argument( | ||
f"--{arg}", nargs="?", const=True, default=False | ||
) | ||
|
||
if "int_args" in arguments_section: | ||
for arg in arguments_section["int_args"].split(): | ||
parser.add_argument(f"--{arg}", type=int) | ||
|
||
if "bool_int_args" in arguments_section: | ||
for arg in arguments_section["bool_int_args"].split(): | ||
parser.add_argument( | ||
f"--{arg}", nargs="?", const=True, default=False | ||
) | ||
|
||
namespace, rest = parser.parse_known_intermixed_args(argv) | ||
vars_dict = vars(namespace) | ||
|
||
command_dict: typing.Dict[str, typing.Any] = { | ||
"executable": self.executable, | ||
"args": [], | ||
} | ||
|
||
for var in vars_dict.keys(): | ||
value = vars_dict[var] | ||
if value is not None: | ||
if isinstance(value, bool): | ||
if value is True: | ||
command_dict["args"].append(f"--{var}") | ||
else: | ||
command_dict["args"].append(f"--{var}={value}") | ||
|
||
if len(rest) >= 0: | ||
value = rest.pop(0) | ||
command_dict["args"].append(f"--target={value}") | ||
|
||
if len(rest) >= 0: | ||
value = rest.pop(0) | ||
command_dict["args"].append(f"--query={value}") | ||
|
||
return command_dict | ||
|
||
def _write_format(self) -> None: | ||
if self.args.format_selector == "bam": | ||
format_name = "bam" | ||
elif self.args.format_selector == "maf": | ||
format_name = "maf" | ||
elif self.args.format_selector == "differences": | ||
format_name = "interval" | ||
else: | ||
format_name = "tabular" | ||
|
||
with open("format.txt", "w") as ofh: | ||
print(f"{format_name}", file=ofh) | ||
|
||
self.package_file.add_format("format.txt") | ||
|
||
|
||
class nodevisitor(bashlex.ast.nodevisitor): # type: ignore[misc] | ||
def __init__(self, positions: typing.List[typing.Tuple[int, int]]) -> None: | ||
self.positions = positions | ||
self.stdin = None | ||
self.stdout = None | ||
self.stderr = None | ||
|
||
def visitredirect( | ||
self, | ||
n: bashlex.ast.node, | ||
n_input: int, | ||
n_type: str, | ||
output: typing.Any, | ||
heredoc: typing.Any, | ||
) -> None: | ||
if isinstance(n_input, int) and 0 <= n_input <= 2: | ||
if isinstance(output, bashlex.ast.node) and output.kind == "word": | ||
self.positions.append(n.pos) | ||
if n_input == 0: | ||
self.stdin = output.word | ||
elif n_input == 1: | ||
self.stdout = output.word | ||
elif n_input == 2: | ||
self.stderr = output.word | ||
else: | ||
sys.exit(f"oops 1: {type(n_input)}") | ||
else: | ||
sys.exit(f"oops 2: {type(n_input)}") | ||
|
||
def visitheredoc(self, n: bashlex.ast.node, value: typing.Any) -> None: | ||
pass | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--tool_directory", type=str, required=True, help="tool directory") | ||
parser.add_argument("--format_selector", type=str, required=True, help="format selector") | ||
parser.add_argument("--debug", action="store_true", help="enable debug messages") | ||
args = parser.parse_args() | ||
|
||
if args.debug: | ||
r_beg = resource.getrusage(resource.RUSAGE_SELF) | ||
beg: int = time.monotonic_ns() | ||
|
||
lastz_command_config_file: str = os.path.join(args.tool_directory, "lastz-cmd.ini") | ||
|
||
config: configparser.ConfigParser = configparser.ConfigParser() | ||
config.read(lastz_command_config_file) | ||
|
||
package_file = PackageFile() | ||
lastz_command_file = "lastz-commands.txt" | ||
bashCommandLineFile(lastz_command_file, config, args, package_file) | ||
package_file.close() | ||
|
||
if args.debug: | ||
ns: int = time.monotonic_ns() - beg | ||
r_end = resource.getrusage(resource.RUSAGE_SELF) | ||
print(f"package output clock time: {ns} ns", file=sys.stderr, flush=True) | ||
for rusage_attr in RUSAGE_ATTRS: | ||
value = getattr(r_end, rusage_attr) - getattr(r_beg, rusage_attr) | ||
print(f" package output {rusage_attr}: {value}", file=sys.stderr, flush=True) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
<macros> | ||
<xml name="scoring_options"> | ||
<section name="scoring_options" expanded="false" title="Scoring Options"> | ||
<param argument="--scoring" type="data" value="false" optional="true" format="txt" label="Scoring file in LASTZ format"/> | ||
<param name="ambiguous_selector" type="select" label="Ambiguous Nucleotides"> | ||
<option value="x" selected="true">None</option> | ||
<option value="n">N</option> | ||
<option value="iupac">IUPAC</option> | ||
</param> | ||
<conditional name="ambiguous_params"> | ||
<param name="set_ambiguous_params_selector" type="select" label="Set Ambiguous Nucleotide Scoring Parameters"> | ||
<option value="false" selected="true">No</option> | ||
<option value="true">Yes</option> | ||
</param> | ||
<when value="false"> | ||
<!-- Do nothing --> | ||
</when> | ||
<when value="true"> | ||
<param name="ambiguous_reward" type="integer" value="0" label="Ambiguous Nucleotide Reward"/> | ||
<param name="ambiguous_penalty" type="integer" value="0" label="Ambiguous Nucleotide Penalty"/> | ||
</when> | ||
</conditional> | ||
</section> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
<macros> | ||
<xml name="seeding_options"> | ||
<section name="seeding_options" expanded="false" title="Seeding Options"> | ||
<conditional name="seed"> | ||
<param name="seed_selector" type="select" label="Seed patern"> | ||
<option value="12of19" selected="true">12of19 (1110100110010101111)</option> | ||
<option value="14of22">14of22 (1110101100110010101111)</option> | ||
<option value="custom">custom</option> | ||
</param> | ||
<when value="12of19"> | ||
<!-- Do nothing --> | ||
</when> | ||
<when value="14of22"> | ||
<!-- Do nothing --> | ||
</when> | ||
<when value="custom"> | ||
<param name="custom_seed" type="text" value="" optional="false" label="Custom seed pattern"> | ||
<validator type="empty_field"/> | ||
<validator type="regex" message="Arbitrary pattern of 1s, 0s, and Ts">^[01T]+$</validator> | ||
<sanitizer invalid_char=""> | ||
<valid initial="none"> | ||
<add value="0"/> | ||
<add value="1"/> | ||
<add value="T"/> | ||
</valid> | ||
</sanitizer> | ||
</param> | ||
</when> | ||
</conditional> | ||
<param argument="--step" type="integer" value="1" label="Offset between the starting positions of successive target words considered for generating seed table"/> | ||
<param argument="--notransition" type="boolean" checked="false" label="Don't allow one transition in a seed hit"/> | ||
</section> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<macros> | ||
<xml name="sequence_options"> | ||
<section name="sequence_options" expanded="false" title="Sequence Options"> | ||
<param name="strand_selector" type="select" label="Strand to search"> | ||
<option value="plus">plus</option> | ||
<option value="minus">minus</option> | ||
<option value="both" selected="true">both</option> | ||
</param> | ||
<yield/> | ||
</section> | ||
</xml> | ||
</macros> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<macros> | ||
<xml name="system_options"> | ||
<section name="system_options" title="System Options"> | ||
<param argument="--wga_chunk_size" type="integer" value="250000" label="Chunk sizes for GPU calls for Xdrop - ⚠ change only if you are a developer"/> | ||
<param argument="--lastz_interval_size" type="integer" value="10000000" label="LASTZ interval for Ydrop - ⚠ change only if you are a developer"/> | ||
<param argument="--seq_block_size" type="integer" value="400000000" label="LASTZ interval for Ydrop - ⚠ change only if you are a developer"/> | ||
<param argument="--num_gpu" type="integer" value="-1" label="Specify number of GPUs to use - -1 if all the GPUs should be used"/> | ||
<param argument="--debug" type="boolean" value="false" label="Print debug messages"/> | ||
</section> | ||
</xml> | ||
</macros> |
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<macros> | ||
<xml name="ungapped_extension_options"> | ||
<section name="ungapped_extension_options" expanded="false" title="Ungapped Extension Options"> | ||
<param argument="--xdrop" type="integer" value="910" label="X-drop value for gap-free extension"/> | ||
<param argument="--hspthresh" type="integer" value="3000" label="Segment score threshold for high scoring pairs"/> | ||
<param argument="--noentropy" type="boolean" checked="false" label="Don't adjust low score segment pair scores using entropy factor after filtering stage"/> | ||
</section> | ||
</xml> | ||
</macros> |