Skip to content

Commit

Permalink
Merge branch 'feat/add-kmer-lenght' into feat/finalize-paper
Browse files Browse the repository at this point in the history
  • Loading branch information
mmolari committed Oct 12, 2022
2 parents 3ab6c00 + b67a49b commit 0857c36
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 9 deletions.
4 changes: 2 additions & 2 deletions docs/dev/docker_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ pangraph generate -s 100 -r 1e-1 -m 1e-3 -d 5e-2 -i 1e-2 -t 5 "$TESTDIR/randseqs

echo "Test pangraph build - minimap asm20 no energy"
export JULIA_NUM_THREADS=4
pangraph build -k minimap2 -s 20 -a 0 -b 0 "$TESTDIR/input.fa" > "$TESTDIR/test1.json"
pangraph build -c -k minimap2 -s 20 -a 0 -b 0 "$TESTDIR/input.fa" > "$TESTDIR/test1.json"

echo "Test pangraph build - mmseqs"
export JULIA_NUM_THREADS=1
pangraph build -k mmseqs "$TESTDIR/input.fa" > "$TESTDIR/test2.json"
pangraph build -c -k mmseqs -K 8 "$TESTDIR/input.fa" > "$TESTDIR/test2.json"

echo "Test pangraph polish"
pangraph polish -c -l 10000 "$TESTDIR/test1.json" > "$TESTDIR/polished.json"
Expand Down
1 change: 1 addition & 0 deletions docs/src/cli/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Build a multiple sequence alignment pangraph.
| enforce uppercase | Boolean | u | upper-case | toggle to force genomes to uppercase characters |
| distance calculator | String | d | distance-backend | only accepts "native" or "mash" |
| alignment kernel | String | k | alignment-kernel | only accepts "minimap2" or "mmseqs" |
| alignment kernel | Integer | K | kmer-length | kmer length, only used for mmseqs2 alignment kernel. If not specified will use mmseqs default. |

## Arguments
Expects one or more fasta files.
Expand Down
11 changes: 9 additions & 2 deletions src/build.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,13 @@ Build = Command(
(short="-k", long="--alignment-kernel"),
"backend to use for pairwise genome alignment\n\trecognized options: [minimap2, mmseqs]",
"minimap2",
),
Arg(
Int,
"k-mer length",
(short="-K", long="--kmer-length"),
"kmer length, only used for mmseqs2 alignment kernel. If not specified will use mmseqs default.",
0,
)
],

Expand Down Expand Up @@ -136,8 +143,8 @@ Build = Command(
if !Shell.havecommand("mmseqs")
panic("external command mmseqs not found. please install before running build step with mmseqs backend\n")
end

MMseqs.align(contigs₁, contigs₂)
kmer_len = arg(Build, "-K")
MMseqs.align(contigs₁, contigs₂, kmer_len)
end
_ => error("unrecognized alignment kernel")
end
Expand Down
15 changes: 11 additions & 4 deletions src/mmseqs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ export align


"""
align(ref::PanContigs, qry::PanContigs)
Align homologous regions of `qry` and `ref`.
align(ref::PanContigs, qry::PanContigs, klen::Int64)
Align homologous regions of `qry` and `ref` using mmseqs easy-search.
Klen tunes the kmer length. If klen=0 then mmseqs default is used.
Returns the list of intervals between pancontigs.
"""
function align(ref::PanContigs, qry::PanContigs)
function align(ref::PanContigs, qry::PanContigs, klen::Int64)

hits = mktempdir() do dir
# hits = let dir = mktempdir(; cleanup = false)
Expand All @@ -39,6 +40,11 @@ function align(ref::PanContigs, qry::PanContigs)
reffa = qryfa
end

# kmer length
klen_opt = klen == 0 ? [] : ["-k", "$klen"]
# format of output file
fmat_outp = "query,qlen,qstart,qend,empty,target,tlen,tstart,tend,nident,alnlen,bits,cigar,fident,raw"

run(
pipeline(
`mmseqs easy-search
Expand All @@ -47,7 +53,8 @@ function align(ref::PanContigs, qry::PanContigs)
--max-seq-len 10000
-a
--search-type 3
--format-output query,qlen,qstart,qend,empty,target,tlen,tstart,tend,nident,alnlen,bits,cigar,fident,raw`,
--format-output $fmat_outp
$klen_opt`,
stdout = devnull,
stderr = devnull,
),
Expand Down
2 changes: 1 addition & 1 deletion trace.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ PanGraph.main(["help", "help"]) # help usage

# build
PanGraph.main(["build", "-c", "-u", "-b", "0", "-a", "0", "$root/test.fa"])
PanGraph.main(["build", "-c", "-u", "-b", "0", "-a", "0", "-k", "mmseqs", "$root/test.fa"])
PanGraph.main(["build", "-c", "-u", "-b", "0", "-a", "0", "-k", "mmseqs", "-K", "8", "$root/test.fa"])

# export
PanGraph.main(["export", "-o", "$root/export", "$root/test.json"])
Expand Down

0 comments on commit 0857c36

Please sign in to comment.