From af500c97b988ae90be173d44198173502200ccc2 Mon Sep 17 00:00:00 2001 From: Adam Novak Date: Fri, 28 Jun 2024 11:45:21 -0400 Subject: [PATCH] Make vg inject use AlignmentEmitter and support GAF --- src/subcommand/inject_main.cpp | 36 +++++++++++++++++++++++++--------- test/t/39_vg_inject.t | 4 +++- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/subcommand/inject_main.cpp b/src/subcommand/inject_main.cpp index a9ed8cbcb8f..99b1c8fe234 100644 --- a/src/subcommand/inject_main.cpp +++ b/src/subcommand/inject_main.cpp @@ -14,6 +14,7 @@ #include "../alignment.hpp" #include "../vg.hpp" #include "../xg.hpp" +#include "../hts_alignment_emitter.hpp" #include #include #include @@ -23,10 +24,11 @@ using namespace vg; using namespace vg::subcommand; void help_inject(char** argv) { - cerr << "usage: " << argv[0] << " inject [options] input.[bam|sam|cram] >output.gam" << endl + cerr << "usage: " << argv[0] << " inject -x graph.xg [options] input.[bam|sam|cram] >output.gam" << endl << endl << "options:" << endl << " -x, --xg-name FILE use this graph or xg index (required, non-XG formats also accepted)" << endl + << " -o, --output-format NAME output the alignments in NAME format (gam / gaf / json) [gam]" << endl << " -t, --threads N number of threads to use" << endl; } @@ -37,6 +39,8 @@ int main_inject(int argc, char** argv) { } string xg_name; + string output_format = "GAM"; + std::set output_formats = { "GAM", "GAF", "JSON" }; int threads = get_thread_count(); int c; @@ -46,12 +50,13 @@ int main_inject(int argc, char** argv) { { {"help", no_argument, 0, 'h'}, {"xg-name", required_argument, 0, 'x'}, + {"output-format", required_argument, 0, 'o'}, {"threads", required_argument, 0, 't'}, {0, 0, 0, 0} }; int option_index = 0; - c = getopt_long (argc, argv, "hx:t:", + c = getopt_long (argc, argv, "hx:o:t:", long_options, &option_index); // Detect the end of the options. @@ -63,6 +68,19 @@ int main_inject(int argc, char** argv) { case 'x': xg_name = optarg; break; + + case 'o': + { + output_format = optarg; + for (char& c : output_format) { + c = std::toupper(c); + } + if (output_formats.find(output_format) == output_formats.end()) { + std::cerr << "error: [vg inject] Invalid output format: " << optarg << std::endl; + std::exit(1); + } + } + break; case 't': threads = parse(optarg); @@ -90,14 +108,14 @@ int main_inject(int argc, char** argv) { } unique_ptr path_handle_graph = vg::io::VPKG::load_one(xg_name); bdsg::PathPositionOverlayHelper overlay_helper; - PathPositionHandleGraph* xgidx = overlay_helper.apply(path_handle_graph.get()); + PathPositionHandleGraph* xgidx = overlay_helper.apply(path_handle_graph.get()); - vg::io::ProtobufEmitter buf(cout); - function lambda = [&buf](Alignment& aln) { -#pragma omp critical (buf) - { - buf.write(std::move(aln)); - } + // We don't do HTS output formats but we do need an empty paths collection to make an alignment emitter + vector> paths; + unique_ptr alignment_emitter = get_alignment_emitter("-", output_format, paths, threads, xgidx); + + function lambda = [&](Alignment& aln) { + alignment_emitter->emit_mapped_single({std::move(aln)}); }; if (threads > 1) { hts_for_each_parallel(file_name, lambda, xgidx); diff --git a/test/t/39_vg_inject.t b/test/t/39_vg_inject.t index 541c2e9cbf6..f1aa362cee1 100644 --- a/test/t/39_vg_inject.t +++ b/test/t/39_vg_inject.t @@ -6,7 +6,7 @@ BASH_TAP_ROOT=../deps/bash-tap PATH=../bin:$PATH # for vg -plan tests 12 +plan tests 13 vg construct -r small/x.fa > j.vg vg index -x j.xg j.vg @@ -52,5 +52,7 @@ cat <(samtools view -H small/x.bam) <(printf "name\t4\t*\t0\t0\t*\t*\t0\t0\tACGT is "$(vg inject -x x.xg unmapped.sam | vg view -aj - | grep "path" | wc -l)" 0 "vg inject does not make an alignment for an umapped read" is "$(echo $?)" 0 "vg inject does not crash on an unmapped read" +is $(vg inject -x x.xg small/x.bam -o GAF | wc -l) \ + 1000 "vg inject supports GAF output" rm j.vg j.xg x.vg x.gcsa x.gcsa.lcp x.xg unmapped.sam