From 6f4e427d73399f855693ce141e1859b04824c25b Mon Sep 17 00:00:00 2001 From: Guillaume Marcais Date: Thu, 3 Jun 2021 06:54:10 +0200 Subject: [PATCH 1/2] Refactoring * argument parsing * allow early load of the index --- include/ReadExperiment.hpp | 31 +--- include/SalmonIndex.hpp | 5 + src/Alevin.cpp | 51 +++-- src/BuildSalmonIndex.cpp | 24 ++- src/Salmon.cpp | 128 ++++++------- src/SalmonAlevin.cpp | 310 ++++++++++++++----------------- src/SalmonQuantMerge.cpp | 3 +- src/SalmonQuantify.cpp | 12 +- src/SalmonQuantifyAlignments.cpp | 2 +- 9 files changed, 279 insertions(+), 287 deletions(-) diff --git a/include/ReadExperiment.hpp b/include/ReadExperiment.hpp index c84f923ec..40b2f7882 100644 --- a/include/ReadExperiment.hpp +++ b/include/ReadExperiment.hpp @@ -25,6 +25,7 @@ // Boost includes #include +#include #include // Cereal includes @@ -48,10 +49,12 @@ class ReadExperiment { public: ReadExperiment(std::vector& readLibraries, // const boost::filesystem::path& transcriptFile, - const boost::filesystem::path& indexDirectory, + SalmonIndex* salmonIndex, + // const boost::filesystem::path& indexDirectory, SalmonOpts& sopt) : readLibraries_(readLibraries), // transcriptFile_(transcriptFile), + salmonIndex_(salmonIndex), transcripts_(std::vector()), totalAssignedFragments_(0), fragStartDists_(5), posBiasFW_(5), posBiasRC_(5), posBiasExpectFW_(5), posBiasExpectRC_(5), /*seqBiasModel_(1.0),*/ eqBuilder_(sopt.jointLog, sopt.maxHashResizeThreads), @@ -115,24 +118,6 @@ class ReadExperiment { } */ - // ==== Figure out the index type - boost::filesystem::path versionPath = indexDirectory / "versionInfo.json"; - SalmonIndexVersionInfo versionInfo; - versionInfo.load(versionPath); - if (versionInfo.indexVersion() == 0) { - fmt::MemoryWriter infostr; - infostr << "Error: The index version file " << versionPath.string() - << " doesn't seem to exist. Please try re-building the salmon " - "index."; - throw std::invalid_argument(infostr.str()); - } - // Check index version compatibility here - auto indexType = versionInfo.indexType(); - // ==== Figure out the index type - - salmonIndex_.reset(new SalmonIndex(sopt.jointLog, indexType)); - salmonIndex_->load(indexDirectory); - // Now we'll have either an FMD-based index or a QUASI index // dispatch on the correct type. fmt::MemoryWriter infostr; @@ -159,7 +144,7 @@ class ReadExperiment { // Create the cluster forest for this set of transcripts clusters_.reset(new ClusterForest(transcripts_.size(), transcripts_)); } - + EQBuilderT& equivalenceClassBuilder() { return eqBuilder_; } std::string getIndexSeqHash256() const { return salmonIndex_->seqHash256(); } @@ -262,7 +247,7 @@ class ReadExperiment { } } - SalmonIndex* getIndex() { return salmonIndex_.get(); } + SalmonIndex* getIndex() { return salmonIndex_; } template void loadTranscriptsFromPuff(PuffIndexT* idx_, const SalmonOpts& sopt) { @@ -416,7 +401,7 @@ class ReadExperiment { std::atomic burnedIn{ totalAssignedFragments_ + numAssignedFragments_ >= sopt.numBurninFrags}; for (auto& rl : readLibraries_) { - processReadLibrary(rl, salmonIndex_.get(), transcripts_, clusterForest(), + processReadLibrary(rl, salmonIndex_, transcripts_, clusterForest(), *(fragLengthDist_.get()), numAssignedFragments_, numThreads, burnedIn); } @@ -806,7 +791,7 @@ class ReadExperiment { /** * The index we've built on the set of transcripts. */ - std::unique_ptr salmonIndex_{nullptr}; + SalmonIndex* salmonIndex_{nullptr}; /** * The cluster forest maintains the dynamic relationship * defined by transcripts and reads --- if two transcripts diff --git a/include/SalmonIndex.hpp b/include/SalmonIndex.hpp index 8bf38368c..5ef757553 100644 --- a/include/SalmonIndex.hpp +++ b/include/SalmonIndex.hpp @@ -246,4 +246,9 @@ class SalmonIndex { std::string decoyNameHash256_; }; +// Convenience function to load an index +std::unique_ptr +checkLoadIndex(const boost::filesystem::path& indexDirectory, + std::shared_ptr& logger); + #endif //__SALMON_INDEX_HPP diff --git a/src/Alevin.cpp b/src/Alevin.cpp index 9debe68fa..e4d2f020c 100644 --- a/src/Alevin.cpp +++ b/src/Alevin.cpp @@ -19,6 +19,7 @@
#include #include #include @@ -63,11 +64,11 @@ // salmon includes #include "FastxParser.hpp" +#include "ProgramOptionsGenerator.hpp" #include "SalmonConfig.hpp" #include "SalmonDefaults.hpp" #include "SalmonOpts.hpp" #include "SalmonUtils.hpp" -#include "ProgramOptionsGenerator.hpp" using paired_parser_qual = fastx_parser::FastxParser; using single_parser = fastx_parser::FastxParser; @@ -78,20 +79,18 @@ namespace apt = alevin::protocols; namespace aut = alevin::utils; template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); +int alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, + boost::program_options::parsed_options& orderedOptions, + std::unique_ptr& salmonIndex); template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, +int alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, spp::sparse_hash_map& txpToGeneMap, spp::sparse_hash_map& geneIdxMap, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); //colors for progress monitoring const char RESET_COLOR[] = "\x1b[0m"; @@ -835,7 +834,8 @@ void initiatePipeline(AlevinOpts& aopt, boost::program_options::variables_map& vm, std::string commentString, bool noTgMap, std::vector barcodeFiles, - std::vector readFiles){ + std::vector readFiles, + std::unique_ptr& salmonIndex){ bool isOptionsOk = aut::processAlevinOpts(aopt, sopt, noTgMap, vm); if (!isOptionsOk){ aopt.jointLog->flush(); @@ -900,8 +900,7 @@ void initiatePipeline(AlevinOpts& aopt, } // do the actual mapping - auto rc = alevin_sc_align(aopt, sopt, orderedOptions); - + auto rc = alevin_sc_align(aopt, sopt, orderedOptions, salmonIndex); if (rc == 0) { aopt.jointLog->info("sc-align successful."); } else { @@ -949,7 +948,7 @@ void initiatePipeline(AlevinOpts& aopt, aopt.jointLog->info("Done with Barcode Processing; Moving to Quantify\n"); alevinQuant(aopt, sopt, barcodeSoftMap, trueBarcodes, txpToGeneMap, geneIdxMap, orderedOptions, - freqCounter, numLowConfidentBarcode); + freqCounter, numLowConfidentBarcode, salmonIndex); } else{ boost::filesystem::path cmdInfoPath = vm["output"].as(); @@ -962,7 +961,7 @@ void initiatePipeline(AlevinOpts& aopt, } } -int salmonBarcoding(int argc, const char* argv[]) { +int salmonBarcoding(int argc, const char* argv[], std::unique_ptr& salmonIndex) { namespace bfs = boost::filesystem; namespace po = boost::program_options; @@ -1077,7 +1076,7 @@ salmon-based processing of single-cell RNA-seq data. //aopt.jointLog->warn("Using DropSeq Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if(indrop){ std::cout<<"Indrop get neighbors removed, please use other protocols"; @@ -1089,7 +1088,7 @@ salmon-based processing of single-cell RNA-seq data. //aopt.jointLog->warn("Using InDrop Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else{ fmt::print(stderr, "ERROR: indrop needs w1 flag too.\n Exiting Now"); @@ -1105,7 +1104,7 @@ salmon-based processing of single-cell RNA-seq data. //aopt.jointLog->warn("Using InDrop Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else{ fmt::print(stderr, "ERROR: citeseq needs featureStart and featureLength flag too.\n Exiting Now"); @@ -1117,54 +1116,54 @@ salmon-based processing of single-cell RNA-seq data. //aopt.jointLog->warn("Using 10x v3 Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if(chrom){ AlevinOpts aopt; //aopt.jointLog->warn("Using 10x v2 Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if(gemcode){ AlevinOpts aopt; //aopt.jointLog->warn("Using 10x v1 Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - unmateFiles, readFiles); + unmateFiles, readFiles, salmonIndex); } else if(celseq){ AlevinOpts aopt; //aopt.jointLog->warn("Using CEL-Seq Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if(celseq2){ AlevinOpts aopt; //aopt.jointLog->warn("Using CEL-Seq2 Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if(quartzseq2){ AlevinOpts aopt; //aopt.jointLog->warn("Using Quartz-Seq2 Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if (custom_old) { AlevinOpts aopt; //aopt.jointLog->warn("Using Custom Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } else if (custom_new) { AlevinOpts aopt; //aopt.jointLog->warn("Using Custom Setting for Alevin"); initiatePipeline(aopt, sopt, orderedOptions, vm, commentString, noTgMap, - barcodeFiles, readFiles); + barcodeFiles, readFiles, salmonIndex); } } catch (po::error& e) { diff --git a/src/BuildSalmonIndex.cpp b/src/BuildSalmonIndex.cpp index 315feee3f..a9fc758fb 100644 --- a/src/BuildSalmonIndex.cpp +++ b/src/BuildSalmonIndex.cpp @@ -43,7 +43,7 @@ // http://stackoverflow.com/questions/108318/whats-the-simplest-way-to-test-whether-a-number-is-a-power-of-2-in-c bool isPowerOfTwo(uint32_t n) { return (n > 0 and (n & (n - 1)) == 0); } -int salmonIndex(int argc, const char* argv[]) { +int salmonIndex(int argc, const char* argv[], std::unique_ptr& /* salmonIndex */) { using std::string; namespace bfs = boost::filesystem; @@ -256,3 +256,25 @@ Creates a salmon index. } return ret; } + +std::unique_ptr checkLoadIndex(const boost::filesystem::path& indexDirectory, std::shared_ptr& logger) { + // ==== Figure out the index type + boost::filesystem::path versionPath = + indexDirectory / "versionInfo.json"; + SalmonIndexVersionInfo versionInfo; + versionInfo.load(versionPath); + if (versionInfo.indexVersion() == 0) { + fmt::MemoryWriter infostr; + infostr + << "Error: The index version file " << versionPath.string() + << " doesn't seem to exist. Please try re-building the salmon " + "index."; + throw std::invalid_argument(infostr.str()); + } + // Check index version compatibility here + auto indexType = versionInfo.indexType(); + // ==== Figure out the index type + std::unique_ptr res(new SalmonIndex(logger, indexType)); + res->load(indexDirectory); + return res; +} diff --git a/src/Salmon.cpp b/src/Salmon.cpp index b6e46d88d..bcb674d02 100644 --- a/src/Salmon.cpp +++ b/src/Salmon.cpp @@ -46,8 +46,9 @@ #include "GenomicFeature.hpp" #include "SalmonConfig.hpp" #include "VersionChecker.hpp" +#include "SalmonIndex.hpp" -int help(const std::vector& /*opts*/) { +int help(const std::vector& /*opts*/) { fmt::MemoryWriter helpMsg; helpMsg.write("salmon v{}\n\n", salmon::version); helpMsg.write( @@ -90,10 +91,12 @@ int dualModeMessage() { return 0; } +typedef std::function& index)> SubCmdType; + /** * Bonus! */ -int salmonSwim(int /*argc*/, const char* /*argv*/[]) { +int salmonSwim(int /*argc*/, const char* /*argv*/[], std::unique_ptr& /*index*/) { std::cout << R"( _____ __ @@ -144,17 +147,18 @@ Nature Methods. 2017;14(4):417-419. doi: 10.1038/nmeth.4197 )"; } -int salmonIndex(int argc, const char* argv[]); -int salmonQuantify(int argc, const char* argv[]); -int salmonAlignmentQuantify(int argc, const char* argv[]); +int salmonIndex(int argc, const char* argv[], std::unique_ptr& index); +int salmonQuantify(int argc, const char* argv[], std::unique_ptr& index); +int salmonAlignmentQuantify(int argc, const char* argv[], std::unique_ptr& index); +int salmonAlignmentDualMode(int argc, const char* argv[], std::unique_ptr& index); // TODO : PF_INTEGRATION -int salmonBarcoding(int argc, const char* argv[]); -int salmonQuantMerge(int argc, const char* argv[]); +int salmonBarcoding(int argc, const char* argv[], std::unique_ptr& index); +int salmonQuantMerge(int argc, const char* argv[], + std::unique_ptr& index); bool verbose = false; -int main(int argc, char* argv[]) { - show_backtrace(); +int main(int argc, const char* argv[]) { using std::string; namespace po = boost::program_options; std::setlocale(LC_ALL, "en_US.UTF-8"); @@ -235,9 +239,9 @@ int main(int argc, char* argv[]) { opts.insert(opts.begin(), "--help"); } - std::unordered_map> cmds( + std::unordered_map cmds( {{"index", salmonIndex}, - {"quant", salmonQuantify}, + {"quant", salmonAlignmentDualMode}, {"quantmerge", salmonQuantMerge}, // TODO : PF_INTEGRATION {"alevin", salmonBarcoding}, @@ -251,64 +255,24 @@ int main(int argc, char* argv[]) { std::copy_n( &argv[topLevelArgc], argc-topLevelArgc, &argv2[1] ); */ - int32_t subCommandArgc = opts.size() + 1; - std::unique_ptr argv2(new const char*[subCommandArgc]); + std::unique_ptr preloadedIndex; + + int32_t nargc = opts.size() + 1; + std::unique_ptr argv2(new const char*[nargc]); argv2[0] = argv[0]; - for (int32_t i = 0; i < subCommandArgc - 1; ++i) { + for (int32_t i = 0; i < nargc - 1; ++i) { argv2[i + 1] = opts[i].c_str(); } + const char** nargv = argv2.get(); - auto cmdMain = cmds.find(cmd); - if (cmdMain == cmds.end()) { - // help(subCommandArgc, argv2); - return help(opts); - } else { - // If the command is quant; determine whether - // we're quantifying with raw sequences or alignments - if (cmdMain->first == "quant") { - - if (subCommandArgc < 2) { - return dualModeMessage(); - } - // detect mode-specific help request - if (strncmp(argv2[1], "--help-alignment", 16) == 0) { - std::vector helpStr{'-', '-', 'h', 'e', 'l', 'p', '\0'}; - const char* helpArgv[] = {argv[0], &helpStr[0]}; - return salmonAlignmentQuantify(2, helpArgv); - } else if (strncmp(argv2[1], "--help-reads", 12) == 0) { - std::vector helpStr{'-', '-', 'h', 'e', 'l', 'p', '\0'}; - const char* helpArgv[] = {argv[0], &helpStr[0]}; - return salmonQuantify(2, helpArgv); - } - - // detect general help request - if (strncmp(argv2[1], "--help", 6) == 0 or - strncmp(argv2[1], "-h", 2) == 0) { - return dualModeMessage(); - } - - // otherwise, detect and dispatch the correct mode - bool useSalmonAlign{false}; - for (int32_t i = 0; i < subCommandArgc; ++i) { - if (strncmp(argv2[i], "-a", 2) == 0 or - strncmp(argv2[i], "-e", 2) == 0 or - strncmp(argv2[i], "--alignments", 12) == 0 or - strncmp(argv2[i], "--eqclasses", 11) == 0 or - strcmp(argv2[i], "--ont") == 0) { - useSalmonAlign = true; - break; - } - } - if (useSalmonAlign) { - return salmonAlignmentQuantify(subCommandArgc, argv2.get()); - } else { - return salmonQuantify(subCommandArgc, argv2.get()); - } - } else { - return cmdMain->second(subCommandArgc, argv2.get()); + while(true) { + auto cmdMain = cmds.find(cmd); + if (cmdMain == cmds.end()) { + // help(subCommandArgc, argv2); + return help(opts); } + return cmdMain->second(nargc, nargv, preloadedIndex); } - } catch (po::error& e) { std::cerr << "Program Option Error (main) : [" << e.what() << "].\n Exiting.\n"; @@ -321,3 +285,41 @@ int main(int argc, char* argv[]) { return 0; } + +int salmonAlignmentDualMode(int argc, const char* argv[], std::unique_ptr& index) { + // If the command is quant; determine whether + // we're quantifying with raw sequences or alignments + if (argc < 2) { + return dualModeMessage(); + } + // detect mode-specific help request + if (strncmp(argv[1], "--help-alignment", 16) == 0) { + const char* helpArgv[] = {argv[0], "--help", nullptr}; + return salmonAlignmentQuantify(2, helpArgv, index); + } else if (strncmp(argv[1], "--help-reads", 12) == 0) { + const char* helpArgv[] = {argv[0], "--help", nullptr}; + return salmonQuantify(2, helpArgv, index); + } + + // detect general help request + if (strncmp(argv[1], "--help", 6) == 0 or strncmp(argv[1], "-h", 2) == 0) { + return dualModeMessage(); + } + + // otherwise, detect and dispatch the correct mode + bool useSalmonAlign{false}; + for (int i = 0; i < argc; ++i) { + if (strncmp(argv[i], "-a", 2) == 0 or + strncmp(argv[i], "-e", 2) == 0 or + strncmp(argv[i], "--alignments", 12) == 0 or + strncmp(argv[i], "--eqclasses", 11) == 0) { + useSalmonAlign = true; + break; + } + } + if (useSalmonAlign) { + return salmonAlignmentQuantify(argc, argv, index); + } else { + return salmonQuantify(argc, argv, index); + } +} diff --git a/src/SalmonAlevin.cpp b/src/SalmonAlevin.cpp index 535876f10..6b4078968 100644 --- a/src/SalmonAlevin.cpp +++ b/src/SalmonAlevin.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -2662,7 +2663,8 @@ void alevinOptimize( std::vector& trueBarcodesVec, template int alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions){ + boost::program_options::parsed_options& orderedOptions, + std::unique_ptr& salmonIndex){ using std::cerr; using std::vector; using std::string; @@ -2689,13 +2691,12 @@ int alevin_sc_align(AlevinOpts& aopt, } // ==== END: Library format processing === - SalmonIndexVersionInfo versionInfo; - boost::filesystem::path versionPath = indexDirectory / "versionInfo.json"; - versionInfo.load(versionPath); - auto idxType = versionInfo.indexType(); + if(!salmonIndex) + salmonIndex = checkLoadIndex(indexDirectory, sopt.jointLog); + auto idxType = salmonIndex->indexType(); MappingStatistics mstats; - ReadExperimentT experiment(readLibraries, indexDirectory, sopt); + ReadExperimentT experiment(readLibraries, salmonIndex.get(), sopt); // We currently do not support decoy sequence in the // --justAlign or --sketch modes, so check that the @@ -2778,7 +2779,8 @@ int alevinQuant(AlevinOpts& aopt, spp::sparse_hash_map& txpToGeneMap, spp::sparse_hash_map& geneIdxMap, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, size_t numLowConfidentBarcode){ + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex){ using std::cerr; using std::vector; using std::string; @@ -2804,14 +2806,12 @@ int alevinQuant(AlevinOpts& aopt, std::exit(1); } // ==== END: Library format processing === - - SalmonIndexVersionInfo versionInfo; - boost::filesystem::path versionPath = indexDirectory / "versionInfo.json"; - versionInfo.load(versionPath); - auto idxType = versionInfo.indexType(); + if(!salmonIndex) + salmonIndex = checkLoadIndex(indexDirectory, sopt.jointLog); + auto idxType = salmonIndex->indexType(); MappingStatistics mstats; - ReadExperimentT experiment(readLibraries, indexDirectory, sopt); + ReadExperimentT experiment(readLibraries, salmonIndex.get(), sopt); //experiment.computePolyAPositions(); // This will be the class in charge of maintaining our @@ -2996,168 +2996,146 @@ int alevinQuant(AlevinOpts& aopt, namespace apt = alevin::protocols; -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); - -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, - boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, - boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); +int alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, + boost::program_options::parsed_options& orderedOptions, + std::unique_ptr& salmonIndex); + +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, +int alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, + boost::program_options::parsed_options& orderedOptions, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); - -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); -template -int alevin_sc_align(AlevinOpts& aopt, - SalmonOpts& sopt, - boost::program_options::parsed_options& orderedOptions); - -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); + +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); -template -int alevinQuant(AlevinOpts& aopt, - SalmonOpts& sopt, - SoftMapT& barcodeMap, - TrueBcsT& trueBarcodes, - spp::sparse_hash_map& txpToGeneMap, - spp::sparse_hash_map& geneIdxMap, + std::unique_ptr& salmonIndex); +template int +alevin_sc_align(AlevinOpts& aopt, SalmonOpts& sopt, boost::program_options::parsed_options& orderedOptions, - CFreqMapT& freqCounter, - size_t numLowConfidentBarcode); + std::unique_ptr& salmonIndex); + +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); +template int +alevinQuant(AlevinOpts& aopt, SalmonOpts& sopt, + SoftMapT& barcodeMap, TrueBcsT& trueBarcodes, + spp::sparse_hash_map& txpToGeneMap, + spp::sparse_hash_map& geneIdxMap, + boost::program_options::parsed_options& orderedOptions, + CFreqMapT& freqCounter, size_t numLowConfidentBarcode, + std::unique_ptr& salmonIndex); diff --git a/src/SalmonQuantMerge.cpp b/src/SalmonQuantMerge.cpp index 4178b35f0..fc563cfe1 100644 --- a/src/SalmonQuantMerge.cpp +++ b/src/SalmonQuantMerge.cpp @@ -27,6 +27,7 @@ // C++ string formatting library #include "spdlog/fmt/fmt.h" // logger includes #include "spdlog/spdlog.h" +#include "SalmonIndex.hpp" enum class TargetColumn { LEN, ELEN, TPM, NREADS }; @@ -213,7 +214,7 @@ bool doMerge(QuantMergeOptions& qmOpts) { return true; } -int salmonQuantMerge(int argc, const char* argv[]) { +int salmonQuantMerge(int argc, const char* argv[], std::unique_ptr& /* salmonIndex */) { using std::cerr; using std::vector; using std::string; diff --git a/src/SalmonQuantify.cpp b/src/SalmonQuantify.cpp index cacc459aa..be7adaa6f 100644 --- a/src/SalmonQuantify.cpp +++ b/src/SalmonQuantify.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -2385,7 +2386,7 @@ void quantifyLibrary(ReadExperimentT& experiment, jointLog->info("finished quantifyLibrary()"); } -int salmonQuantify(int argc, const char* argv[]) { +int salmonQuantify(int argc, const char* argv[], std::unique_ptr& salmonIndex) { using std::cerr; using std::vector; using std::string; @@ -2489,13 +2490,12 @@ transcript abundance from RNA-seq reads } // ==== END: Library format processing === - SalmonIndexVersionInfo versionInfo; - boost::filesystem::path versionPath = indexDirectory / "versionInfo.json"; - versionInfo.load(versionPath); - auto idxType = versionInfo.indexType(); + if(!salmonIndex) { + salmonIndex = checkLoadIndex(indexDirectory, sopt.jointLog); + } MappingStatistics mstats; - ReadExperimentT experiment(readLibraries, indexDirectory, sopt); + ReadExperimentT experiment(readLibraries, salmonIndex.get(), sopt); // This will be the class in charge of maintaining our // rich equivalence classes diff --git a/src/SalmonQuantifyAlignments.cpp b/src/SalmonQuantifyAlignments.cpp index aad99c824..747d1ee35 100644 --- a/src/SalmonQuantifyAlignments.cpp +++ b/src/SalmonQuantifyAlignments.cpp @@ -1573,7 +1573,7 @@ bool runSingleEndSample(std::vector& alignmentFiles, bfs::path& trans return processSample(alnLib, requiredObservations, sopt, sopt.outputDirectory); } -int salmonAlignmentQuantify(int argc, const char* argv[]) { +int salmonAlignmentQuantify(int argc, const char* argv[], std::unique_ptr& /* salmon_index */) { using std::cerr; using std::vector; using std::string; From 373ff32fb55ab1851d80132a914198df4cbfb2b6 Mon Sep 17 00:00:00 2001 From: Rob Patro Date: Thu, 16 Sep 2021 13:11:31 -0400 Subject: [PATCH 2/2] Update Alevin.cpp spacing -- re-trigger CI --- src/Alevin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Alevin.cpp b/src/Alevin.cpp index e4d2f020c..8f58db16a 100644 --- a/src/Alevin.cpp +++ b/src/Alevin.cpp @@ -894,6 +894,7 @@ void initiatePipeline(AlevinOpts& aopt, // write out the cmd_info.json to make sure we have that boost::filesystem::path outputDirectory = vm["output"].as(); bool isWriteOk = aut::writeCmdInfo(outputDirectory / "cmd_info.json", orderedOptions); + if(!isWriteOk){ fmt::print(stderr, "Writing cmd_info.json in output directory failed.\nExiting now."); exit(1);