diff --git a/CMakeLists.txt b/CMakeLists.txt index d342ea4..246a537 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -36,8 +36,8 @@ set (CMAKE_CXX_STANDARD 11) set (CMAKE_CXX_STANDARD_REQUIRED ON) ### MPI -find_package(MPI) -if (MPI_FOUND) +if (CMAKE_ENABLE_MPI_SUPPORT) + find_package(MPI REQUIRED) set(MSWEEP_MPI_SUPPORT 1) include_directories(MPI_C_INCLUDE_DIRS) if (CMAKE_MPI_MAX_PROCESSES) @@ -209,8 +209,9 @@ else() set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1) target_link_libraries(mSWEEP libtelescope) set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/include) + set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src) endif() -include_directories(${CMAKE_TELESCOPE_HEADERS}) +include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS}) ## rcgpar if (DEFINED CMAKE_RCGPAR_LIBRARY AND DEFINED CMAKE_RCGPAR_HEADERS) @@ -255,6 +256,11 @@ endif() add_library(msweeptools ${CMAKE_CURRENT_SOURCE_DIR}/src/tools/matchfasta.cpp) add_executable(matchfasta ${CMAKE_CURRENT_SOURCE_DIR}/src/tools/main.cpp) +if(MPI_FOUND) + add_dependencies(mSWEEP rcgmpi bigmpi) + target_link_libraries(mSWEEP rcgmpi ${LIBRARY_OUTPUT_PATH}/libbigmpi.a) +endif() + # Link libraries target_link_libraries(mSWEEP ${ZLIB} msweeptools) target_link_libraries(matchfasta msweeptools) diff --git a/README.md b/README.md index 3749fc2..04b68b1 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ mSWEEP with the following commands (example case for Open MPI): > mkdir build > cd build > module load mpi/openmpi -> cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx .. +> cmake -DCMAKE_ENABLE_MPI_SUPPORT=1 -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx .. > make ``` diff --git a/config/CMakeLists-rcgpar.txt.in b/config/CMakeLists-rcgpar.txt.in index d323f6a..17a0759 100644 --- a/config/CMakeLists-rcgpar.txt.in +++ b/config/CMakeLists-rcgpar.txt.in @@ -5,10 +5,11 @@ include(ExternalProject) ExternalProject_Add(rcgpar-download GIT_REPOSITORY https://github.com/tmaklin/rcgpar - GIT_TAG v1.0.0 + GIT_TAG v1.0.2 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar" BUILD_IN_SOURCE 0 BUILD_COMMAND "" + CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT} INSTALL_COMMAND "" TEST_COMMAND "" UPDATE_COMMAND "" diff --git a/config/CMakeLists-telescope.txt.in b/config/CMakeLists-telescope.txt.in index f1ed0bc..9815ccd 100644 --- a/config/CMakeLists-telescope.txt.in +++ b/config/CMakeLists-telescope.txt.in @@ -5,12 +5,14 @@ include(ExternalProject) ExternalProject_Add(telescope-download GIT_REPOSITORY https://github.com/tmaklin/telescope.git - GIT_TAG v0.2.1 + GIT_TAG v0.4.0 SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope" BUILD_IN_SOURCE 0 BUILD_COMMAND "" CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS} -D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS} + -D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS} + -D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src INSTALL_COMMAND "" TEST_COMMAND "" UPDATE_COMMAND "" diff --git a/include/Sample.hpp b/include/Sample.hpp index e73f8d3..db8970b 100644 --- a/include/Sample.hpp +++ b/include/Sample.hpp @@ -27,7 +27,7 @@ class Sample { std::vector relative_abundances; // Alignments class from telescope - KallistoAlignment pseudos; + telescope::KallistoAlignment pseudos; // Calculate log_ec_counts and counts_total. void process_aln(const bool bootstrap_mode); diff --git a/include/log.hpp b/include/log.hpp index 68584e2..8ffd3ca 100644 --- a/include/log.hpp +++ b/include/log.hpp @@ -2,8 +2,8 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ -#ifndef CXXIO_LOG_HPP -#define CXXIO_LOG_HPP +#ifndef MSWEEP_LOG_HPP +#define MSWEEP_LOG_HPP #include #include diff --git a/include/msweep_log.hpp b/include/msweep_log.hpp new file mode 100644 index 0000000..8ffd3ca --- /dev/null +++ b/include/msweep_log.hpp @@ -0,0 +1,62 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +#ifndef MSWEEP_LOG_HPP +#define MSWEEP_LOG_HPP + +#include +#include +#include +#include + +#include "cxxio.hpp" +#include "mpi_config.hpp" + +class Log : public cxxio::Out { + public: + bool verbose; + bool log_time; + std::chrono::time_point start_time; + + Log(std::ostream &_stream, bool _verbose = true, bool _log_time = true) : cxxio::Out(_stream), verbose(_verbose), log_time(_log_time) { + if (this->log_time) { + start_time = std::chrono::system_clock::now(); + } + } + void flush() { + if (verbose && log_time) { + std::chrono::time_point end_time = std::chrono::system_clock::now(); + std::chrono::duration elapsed_seconds = end_time - start_time; + std::time_t end = std::chrono::system_clock::to_time_t(end_time); + std::string end_s(std::ctime(&end)); + end_s.pop_back(); + *this << end_s << " elapsed_time: " << elapsed_seconds.count() << "s\n"; + } + } +}; + +template +Log& operator<<(Log &os, T t) { + int rank = 0; +#if defined(MSWEEP_MPI_SUPPORT) && (MSWEEP_MPI_SUPPORT) == 1 + // Only root will log + MPI_Comm_rank(MPI_COMM_WORLD,&rank); +#endif + if (os.verbose && rank == 0) { + if (os.log_time){ + std::chrono::time_point now = std::chrono::system_clock::now(); + std::time_t now_t = std::chrono::system_clock::to_time_t(now); + std::string now_s(std::ctime(&now_t)); + now_s.pop_back(); + os.stream() << now_s << ' '; + } + os.stream () << t; + if (!os.stream().good() && !os.stream().eof()) { + throw std::runtime_error("Error writing type: " + std::string(typeid(T).name()) + " to file " + os.filename()); + } + } + return os; +} + +#endif diff --git a/src/BootstrapSample.cpp b/src/BootstrapSample.cpp index bb53113..076f061 100644 --- a/src/BootstrapSample.cpp +++ b/src/BootstrapSample.cpp @@ -32,7 +32,7 @@ void BootstrapSample::init_bootstrap() { this->bootstrap_results = std::vector>(); // Initialize ec_distribution for bootstrapping - ec_distribution = std::discrete_distribution(pseudos.ec_counts.begin(), pseudos.ec_counts.end()); + ec_distribution = std::discrete_distribution(pseudos.ec_counts_begin(), pseudos.ec_counts_end()); } void BootstrapSample::write_bootstrap(const std::vector &cluster_indicators_to_string, diff --git a/src/Sample.cpp b/src/Sample.cpp index d29ba87..d51f168 100644 --- a/src/Sample.cpp +++ b/src/Sample.cpp @@ -9,20 +9,19 @@ void Sample::process_aln(const bool bootstrap_mode) { cell_id = ""; - m_num_ecs = pseudos.ec_counts.size(); + m_num_ecs = pseudos.compressed_size(); log_ec_counts.resize(m_num_ecs, 0.0); uint32_t aln_counts_total = 0; #pragma omp parallel for schedule(static) reduction(+:aln_counts_total) for (uint32_t i = 0; i < m_num_ecs; ++i) { - log_ec_counts[i] = std::log(pseudos.ec_counts[i]); - aln_counts_total += pseudos.ec_counts[i]; + log_ec_counts[i] = std::log(pseudos.reads_in_ec(i)); + aln_counts_total += pseudos.reads_in_ec(i); } counts_total = aln_counts_total; if (!bootstrap_mode) { // EC counts aren't needed when not bootstrapping. - this->pseudos.ec_counts.clear(); - this->pseudos.ec_counts.shrink_to_fit(); + this->pseudos.clear_counts(); } } @@ -31,7 +30,7 @@ std::vector Sample::group_counts(const std::vector indicator std::vector read_hitcounts(n_groups); uint32_t m_num_refs = this->pseudos.n_targets(); for (uint32_t j = 0; j < m_num_refs; ++j) { - read_hitcounts[indicators[j]] += pseudos.ec_configs[ec_id][j]; + read_hitcounts[indicators[j]] += pseudos(ec_id, j); } return read_hitcounts; } @@ -125,7 +124,7 @@ void Sample::read_likelihood(const Grouping &grouping, std::istream &infile) { uint32_t n_groups = grouping.get_n_groups(); std::vector> likelihoods(n_groups, std::vector()); - this->pseudos = KallistoAlignment(); + this->pseudos = telescope::KallistoAlignment(); if (infile.good()) { std::string newline; @@ -140,7 +139,7 @@ void Sample::read_likelihood(const Grouping &grouping, std::istream &infile) { while (std::getline(partition, part, '\t')) { if (ec_count_col) { uint32_t ec_count = std::stol(part); - this->pseudos.ec_counts.emplace_back(ec_count); + this->pseudos.add_counts(ec_count); ec_count_col = false; } else { likelihoods[group_id].emplace_back(std::stod(part)); diff --git a/src/mSWEEP.cpp b/src/mSWEEP.cpp index 1a548e8..826303f 100644 --- a/src/mSWEEP.cpp +++ b/src/mSWEEP.cpp @@ -28,7 +28,8 @@ void ReadInput(const Arguments &args, std::vector> *samp if (!args.themisto_mode && !args.read_likelihood_mode) { // Check that the number of reference sequences matches in the grouping and the alignment. reference->verify_kallisto_alignment(*args.infiles.run_info); - ReadKallisto(reference->get_n_refs(), *args.infiles.ec, *args.infiles.tsv, &samples->back()->pseudos); + samples->back()->pseudos = telescope::KallistoAlignment(reference->get_n_refs()); + telescope::read::Kallisto(*args.infiles.ec, *args.infiles.tsv, &samples->back()->pseudos); } else if (!args.read_likelihood_mode) { if (!args.themisto_index_path.empty()) { try { @@ -43,7 +44,8 @@ void ReadInput(const Arguments &args, std::vector> *samp cxxio::In forward_strand(args.tinfile1); cxxio::In reverse_strand(args.tinfile2); std::vector strands = { &forward_strand.stream(), &reverse_strand.stream() }; - ReadThemisto(get_mode(args.themisto_merge_mode), reference->get_n_refs(), strands, &samples->back()->pseudos); + samples->back()->pseudos = telescope::KallistoAlignment(reference->get_n_refs()); + telescope::read::ThemistoToKallisto(telescope::get_mode(args.themisto_merge_mode), strands, &samples->back()->pseudos); } else { if (reference->get_n_groupings() > 1) { throw std::runtime_error("Using more than one grouping with --read-likelihood is not yet implemented."); @@ -63,8 +65,7 @@ void ConstructLikelihood(const Arguments &args, const Grouping &grouping, const } if (free_ec_counts) { // Free memory used by the configs after all likelihood matrices are built. - sample->pseudos.ec_configs.clear(); - sample->pseudos.ec_configs.shrink_to_fit(); + sample->pseudos.clear_configs(); } } @@ -98,24 +99,26 @@ void WriteResults(const Arguments &args, const std::unique_ptr &sample, } // Relative abundances - std::string abundances_outfile(outfile); - abundances_outfile = (args.outfile.empty() || !args.batch_mode ? abundances_outfile : abundances_outfile + '/' + sample->cell_name()); - if (!args.outfile.empty()) { - abundances_outfile += "_abundances.txt"; - of.open(abundances_outfile); - } - if (args.bootstrap_mode) { - BootstrapSample* bs = static_cast(&(*sample)); - bs->write_bootstrap(grouping.get_names(), args.iters, (args.outfile.empty() ? std::cout : of.stream())); - } else { - sample->write_abundances(grouping.get_names(), (args.outfile.empty() ? std::cout : of.stream())); + if (!args.optimizer.no_fit_model) { + std::string abundances_outfile(outfile); + abundances_outfile = (args.outfile.empty() || !args.batch_mode ? abundances_outfile : abundances_outfile + '/' + sample->cell_name()); + if (!args.outfile.empty()) { + abundances_outfile += "_abundances.txt"; + of.open(abundances_outfile); + } + if (args.bootstrap_mode) { + BootstrapSample* bs = static_cast(&(*sample)); + bs->write_bootstrap(grouping.get_names(), args.iters, (args.outfile.empty() ? std::cout : of.stream())); + } else { + sample->write_abundances(grouping.get_names(), (args.outfile.empty() ? std::cout : of.stream())); + } } // Probability matrix - if (args.optimizer.print_probs) { + if (args.optimizer.print_probs && !args.optimizer.no_fit_model) { sample->write_probabilities(grouping.get_names(), std::cout); } - if (args.optimizer.write_probs) { + if (args.optimizer.write_probs && !args.optimizer.no_fit_model) { std::string probs_outfile(outfile); probs_outfile += "_probs.csv"; if (args.optimizer.gzip_probs) { diff --git a/src/main.cpp b/src/main.cpp index 1760958..051df2f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,6 +4,7 @@ #include #include "rcgpar.hpp" +#include "msweep_log.hpp" #include "mSWEEP.hpp" #include "parse_arguments.hpp" @@ -13,7 +14,6 @@ #include "version.h" #include "openmp_config.hpp" #include "mpi_config.hpp" -#include "log.hpp" void finalize(const std::string &msg, Log &log, bool abort = false) { log << msg; @@ -165,11 +165,10 @@ int main (int argc, char *argv[]) { bs->bootstrap_results.emplace_back(rcgpar::mixture_components(bootstrapped_ec_probs, resampled_log_ec_counts)); } } - - // Write results to file from the root process - if (rank == 0) - WriteResults(args, samples[j], reference.get_grouping(i), n_groupings, i); } + // Write results to file from the root process + if (rank == 0) + WriteResults(args, samples[j], reference.get_grouping(i), n_groupings, i); } } finalize("", log);