Skip to content

Commit

Permalink
Merge pull request #18 from PROBIC/v1.6.2
Browse files Browse the repository at this point in the history
mSWEEP-v1.6.2 (12 August 2022)
  • Loading branch information
tmaklin authored Aug 12, 2022
2 parents 52446f8 + c5ef9a9 commit 11b3eca
Show file tree
Hide file tree
Showing 11 changed files with 112 additions and 40 deletions.
12 changes: 9 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ set (CMAKE_CXX_STANDARD 11)
set (CMAKE_CXX_STANDARD_REQUIRED ON)

### MPI
find_package(MPI)
if (MPI_FOUND)
if (CMAKE_ENABLE_MPI_SUPPORT)
find_package(MPI REQUIRED)
set(MSWEEP_MPI_SUPPORT 1)
include_directories(MPI_C_INCLUDE_DIRS)
if (CMAKE_MPI_MAX_PROCESSES)
Expand Down Expand Up @@ -209,8 +209,9 @@ else()
set_target_properties(telescope PROPERTIES EXCLUDE_FROM_ALL 1)
target_link_libraries(mSWEEP libtelescope)
set(CMAKE_TELESCOPE_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/include)
set(CMAKE_BITMAGIC_HEADERS ${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src)
endif()
include_directories(${CMAKE_TELESCOPE_HEADERS})
include_directories(${CMAKE_TELESCOPE_HEADERS} ${CMAKE_BITMAGIC_HEADERS})

## rcgpar
if (DEFINED CMAKE_RCGPAR_LIBRARY AND DEFINED CMAKE_RCGPAR_HEADERS)
Expand Down Expand Up @@ -255,6 +256,11 @@ endif()
add_library(msweeptools ${CMAKE_CURRENT_SOURCE_DIR}/src/tools/matchfasta.cpp)
add_executable(matchfasta ${CMAKE_CURRENT_SOURCE_DIR}/src/tools/main.cpp)

if(MPI_FOUND)
add_dependencies(mSWEEP rcgmpi bigmpi)
target_link_libraries(mSWEEP rcgmpi ${LIBRARY_OUTPUT_PATH}/libbigmpi.a)
endif()

# Link libraries
target_link_libraries(mSWEEP ${ZLIB} msweeptools)
target_link_libraries(matchfasta msweeptools)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ mSWEEP with the following commands (example case for Open MPI):
> mkdir build
> cd build
> module load mpi/openmpi
> cmake -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx ..
> cmake -DCMAKE_ENABLE_MPI_SUPPORT=1 -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx ..
> make
```

Expand Down
3 changes: 2 additions & 1 deletion config/CMakeLists-rcgpar.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ include(ExternalProject)

ExternalProject_Add(rcgpar-download
GIT_REPOSITORY https://github.com/tmaklin/rcgpar
GIT_TAG v1.0.0
GIT_TAG v1.0.2
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/rcgpar"
BUILD_IN_SOURCE 0
BUILD_COMMAND ""
CMAKE_ARGS -D CMAKE_ENABLE_MPI_SUPPORT=${MSWEEP_MPI_SUPPORT}
INSTALL_COMMAND ""
TEST_COMMAND ""
UPDATE_COMMAND ""
Expand Down
4 changes: 3 additions & 1 deletion config/CMakeLists-telescope.txt.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ include(ExternalProject)

ExternalProject_Add(telescope-download
GIT_REPOSITORY https://github.com/tmaklin/telescope.git
GIT_TAG v0.2.1
GIT_TAG v0.4.0
SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/external/telescope"
BUILD_IN_SOURCE 0
BUILD_COMMAND ""
CMAKE_ARGS -D CMAKE_BXZSTR_HEADERS=${CMAKE_BXZSTR_HEADERS}
-D CMAKE_CXXARGS_HEADERS=${CMAKE_CXXARGS_HEADERS}
-D CMAKE_CXXIO_HEADERS=${CMAKE_CXXIO_HEADERS}
-D CMAKE_BITMAGIC_HEADERS=${CMAKE_CURRENT_BINARY_DIR}/external/telescope/external/BitMagic-7.12.3/src
INSTALL_COMMAND ""
TEST_COMMAND ""
UPDATE_COMMAND ""
Expand Down
2 changes: 1 addition & 1 deletion include/Sample.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class Sample {
std::vector<double> relative_abundances;

// Alignments class from telescope
KallistoAlignment pseudos;
telescope::KallistoAlignment pseudos;

// Calculate log_ec_counts and counts_total.
void process_aln(const bool bootstrap_mode);
Expand Down
4 changes: 2 additions & 2 deletions include/log.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

#ifndef CXXIO_LOG_HPP
#define CXXIO_LOG_HPP
#ifndef MSWEEP_LOG_HPP
#define MSWEEP_LOG_HPP

#include <chrono>
#include <fstream>
Expand Down
62 changes: 62 additions & 0 deletions include/msweep_log.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

#ifndef MSWEEP_LOG_HPP
#define MSWEEP_LOG_HPP

#include <chrono>
#include <fstream>
#include <exception>
#include <string>

#include "cxxio.hpp"
#include "mpi_config.hpp"

class Log : public cxxio::Out {
public:
bool verbose;
bool log_time;
std::chrono::time_point<std::chrono::system_clock> start_time;

Log(std::ostream &_stream, bool _verbose = true, bool _log_time = true) : cxxio::Out(_stream), verbose(_verbose), log_time(_log_time) {
if (this->log_time) {
start_time = std::chrono::system_clock::now();
}
}
void flush() {
if (verbose && log_time) {
std::chrono::time_point<std::chrono::system_clock> end_time = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end_time - start_time;
std::time_t end = std::chrono::system_clock::to_time_t(end_time);
std::string end_s(std::ctime(&end));
end_s.pop_back();
*this << end_s << " elapsed_time: " << elapsed_seconds.count() << "s\n";
}
}
};

template <typename T>
Log& operator<<(Log &os, T t) {
int rank = 0;
#if defined(MSWEEP_MPI_SUPPORT) && (MSWEEP_MPI_SUPPORT) == 1
// Only root will log
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
#endif
if (os.verbose && rank == 0) {
if (os.log_time){
std::chrono::time_point<std::chrono::system_clock> now = std::chrono::system_clock::now();
std::time_t now_t = std::chrono::system_clock::to_time_t(now);
std::string now_s(std::ctime(&now_t));
now_s.pop_back();
os.stream() << now_s << ' ';
}
os.stream () << t;
if (!os.stream().good() && !os.stream().eof()) {
throw std::runtime_error("Error writing type: " + std::string(typeid(T).name()) + " to file " + os.filename());
}
}
return os;
}

#endif
2 changes: 1 addition & 1 deletion src/BootstrapSample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void BootstrapSample::init_bootstrap() {
this->bootstrap_results = std::vector<std::vector<double>>();

// Initialize ec_distribution for bootstrapping
ec_distribution = std::discrete_distribution<uint32_t>(pseudos.ec_counts.begin(), pseudos.ec_counts.end());
ec_distribution = std::discrete_distribution<uint32_t>(pseudos.ec_counts_begin(), pseudos.ec_counts_end());
}

void BootstrapSample::write_bootstrap(const std::vector<std::string> &cluster_indicators_to_string,
Expand Down
15 changes: 7 additions & 8 deletions src/Sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,19 @@

void Sample::process_aln(const bool bootstrap_mode) {
cell_id = "";
m_num_ecs = pseudos.ec_counts.size();
m_num_ecs = pseudos.compressed_size();
log_ec_counts.resize(m_num_ecs, 0.0);
uint32_t aln_counts_total = 0;
#pragma omp parallel for schedule(static) reduction(+:aln_counts_total)
for (uint32_t i = 0; i < m_num_ecs; ++i) {
log_ec_counts[i] = std::log(pseudos.ec_counts[i]);
aln_counts_total += pseudos.ec_counts[i];
log_ec_counts[i] = std::log(pseudos.reads_in_ec(i));
aln_counts_total += pseudos.reads_in_ec(i);
}
counts_total = aln_counts_total;

if (!bootstrap_mode) {
// EC counts aren't needed when not bootstrapping.
this->pseudos.ec_counts.clear();
this->pseudos.ec_counts.shrink_to_fit();
this->pseudos.clear_counts();
}
}

Expand All @@ -31,7 +30,7 @@ std::vector<uint16_t> Sample::group_counts(const std::vector<uint32_t> indicator
std::vector<uint16_t> read_hitcounts(n_groups);
uint32_t m_num_refs = this->pseudos.n_targets();
for (uint32_t j = 0; j < m_num_refs; ++j) {
read_hitcounts[indicators[j]] += pseudos.ec_configs[ec_id][j];
read_hitcounts[indicators[j]] += pseudos(ec_id, j);
}
return read_hitcounts;
}
Expand Down Expand Up @@ -125,7 +124,7 @@ void Sample::read_likelihood(const Grouping &grouping, std::istream &infile) {
uint32_t n_groups = grouping.get_n_groups();

std::vector<std::vector<double>> likelihoods(n_groups, std::vector<double>());
this->pseudos = KallistoAlignment();
this->pseudos = telescope::KallistoAlignment();

if (infile.good()) {
std::string newline;
Expand All @@ -140,7 +139,7 @@ void Sample::read_likelihood(const Grouping &grouping, std::istream &infile) {
while (std::getline(partition, part, '\t')) {
if (ec_count_col) {
uint32_t ec_count = std::stol(part);
this->pseudos.ec_counts.emplace_back(ec_count);
this->pseudos.add_counts(ec_count);
ec_count_col = false;
} else {
likelihoods[group_id].emplace_back(std::stod(part));
Expand Down
37 changes: 20 additions & 17 deletions src/mSWEEP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ void ReadInput(const Arguments &args, std::vector<std::unique_ptr<Sample>> *samp
if (!args.themisto_mode && !args.read_likelihood_mode) {
// Check that the number of reference sequences matches in the grouping and the alignment.
reference->verify_kallisto_alignment(*args.infiles.run_info);
ReadKallisto(reference->get_n_refs(), *args.infiles.ec, *args.infiles.tsv, &samples->back()->pseudos);
samples->back()->pseudos = telescope::KallistoAlignment(reference->get_n_refs());
telescope::read::Kallisto(*args.infiles.ec, *args.infiles.tsv, &samples->back()->pseudos);
} else if (!args.read_likelihood_mode) {
if (!args.themisto_index_path.empty()) {
try {
Expand All @@ -43,7 +44,8 @@ void ReadInput(const Arguments &args, std::vector<std::unique_ptr<Sample>> *samp
cxxio::In forward_strand(args.tinfile1);
cxxio::In reverse_strand(args.tinfile2);
std::vector<std::istream*> strands = { &forward_strand.stream(), &reverse_strand.stream() };
ReadThemisto(get_mode(args.themisto_merge_mode), reference->get_n_refs(), strands, &samples->back()->pseudos);
samples->back()->pseudos = telescope::KallistoAlignment(reference->get_n_refs());
telescope::read::ThemistoToKallisto(telescope::get_mode(args.themisto_merge_mode), strands, &samples->back()->pseudos);
} else {
if (reference->get_n_groupings() > 1) {
throw std::runtime_error("Using more than one grouping with --read-likelihood is not yet implemented.");
Expand All @@ -63,8 +65,7 @@ void ConstructLikelihood(const Arguments &args, const Grouping &grouping, const
}
if (free_ec_counts) {
// Free memory used by the configs after all likelihood matrices are built.
sample->pseudos.ec_configs.clear();
sample->pseudos.ec_configs.shrink_to_fit();
sample->pseudos.clear_configs();
}
}

Expand Down Expand Up @@ -98,24 +99,26 @@ void WriteResults(const Arguments &args, const std::unique_ptr<Sample> &sample,
}

// Relative abundances
std::string abundances_outfile(outfile);
abundances_outfile = (args.outfile.empty() || !args.batch_mode ? abundances_outfile : abundances_outfile + '/' + sample->cell_name());
if (!args.outfile.empty()) {
abundances_outfile += "_abundances.txt";
of.open(abundances_outfile);
}
if (args.bootstrap_mode) {
BootstrapSample* bs = static_cast<BootstrapSample*>(&(*sample));
bs->write_bootstrap(grouping.get_names(), args.iters, (args.outfile.empty() ? std::cout : of.stream()));
} else {
sample->write_abundances(grouping.get_names(), (args.outfile.empty() ? std::cout : of.stream()));
if (!args.optimizer.no_fit_model) {
std::string abundances_outfile(outfile);
abundances_outfile = (args.outfile.empty() || !args.batch_mode ? abundances_outfile : abundances_outfile + '/' + sample->cell_name());
if (!args.outfile.empty()) {
abundances_outfile += "_abundances.txt";
of.open(abundances_outfile);
}
if (args.bootstrap_mode) {
BootstrapSample* bs = static_cast<BootstrapSample*>(&(*sample));
bs->write_bootstrap(grouping.get_names(), args.iters, (args.outfile.empty() ? std::cout : of.stream()));
} else {
sample->write_abundances(grouping.get_names(), (args.outfile.empty() ? std::cout : of.stream()));
}
}

// Probability matrix
if (args.optimizer.print_probs) {
if (args.optimizer.print_probs && !args.optimizer.no_fit_model) {
sample->write_probabilities(grouping.get_names(), std::cout);
}
if (args.optimizer.write_probs) {
if (args.optimizer.write_probs && !args.optimizer.no_fit_model) {
std::string probs_outfile(outfile);
probs_outfile += "_probs.csv";
if (args.optimizer.gzip_probs) {
Expand Down
9 changes: 4 additions & 5 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <fstream>

#include "rcgpar.hpp"
#include "msweep_log.hpp"

#include "mSWEEP.hpp"
#include "parse_arguments.hpp"
Expand All @@ -13,7 +14,6 @@
#include "version.h"
#include "openmp_config.hpp"
#include "mpi_config.hpp"
#include "log.hpp"

void finalize(const std::string &msg, Log &log, bool abort = false) {
log << msg;
Expand Down Expand Up @@ -165,11 +165,10 @@ int main (int argc, char *argv[]) {
bs->bootstrap_results.emplace_back(rcgpar::mixture_components(bootstrapped_ec_probs, resampled_log_ec_counts));
}
}

// Write results to file from the root process
if (rank == 0)
WriteResults(args, samples[j], reference.get_grouping(i), n_groupings, i);
}
// Write results to file from the root process
if (rank == 0)
WriteResults(args, samples[j], reference.get_grouping(i), n_groupings, i);
}
}
finalize("", log);
Expand Down

0 comments on commit 11b3eca

Please sign in to comment.