Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Jun 7, 2018
2 parents b2c3c9b + ff9b782 commit c4b3c57
Show file tree
Hide file tree
Showing 18 changed files with 1,056 additions and 482 deletions.
39 changes: 15 additions & 24 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ enable_testing()

project (Salmon)

set(CPACK_PACKAGE_VERSION "0.10.1")
set(CPACK_PACKAGE_VERSION "0.10.2")
set(CPACK_PACKAGE_VERSION_MAJOR "0")
set(CPACK_PACKAGE_VERSION_MINOR "10")
set(CPACK_PACKAGE_VERSION_PATCH "1")
set(CPACK_PACKAGE_VERSION_PATCH "2")
set(PROJECT_VERSION ${CPACK_PACKAGE_VERSION})
set(CPACK_GENERATOR "TGZ")
set(CPACK_SOURCE_GENERATOR "TGZ")
Expand Down Expand Up @@ -37,7 +37,7 @@ if(CONDA_BUILD)
set (CXXSTDFLAG "-std=c++11")
set (GCCVERSION "4.8.5")
else ()
set (CXXSTDFLAG "-std=c++14")
set (CXXSTDFLAG "-std=c++11")
set (GCCVERSION "5.2")
endif()

Expand Down Expand Up @@ -652,20 +652,16 @@ set (HAVE_FAST_MALLOC FALSE)
# See if we have Jemalloc
find_package(Jemalloc)
if (Jemalloc_FOUND)
##
# Don't be so stringent about the version yet
##
#if (NOT (${JEMALLOC_VERSION} VERSION_LESS 5.1.0))
message("Found Jemalloc library --- using this memory allocator")
set (FAST_MALLOC_LIB ${JEMALLOC_LIBRARIES})
set (HAVE_FAST_MALLOC TRUE)
endif()

if (NOT HAVE_FAST_MALLOC)
# See if we have Tcmalloc
find_package(Tcmalloc)
if (Tcmalloc_FOUND)
message("Fount TCMalloc library --- using this memory allocator")
set (TCMALLOC_LIB ${Tcmalloc_LIBRARIES})
set (FAST_MALLOC_LIB ${TCMALLOC_LIB})
set (HAVE_FAST_MALLOC TRUE)
endif()
#else()
# message("Fond Jemalloc version ${JEMALLOC_VERSION}, but require >= 5.1.0. Downloading newer version")
#endif()
endif()

if(CONDA_BUILD)
Expand All @@ -679,16 +675,11 @@ if (NOT HAVE_FAST_MALLOC)
message("==================================================================")
ExternalProject_Add(libjemalloc
DOWNLOAD_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/jemalloc/archive/4.5.0.tar.gz -o jemalloc-4.5.0.tar.gz &&
${SHASUM} e885b65b95426945655ee91a30f563c9679770c92946bcdd0795f6b78c06c221 jemalloc-4.5.0.tar.gz &&
tar -xzf jemalloc-4.5.0.tar.gz
##
#URL https://github.com/COMBINE-lab/jemalloc/archive/4.5.0.tar.gz
#DOWNLOAD_NAME jemalloc-4.5.0.tar.gz
#URL_HASH SHA1=ad3a10866ad34b446f8f133e5edcd028be2f3003
#TLS_VERIFY FALSE
##
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jemalloc-4.5.0
DOWNLOAD_COMMAND curl -k -L https://github.com/COMBINE-lab/jemalloc/archive/5.1.0.tar.gz -o jemalloc-5.1.0.tar.gz &&
${SHASUM} ff28aef89df724bd7b6bd6fde8597695514e0e3404d1afad2f1eb8b55ef378d3 jemalloc-5.1.0.tar.gz &&
tar -xzf jemalloc-5.1.0.tar.gz

SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/jemalloc-5.1.0
BUILD_IN_SOURCE TRUE
INSTALL_DIR ${CMAKE_CURRENT_SOURCE_DIR}/external/install
CONFIGURE_COMMAND sh -c "${JEMALLOC_FLAGS} ./autogen.sh --disable-debug --prefix=<INSTALL_DIR>"
Expand Down
33 changes: 20 additions & 13 deletions cmake/Modules/FindJemalloc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,26 @@ find_library(JEMALLOC_LIBRARY NAMES jemalloc libjemalloc
${PC_JEMALLOC_LIBRARY_DIRS}
PATH_SUFFIXES lib lib64)

set(JEMALLOC_LIBRARIES ${JEMALLOC_LIBRARY})
set(JEMALLOC_INCLUDE_DIRS ${JEMALLOC_INCLUDE_DIR})

find_package_handle_standard_args(Jemalloc DEFAULT_MSG
JEMALLOC_LIBRARY JEMALLOC_INCLUDE_DIR)

get_property(_type CACHE JEMALLOC_ROOT PROPERTY TYPE)
if(_type)
set_property(CACHE JEMALLOC_ROOT PROPERTY ADVANCED 1)
if("x${_type}" STREQUAL "xUNINITIALIZED")
set_property(CACHE JEMALLOC_ROOT PROPERTY TYPE PATH)
endif()
if(JEMALLOC_INCLUDE_DIR)
set(_version_regex "^#define[ \t]+JEMALLOC_VERSION[ \t]+\"([^\"]+)\".*")
file(STRINGS "${JEMALLOC_INCLUDE_DIR}/jemalloc/jemalloc.h"
JEMALLOC_VERSION REGEX "${_version_regex}")
string(REGEX REPLACE "${_version_regex}" "\\1"
JEMALLOC_VERSION "${JEMALLOC_VERSION}")
unset(_version_regex)
endif()

mark_as_advanced(JEMALLOC_ROOT JEMALLOC_LIBRARY JEMALLOC_INCLUDE_DIR)
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set JEMALLOC_FOUND to TRUE
# if all listed variables are TRUE and the requested version matches.
find_package_handle_standard_args(Jemalloc REQUIRED_VARS
JEMALLOC_LIBRARY JEMALLOC_INCLUDE_DIR
VERSION_VAR JEMALLOC_VERSION)


if(JEMALLOC_FOUND)
set(JEMALLOC_LIBRARIES ${JEMALLOC_LIBRARY})
set(JEMALLOC_INCLUDE_DIRS ${JEMALLOC_INCLUDE_DIR})
endif()

mark_as_advanced(JEMALLOC_INCLUDE_DIR JEMALLOC_LIBRARY)
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ MAINTAINER [email protected]

ENV PACKAGES git gcc make g++ cmake libboost-all-dev liblzma-dev libbz2-dev \
ca-certificates zlib1g-dev curl unzip autoconf
ENV SALMON_VERSION 0.10.1
ENV SALMON_VERSION 0.10.2

# salmon binary will be installed in /home/salmon/bin/salmon

Expand Down
2 changes: 1 addition & 1 deletion docker/build_test.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
#! /bin/bash
docker build -t combinelab/salmon:0.10.1 .
docker build -t combinelab/salmon:0.10.2 .
3 changes: 3 additions & 0 deletions include/AtomicMatrix.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ template <typename T> class AtomicMatrix {
}
}

size_t nRow() const { return nRow_; }
size_t nCol() const { return nCol_; }

private:
std::vector<tbb::atomic<T>> storage_;
std::vector<tbb::atomic<T>> rowsums_;
Expand Down
4 changes: 2 additions & 2 deletions include/SalmonConfig.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
namespace salmon {
constexpr char majorVersion[] = "0";
constexpr char minorVersion[] = "10";
constexpr char patchVersion[] = "1";
constexpr char version[] = "0.10.1";
constexpr char patchVersion[] = "2";
constexpr char version[] = "0.10.2";
constexpr uint32_t indexVersion = 2;
constexpr char requiredQuasiIndexVersion[] = "q5";
} // namespace salmon
Expand Down
3 changes: 2 additions & 1 deletion include/SalmonStringUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <cstddef>
#include <cstdint>
#include <vector>

namespace salmon {

Expand Down Expand Up @@ -128,7 +129,7 @@ constexpr uint8_t charToSamEncode[] = {
15, 15, 15, 15, 15, 15};
*/

uint8_t* encodeSequenceInSAM(const char* src, size_t len);
std::vector<uint8_t> encodeSequenceInSAM(const char* src, size_t len);

/**
Incomplete: currently only rev for 'ATCG'
Expand Down
41 changes: 20 additions & 21 deletions include/Transcript.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "SequenceBiasModel.hpp"
#include "tbb/atomic.h"
#include "stx/string_view.hpp"
#include "IOUtils.hpp"
#include <atomic>
#include <cmath>
#include <limits>
Expand Down Expand Up @@ -145,7 +146,11 @@ class Transcript {
using salmon::stringtools::encodedRevComp;
size_t byte = idx >> 1;
size_t nibble = idx & 0x1;
uint8_t* sseq = SAMSequence_.get();

// NOTE 10.2
auto& sseq = SAMSequence_;
//if (byte >= sseq.size()) { std::cerr << "requested index " << byte << " for vector of size " << sseq.size() << " for reference " << RefName << std::endl; return 0;}


switch (dir) {
case strand::forward:
Expand Down Expand Up @@ -473,33 +478,27 @@ class Transcript {
}
}

// Will *not* delete seq on destruction
void setSAMSequenceBorrowed(uint8_t* seq, bool needGC = false,
bool reduceGCMemory = false) {
SAMSequence_ = std::unique_ptr<uint8_t, void (*)(uint8_t*)>(
seq, // store seq
[](uint8_t* p) {} // do nothing deleter
);
if (needGC) {
computeGCContent_(reduceGCMemory);
}
}

// Will delete seq on destruction
void setSAMSequenceOwned(uint8_t* seq, bool needGC = false,
void setSAMSequenceOwned(std::vector<uint8_t>&& seq, bool needGC = false,
bool reduceGCMemory = false) {
SAMSequence_ = std::unique_ptr<uint8_t, void (*)(uint8_t*)>(
seq, // store seq
[](uint8_t* p) { delete[] p; } // do nothing deleter
);

if ((2*seq.size() < RefLength) or (2*seq.size() > RefLength + 1)) {
std::stringstream errstream;
errstream << "\n\nSAM file says target " << RefName << " has length " << RefLength
<< ", but the FASTA file contains a sequence of length [" << seq.size() * 2 << " or " << seq.size() * 2 - 1 << "]\n\n";
std::cerr << ioutils::SET_RED << errstream.str();
std::exit(1);
}

SAMSequence_ = std::move(seq);
if (needGC) {
computeGCContent_(reduceGCMemory);
}
}

const char* Sequence() const { return Sequence_.get(); }

uint8_t* SAMSequence() const { return SAMSequence_.get(); }
uint8_t* SAMSequence() const { return const_cast<uint8_t*>(SAMSequence_.data()); }

void setCompleteLength(uint32_t completeLengthIn) {
CompleteLength = completeLengthIn;
Expand Down Expand Up @@ -679,8 +678,8 @@ class Transcript {
*/
}

std::unique_ptr<uint8_t, void (*)(uint8_t*)> SAMSequence_ =
std::unique_ptr<uint8_t, void (*)(uint8_t*)>(nullptr, [](uint8_t*) {});
// NOTE 10.2
std::vector<uint8_t> SAMSequence_;

std::unique_ptr<const char, void (*)(const char*)> Sequence_ =
std::unique_ptr<const char, void (*)(const char*)>(nullptr,
Expand Down
12 changes: 9 additions & 3 deletions scripts/fetchRapMap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,11 @@ if [ -d ${INSTALL_DIR}/src/rapmap ] ; then
rm -fr ${INSTALL_DIR}/src/rapmap
fi

SVER=salmon-v0.10.1
SVER=salmon-v0.10.2
#SVER=develop-salmon
#SVER=pe-chaining

EXPECTED_SHA256=c4ca27de299bee395b404d117d9d98ad9f0d6ee256ea1deb2890ea402893e688

mkdir -p ${EXTERNAL_DIR}
curl -k -L https://github.com/COMBINE-lab/RapMap/archive/${SVER}.zip -o ${EXTERNAL_DIR}/rapmap.zip
Expand All @@ -40,8 +43,11 @@ fi
if [ -z "${hashcheck-}" ]; then
echo "Couldn't find shasum command; can't verify contents of downloaded RapMap";
else
echo "fca1323154b884a81bd61e3bdda06502fd3593f1622ad3b2dc98ef14dc4e6b6a ${EXTERNAL_DIR}/rapmap.zip" | ${hashcheck} -c - || { echo "rapmap.zip did not match expected SHA1! Exiting."; exit 1; }
#echo "not testing sha in develop branch"
if [[ $SVER != develop-salmon ]]; then
echo "${EXPECTED_SHA256} ${EXTERNAL_DIR}/rapmap.zip" | ${hashcheck} -c - || { echo "rapmap.zip did not match expected SHA1! Exiting."; exit 1; }
else
echo "not testing sha since pulling from develop-salmon"
fi
fi


Expand Down
1 change: 1 addition & 0 deletions scripts/make-release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ cp ${DIR}/../sample_data.tgz ${DIR}/../RELEASES/${betaname}/
echo -e "Copying over other shared library dependencies\n"
bash ${DIR}/../scripts/cpld.bash ${DIR}/../bin/salmon ${DIR}/../RELEASES/${betaname}/lib/
echo -e "Removing dangerous dependencies\n"
rm ${DIR}/../RELEASES/${betaname}/lib/librt.so.1
rm ${DIR}/../RELEASES/${betaname}/lib/libc.so.6
rm ${DIR}/../RELEASES/${betaname}/lib/ld-linux-x86-64.so.2
rm ${DIR}/../RELEASES/${betaname}/lib/libdl.so.2
Expand Down
42 changes: 12 additions & 30 deletions src/AlignmentModel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ inline void AlignmentModel::setBasesFromCIGAROp_(enum cigar_op op,
size_t& curReadBase) {
switch (op) {
case BAM_UNKNOWN:
std::cerr << "ENCOUNTERED UNKNOWN SYMBOL IN CIGAR STRING!\n";
std::cerr << "ENCOUNTERED UNKNOWN SYMBOL IN CIGAR STRING!" << std::endl;
break;
case BAM_CMATCH:
// do nothing
Expand Down Expand Up @@ -164,6 +164,9 @@ inline void AlignmentModel::setBasesFromCIGAROp_(enum cigar_op op,
curRefBase = ALN_PAD;
curReadBase = ALN_PAD;
break;
default:
std::cerr << "ENCOUNTERED UNKNOWN (non -1) CIGAR OP : (" << op << ")!" << std::endl;
break;
}
}

Expand Down Expand Up @@ -258,8 +261,8 @@ double AlignmentModel::logLikelihood(
uint32_t opLen = cigar[cigarIdx] >> BAM_CIGAR_SHIFT;
enum cigar_op op =
static_cast<enum cigar_op>(cigar[cigarIdx] & BAM_CIGAR_MASK);
size_t curReadBase = samToTwoBit[bam_seqi(qseq, readIdx)];
size_t curRefBase = samToTwoBit[ref.baseAt(uTranscriptIdx, readStrand)];
size_t curReadBase = (BAM_CONSUME_SEQ(op)) ? samToTwoBit[bam_seqi(qseq, readIdx)] : 0;
size_t curRefBase = (BAM_CONSUME_REF(op)) ? samToTwoBit[ref.baseAt(uTranscriptIdx, readStrand)] : 0;
advanceInRead = false;
advanceInReference = false;

Expand Down Expand Up @@ -418,6 +421,7 @@ void AlignmentModel::update(
int32_t readIdx{0};
auto transcriptIdx = bam_pos(read);
size_t transcriptLen = ref.RefLength;

// if the read starts before the beginning of the transcript,
// only consider the part overlapping the transcript
if (transcriptIdx < 0) {
Expand Down Expand Up @@ -450,9 +454,9 @@ void AlignmentModel::update(
uint32_t opLen = cigar[cigarIdx] >> BAM_CIGAR_SHIFT;
enum cigar_op op =
static_cast<enum cigar_op>(cigar[cigarIdx] & BAM_CIGAR_MASK);
size_t curReadBase = samToTwoBit[bam_seqi(qseq, readIdx)];
size_t curRefBase = samToTwoBit[ref.baseAt(uTranscriptIdx, readStrand)];
advanceInRead = false;

size_t curReadBase = (BAM_CONSUME_SEQ(op)) ? samToTwoBit[bam_seqi(qseq, readIdx)] : 0;
size_t curRefBase = (BAM_CONSUME_REF(op)) ? samToTwoBit[ref.baseAt(uTranscriptIdx, readStrand)] : 0;
advanceInReference = false;

for (size_t i = 0; i < opLen; ++i) {
Expand All @@ -475,6 +479,7 @@ void AlignmentModel::update(
}
return;
}

curReadBase = samToTwoBit[bam_seqi(qseq, readIdx)];
readPosBin = static_cast<uint32_t>((readIdx * invLen));
advanceInRead = false;
Expand All @@ -493,6 +498,7 @@ void AlignmentModel::update(
}
return;
}

curRefBase = samToTwoBit[ref.baseAt(uTranscriptIdx, readStrand)];
advanceInReference = false;
}
Expand All @@ -510,30 +516,6 @@ void AlignmentModel::update(
if (BAM_CONSUME_SEQ(op)) {
++readIdx;
advanceInRead = true;
/* DEBUG -- print what happened
std::cerr << "read name = " << bam_name(read) << "\n";
std::cerr << "curReadBase = " << readIdx << "\n";
std::cerr << "readLen = " << bam_seq_len(read) << "\n";
std::cerr << "ref = ";
for (size_t j = 0; j <
std::min(static_cast<size_t>(bam_seq_len(read)),
static_cast<size_t>(transcriptLen - transcriptIdx)); ++j) {
std::cerr << salmon::stringtools::samCodeToChar[ref.baseAt(j,
readStrand)];
}
std::cerr << "\n";
std::cerr << "read = ";
for (size_t j = 0; j < bam_seq_len(read); ++j) {
std::cerr << salmon::stringtools::samCodeToChar[bam_seqi(qseq, j)];
}
std::cerr << "\nCIGAR = ";
for (size_t j = 0; j < cigarLen; ++j) {
uint32_t opLen = cigar[j] >> BAM_CIGAR_SHIFT;
enum cigar_op op = static_cast<enum cigar_op>(cigar[j] &
BAM_CIGAR_MASK); std::cerr << opLen << opToChr(op);
}
std::cerr << "\n";
*/
}
if (BAM_CONSUME_REF(op)) {
++uTranscriptIdx;
Expand Down
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ target_link_libraries(salmon
${FAST_MALLOC_LIB}
${LIBRT}
ksw2pp
${CMAKE_DL_LIBS}
)

# Link the executable
Expand All @@ -213,6 +214,7 @@ target_link_libraries(unitTests
${NON_APPLECLANG_LIBS}
${FAST_MALLOC_LIB}
${LIBRT}
${CMAKE_DL_LIBS}
)

add_dependencies(salmon unitTests)
Expand Down
Loading

0 comments on commit c4b3c57

Please sign in to comment.