Skip to content

Commit

Permalink
Merge pull request #99 from BUStools/devel
Browse files Browse the repository at this point in the history
merge devel
  • Loading branch information
Yenaled authored Jan 4, 2024
2 parents a7b3e81 + cd6bf09 commit 6705b7e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 17 deletions.
2 changes: 1 addition & 1 deletion src/Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
#include "roaring.hh"
#include "hash.hpp"

#define BUSTOOLS_VERSION "0.43.1"
#define BUSTOOLS_VERSION "0.43.2"

#define u_map_ std::unordered_map
enum CAPTURE_TYPE : char
Expand Down
6 changes: 2 additions & 4 deletions src/bustools_count.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ void bustools_count(Bustools_opt &opt) {
std::vector<int32_t> tx_split; // Store transcript names for split
std::vector<int32_t> tx_split_lookup; // Map transcript IDs to mtx status
bool count_split = !opt.count_split.empty();
int count_mtx_priority = !opt.count_gene_multimapping && count_split ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons]
int count_mtx_priority = (!opt.count_gene_multimapping || opt.count_collapse) && count_split ? 1 : 0; // 1 = when something in tx_split overlaps something not in tx_split, prioritize the latter (useful for dealing in cases when introns of one gene overlap exons of another gene [we prioritize the exons]
parseTranscripts(opt.count_txp, txnames);
std::vector<int32_t> genemap(txnames.size(), -1);
u_map_<std::string, int32_t> genenames;
Expand Down Expand Up @@ -89,9 +89,7 @@ void bustools_count(Bustools_opt &opt) {
// Essentially, we are removing all tx's that belong to (or not belong to) tx_split in the equivalence classes
// This handles instances in which a read maps to exon of one gene but intron of another (likely overlapping) gene to avoid discarding the record
// This is done at read-level (not UMI-level) so if one UMI maps to one gene's exon but another UMI maps to the other gene's intron, we still discard it
if (count_mtx_priority == 1 && !found)
new_ec.erase(std::remove(new_ec.begin(), new_ec.end(), tx), new_ec.end());
else if (count_mtx_priority == 2 && found)
if (count_mtx_priority == 1 && found)
new_ec.erase(std::remove(new_ec.begin(), new_ec.end(), tx), new_ec.end());
}
}
Expand Down
25 changes: 13 additions & 12 deletions src/bustools_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2661,17 +2661,18 @@ void Bustools_Usage()
<< "count Generate count matrices from a BUS file" << std::endl
<< "inspect Produce a report summarizing a BUS file" << std::endl
<< "allowlist Generate an on-list from a BUS file" << std::endl
<< "project Project a BUS file to gene sets" << std::endl
//<< "project Project a BUS file to gene sets" << std::endl
<< "capture Capture records from a BUS file" << std::endl
<< "merge Merge bus files from same experiment" << std::endl
//<< "merge Merge bus files from same experiment" << std::endl
<< "text Convert a binary BUS file to a tab-delimited text file" << std::endl
<< "fromtext Convert a tab-delimited text file to a binary BUS file" << std::endl
<< "extract Extract FASTQ reads correspnding to reads in BUS file" << std::endl
<< "predict Correct the count matrix using prediction of unseen species" << std::endl
<< "collapse Turn BUS files into a BUG file" << std::endl
<< "clusterhist Create UMI histograms per cluster" << std::endl
<< "linker Remove section of barcodes in BUS files" << std::endl
//<< "predict Correct the count matrix using prediction of unseen species" << std::endl
//<< "collapse Turn BUS files into a BUG file" << std::endl
//<< "clusterhist Create UMI histograms per cluster" << std::endl
//<< "linker Remove section of barcodes in BUS files" << std::endl
<< "compress Compress a BUS file" << std::endl
<< "inflate Decompress a BUSZ (compressed BUS) file" << std::endl
<< "decompress Decompress a BUSZ (compressed BUS) file" << std::endl
<< "version Prints version number" << std::endl
<< "cite Prints citation information" << std::endl
<< std::endl
Expand Down Expand Up @@ -2787,13 +2788,13 @@ void Bustools_count_Usage()
<< "-t, --txnames File with names of transcripts" << std::endl
<< " --genecounts Aggregate counts to genes only" << std::endl
<< " --umi-gene Perform gene-level collapsing of UMIs" << std::endl
<< " --em Estimate gene abundances using EM algorithm" << std::endl
//<< " --em Estimate gene abundances using EM algorithm" << std::endl
<< " --cm Count multiplicities instead of UMIs" << std::endl
<< "-s, --split Split output matrix in two (plus ambiguous) based on transcripts supplied in this file" << std::endl
<< "-m, --multimapping Include bus records that pseudoalign to multiple genes" << std::endl
<< " --hist Output copy per UMI histograms for all genes" << std::endl
<< "-d --downsample Specify a factor between 0 and 1 specifying how much to downsample" << std::endl
<< " --rawcounts The count matrix will contain raw counts instead of UMI counts" << std::endl
//<< " --hist Output copy per UMI histograms for all genes" << std::endl
//<< "-d --downsample Specify a factor between 0 and 1 specifying how much to downsample" << std::endl
//<< " --rawcounts The count matrix will contain raw counts instead of UMI counts" << std::endl
<< std::endl;
}

Expand Down Expand Up @@ -3164,7 +3165,7 @@ int main(int argc, char **argv)
exit(1);
}
}
else if (cmd == "whitelist" || cmd == "allowlist")
else if (cmd == "whitelist" || cmd == "allowlist" || cmd == "onlist")
{
if (disp_help)
{
Expand Down

0 comments on commit 6705b7e

Please sign in to comment.