Skip to content

Commit

Permalink
Merge pull request #219 from LadnerLab/deconv-ISS218
Browse files Browse the repository at this point in the history
Deconv iss218
  • Loading branch information
SeanGolez authored Jul 22, 2024
2 parents 52ae226 + e9043e3 commit 1eb103e
Show file tree
Hide file tree
Showing 8 changed files with 518 additions and 297 deletions.
66 changes: 40 additions & 26 deletions include/modules/deconv/module_deconv.h
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#ifndef MODULE_DECONV_HH_INCLUDED
#define MODULE_DECONV_HH_INCLUDED
#include <string>
#include <vector>
#include <algorithm>
#include <set>
#include <boost/algorithm/string.hpp>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <unordered_map>
#include <map>
#include <unordered_set>

#include "overlap_data.h"
Expand Down Expand Up @@ -107,25 +108,36 @@ class module_deconv : public module
* @note id is only used for summation scoring
* @returns The score the id contributes to the species, as defined by
* score_strat
**/
double score_peptide_for_species( const peptide& peptide,
std::unordered_map
<std::string,
std::vector<std::pair<std::string,double>
>>&
spec_count_map,
std::string id,
evaluation_strategy::score_strategy score_strat
);
/**
* Parse a map that will provide name->tax id mappings. This map should be formatted
* in the same manner as that of 'lineage.dmp' from NCBI.
* @param fname The name fo the file to parse
* @param name_map the destination map that will store the mappings of id->name
**/
void
parse_name_map( std::string fname, std::map<std::string,std::string>& name_map );

*/
double score_peptide_for_species(
const peptide& peptide,
std::unordered_map<
std::string, std::vector<std::pair<std::string,double>>
>& spec_count_map,
std::string id,
evaluation_strategy::score_strategy score_strat
);

/**
* Parse a map which provides name->taxID mappings. This map should be
* formatted in the same manner as that of 'lineage.dmp' from NCBI.
* @param fname Name of the file to parse
* @param name_map Destination map which stores mappings of ID->name
*/
void parse_ncbi_name_map(
std::string fname, std::map<std::string, std::string>& name_map
);

/**
* Parse a map which providing name->tax ID mappings. This map shoudl be
* tab-delimited.
* @param fname Name of file to be parsed
* @param name_map Destination map which stores mappings of ID->name
*/
void parse_custom_name_map(
std::tuple<std::string, std::string, std::string>& tup,
std::map<std::string, std::string>& name_map
);

/**
* Write output to a file that will be named 'out_name'
Expand All @@ -137,7 +149,7 @@ class module_deconv : public module
* used for outputting what the original count/score of each enriched species is.
* This is useful for seeing how the scores and counts have changed for each
* of the enriched species.
**/
*/
void write_outputs( std::string out_name,
std::map<std::string,std::string>*
id_name_map,
Expand All @@ -146,7 +158,8 @@ class module_deconv : public module
>&
out_counts,
std::unordered_map<std::string,std::pair<double,double>>&
original_scores
original_scores,
std::string custom_id_header
);

/**
Expand Down Expand Up @@ -895,7 +908,8 @@ class module_deconv : public module
* @param filename filepath to tab-delimited file
* @returns map of taxID and threshold
**/
void thresh_file_to_map( std::unordered_map<std::string, std::size_t>& thresh_map, std::string filename );
void thresh_file_to_map( std::unordered_map<std::string, std::size_t>& thresh_map, std::string filename,
const std::vector<std::pair<std::string, std::vector<std::pair<std::string, double>>>> pep_species_vec );
};

#endif // MODULE_DECONV_HH_INCLUDED
39 changes: 35 additions & 4 deletions include/modules/deconv/options_deconv.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#ifndef OPTIONS_DECONV_HH_INCLUDED
#define OPTIONS_DECONV_HH_INCLUDED

#include "logger.h"
#include "options.h"

#include <string>
#include <tuple>
#include <vector>
#include <boost/algorithm/string.hpp>

class options_deconv : public options
{
Expand Down Expand Up @@ -120,9 +123,14 @@ class options_deconv : public options
std::size_t k;

/**
* The name of the file to
* write the id name map to.
**/
* Tuple containing information for parsing user-defined ID name map
*/
std::tuple<std::string, std::string, std::string>
custom_id_name_map_info; //!< 0 = ID name map, 1 = Taxon ID column name, 2 = Sequence column name

/**
* Name of file containing NCBI taxID to taxon name mappings
*/
std::string id_name_map_fname;

/**
Expand All @@ -131,6 +139,29 @@ class options_deconv : public options
**/
std::string enriched_file_ending;

/**
* Translates information from string 'info' into a tuple of length 3 and
* any type.
* Note: is probably only going to be used to move custom ID name map
* information.
* @param tup Reference to tuple which will be filled
* @param info Comma-delimited string of information
* @param opt_name Name of option for error output
*/
// TODO: make into a generic function in options class,
// or a virtual method - whichever is more practical for the future
void set_info(
std::tuple<std::string, std::string, std::string>& tup,
std::string info,
std::string opt_name
);

/**
*/
// TODO: make into a generic function in options class,
// or a virtual method - whichever is more practical for the future
std::string tuple_to_string(std::tuple<std::string, std::string, std::string>& tup);
};

#endif // OPTIONS_DECONV_HH_INCLUDED
#endif /* OPTIONS_DECONV_HH_INCLUDED */

Loading

0 comments on commit 1eb103e

Please sign in to comment.