Skip to content

Commit

Permalink
feat: insertions action
Browse files Browse the repository at this point in the history
  • Loading branch information
Taepper committed Aug 22, 2023
1 parent 3cc8fee commit e067062
Show file tree
Hide file tree
Showing 16 changed files with 478 additions and 11 deletions.
17 changes: 17 additions & 0 deletions endToEndTests/test/invalidQueries/insertionsInvalidColumn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"testCaseName": "The insertions action with an invalid column",
"query": {
"action": {
"type": "Insertions",
"column": "insertionsThatAreNotThere",
"sequenceName": "anything"
},
"filterExpression": {
"type": "True"
}
},
"expectedError": {
"error": "Bad request",
"message": "The column 'insertionsThatAreNotThere' does not exist."
}
}
17 changes: 17 additions & 0 deletions endToEndTests/test/invalidQueries/insertionsInvalidSequence.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"testCaseName": "The insertions action with an invalid sequence",
"query": {
"action": {
"type": "Insertions",
"column": "insertions",
"sequenceName": "S"
},
"filterExpression": {
"type": "True"
}
},
"expectedError": {
"error": "Bad request",
"message": "The column 'insertions' does not contain the sequence 'S'"
}
}
23 changes: 23 additions & 0 deletions endToEndTests/test/queries/insertionsActionAndFilter.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{
"testCaseName": "The insertions action and insertions contains filter",
"query": {
"action": {
"type": "Insertions",
"column": "insertions"
},
"filterExpression": {
"type": "InsertionContains",
"column": "insertions",
"position": 22339,
"value": ".*C.*G.*"
}
},
"expectedQueryResult": [
{
"count": 1,
"insertions": "GCTGGT",
"position": "22340",
"sequenceName": ""
}
]
}
38 changes: 38 additions & 0 deletions endToEndTests/test/queries/insertionsColumn.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"testCaseName": "The insertions action",
"query": {
"action": {
"type": "Insertions",
"column": "insertions"
},
"filterExpression": {
"type": "True"
}
},
"expectedQueryResult": [
{
"count": 1,
"insertions": "TAT",
"position": "5960",
"sequenceName": ""
},
{
"count": 1,
"insertions": "CAGAA",
"position": "22205",
"sequenceName": ""
},
{
"count": 1,
"insertions": "GCTGGT",
"position": "22340",
"sequenceName": ""
},
{
"count": 17,
"insertions": "CCC",
"position": "25702",
"sequenceName": ""
}
]
}
2 changes: 1 addition & 1 deletion include/silo/query_engine/actions/action.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Action {
void applySort(QueryResult& result) const;
void applyOffsetAndLimit(QueryResult& result) const;

[[nodiscard]] virtual void validateOrderByFields(const Database& database) const = 0;
virtual void validateOrderByFields(const Database& database) const = 0;

[[nodiscard]] virtual QueryResult execute(
const Database& database,
Expand Down
60 changes: 60 additions & 0 deletions include/silo/query_engine/actions/insertions.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,64 @@
#ifndef SILO_INSERTIONS_H
#define SILO_INSERTIONS_H

#include "silo/query_engine/actions/action.h"
#include "silo/storage/column/insertion_index.h"

namespace silo::query_engine {

struct QueryResultEntry;

namespace actions {

template <typename Symbol>
class InsertionAggregation : public Action {
static constexpr std::string_view POSITION_FIELD_NAME = "position";
static constexpr std::string_view INSERTION_FIELD_NAME = "insertions";
static constexpr std::string_view SEQUENCE_FIELD_NAME = "sequenceName";
static constexpr std::string_view COUNT_FIELD_NAME = "count";

std::string column_name;
std::vector<std::string> sequence_names;

struct PrefilteredBitmaps {
std::vector<std::pair<
const OperatorResult&,
const silo::storage::column::insertion::InsertionIndex<Symbol>&>>
bitmaps;
std::vector<std::pair<
const OperatorResult&,
const silo::storage::column::insertion::InsertionIndex<Symbol>&>>
full_bitmaps;
};

void addAggregatedInsertionsToInsertionCounts(
std::vector<QueryResultEntry>& output,
const std::string& sequence_name,
const PrefilteredBitmaps& prefiltered_bitmaps
) const;

std::unordered_map<std::string, InsertionAggregation<Symbol>::PrefilteredBitmaps>
validateFieldsAndPreFilterBitmaps(
const Database& database,
std::vector<OperatorResult>& bitmap_filter
) const;

public:
InsertionAggregation(std::string column, std::vector<std::string>&& sequence_names);

void validateOrderByFields(const Database& database) const override;

[[nodiscard]] QueryResult execute(
const Database& database,
std::vector<OperatorResult> bitmap_filter
) const override;
};

template <typename Symbol>
// NOLINTNEXTLINE(readability-identifier-naming)
void from_json(const nlohmann::json& json, std::unique_ptr<InsertionAggregation<Symbol>>& action);

} // namespace actions
} // namespace silo::query_engine

#endif // SILO_INSERTIONS_H
5 changes: 4 additions & 1 deletion include/silo/storage/column/insertion_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ class InsertionColumnPartition {

void insert(const std::string& value);

void buildInsertionIndex();
void buildInsertionIndexes();

const std::unordered_map<std::string, insertion::InsertionIndex<Symbol>>& getInsertionIndexes(
) const;

[[nodiscard]] std::unique_ptr<roaring::Roaring> search(
const std::string& sequence_name,
Expand Down
2 changes: 2 additions & 0 deletions include/silo/storage/column/insertion_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ class InsertionIndex {

void buildIndex();

const std::unordered_map<uint32_t, InsertionPosition<Symbol>>& getInsertionPositions() const;

std::unique_ptr<roaring::Roaring> search(uint32_t position, const std::string& search_pattern)
const;
};
Expand Down
4 changes: 2 additions & 2 deletions src/silo/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -762,10 +762,10 @@ void Database::initializeAASequences(
void Database::finalizeInsertionIndexes() {
tbb::parallel_for_each(partitions.begin(), partitions.end(), [](auto& partition) {
for (auto& insertion_column : partition.columns.nuc_insertion_columns) {
insertion_column.second.buildInsertionIndex();
insertion_column.second.buildInsertionIndexes();
}
for (auto& insertion_column : partition.columns.aa_insertion_columns) {
insertion_column.second.buildInsertionIndex();
insertion_column.second.buildInsertionIndexes();
}
});
}
Expand Down
2 changes: 0 additions & 2 deletions src/silo/query_engine/actions/aa_mutations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,6 @@ QueryResult AAMutations::execute(
const Database& database,
std::vector<OperatorResult> bitmap_filter
) const {
using roaring::Roaring;

std::vector<std::string> aa_sequence_names_to_evaluate;
for (const auto& aa_sequence_name : aa_sequence_names) {
CHECK_SILO_QUERY(
Expand Down
15 changes: 14 additions & 1 deletion src/silo/query_engine/actions/action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "silo/query_engine/actions/details.h"
#include "silo/query_engine/actions/fasta.h"
#include "silo/query_engine/actions/fasta_aligned.h"
#include "silo/query_engine/actions/insertions.h"
#include "silo/query_engine/actions/nuc_mutations.h"
#include "silo/query_engine/operator_result.h"
#include "silo/query_engine/query_parse_exception.h"
Expand Down Expand Up @@ -125,7 +126,15 @@ void from_json(const nlohmann::json& json, OrderByField& field) {
"' must be either a string or an object containing the fields 'field':string and "
"'order':string, where the value of order is 'ascending' or 'descending'"
)
field = {json["field"].get<std::string>(), json["order"].get<std::string>() == "ascending"};
const std::string field_name = json["field"].get<std::string>();
const std::string order_string = json["order"].get<std::string>();
CHECK_SILO_QUERY(
order_string == "ascending" || order_string == "descending",
"The orderByField '" + json.dump() +
"' must be either a string or an object containing the fields 'field':string and "
"'order':string, where the value of order is 'ascending' or 'descending'"
)
field = {field_name, json["order"].get<std::string>() == "ascending"};
}

// NOLINTNEXTLINE(readability-identifier-naming)
Expand All @@ -148,6 +157,10 @@ void from_json(const nlohmann::json& json, std::unique_ptr<Action>& action) {
action = json.get<std::unique_ptr<Fasta>>();
} else if (expression_type == "FastaAligned") {
action = json.get<std::unique_ptr<FastaAligned>>();
} else if (expression_type == "Insertions") {
action = json.get<std::unique_ptr<InsertionAggregation<NUCLEOTIDE_SYMBOL>>>();
} else if (expression_type == "AminoAcidInsertions") {
action = json.get<std::unique_ptr<InsertionAggregation<AA_SYMBOL>>>();
} else {
throw QueryParseException(expression_type + " is not a valid action");
}
Expand Down
Loading

0 comments on commit e067062

Please sign in to comment.