Skip to content

Commit

Permalink
fix: nucleotide symbol equals with dot
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasKellerer committed Mar 5, 2024
1 parent 585f646 commit 6ad623e
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 13 deletions.
3 changes: 3 additions & 0 deletions src/silo/common/aa_symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ char AminoAcid::symbolToChar(AminoAcid::Symbol symbol) {

std::optional<AminoAcid::Symbol> AminoAcid::charToSymbol(char character) {
switch (character) {
case '.':
return std::nullopt;
case '-':
return AminoAcid::Symbol::GAP;
case 'A':
Expand Down Expand Up @@ -114,6 +116,7 @@ std::optional<AminoAcid::Symbol> AminoAcid::charToSymbol(char character) {
case '*':
return AminoAcid::Symbol::STOP;
default:
// TODO(#342): Revisit charToSymbol, so that illegal characters are not the same as '.'.
return std::nullopt;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/silo/common/aa_symbols.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ TEST(AminoAcidSymbol, enumShouldHaveSameLengthAsArrayOfSymbols) {

TEST(AminoAcidSymbol, conversionFromCharacter) {
EXPECT_EQ(silo::AminoAcid::charToSymbol('-'), silo::AminoAcid::Symbol::GAP);
EXPECT_EQ(silo::AminoAcid::charToSymbol('.'), std::nullopt);
EXPECT_EQ(silo::AminoAcid::charToSymbol('A'), silo::AminoAcid::Symbol::A);
EXPECT_EQ(silo::AminoAcid::charToSymbol('N'), silo::AminoAcid::Symbol::N);
EXPECT_EQ(silo::AminoAcid::charToSymbol('J'), std::nullopt);
Expand Down
2 changes: 2 additions & 0 deletions src/silo/common/nucleotide_symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ char Nucleotide::symbolToChar(Nucleotide::Symbol symbol) {
std::optional<Nucleotide::Symbol> Nucleotide::charToSymbol(char character) {
switch (character) {
case '.':
return std::nullopt;
case '-':
return Symbol::GAP;
case 'A':
Expand Down Expand Up @@ -80,6 +81,7 @@ std::optional<Nucleotide::Symbol> Nucleotide::charToSymbol(char character) {
case 'N':
return Symbol::N;
default:
// TODO(#342): Revisit charToSymbol, so that illegal characters are not the same as '.'.
return std::nullopt;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/silo/common/nucleotide_symbols.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ TEST(NucleotideSymbol, enumShouldHaveSameLengthAsArrayOfSymbols) {
}

TEST(NucleotideSymbol, conversionFromCharacter) {
EXPECT_EQ(silo::Nucleotide::charToSymbol('.'), silo::Nucleotide::Symbol::GAP);
EXPECT_EQ(silo::Nucleotide::charToSymbol('.'), std::nullopt);
EXPECT_EQ(silo::Nucleotide::charToSymbol('-'), silo::Nucleotide::Symbol::GAP);
EXPECT_EQ(silo::Nucleotide::charToSymbol('A'), silo::Nucleotide::Symbol::A);
EXPECT_EQ(silo::Nucleotide::charToSymbol('N'), silo::Nucleotide::Symbol::N);
Expand Down
45 changes: 33 additions & 12 deletions src/silo/test/amino_acid_symbol_equals.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,52 @@ nlohmann::json createDataWithAminoAcidSequence(const std::string& aminoAcidSeque
};
}
const nlohmann::json DATA_WITH_D = createDataWithAminoAcidSequence("D*");
const nlohmann::json DATA_WITH_M = createDataWithAminoAcidSequence("M*");
const nlohmann::json DATA_SAME_AS_REFERENCE = createDataWithAminoAcidSequence("M*");
const nlohmann::json DATA_WITH_B = createDataWithAminoAcidSequence("B*");

const auto DATABASE_CONFIG =
DatabaseConfig{"segmenet1", {"dummy name", {{"primaryKey", ValueType::STRING}}, "primaryKey"}};
DatabaseConfig{"segment1", {"dummy name", {{"primaryKey", ValueType::STRING}}, "primaryKey"}};

const auto REFERENCE_GENOMES = ReferenceGenomes{
{{"segment1", "A"}},
{{"gene1", "M*"}},
{{GENE, "M*"}},
};

const QueryTestData TEST_DATA{
{DATA_WITH_D, DATA_WITH_M, DATA_WITH_B},
{DATA_WITH_D, DATA_SAME_AS_REFERENCE, DATA_SAME_AS_REFERENCE, DATA_WITH_B},
DATABASE_CONFIG,
REFERENCE_GENOMES
};

const nlohmann::json QUERY = {
{"action", {{"type", "Aggregated"}}},
{"filterExpression",
{{"type", "AminoAcidEquals"}, {"position", 1}, {"symbol", "D"}, {"sequenceName", GENE}}}
};
nlohmann::json createAminoAcidSymbolEqualsQuery(
const std::string& symbol,
int position,
const std::string& gene
) {
return {
{"action", {{"type", "Aggregated"}}},
{"filterExpression",
{{"type", "AminoAcidEquals"},
{"position", position},
{"symbol", symbol},
{"sequenceName", gene}}}
};
}

const nlohmann::json EXPECTED = {{{"count", 1}}};
const QueryTestScenario AMINO_ACID_EQUALS_D = {
"aminoAcidEqualsD",
createAminoAcidSymbolEqualsQuery("D", 1, GENE),
{{{"count", 1}}}
};

const QueryTestScenario AMINO_ACID_EQUALS_D = {"aminoAcidEqualsD", QUERY, EXPECTED};
const QueryTestScenario AMINO_ACID_EQUALS_WITH_DOT_RETURNS_AS_IF_REFERENCE = {
"aminoAcidEqualsM",
createAminoAcidSymbolEqualsQuery(".", 1, GENE),
{{{"count", 2}}}
};

QUERY_TEST(AminoAcidSymbolEquals, TEST_DATA, ::testing::Values(AMINO_ACID_EQUALS_D));
QUERY_TEST(
AminoAcidSymbolEquals,
TEST_DATA,
::testing::Values(AMINO_ACID_EQUALS_D, AMINO_ACID_EQUALS_WITH_DOT_RETURNS_AS_IF_REFERENCE)
);
72 changes: 72 additions & 0 deletions src/silo/test/nucleotide_symbol_equals.test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <nlohmann/json.hpp>

#include "silo/test/query_fixture.test.h"

using silo::ReferenceGenomes;
using silo::config::DatabaseConfig;
using silo::config::ValueType;
using silo::test::QueryTestData;
using silo::test::QueryTestScenario;

#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

using boost::uuids::random_generator;

nlohmann::json createDataWithNucleotideSequence(const std::string& nucleotideSequence) {
random_generator generator;
const auto request_id = generator();

return {
{"metadata", {{"primaryKey", "id_" + to_string(request_id)}}},
{"alignedNucleotideSequences", {{"segment1", nucleotideSequence}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}
};
}
const nlohmann::json DATA_SAME_AS_REFERENCE = createDataWithNucleotideSequence("ATGCN");
const nlohmann::json DATA_WITH_ALL_N = createDataWithNucleotideSequence("NNNNN");
const nlohmann::json DATA_WITH_ALL_MUTATED = createDataWithNucleotideSequence("CATTT");

const auto DATABASE_CONFIG =
DatabaseConfig{"segment1", {"dummy name", {{"primaryKey", ValueType::STRING}}, "primaryKey"}};

const auto REFERENCE_GENOMES = ReferenceGenomes{
{{"segment1", "ATGCN"}},
{{"gene1", "M*"}},
};

const QueryTestData TEST_DATA{
{DATA_SAME_AS_REFERENCE, DATA_SAME_AS_REFERENCE, DATA_WITH_ALL_N, DATA_WITH_ALL_MUTATED},
DATABASE_CONFIG,
REFERENCE_GENOMES
};

nlohmann::json createNucleotideSymbolEqualsQuery(const std::string& symbol, int position) {
return {
{"action", {{"type", "Aggregated"}}},
{"filterExpression",
{{"type", "NucleotideEquals"},
{"position", position},
{"symbol", symbol},
{"sequenceName", "segment1"}}}
};
}

const QueryTestScenario NUCLEOTIDE_EQUALS_WITH_SYMBOL = {
"nucleotideEqualsWithSymbol",
createNucleotideSymbolEqualsQuery("C", 1),
{{{"count", 1}}}
};

const QueryTestScenario NUCLEOTIDE_EQUALS_WITH_DOT_RETURNS_REFERENCE = {
"nucleotideEqualsWithDot",
createNucleotideSymbolEqualsQuery(".", 1),
{{{"count", 2}}}
};

QUERY_TEST(
NucleotideSymbolEquals,
TEST_DATA,
::testing::Values(NUCLEOTIDE_EQUALS_WITH_SYMBOL, NUCLEOTIDE_EQUALS_WITH_DOT_RETURNS_REFERENCE)
);

0 comments on commit 6ad623e

Please sign in to comment.