Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: nucleotide symbol equals with dot #341

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/silo/common/aa_symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ char AminoAcid::symbolToChar(AminoAcid::Symbol symbol) {

std::optional<AminoAcid::Symbol> AminoAcid::charToSymbol(char character) {
switch (character) {
case '.':
return std::nullopt;
case '-':
return AminoAcid::Symbol::GAP;
case 'A':
Expand Down Expand Up @@ -114,6 +116,7 @@ std::optional<AminoAcid::Symbol> AminoAcid::charToSymbol(char character) {
case '*':
return AminoAcid::Symbol::STOP;
default:
// TODO(#342): Revisit charToSymbol, so that illegal characters are not the same as '.'.
return std::nullopt;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/silo/common/aa_symbols.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ TEST(AminoAcidSymbol, enumShouldHaveSameLengthAsArrayOfSymbols) {

TEST(AminoAcidSymbol, conversionFromCharacter) {
EXPECT_EQ(silo::AminoAcid::charToSymbol('-'), silo::AminoAcid::Symbol::GAP);
EXPECT_EQ(silo::AminoAcid::charToSymbol('.'), std::nullopt);
EXPECT_EQ(silo::AminoAcid::charToSymbol('A'), silo::AminoAcid::Symbol::A);
EXPECT_EQ(silo::AminoAcid::charToSymbol('N'), silo::AminoAcid::Symbol::N);
EXPECT_EQ(silo::AminoAcid::charToSymbol('J'), std::nullopt);
Expand Down
2 changes: 2 additions & 0 deletions src/silo/common/nucleotide_symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ char Nucleotide::symbolToChar(Nucleotide::Symbol symbol) {
std::optional<Nucleotide::Symbol> Nucleotide::charToSymbol(char character) {
switch (character) {
case '.':
return std::nullopt;
case '-':
return Symbol::GAP;
case 'A':
Expand Down Expand Up @@ -80,6 +81,7 @@ std::optional<Nucleotide::Symbol> Nucleotide::charToSymbol(char character) {
case 'N':
return Symbol::N;
default:
// TODO(#342): Revisit charToSymbol, so that illegal characters are not the same as '.'.
return std::nullopt;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/silo/common/nucleotide_symbols.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ TEST(NucleotideSymbol, enumShouldHaveSameLengthAsArrayOfSymbols) {
}

TEST(NucleotideSymbol, conversionFromCharacter) {
EXPECT_EQ(silo::Nucleotide::charToSymbol('.'), silo::Nucleotide::Symbol::GAP);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see, it worked as the tests suggested, just the test was wrong 😄

EXPECT_EQ(silo::Nucleotide::charToSymbol('.'), std::nullopt);
EXPECT_EQ(silo::Nucleotide::charToSymbol('-'), silo::Nucleotide::Symbol::GAP);
EXPECT_EQ(silo::Nucleotide::charToSymbol('A'), silo::Nucleotide::Symbol::A);
EXPECT_EQ(silo::Nucleotide::charToSymbol('N'), silo::Nucleotide::Symbol::N);
Expand Down
45 changes: 33 additions & 12 deletions src/silo/test/amino_acid_symbol_equals.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,31 +19,52 @@ nlohmann::json createDataWithAminoAcidSequence(const std::string& aminoAcidSeque
};
}
const nlohmann::json DATA_WITH_D = createDataWithAminoAcidSequence("D*");
const nlohmann::json DATA_WITH_M = createDataWithAminoAcidSequence("M*");
const nlohmann::json DATA_SAME_AS_REFERENCE = createDataWithAminoAcidSequence("M*");
const nlohmann::json DATA_WITH_B = createDataWithAminoAcidSequence("B*");

const auto DATABASE_CONFIG =
DatabaseConfig{"segmenet1", {"dummy name", {{"primaryKey", ValueType::STRING}}, "primaryKey"}};
DatabaseConfig{"segment1", {"dummy name", {{"primaryKey", ValueType::STRING}}, "primaryKey"}};

const auto REFERENCE_GENOMES = ReferenceGenomes{
{{"segment1", "A"}},
{{"gene1", "M*"}},
{{GENE, "M*"}},
};

const QueryTestData TEST_DATA{
{DATA_WITH_D, DATA_WITH_M, DATA_WITH_B},
{DATA_WITH_D, DATA_SAME_AS_REFERENCE, DATA_SAME_AS_REFERENCE, DATA_WITH_B},
DATABASE_CONFIG,
REFERENCE_GENOMES
};

const nlohmann::json QUERY = {
{"action", {{"type", "Aggregated"}}},
{"filterExpression",
{{"type", "AminoAcidEquals"}, {"position", 1}, {"symbol", "D"}, {"sequenceName", GENE}}}
};
nlohmann::json createAminoAcidSymbolEqualsQuery(
const std::string& symbol,
int position,
const std::string& gene
) {
return {
{"action", {{"type", "Aggregated"}}},
{"filterExpression",
{{"type", "AminoAcidEquals"},
{"position", position},
{"symbol", symbol},
{"sequenceName", gene}}}
};
}

const nlohmann::json EXPECTED = {{{"count", 1}}};
const QueryTestScenario AMINO_ACID_EQUALS_D = {
"aminoAcidEqualsD",
createAminoAcidSymbolEqualsQuery("D", 1, GENE),
{{{"count", 1}}}
};

const QueryTestScenario AMINO_ACID_EQUALS_D = {"aminoAcidEqualsD", QUERY, EXPECTED};
const QueryTestScenario AMINO_ACID_EQUALS_WITH_DOT_RETURNS_AS_IF_REFERENCE = {
"aminoAcidEqualsM",
createAminoAcidSymbolEqualsQuery(".", 1, GENE),
{{{"count", 2}}}
};

QUERY_TEST(AminoAcidSymbolEquals, TEST_DATA, ::testing::Values(AMINO_ACID_EQUALS_D));
QUERY_TEST(
AminoAcidSymbolEquals,
TEST_DATA,
::testing::Values(AMINO_ACID_EQUALS_D, AMINO_ACID_EQUALS_WITH_DOT_RETURNS_AS_IF_REFERENCE)
);
72 changes: 72 additions & 0 deletions src/silo/test/nucleotide_symbol_equals.test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#include <nlohmann/json.hpp>

#include "silo/test/query_fixture.test.h"

using silo::ReferenceGenomes;
using silo::config::DatabaseConfig;
using silo::config::ValueType;
using silo::test::QueryTestData;
using silo::test::QueryTestScenario;

#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

using boost::uuids::random_generator;

nlohmann::json createDataWithNucleotideSequence(const std::string& nucleotideSequence) {
random_generator generator;
const auto request_id = generator();

return {
{"metadata", {{"primaryKey", "id_" + to_string(request_id)}}},
{"alignedNucleotideSequences", {{"segment1", nucleotideSequence}}},
{"unalignedNucleotideSequences", {{"segment1", nullptr}}},
{"alignedAminoAcidSequences", {{"gene1", nullptr}}}
};
}
const nlohmann::json DATA_SAME_AS_REFERENCE = createDataWithNucleotideSequence("ATGCN");
const nlohmann::json DATA_WITH_ALL_N = createDataWithNucleotideSequence("NNNNN");
const nlohmann::json DATA_WITH_ALL_MUTATED = createDataWithNucleotideSequence("CATTT");

const auto DATABASE_CONFIG =
DatabaseConfig{"segment1", {"dummy name", {{"primaryKey", ValueType::STRING}}, "primaryKey"}};

const auto REFERENCE_GENOMES = ReferenceGenomes{
{{"segment1", "ATGCN"}},
{{"gene1", "M*"}},
};

const QueryTestData TEST_DATA{
{DATA_SAME_AS_REFERENCE, DATA_SAME_AS_REFERENCE, DATA_WITH_ALL_N, DATA_WITH_ALL_MUTATED},
DATABASE_CONFIG,
REFERENCE_GENOMES
};

nlohmann::json createNucleotideSymbolEqualsQuery(const std::string& symbol, int position) {
return {
{"action", {{"type", "Aggregated"}}},
{"filterExpression",
{{"type", "NucleotideEquals"},
{"position", position},
{"symbol", symbol},
{"sequenceName", "segment1"}}}
};
}

const QueryTestScenario NUCLEOTIDE_EQUALS_WITH_SYMBOL = {
"nucleotideEqualsWithSymbol",
createNucleotideSymbolEqualsQuery("C", 1),
{{{"count", 1}}}
};

const QueryTestScenario NUCLEOTIDE_EQUALS_WITH_DOT_RETURNS_REFERENCE = {
"nucleotideEqualsWithDot",
createNucleotideSymbolEqualsQuery(".", 1),
{{{"count", 2}}}
};

QUERY_TEST(
NucleotideSymbolEquals,
TEST_DATA,
::testing::Values(NUCLEOTIDE_EQUALS_WITH_SYMBOL, NUCLEOTIDE_EQUALS_WITH_DOT_RETURNS_REFERENCE)
);
Loading