Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

default nucleotide and amino acid sequence #510

Merged
2 changes: 1 addition & 1 deletion endToEndTests/test/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ if (!siloUrl) {

export const server = supertest.agent(siloUrl);

export function headerToHaveDataVersion(response) {
export function expectHeaderToHaveDataVersion(response) {
const headers = response.headers;
expect(headers).to.have.property('data-version');
const dataVersion = headers['data-version'];
Expand Down
6 changes: 3 additions & 3 deletions endToEndTests/test/info.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { expect } from 'chai';
import { headerToHaveDataVersion, server } from './common.js';
import { expectHeaderToHaveDataVersion, server } from './common.js';
import { describe, it } from 'node:test';

describe('The /info endpoint', () => {
Expand All @@ -8,7 +8,7 @@ describe('The /info endpoint', () => {
.get('/info')
.expect(200)
.expect('Content-Type', 'application/json')
.expect(headerToHaveDataVersion)
.expect(expectHeaderToHaveDataVersion)
.expect({ nBitmapsSize: 3898, sequenceCount: 100, totalSize: 26589464, numberOfPartitions: 11 });
});

Expand Down Expand Up @@ -79,6 +79,6 @@ describe('The /info endpoint', () => {
'Y': 2631494,
});
})
.expect(headerToHaveDataVersion);
.expect(expectHeaderToHaveDataVersion);
});
});
1 change: 0 additions & 1 deletion endToEndTests/test/queries/aaInsertionsAction.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"query": {
"action": {
"type": "AminoAcidInsertions",
"column": "aminoAcidInsertions",
"orderByFields": ["insertion", "position"]
},
"filterExpression": {
Expand Down
2 changes: 0 additions & 2 deletions endToEndTests/test/queries/aaInsertionsActionAndFilter.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@
"query": {
"action": {
"type": "AminoAcidInsertions",
"column": "aminoAcidInsertions",
"orderByFields": ["insertedSymbols", "position"]
},
"filterExpression": {
"type": "AminoAcidInsertionContains",
"sequenceName": "S",
"value": "E.*",
"column": "aminoAcidInsertions",
"position": 214
}
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"query": {
"action": {
"type": "AminoAcidInsertions",
"column": "aminoAcidInsertions",
"orderByFields": ["insertedSymbols", "position"],
"sequenceName": "S"
},
Expand Down
1 change: 0 additions & 1 deletion endToEndTests/test/queries/aaInsertionsContains.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"type": "AminoAcidInsertionContains",
"sequenceName": "S",
"value": "E.*E",
"column": "aminoAcidInsertions",
"position": 214
}
},
Expand Down
15 changes: 6 additions & 9 deletions endToEndTests/test/query.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { headerToHaveDataVersion, server } from './common.js';
import { expectHeaderToHaveDataVersion, server } from './common.js';
import { expect } from 'chai';
import { describe, it } from 'node:test';
import fs from 'fs';
Expand Down Expand Up @@ -40,19 +40,16 @@ describe('The /query endpoint', () => {

testCases.forEach(testCase =>
it('should return data for the test case ' + testCase.testCaseName, async () => {
const response = await server
.post('/query')
.send(testCase.query)
.expect(200)
.expect('Content-Type', 'application/x-ndjson')
.expect(headerToHaveDataVersion);
const response = await server.post('/query').send(testCase.query);

const errorMessage = 'Actual result is:\n' + response.text + '\n';
expect(response.status, errorMessage).to.equal(200);
expect(response.header['content-type'], errorMessage).to.equal('application/x-ndjson');
expectHeaderToHaveDataVersion(response);
let responseLines = response.text
.split(/\n/)
.filter(it => it !== '')
.map(it => JSON.parse(it));

const errorMessage = 'Actual result is:\n' + response.text + '\n';
expect(responseLines, errorMessage).to.deep.equal(testCase.expectedQueryResult);
})
);
Expand Down
2 changes: 1 addition & 1 deletion endToEndTests/test/requestId.test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { expect } from 'chai';
import { headerToHaveDataVersion, server } from './common.js';
import { server } from './common.js';
import { describe, it } from 'node:test';

const X_REQUEST_ID = 'x-request-id';
Expand Down
5 changes: 4 additions & 1 deletion include/silo/config/database_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class DatabaseSchema {

class DatabaseConfig {
public:
std::string default_nucleotide_sequence;
std::optional<std::string> default_nucleotide_sequence;
std::optional<std::string> default_amino_acid_sequence;
DatabaseSchema schema;

[[nodiscard]] std::optional<DatabaseMetadata> getMetadata(const std::string& name) const;
Expand All @@ -47,6 +48,8 @@ class DatabaseConfig {
class DatabaseConfigReader {
public:
[[nodiscard]] virtual DatabaseConfig readConfig(const std::filesystem::path& config_path) const;

[[nodiscard]] virtual DatabaseConfig parseYaml(const std::string& yaml) const;
};

} // namespace silo::config
Expand Down
3 changes: 3 additions & 0 deletions include/silo/preprocessing/preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class Preprocessor {
Database preprocess();

private:
void finalizeConfig();
void validateConfig();

static std::string makeNonNullKey(const std::string& field);
std::string getPartitionKeySelect() const;

Expand Down
49 changes: 40 additions & 9 deletions src/silo/config/database_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ ValueType silo::config::toDatabaseValueType(std::string_view type) {

namespace {

const std::string DEFAULT_NUCLEOTIDE_SEQUENCE_KEY = "defaultNucleotideSequence";
const std::string DEFAULT_AMINO_ACID_SEQUENCE_KEY = "defaultAminoAcidSequence";

std::string toString(ValueType type) {
switch (type) {
case ValueType::STRING:
Expand All @@ -63,10 +66,15 @@ struct convert<silo::config::DatabaseConfig> {
static bool decode(const Node& node, silo::config::DatabaseConfig& config) {
config.schema = node["schema"].as<silo::config::DatabaseSchema>();

if (node["defaultNucleotideSequence"].IsDefined()) {
config.default_nucleotide_sequence = node["defaultNucleotideSequence"].as<std::string>();
} else {
config.default_nucleotide_sequence = "main";
if (node[DEFAULT_NUCLEOTIDE_SEQUENCE_KEY].IsDefined() &&
!node[DEFAULT_NUCLEOTIDE_SEQUENCE_KEY].IsNull()) {
config.default_nucleotide_sequence =
node[DEFAULT_NUCLEOTIDE_SEQUENCE_KEY].as<std::string>();
}
if (node[DEFAULT_AMINO_ACID_SEQUENCE_KEY].IsDefined() &&
!node[DEFAULT_AMINO_ACID_SEQUENCE_KEY].IsNull()) {
config.default_amino_acid_sequence =
node[DEFAULT_AMINO_ACID_SEQUENCE_KEY].as<std::string>();
}

SPDLOG_TRACE("Resulting database config: {}", config);
Expand All @@ -77,8 +85,11 @@ struct convert<silo::config::DatabaseConfig> {
Node node;
node["schema"] = config.schema;

if (config.default_nucleotide_sequence != "main") {
node["defaultNucleotideSequence"] = config.default_nucleotide_sequence;
if (config.default_nucleotide_sequence.has_value()) {
node[DEFAULT_NUCLEOTIDE_SEQUENCE_KEY] = *config.default_nucleotide_sequence;
}
if (config.default_amino_acid_sequence.has_value()) {
node[DEFAULT_AMINO_ACID_SEQUENCE_KEY] = *config.default_amino_acid_sequence;
}
return node;
}
Expand Down Expand Up @@ -200,8 +211,19 @@ void DatabaseConfig::writeConfig(const std::filesystem::path& config_path) const

DatabaseConfig DatabaseConfigReader::readConfig(const std::filesystem::path& config_path) const {
SPDLOG_INFO("Reading database config from {}", config_path.string());
std::stringstream yaml;

std::ifstream file(config_path);
if (!file.is_open()) {
throw std::runtime_error(
"Failed to read database config: Could not open file " + config_path.string()
);
}

yaml << file.rdbuf();

try {
return YAML::LoadFile(config_path.string()).as<DatabaseConfig>();
return parseYaml(yaml.str());
} catch (const YAML::Exception& e) {
throw std::runtime_error(
"Failed to read database config from " + config_path.string() + ": " +
Expand All @@ -210,6 +232,10 @@ DatabaseConfig DatabaseConfigReader::readConfig(const std::filesystem::path& con
}
}

DatabaseConfig DatabaseConfigReader::parseYaml(const std::string& yaml) const {
return YAML::Load(yaml).as<DatabaseConfig>();
}

} // namespace silo::config

[[maybe_unused]] auto fmt::formatter<silo::config::DatabaseConfig>::format(
Expand All @@ -218,8 +244,13 @@ DatabaseConfig DatabaseConfigReader::readConfig(const std::filesystem::path& con
) -> decltype(ctx.out()) {
return fmt::format_to(
ctx.out(),
"{{ default_nucleotide_sequence: '{}', schema: {} }}",
database_config.default_nucleotide_sequence,
"{{ default_nucleotide_sequence: {}, default_amino_acid_sequence: {}, schema: {} }}",
database_config.default_nucleotide_sequence.has_value()
? "'" + *database_config.default_nucleotide_sequence + "'"
: "null",
database_config.default_amino_acid_sequence.has_value()
? "'" + *database_config.default_nucleotide_sequence + "'"
: "null",
database_config.schema
);
}
Expand Down
111 changes: 91 additions & 20 deletions src/silo/config/database_config.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ TEST(DatabaseConfigReader, shouldReadConfigWithCorrectParameters) {
ASSERT_EQ(config.schema.metadata[8].name, "qc_value");
ASSERT_EQ(config.schema.metadata[8].type, ValueType::FLOAT);
ASSERT_EQ(config.schema.metadata[8].generate_index, false);
ASSERT_EQ(config.default_nucleotide_sequence, std::nullopt);
ASSERT_EQ(config.default_amino_acid_sequence, std::nullopt);
}

TEST(DatabaseConfigReader, shouldThrowExceptionWhenConfigFileDoesNotExist) {
Expand All @@ -148,47 +150,116 @@ TEST(DatabaseConfigReader, shouldThrowExceptionWhenConfigFileDoesNotExist) {
}

TEST(DatabaseConfigReader, shouldThrowErrorForInvalidMetadataType) {
ASSERT_THROW(
(void)DatabaseConfigReader().readConfig(
"testBaseData/test_database_config_with_invalid_metadata_type.yaml"
),
ConfigException
);
const auto* yaml = R"-(
schema:
instanceName: dummy name
metadata:
- name: wrongType
type: wrong_type
primaryKey: gisaid_epi_isl
)-";

ASSERT_THROW((void)DatabaseConfigReader().parseYaml(yaml), ConfigException);
}

TEST(DatabaseConfigReader, shouldNotThrowIfThereAreAdditionalEntries) {
ASSERT_NO_THROW((void)DatabaseConfigReader().readConfig(
"testBaseData/test_database_config_with_additional_entries.yaml"
));
const auto* yaml = R"-(
schema:
instanceName: dummy name
metadata:
- name: key
type: string
primaryKey: key
features:
- name: this is unknown to SILO
)-";

ASSERT_NO_THROW((void)DatabaseConfigReader().parseYaml(yaml));
}

TEST(DatabaseConfigReader, shouldThrowIfTheConfigHasAnInvalidStructure) {
const auto* yaml = R"-(
schema:
instanceName: dummy name
primaryKey: missing metadata
)-";

EXPECT_THAT(
[]() {
(void)DatabaseConfigReader().readConfig(
"testBaseData/test_database_config_with_invalid_structure.yaml"
);
},
[yaml]() { (void)DatabaseConfigReader().parseYaml(yaml); },
ThrowsMessage<std::runtime_error>(
::testing::HasSubstr("invalid node; first invalid key: \"metadata\"")
)
);
}

TEST(DatabaseConfigReader, shouldReadConfigWithoutDateToSortBy) {
const DatabaseConfig& config = DatabaseConfigReader().readConfig(
"testBaseData/test_database_config_without_date_to_sort_by.yaml"
);
const auto* yaml = R"-(
schema:
instanceName: Having no dateToSortBy is valid
metadata:
- name: primaryKey
type: string
primaryKey: primaryKey
partitionBy: pango_lineage
)-";

const DatabaseConfig& config = DatabaseConfigReader().parseYaml(yaml);

ASSERT_EQ(config.schema.date_to_sort_by, std::nullopt);
}

TEST(DatabaseConfigReader, shouldReadConfigWithoutPartitionBy) {
const DatabaseConfig& config = DatabaseConfigReader().readConfig(
"testBaseData/test_database_config_without_partition_by.yaml"
);
const auto* yaml = R"-(
schema:
instanceName: dummy without partitionBy
metadata:
- name: primaryKey
type: string
- name: date
type: date
primaryKey: primaryKey
dateToSortBy: date
)-";

const DatabaseConfig& config = DatabaseConfigReader().parseYaml(yaml);

ASSERT_EQ(config.schema.partition_by, std::nullopt);
}

TEST(DatabaseConfigReader, shouldReadConfigWithDefaultSequencesSet) {
const auto* yaml = R"-(
schema:
instanceName: dummy without partitionBy
metadata:
- name: primaryKey
type: string
primaryKey: primaryKey
defaultNucleotideSequence: defaultNuc
defaultAminoAcidSequence: defaultAA
)-";

const DatabaseConfig& config = DatabaseConfigReader().parseYaml(yaml);

ASSERT_EQ(config.default_nucleotide_sequence, "defaultNuc");
ASSERT_EQ(config.default_amino_acid_sequence, "defaultAA");
}

TEST(DatabaseConfigReader, shouldReadConfigWithDefaultSequencesSetButNull) {
const auto* yaml = R"-(
schema:
instanceName: dummy without partitionBy
metadata:
- name: primaryKey
type: string
primaryKey: primaryKey
defaultNucleotideSequence: null
defaultAminoAcidSequence: null
)-";

const DatabaseConfig& config = DatabaseConfigReader().parseYaml(yaml);

ASSERT_EQ(config.default_nucleotide_sequence, std::nullopt);
ASSERT_EQ(config.default_amino_acid_sequence, std::nullopt);
}

} // namespace
2 changes: 1 addition & 1 deletion src/silo/database.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ std::optional<std::string> Database::getDefaultSequenceName<Nucleotide>() const

template <>
std::optional<std::string> Database::getDefaultSequenceName<AminoAcid>() const {
return std::nullopt;
return database_config.default_amino_acid_sequence;
}

template <>
Expand Down
Loading