Skip to content

Commit

Permalink
fix: add workaround so insertions are read correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
JonasKellerer committed Jan 16, 2024
1 parent 88a2dc0 commit 8a2bfa8
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 26 deletions.
10 changes: 0 additions & 10 deletions .run/silo--preprocessing.run.xml

This file was deleted.

2 changes: 1 addition & 1 deletion conanfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class SiloRecipe(ConanFile):
"duckdb/0.8.1",
"poco/1.12.4",
"hwloc/2.9.3",
"onetbb/2021.9.0",
"onetbb/2021.10.0",
"nlohmann_json/3.11.2",
"gtest/cci.20210126",
"roaring/1.0.0",
Expand Down
2 changes: 2 additions & 0 deletions include/silo/common/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ namespace silo {

std::vector<std::string> splitBy(const std::string& value, const std::string_view delimiter);

std::string removeSymbol(const std::string& value, char symbol);

} // namespace silo
6 changes: 6 additions & 0 deletions src/silo/common/string_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "silo/common/string_utils.h"
#include <algorithm>

namespace silo {

Expand All @@ -19,4 +20,9 @@ std::vector<std::string> splitBy(const std::string& value, const std::string_vie
return splits;
}

std::string removeSymbol(const std::string& value, char symbol) {
std::string result = value;
result.erase(std::remove(result.begin(), result.end(), symbol), result.end());
return result;
}
} // namespace silo
38 changes: 37 additions & 1 deletion src/silo/common/string_utils.test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <gtest/gtest.h>

using silo::removeSymbol;
using silo::splitBy;

TEST(splitBy, correctSplit) {
Expand Down Expand Up @@ -34,4 +35,39 @@ TEST(splitBy, correctWithEmptyString) {

const auto result = splitBy(input, delimiter);
EXPECT_EQ(result, std::vector<std::string>{""});
}
}

TEST(removeSymbol, removesAllOccurences) {
const std::string input(R"(ABC"DEF"ADS")");

const auto result = silo::removeSymbol(input, '\"');
EXPECT_EQ(result, std::string("ABCDEFADS"));
}

TEST(removeSymbol, removesAtBeginning) {
const std::string input(R"("ABC)");

const auto result = silo::removeSymbol(input, '\"');
EXPECT_EQ(result, std::string("ABC"));
}

TEST(removeSymbol, removesAtEnd) {
const std::string input(R"(ABC")");

const auto result = silo::removeSymbol(input, '\"');
EXPECT_EQ(result, std::string("ABC"));
}

TEST(removeSymbol, removesAtBeginningAndEnd) {
const std::string input(R"("ABC")");

const auto result = silo::removeSymbol(input, '\"');
EXPECT_EQ(result, std::string("ABC"));
}

TEST(removeSymbol, doesNotRemoveIfNotContained) {
const std::string input("ABCDEFADS");

const auto result = silo::removeSymbol(input, '\"');
EXPECT_EQ(result, std::string("ABCDEFADS"));
}
40 changes: 26 additions & 14 deletions src/silo/storage/column/insertion_column.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,34 @@ InsertionEntry parseInsertion(
const std::string& value,
const std::optional<std::string>& default_sequence_name
) {
const auto position_and_insertion = splitBy(value, DELIMITER_INSERTION);
if (position_and_insertion.size() == 2) {
if (default_sequence_name == std::nullopt) {
const std::string message = "Failed to parse insertion due to invalid format: " + value;
throw preprocessing::PreprocessingException(message);
auto position_and_insertion = splitBy(value, DELIMITER_INSERTION);
std::transform(
position_and_insertion.begin(),
position_and_insertion.end(),
position_and_insertion.begin(),
[](const std::string& value) { return silo::removeSymbol(value, '\"'); }
);
try {
if (position_and_insertion.size() == 2) {
if (default_sequence_name == std::nullopt) {
const std::string message = "Failed to parse insertion due to invalid format: " + value;
throw preprocessing::PreprocessingException(message);
}
const auto position = boost::lexical_cast<uint32_t>(position_and_insertion[0]);
const auto& insertion = position_and_insertion[1];
return {*default_sequence_name, position, insertion};
}
const auto position = boost::lexical_cast<uint32_t>(position_and_insertion[0]);
const auto& insertion = position_and_insertion[1];
return {*default_sequence_name, position, insertion};
}
if (position_and_insertion.size() == 3) {
const auto& sequence_name = position_and_insertion[0];
const auto position = boost::lexical_cast<uint32_t>(position_and_insertion[1]);
const auto& insertion = position_and_insertion[2];
return {sequence_name, position, insertion};
if (position_and_insertion.size() == 3) {
const auto& sequence_name = position_and_insertion[0];
const auto position = boost::lexical_cast<uint32_t>(position_and_insertion[1]);
const auto& insertion = position_and_insertion[2];
return {sequence_name, position, insertion};
}
} catch (const boost::bad_lexical_cast& error) {
const std::string message = "Failed to parse insertion due to invalid format: " + value;
throw preprocessing::PreprocessingException(message + ". Error: " + error.what());
}

const std::string message = "Failed to parse insertion due to invalid format: " + value;
throw preprocessing::PreprocessingException(message);
}
Expand Down

0 comments on commit 8a2bfa8

Please sign in to comment.