-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: support SAM files as sequence input and allow partial sequence …
…input with an offset
- Loading branch information
Showing
66 changed files
with
880 additions
and
1,002 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#pragma once | ||
|
||
#include <filesystem> | ||
#include <optional> | ||
#include <string> | ||
|
||
#include "silo/sequence_file_reader/sequence_file_reader.h" | ||
|
||
namespace silo::sequence_file_reader { | ||
|
||
class FastaReader : public SequenceFileReader { | ||
public: | ||
explicit FastaReader(const std::filesystem::path& in_file_name) | ||
: SequenceFileReader(in_file_name) {} | ||
std::optional<ReadSequence> nextEntry() override; | ||
}; | ||
} // namespace silo::sequence_file_reader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#pragma once | ||
|
||
#include <stdexcept> | ||
#include <string> | ||
|
||
namespace silo::sequence_file_reader { | ||
|
||
class SamFormatException : public std::runtime_error { | ||
public: | ||
explicit SamFormatException(const std::string& error_message) | ||
: std::runtime_error(error_message.c_str()){}; | ||
}; | ||
|
||
} // namespace silo::sequence_file_reader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#pragma once | ||
|
||
#include <filesystem> | ||
#include <optional> | ||
#include <string> | ||
|
||
#include "silo/sequence_file_reader/sequence_file_reader.h" | ||
|
||
namespace silo::sequence_file_reader { | ||
|
||
class SamReader : public SequenceFileReader { | ||
public: | ||
explicit SamReader(const std::filesystem::path& in_file_name) | ||
: SequenceFileReader(in_file_name) {} | ||
explicit SamReader(const std::string& file_content) | ||
: SequenceFileReader(file_content) {} | ||
std::optional<ReadSequence> nextEntry() override; | ||
}; | ||
} // namespace silo::sequence_file_reader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#pragma once | ||
|
||
#include <optional> | ||
#include <string> | ||
|
||
#include "silo/common/input_stream_wrapper.h" | ||
|
||
namespace silo::sequence_file_reader { | ||
class SequenceFileReader { | ||
protected: | ||
explicit SequenceFileReader(const std::filesystem::path& in_file_name) | ||
: in_file(in_file_name){}; | ||
explicit SequenceFileReader(const std::string& file_content) | ||
: in_file(file_content){}; | ||
|
||
silo::InputStreamWrapper in_file; | ||
|
||
public: | ||
struct ReadSequence { | ||
std::string key; | ||
uint32_t offset; | ||
std::string sequence; | ||
}; | ||
|
||
virtual std::optional<ReadSequence> nextEntry() = 0; | ||
|
||
virtual ~SequenceFileReader(){}; | ||
}; | ||
} // namespace silo::sequence_file_reader |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
#pragma once | ||
|
||
#include <string> | ||
|
||
#include "silo/sequence_file_reader/sequence_file_reader.h" | ||
|
||
namespace duckdb { | ||
struct Connection; | ||
} | ||
|
||
namespace silo { | ||
|
||
class ZstdTable { | ||
duckdb::Connection& connection; | ||
std::string table_name; | ||
|
||
ZstdTable(duckdb::Connection& connection, std::string table_name) | ||
: connection(connection), | ||
table_name(std::move(table_name)){}; | ||
|
||
public: | ||
static ZstdTable generate( | ||
duckdb::Connection& connection, | ||
const std::string& table_name, | ||
sequence_file_reader::SequenceFileReader& file_reader, | ||
std::string_view reference_sequence | ||
); | ||
}; | ||
|
||
} // namespace silo |
Oops, something went wrong.