-
Notifications
You must be signed in to change notification settings - Fork 411
/
StorageLog.h
138 lines (107 loc) · 4.66 KB
/
StorageLog.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Common/FileChecker.h>
#include <Common/escapeForFileName.h>
#include <Poco/File.h>
#include <Storages/IStorage.h>
#include <ext/shared_ptr_helper.h>
#include <map>
#include <shared_mutex>
namespace DB
{
/** Implements simple table engine without support of indices.
* The data is stored in a compressed form.
*/
class StorageLog
: public ext::SharedPtrHelper<StorageLog>
, public IStorage
{
friend class LogBlockInputStream;
friend class LogBlockOutputStream;
public:
std::string getName() const override { return "Log"; }
std::string getTableName() const override { return name; }
BlockInputStreams read(
const Names & column_names,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum & processed_stage,
size_t max_block_size,
unsigned num_streams) override;
BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override;
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name)
override;
bool checkData() const override;
std::string full_path() const { return path + escapeForFileName(name) + '/'; }
String getDataPath() const override { return full_path(); }
protected:
/** Attach the table with the appropriate name, along the appropriate path (with / at the end),
* (the correctness of names and paths is not verified)
* consisting of the specified columns; Create files if they do not exist.
*/
StorageLog(
const std::string & path_,
const std::string & name_,
const ColumnsDescription & columns_,
size_t max_compress_block_size_);
private:
String path;
String name;
mutable std::shared_mutex rwlock;
/** Offsets to some row number in a file for column in table.
* They are needed so that you can read the data in several threads.
*/
struct Mark
{
size_t rows; /// How many rows are before this offset including the block at this offset.
size_t offset; /// The offset in compressed file.
};
using Marks = std::vector<Mark>;
/// Column data
struct ColumnData
{
/// Specifies the column number in the marks file.
/// Does not necessarily match the column number among the columns of the table: columns with lengths of arrays are also numbered here.
size_t column_index;
Poco::File data_file;
Marks marks;
};
using Files_t = std::map<String, ColumnData>;
Files_t files; /// name -> data
Names column_names; /// column_index -> name
Poco::File marks_file;
/// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
void addFiles(const String & column_name, const IDataType & type);
bool loaded_marks = false;
size_t max_compress_block_size;
size_t file_count = 0;
FileChecker file_checker;
/// Read marks files if they are not already read.
/// It is done lazily, so that with a large number of tables, the server starts quickly.
/// You can not call with a write locked `rwlock`.
void loadMarks();
/// The order of adding files should not change: it corresponds to the order of the columns in the marks file.
void addFile(const String & column_name, const IDataType & type, size_t level = 0);
/** For normal columns, the number of rows in the block is specified in the marks.
* For array columns and nested structures, there are more than one group of marks that correspond to different files
* - for elements (file name.bin) - the total number of array elements in the block is specified,
* - for array sizes (file name.size0.bin) - the number of rows (the whole arrays themselves) in the block is specified.
*
* Return the first group of marks that contain the number of rows, but not the internals of the arrays.
*/
const Marks & getMarksWithRealRowCount() const;
std::string getFullPath() const { return path + escapeForFileName(name) + '/'; }
};
} // namespace DB