diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp deleted file mode 100644 index 4dec4fd5ea0..00000000000 --- a/dbms/src/Storages/StorageFile.cpp +++ /dev/null @@ -1,348 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include - -#include -#include - -#include -#include - -#include -#include -#include - -#include -#include -#include - -#include -#include - -#include - -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int CANNOT_WRITE_TO_FILE_DESCRIPTOR; - extern const int CANNOT_SEEK_THROUGH_FILE; - extern const int DATABASE_ACCESS_DENIED; - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int UNKNOWN_IDENTIFIER; - extern const int INCORRECT_FILE_NAME; - extern const int FILE_DOESNT_EXIST; - extern const int EMPTY_LIST_OF_COLUMNS_PASSED; -}; - - -static std::string getTablePath(const std::string & db_dir_path, const std::string & table_name, const std::string & format_name) -{ - return db_dir_path + escapeForFileName(table_name) + "/data." + escapeForFileName(format_name); -} - -/// Both db_dir_path and table_path must be converted to absolute paths (in particular, path cannot contain '..'). -static void checkCreationIsAllowed(Context & context_global, const std::string & db_dir_path, const std::string & table_path, int table_fd) -{ - if (context_global.getApplicationType() != Context::ApplicationType::SERVER) - return; - - if (table_fd >= 0) - throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); - else if (!startsWith(table_path, db_dir_path)) - throw Exception("Part path " + table_path + " is not inside " + db_dir_path, ErrorCodes::DATABASE_ACCESS_DENIED); - - Poco::File table_path_poco_file = Poco::File(table_path); - if (!table_path_poco_file.exists()) - throw Exception("File " + table_path + " is not exist", ErrorCodes::FILE_DOESNT_EXIST); - else if (table_path_poco_file.isDirectory()) - throw Exception("File " + table_path + " must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); -} - - -StorageFile::StorageFile( - const std::string & table_path_, - int table_fd_, - const std::string & db_dir_path, - const std::string & table_name_, - const std::string & format_name_, - const ColumnsDescription & columns_, - Context & context_) - : IStorage(columns_), - table_name(table_name_), format_name(format_name_), context_global(context_), table_fd(table_fd_) -{ - if (table_fd < 0) /// Will use file - { - use_table_fd = false; - - if (!table_path_.empty()) /// Is user's file - { - Poco::Path poco_path = Poco::Path(table_path_); - if (poco_path.isRelative()) - poco_path = Poco::Path(db_dir_path, poco_path); - - path = poco_path.absolute().toString(); - checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); - is_db_table = false; - } - else /// Is DB's file - { - if (db_dir_path.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); - - path = getTablePath(db_dir_path, table_name, format_name); - is_db_table = true; - Poco::File(Poco::Path(path).parent()).createDirectories(); - } - } - else /// Will use FD - { - checkCreationIsAllowed(context_global, db_dir_path, path, table_fd); - - is_db_table = false; - use_table_fd = true; - - /// Save initial offset, it will be used for repeating SELECTs - /// If FD isn't seekable (lseek returns -1), then the second and subsequent SELECTs will fail. - table_fd_init_offset = lseek(table_fd, 0, SEEK_CUR); - } -} - - -class StorageFileBlockInputStream : public IProfilingBlockInputStream -{ -public: - StorageFileBlockInputStream(StorageFile & storage_, const Context & context, size_t max_block_size) - : storage(storage_) - { - if (storage.use_table_fd) - { - storage.rwlock.lock(); - - /// We could use common ReadBuffer and WriteBuffer in storage to leverage cache - /// and add ability to seek unseekable files, but cache sync isn't supported. - - if (storage.table_fd_was_used) /// We need seek to initial position - { - if (storage.table_fd_init_offset < 0) - throw Exception("File descriptor isn't seekable, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - - /// ReadBuffer's seek() doesn't make sence, since cache is empty - if (lseek(storage.table_fd, storage.table_fd_init_offset, SEEK_SET) < 0) - throwFromErrno("Cannot seek file descriptor, inside " + storage.getName(), ErrorCodes::CANNOT_SEEK_THROUGH_FILE); - } - - storage.table_fd_was_used = true; - read_buf = std::make_unique(storage.table_fd); - } - else - { - storage.rwlock.lock_shared(); - - read_buf = std::make_unique(storage.path); - } - - reader = FormatFactory().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); - } - - ~StorageFileBlockInputStream() override - { - if (storage.use_table_fd) - storage.rwlock.unlock(); - else - storage.rwlock.unlock_shared(); - } - - String getName() const override - { - return storage.getName(); - } - - Block readImpl() override - { - return reader->read(); - } - - Block getHeader() const override { return reader->getHeader(); }; - - void readPrefixImpl() override - { - reader->readPrefix(); - } - - void readSuffixImpl() override - { - reader->readSuffix(); - } - -private: - StorageFile & storage; - Block sample_block; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; -}; - - -BlockInputStreams StorageFile::read( - const Names & /*column_names*/, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum & /*processed_stage*/, - size_t max_block_size, - unsigned /*num_streams*/) -{ - return BlockInputStreams(1, std::make_shared(*this, context, max_block_size)); -} - - -class StorageFileBlockOutputStream : public IBlockOutputStream -{ -public: - explicit StorageFileBlockOutputStream(StorageFile & storage_) - : storage(storage_), lock(storage.rwlock) - { - if (storage.use_table_fd) - { - /** NOTE: Using real file binded to FD may be misleading: - * SELECT *; INSERT insert_data; SELECT *; last SELECT returns initil_fd_data + insert_data - * INSERT data; SELECT *; last SELECT returns only insert_data - */ - storage.table_fd_was_used = true; - write_buf = std::make_unique(storage.table_fd); - } - else - { - write_buf = std::make_unique(storage.path, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT); - } - - writer = FormatFactory().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), storage.context_global); - } - - Block getHeader() const override { return storage.getSampleBlock(); } - - void write(const Block & block) override - { - writer->write(block); - } - - void writePrefix() override - { - writer->writePrefix(); - } - - void writeSuffix() override - { - writer->writeSuffix(); - } - - void flush() override - { - writer->flush(); - } - -private: - StorageFile & storage; - std::unique_lock lock; - std::unique_ptr write_buf; - BlockOutputStreamPtr writer; -}; - -BlockOutputStreamPtr StorageFile::write( - const ASTPtr & /*query*/, - const Settings & /*settings*/) -{ - return std::make_shared(*this); -} - - -void StorageFile::drop() -{ - /// Extra actions are not required. -} - - -void StorageFile::rename(const String & new_path_to_db, const String & /*new_database_name*/, const String & new_table_name) -{ - if (!is_db_table) - throw Exception("Can't rename table '" + table_name + "' binded to user-defined file (or FD)", ErrorCodes::DATABASE_ACCESS_DENIED); - - std::unique_lock lock(rwlock); - - std::string path_new = getTablePath(new_path_to_db, new_table_name, format_name); - Poco::File(Poco::Path(path_new).parent()).createDirectories(); - Poco::File(path).renameTo(path_new); - - path = std::move(path_new); -} - - -void registerStorageFile(StorageFactory & factory) -{ - factory.registerStorage("File", [](const StorageFactory::Arguments & args) - { - ASTs & engine_args = args.engine_args; - - if (!(engine_args.size() == 1 || engine_args.size() == 2)) - throw Exception( - "Storage File requires 1 or 2 arguments: name of used format and source.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); - String format_name = static_cast(*engine_args[0]).value.safeGet(); - - int source_fd = -1; - String source_path; - if (engine_args.size() >= 2) - { - /// Will use FD if engine_args[1] is int literal or identifier with std* name - - if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) - { - if (identifier->name == "stdin") - source_fd = STDIN_FILENO; - else if (identifier->name == "stdout") - source_fd = STDOUT_FILENO; - else if (identifier->name == "stderr") - source_fd = STDERR_FILENO; - else - throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", - ErrorCodes::UNKNOWN_IDENTIFIER); - } - else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) - { - auto type = literal->value.getType(); - if (type == Field::Types::Int64) - source_fd = static_cast(literal->value.get()); - else if (type == Field::Types::UInt64) - source_fd = static_cast(literal->value.get()); - else if (type == Field::Types::String) - source_path = literal->value.get(); - } - } - - return StorageFile::create( - source_path, source_fd, - args.data_path, - args.table_name, format_name, args.columns, - args.context); - }); -} - -} diff --git a/dbms/src/Storages/StorageFile.h b/dbms/src/Storages/StorageFile.h deleted file mode 100644 index ca46f7f366e..00000000000 --- a/dbms/src/Storages/StorageFile.h +++ /dev/null @@ -1,100 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include -#include - -#include -#include -#include - - -namespace DB -{ -class StorageFileBlockInputStream; -class StorageFileBlockOutputStream; - -class StorageFile : public ext::SharedPtrHelper - , public IStorage -{ -public: - std::string getName() const override - { - return "File"; - } - - std::string getTableName() const override - { - return table_name; - } - - BlockInputStreams read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size, - unsigned num_streams) override; - - BlockOutputStreamPtr write( - const ASTPtr & query, - const Settings & settings) override; - - void drop() override; - - void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; - - String getDataPath() const override { return path; } - -protected: - friend class StorageFileBlockInputStream; - friend class StorageFileBlockOutputStream; - - /** there are three options (ordered by priority): - - use specified file descriptor if (fd >= 0) - - use specified table_path if it isn't empty - - create own table inside data/db/table/ - */ - StorageFile( - const std::string & table_path_, - int table_fd_, - const std::string & db_dir_path, - const std::string & table_name_, - const std::string & format_name_, - const ColumnsDescription & columns_, - Context & context_); - -private: - std::string table_name; - std::string format_name; - Context & context_global; - - std::string path; - int table_fd = -1; - - bool is_db_table = true; /// Table is stored in real database, not user's file - bool use_table_fd = false; /// Use table_fd insted of path - std::atomic table_fd_was_used{false}; /// To detect repeating reads from stdin - off_t table_fd_init_offset = -1; /// Initial position of fd, used for repeating reads - - mutable std::shared_mutex rwlock; - - Poco::Logger * log = &Poco::Logger::get("StorageFile"); -}; - -} // namespace DB diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index a709be0b017..ddf815316ab 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -27,7 +27,6 @@ void registerStorageNull(StorageFactory & factory); void registerStorageMerge(StorageFactory & factory); void registerStorageBuffer(StorageFactory & factory); void registerStorageMemory(StorageFactory & factory); -void registerStorageFile(StorageFactory & factory); void registerStorageDictionary(StorageFactory & factory); void registerStorageSet(StorageFactory & factory); void registerStorageJoin(StorageFactory & factory); @@ -47,7 +46,6 @@ void registerStorages() registerStorageMerge(factory); registerStorageBuffer(factory); registerStorageMemory(factory); - registerStorageFile(factory); registerStorageDictionary(factory); registerStorageSet(factory); registerStorageJoin(factory); diff --git a/dbms/src/TableFunctions/TableFunctionFile.cpp b/dbms/src/TableFunctions/TableFunctionFile.cpp deleted file mode 100644 index 0ff1a5b443f..00000000000 --- a/dbms/src/TableFunctions/TableFunctionFile.cpp +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -namespace DB -{ -namespace ErrorCodes -{ -extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; -extern const int DATABASE_ACCESS_DENIED; -} // namespace ErrorCodes - -StoragePtr TableFunctionFile::executeImpl(const ASTPtr & ast_function, const Context & context) const -{ - // Parse args - ASTs & args_func = typeid_cast(*ast_function).children; - - if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); - - ASTs & args = typeid_cast(*args_func.at(0)).children; - - if (args.size() != 3) - throw Exception("Table function '" + getName() + "' requires exactly 3 arguments: path, format and structure.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - for (size_t i = 0; i < 3; ++i) - args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(args[i], context); - - std::string path = static_cast(*args[0]).value.safeGet(); - std::string format = static_cast(*args[1]).value.safeGet(); - std::string structure = static_cast(*args[2]).value.safeGet(); - - // Create sample block - std::vector structure_vals; - boost::split(structure_vals, structure, boost::algorithm::is_any_of(" ,"), boost::algorithm::token_compress_on); - - if (structure_vals.size() % 2 != 0) - throw Exception("Odd number of elements in section structure: must be a list of name type pairs", ErrorCodes::LOGICAL_ERROR); - - Block sample_block; - const DataTypeFactory & data_type_factory = DataTypeFactory::instance(); - - for (size_t i = 0, size = structure_vals.size(); i < size; i += 2) - { - ColumnWithTypeAndName column; - column.name = structure_vals[i]; - column.type = data_type_factory.get(structure_vals[i + 1]); - column.column = column.type->createColumn(); - sample_block.insert(std::move(column)); - } - - // Create table - StoragePtr storage = StorageFile::create( - path, - -1, - context.getUserFilesPath(), - getName(), - format, - ColumnsDescription{sample_block.getNamesAndTypesList()}, - const_cast(context)); - - storage->startup(); - - return storage; -} - - -void registerTableFunctionFile(TableFunctionFactory & factory) -{ - factory.registerFunction(); -} - -} // namespace DB diff --git a/dbms/src/TableFunctions/TableFunctionFile.h b/dbms/src/TableFunctions/TableFunctionFile.h deleted file mode 100644 index dda367c2679..00000000000 --- a/dbms/src/TableFunctions/TableFunctionFile.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2022 PingCAP, Ltd. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - - -namespace DB -{ -/* file(path, format, structure) - creates a temporary storage from file - * - * - * The file must be in the clickhouse data directory. - * The relative path begins with the clickhouse data directory. - */ -class TableFunctionFile : public ITableFunction -{ -public: - static constexpr auto name = "file"; - std::string getName() const override { return name; } - -private: - StoragePtr executeImpl(const ASTPtr & ast_function, const Context & context) const override; -}; - - -} // namespace DB diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index 2eac0ce0548..bfe219eec62 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -22,7 +22,6 @@ namespace DB void registerTableFunctionMerge(TableFunctionFactory & factory); void registerTableFunctionNumbers(TableFunctionFactory & factory); void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); -void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctions() { auto & factory = TableFunctionFactory::instance(); @@ -30,7 +29,6 @@ void registerTableFunctions() registerTableFunctionMerge(factory); registerTableFunctionNumbers(factory); registerTableFunctionCatBoostPool(factory); - registerTableFunctionFile(factory); } } // namespace DB