diff --git a/include/dsn/utility/filesystem.h b/include/dsn/utility/filesystem.h index 66f735e80a..b2cb0cc6c8 100644 --- a/include/dsn/utility/filesystem.h +++ b/include/dsn/utility/filesystem.h @@ -136,6 +136,17 @@ bool verify_file(const std::string &fname, const std::string &expected_md5, const int64_t &expected_fsize); +// create driectory and get absolute path +bool create_directory(const std::string &path, + /*out*/ std::string &absolute_path, + /*out*/ std::string &err_msg); + +bool write_file(const std::string &fname, std::string &buf); + +// check if directory is readable and writable +// call `create_directory` before to make `path` exist +bool check_dir_rw(const std::string &path, /*out*/ std::string &err_msg); + } // namespace filesystem } // namespace utils } // namespace dsn diff --git a/src/common/fs_manager.cpp b/src/common/fs_manager.cpp index 0e311c117c..9f1b4b7796 100644 --- a/src/common/fs_manager.cpp +++ b/src/common/fs_manager.cpp @@ -187,6 +187,7 @@ dsn::error_code fs_manager::initialize(const std::vector &data_dirs norm_path.c_str(), tags[i].c_str()); } + _available_data_dirs = data_dirs; if (!for_test) { update_disk_stat(); diff --git a/src/common/fs_manager.h b/src/common/fs_manager.h index 2c401afb7c..cfd4c414ec 100644 --- a/src/common/fs_manager.h +++ b/src/common/fs_manager.h @@ -88,6 +88,12 @@ class fs_manager bool for_each_dir_node(const std::function &func) const; void update_disk_stat(); + const std::vector &get_available_data_dirs() const + { + zauto_read_lock l(_lock); + return _available_data_dirs; + } + private: void reset_disk_stat() { @@ -112,6 +118,8 @@ class fs_manager int _max_available_ratio = 0; std::vector> _dir_nodes; + std::vector _available_data_dirs; + // Used for disk available space check // disk status will be updated periodically, this vector record nodes whose disk_status changed // in this round diff --git a/src/replica/replica_stub.cpp b/src/replica/replica_stub.cpp index ac9a4813c0..5cac5549f5 100644 --- a/src/replica/replica_stub.cpp +++ b/src/replica/replica_stub.cpp @@ -64,6 +64,12 @@ namespace dsn { namespace replication { +DSN_DEFINE_bool("replication", + ignore_broken_disk, + true, + "true means ignore broken data disk when initialize"); +DSN_TAG_VARIABLE(ignore_broken_disk, FT_MUTABLE); + bool replica_stub::s_not_exit_on_log_failure = false; replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/, @@ -504,33 +510,13 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f } // init dirs - if (!dsn::utils::filesystem::create_directory(_options.slog_dir)) { - dassert(false, "Fail to create directory %s.", _options.slog_dir.c_str()); - } std::string cdir; - if (!dsn::utils::filesystem::get_absolute_path(_options.slog_dir, cdir)) { - dassert(false, "Fail to get absolute path from %s.", _options.slog_dir.c_str()); + std::string err_msg; + if (!dsn::utils::filesystem::create_directory(_options.slog_dir, cdir, err_msg)) { + dassert(false, "{}", err_msg); } _options.slog_dir = cdir; - int count = 0; - for (auto &dir : _options.data_dirs) { - if (!dsn::utils::filesystem::create_directory(dir)) { - dassert(false, "Fail to create directory %s.", dir.c_str()); - } - std::string cdir; - if (!dsn::utils::filesystem::get_absolute_path(dir, cdir)) { - dassert(false, "Fail to get absolute path from %s.", dir.c_str()); - } - dir = cdir; - ddebug("data_dirs[%d] = %s", count, dir.c_str()); - count++; - } - - { - dsn::error_code err; - err = _fs_manager.initialize(_options.data_dirs, _options.data_dir_tags, false); - dassert(err == dsn::ERR_OK, "initialize fs manager failed, err(%s)", err.to_string()); - } + initialize_fs_manager(_options.data_dirs, _options.data_dir_tags); _log = new mutation_log_shared(_options.slog_dir, _options.log_shared_file_size_mb, @@ -542,7 +528,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f ddebug("start to load replicas"); std::vector dir_list; - for (auto &dir : _options.data_dirs) { + for (auto &dir : _fs_manager.get_available_data_dirs()) { std::vector tmp_list; if (!dsn::utils::filesystem::get_subdirectories(dir, tmp_list, false)) { dassert(false, "Fail to get subdirectories in %s.", dir.c_str()); @@ -790,6 +776,37 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f } } +void replica_stub::initialize_fs_manager(std::vector &data_dirs, + std::vector &data_dir_tags) +{ + std::string cdir; + std::string err_msg; + int count = 0; + std::vector available_dirs; + std::vector available_dir_tags; + for (auto i = 0; i < data_dir_tags.size(); ++i) { + std::string &dir = data_dirs[i]; + if (dsn_unlikely(!utils::filesystem::create_directory(dir, cdir, err_msg) || + !utils::filesystem::check_dir_rw(dir, err_msg))) { + if (FLAGS_ignore_broken_disk) { + dwarn_f("data dir[{}] is broken, ignore it, error:{}", dir, err_msg); + } else { + dassert_f(false, "{}", err_msg); + } + continue; + } + ddebug_f("data_dirs[{}] = {}", count, cdir); + available_dirs.emplace_back(cdir); + available_dir_tags.emplace_back(data_dir_tags[i]); + count++; + } + + dassert_f(available_dirs.size() > 0, + "initialize fs manager failed, no available data directory"); + error_code err = _fs_manager.initialize(available_dirs, available_dir_tags, false); + dassert_f(err == dsn::ERR_OK, "initialize fs manager failed, err({})", err); +} + void replica_stub::initialize_start() { // start timer for configuration sync @@ -1861,7 +1878,7 @@ void replica_stub::on_disk_stat() uint64_t start = dsn_now_ns(); disk_cleaning_report report{}; - dsn::replication::disk_remove_useless_dirs(_options.data_dirs, report); + dsn::replication::disk_remove_useless_dirs(_fs_manager.get_available_data_dirs(), report); _fs_manager.update_disk_stat(); update_disk_holding_replicas(); update_disks_status(); @@ -2569,7 +2586,7 @@ std::string replica_stub::get_replica_dir(const char *app_type, gpid id, bool cr std::string gpid_str = fmt::format("{}.{}", id, app_type); std::string replica_dir; bool is_dir_exist = false; - for (const std::string &data_dir : _options.data_dirs) { + for (const std::string &data_dir : _fs_manager.get_available_data_dirs()) { std::string dir = utils::filesystem::path_combine(data_dir, gpid_str); if (utils::filesystem::directory_exists(dir)) { if (is_dir_exist) { @@ -2591,7 +2608,7 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str { std::string gpid_str = fmt::format("{}.{}", child_pid.to_string(), app_type); std::string child_dir; - for (const std::string &data_dir : _options.data_dirs) { + for (const std::string &data_dir : _fs_manager.get_available_data_dirs()) { std::string dir = utils::filesystem::path_combine(data_dir, gpid_str); // = /. // check if 's is equal to diff --git a/src/replica/replica_stub.h b/src/replica/replica_stub.h index f3a956355e..2645a7762a 100644 --- a/src/replica/replica_stub.h +++ b/src/replica/replica_stub.h @@ -97,6 +97,8 @@ class replica_stub : public serverlet, public ref_counter // void initialize(const replication_options &opts, bool clear = false); void initialize(bool clear = false); + void initialize_fs_manager(std::vector &data_dirs, + std::vector &data_dir_tags); void set_options(const replication_options &opts) { _options = opts; } void open_service(); void close(); diff --git a/src/replica/test/replica_disk_test.cpp b/src/replica/test/replica_disk_test.cpp index 3a1ad4c15c..288fcea6bf 100644 --- a/src/replica/test/replica_disk_test.cpp +++ b/src/replica/test/replica_disk_test.cpp @@ -224,5 +224,23 @@ TEST_F(replica_disk_test, disk_status_test) mock_node_status(node_index, disk_status::NORMAL, disk_status::NORMAL); } +TEST_F(replica_disk_test, broken_disk_test) +{ + // Test cases: + // create: true, check_rw: true + // create: true, check_rw: false + // create: false + struct broken_disk_test + { + std::string mock_create_dir; + std::string mock_rw_flag; + int32_t data_dir_size; + } tests[]{{"true", "true", 3}, {"true", "false", 2}, {"false", "false", 2}}; + for (const auto &test : tests) { + ASSERT_EQ(test.data_dir_size, + ignore_broken_disk_test(test.mock_create_dir, test.mock_rw_flag)); + } +} + } // namespace replication } // namespace dsn diff --git a/src/replica/test/replica_disk_test_base.h b/src/replica/test/replica_disk_test_base.h index b5a1d68900..b53be2a6a7 100644 --- a/src/replica/test/replica_disk_test_base.h +++ b/src/replica/test/replica_disk_test_base.h @@ -123,6 +123,21 @@ class replica_disk_test_base : public replica_test_base return ERR_OK; } + int32_t ignore_broken_disk_test(const std::string &mock_create_directory, + const std::string &mock_check_rw) + { + std::vector data_dirs = {"disk1", "disk2", "disk3"}; + std::vector data_dir_tags = {"tag1", "tag2", "tag3"}; + auto test_stub = make_unique(); + fail::cfg("filesystem_create_directory", "return(" + mock_create_directory + ")"); + fail::cfg("filesystem_check_dir_rw", "return(" + mock_check_rw + ")"); + fail::cfg("update_disk_stat", "return()"); + test_stub->initialize_fs_manager(data_dirs, data_dir_tags); + int32_t dir_size = test_stub->_fs_manager.get_available_data_dirs().size(); + test_stub.reset(); + return dir_size; + } + public: int empty_dir_nodes_count = 1; int dir_nodes_count = 5; @@ -159,7 +174,7 @@ class replica_disk_test_base : public replica_test_base dir_node *node_disk = new dir_node(fmt::format("tag_empty_{}", num), fmt::format("./tag_empty_{}", num)); stub->_fs_manager._dir_nodes.emplace_back(node_disk); - stub->_options.data_dirs.push_back(node_disk->full_dir); + stub->_fs_manager._available_data_dirs.emplace_back(node_disk->full_dir); utils::filesystem::create_directory(node_disk->full_dir); num--; } @@ -188,7 +203,7 @@ class replica_disk_test_base : public replica_test_base disk_available_mb, disk_available_ratio); - stub->_options.data_dirs.push_back( + stub->_fs_manager._available_data_dirs.emplace_back( node_disk->full_dir); // open replica need the options utils::filesystem::create_directory(node_disk->full_dir); diff --git a/src/utils/filesystem.cpp b/src/utils/filesystem.cpp index 14c9c5c87a..f4252f90b7 100644 --- a/src/utils/filesystem.cpp +++ b/src/utils/filesystem.cpp @@ -37,6 +37,8 @@ #include #include +#include +#include #include #include #include @@ -826,6 +828,74 @@ bool verify_file(const std::string &fname, return true; } +bool create_directory(const std::string &path, std::string &absolute_path, std::string &err_msg) +{ + FAIL_POINT_INJECT_F("filesystem_create_directory", [path](string_view str) { + // when str contains 'false', and path contains broken_disk_dir, mock create fail(return + // false) + std::string broken_disk_dir = "disk1"; + return str.find("false") == string_view::npos || + path.find(broken_disk_dir) == std::string::npos; + }); + + if (!create_directory(path)) { + err_msg = fmt::format("Fail to create directory {}.", path); + return false; + } + if (!get_absolute_path(path, absolute_path)) { + err_msg = fmt::format("Fail to get absolute path from {}.", path); + return false; + } + return true; +} + +bool write_file(const std::string &fname, std::string &buf) +{ + if (!file_exists(fname)) { + derror_f("file({}) doesn't exist", fname); + return false; + } + + std::ofstream fstream; + fstream.open(fname.c_str()); + fstream << buf; + fstream.close(); + return true; +} + +bool check_dir_rw(const std::string &path, std::string &err_msg) +{ + FAIL_POINT_INJECT_F("filesystem_check_dir_rw", [path](string_view str) { + // when str contains 'false', and path contains broken_disk_dir, mock check fail(return + // false) + std::string broken_disk_dir = "disk1"; + return str.find("false") == string_view::npos || + path.find(broken_disk_dir) == std::string::npos; + }); + + std::string fname = "read_write_test_file"; + std::string fpath = path_combine(path, fname); + if (!create_file(fpath)) { + err_msg = fmt::format("Fail to create test file {}.", fpath); + return false; + } + + auto cleanup = defer([&fpath]() { remove_path(fpath); }); + std::string value = "test_value"; + if (!write_file(fpath, value)) { + err_msg = fmt::format("Fail to write file {}.", fpath); + return false; + } + + std::string buf; + if (read_file(fpath, buf) != ERR_OK || buf != value) { + err_msg = fmt::format("Fail to read file {} or get wrong value({}).", fpath, buf); + return false; + } + + return true; +} + } // namespace filesystem } // namespace utils } // namespace dsn