Skip to content

Commit

Permalink
fix(file_scanner): fix hardlink processing in presence of errors
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Feb 9, 2024
1 parent 9ce4605 commit e16cce2
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 18 deletions.
29 changes: 16 additions & 13 deletions src/dwarfs/file_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,20 +234,23 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
uint64_t size = p->size();
uint64_t start_hash{0};

if (size >= kLargeFileThreshold && !p->is_invalid()) {
try {
auto mm = os_.map_file(p->fs_path(), kLargeFileStartHashSize);
checksum cs(checksum::algorithm::XXH3_64);
cs.update(mm->addr(), kLargeFileStartHashSize);
cs.finalize(&start_hash);
file_start_hash_.emplace(p, start_hash);
} catch (...) {
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
<< folly::exceptionStr(std::current_exception())
<< ", creating empty file";
++prog_.errors;
p->set_invalid();
if (size >= kLargeFileThreshold) {
if (!p->is_invalid()) {
try {
auto mm = os_.map_file(p->fs_path(), kLargeFileStartHashSize);
checksum cs(checksum::algorithm::XXH3_64);
cs.update(mm->addr(), kLargeFileStartHashSize);
cs.finalize(&start_hash);
} catch (...) {
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
<< folly::exceptionStr(std::current_exception())
<< ", creating empty file";
++prog_.errors;
p->set_invalid();
}
}

file_start_hash_.emplace(p, start_hash);
}

auto [it, is_new] = unique_size_.emplace(std::make_pair(size, start_hash),
Expand Down
47 changes: 42 additions & 5 deletions test/tool_main_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,13 @@ class mkdwarfs_tester : public tester_common {
return filesystem_v2(*lgr, *os, mm, opt);
}

filesystem_v2 fs_from_file(std::string path) {
filesystem_v2
fs_from_file(std::string path, filesystem_options const& opt = {}) {
auto fsimage = fa->get_file(path);
if (!fsimage) {
throw std::runtime_error("file not found: " + path);
}
return fs_from_data(std::move(fsimage.value()));
return fs_from_data(std::move(fsimage.value()), opt);
}

filesystem_v2 fs_from_stdout(filesystem_options const& opt = {}) {
Expand Down Expand Up @@ -2404,8 +2405,6 @@ class map_file_error_test : public testing::TestWithParam<char const*> {};
TEST_P(map_file_error_test, delayed) {
std::string extra_args{GetParam()};

// TODO: we must also simulate hardlinks here...

auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
Expand All @@ -2414,6 +2413,25 @@ TEST_P(map_file_error_test, delayed) {
.max_name_len = 8,
.with_errors = true});

static constexpr size_t const kSizeSmall{1 << 10};
static constexpr size_t const kSizeLarge{1 << 20};
auto gen_small = [] { return test::loremipsum(kSizeLarge); };
auto gen_large = [] { return test::loremipsum(kSizeLarge); };
t.os->add("large_link1", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
gen_large);
t.os->add("large_link2", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
gen_large);
t.os->add("small_link1", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
gen_small);
t.os->add("small_link2", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
gen_small);
for (auto const& link :
{"large_link1", "large_link2", "small_link1", "small_link2"}) {
t.os->set_map_file_error(
fs::path{"/"} / link,
std::make_exception_ptr(std::runtime_error("map_file_error")), 0);
}

{
std::mt19937_64 rng{42};

Expand Down Expand Up @@ -2444,9 +2462,28 @@ TEST_P(map_file_error_test, delayed) {

EXPECT_EQ(2, t.run(args)) << t.err();

auto fs = t.fs_from_file("test.dwarfs");
auto fs = t.fs_from_file("test.dwarfs", {.metadata = {.enable_nlink = true}});
// fs.dump(std::cout, 2);

{
auto large_link1 = fs.find("/large_link1");
auto large_link2 = fs.find("/large_link2");
auto small_link1 = fs.find("/small_link1");
auto small_link2 = fs.find("/small_link2");

ASSERT_TRUE(large_link1);
ASSERT_TRUE(large_link2);
ASSERT_TRUE(small_link1);
ASSERT_TRUE(small_link2);
EXPECT_EQ(large_link1->inode_num(), large_link2->inode_num());
EXPECT_EQ(small_link1->inode_num(), small_link2->inode_num());
file_stat st;
ASSERT_EQ(0, fs.getattr(*large_link1, &st));
EXPECT_EQ(0, st.size);
ASSERT_EQ(0, fs.getattr(*small_link1, &st));
EXPECT_EQ(0, st.size);
}

std::unordered_map<fs::path, std::string, fs_path_hash> actual_files;
fs.walk([&](auto const& dev) {
auto iv = dev.inode();
Expand Down

0 comments on commit e16cce2

Please sign in to comment.