From b11725b495809f1566bf45010f449f5f2a0a0829 Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Tue, 16 Jan 2024 18:06:21 +0100 Subject: [PATCH] fix(mkdwarfs): don't store inodes with inconsistent fragments --- TODO | 10 ---------- include/dwarfs/inode.h | 2 +- include/dwarfs/inode_fragments.h | 2 ++ src/dwarfs/inode_fragments.cpp | 13 +++++++++++++ src/dwarfs/inode_manager.cpp | 8 +++++++- src/dwarfs/scanner.cpp | 9 ++++++++- test/tool_main_test.cpp | 24 +++++++++++++++++++++++- 7 files changed, 54 insertions(+), 14 deletions(-) diff --git a/TODO b/TODO index 60c0f5a5d..a7b1a0bbe 100644 --- a/TODO +++ b/TODO @@ -1,16 +1,6 @@ - Fragment counts don't necessarily match up in presence of errors. -- It is possible, though unlikely, that memory mapping - fails only of a subset of fragments, in which case the - representation of the file in the target image would - be wrong. I don't know yet what the best solution to - this would be - the data for the other fragments is - potentially already stored. Might be best to just mark - the fragments as invalid and later patch the metadata - to drop all references to these fragments (i.e. just - build a zero-size file after the fact). - - When opening a file again, check that its timestamp, size and potentially checksum did not change from when we first saw it. diff --git a/include/dwarfs/inode.h b/include/dwarfs/inode.h index 3cc2ae45f..bfbf849f0 100644 --- a/include/dwarfs/inode.h +++ b/include/dwarfs/inode.h @@ -65,7 +65,7 @@ class inode : public object { virtual size_t size() const = 0; virtual file const* any() const = 0; virtual files_vector const& all() const = 0; - virtual void + virtual bool append_chunks_to(std::vector& vec) const = 0; virtual inode_fragments& fragments() = 0; virtual void dump(std::ostream& os, inode_options const& options) const = 0; diff --git a/include/dwarfs/inode_fragments.h b/include/dwarfs/inode_fragments.h index 583a3ad88..30aa5322a 100644 --- a/include/dwarfs/inode_fragments.h +++ b/include/dwarfs/inode_fragments.h @@ -52,6 +52,8 @@ class single_inode_fragment { void extend(file_off_t length) { length_ += length; } + bool chunks_are_consistent() const; + private: fragment_category category_; file_off_t length_; diff --git a/src/dwarfs/inode_fragments.cpp b/src/dwarfs/inode_fragments.cpp index ebf04e12c..133a204e5 100644 --- a/src/dwarfs/inode_fragments.cpp +++ b/src/dwarfs/inode_fragments.cpp @@ -19,6 +19,7 @@ * along with dwarfs. If not, see . */ +#include #include #include @@ -45,6 +46,18 @@ void single_inode_fragment::add_chunk(size_t block, size_t offset, chunks_.push_back(std::move(c)); } +bool single_inode_fragment::chunks_are_consistent() const { + if (length_ > 0 && chunks_.empty()) { + return false; + } + + auto total_chunks_len = std::accumulate( + chunks_.begin(), chunks_.end(), file_off_t{0}, + [](auto acc, auto const& c) { return acc + c.get_size(); }); + + return total_chunks_len == length_; +} + std::ostream& inode_fragments::to_stream(std::ostream& os, mapper_function_type const& mapper) const { diff --git a/src/dwarfs/inode_manager.cpp b/src/dwarfs/inode_manager.cpp index 3174a62d3..83ede7d09 100644 --- a/src/dwarfs/inode_manager.cpp +++ b/src/dwarfs/inode_manager.cpp @@ -215,13 +215,19 @@ class inode_ : public inode { files_vector const& all() const override { return files_; } - void append_chunks_to(std::vector& vec) const override { + bool append_chunks_to(std::vector& vec) const override { + for (auto const& frag : fragments_) { + if (!frag.chunks_are_consistent()) { + return false; + } + } for (auto const& frag : fragments_) { auto chks = frag.chunks(); if (!chks.empty()) { vec.insert(vec.end(), chks.begin(), chks.end()); } } + return true; } inode_fragments& fragments() override { return fragments_; } diff --git a/src/dwarfs/scanner.cpp b/src/dwarfs/scanner.cpp index d777181b8..4012437b6 100644 --- a/src/dwarfs/scanner.cpp +++ b/src/dwarfs/scanner.cpp @@ -820,7 +820,14 @@ void scanner_::scan( mv2.chunks().value().reserve(prog.chunk_count); im.for_each_inode_in_order([&](std::shared_ptr const& ino) { DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size(); - ino->append_chunks_to(mv2.chunks().value()); + if (!ino->append_chunks_to(mv2.chunks().value())) { + std::ostringstream oss; + for (auto fp : ino->all()) { + oss << "\n " << fp->path_as_string(); + } + LOG_ERROR << "inconsistent fragments in inode " << ino->num() + << ", the following files will be empty:" << oss.str(); + } }); blockmgr->map_logical_blocks(mv2.chunks().value()); diff --git a/test/tool_main_test.cpp b/test/tool_main_test.cpp index 26b328637..4f5d4a445 100644 --- a/test/tool_main_test.cpp +++ b/test/tool_main_test.cpp @@ -33,6 +33,7 @@ #include +#include #include #include #include @@ -2262,11 +2263,32 @@ TEST_P(map_file_error_test, delayed) { auto t = mkdwarfs_tester::create_empty(); t.add_root_dir(); + t.os->add_local_files(audio_data_dir); auto files = t.add_random_file_tree({.avg_size = 64.0, .dimension = 20, .max_name_len = 8, .with_errors = true}); + { + std::mt19937_64 rng{42}; + + for (auto const& p : fs::recursive_directory_iterator(audio_data_dir)) { + if (p.is_regular_file()) { + auto fp = fs::relative(p.path(), audio_data_dir); + std::string contents; + ASSERT_TRUE(folly::readFile(p.path().string().c_str(), contents)); + files.emplace_back(fp, std::move(contents)); + + if (rng() % 2 == 0) { + t.os->set_map_file_error( + fs::path{"/"} / fp, + std::make_exception_ptr(std::runtime_error("map_file_error")), + rng() % 4); + } + } + } + } + t.os->setenv("DWARFS_DUMP_INODES", "inodes.dump"); // t.iol->use_real_terminal(true); @@ -2317,7 +2339,7 @@ TEST_P(map_file_error_test, delayed) { EXPECT_LE(failed_actual.size(), failed_expected.size()); - EXPECT_EQ(8000, files.size()); + EXPECT_GT(files.size(), 8000); EXPECT_GT(num_non_empty, 4000); // Ensure that files which never had any errors are all present