Skip to content

Commit

Permalink
fix(mkdwarfs): don't store inodes with inconsistent fragments
Browse files Browse the repository at this point in the history
  • Loading branch information
mhx committed Jan 16, 2024
1 parent 6804aa0 commit b11725b
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 14 deletions.
10 changes: 0 additions & 10 deletions TODO
Original file line number Diff line number Diff line change
@@ -1,16 +1,6 @@
- Fragment counts don't necessarily match up in presence
of errors.

- It is possible, though unlikely, that memory mapping
fails only of a subset of fragments, in which case the
representation of the file in the target image would
be wrong. I don't know yet what the best solution to
this would be - the data for the other fragments is
potentially already stored. Might be best to just mark
the fragments as invalid and later patch the metadata
to drop all references to these fragments (i.e. just
build a zero-size file after the fact).

- When opening a file again, check that its timestamp,
size and potentially checksum did not change from
when we first saw it.
Expand Down
2 changes: 1 addition & 1 deletion include/dwarfs/inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class inode : public object {
virtual size_t size() const = 0;
virtual file const* any() const = 0;
virtual files_vector const& all() const = 0;
virtual void
virtual bool
append_chunks_to(std::vector<thrift::metadata::chunk>& vec) const = 0;
virtual inode_fragments& fragments() = 0;
virtual void dump(std::ostream& os, inode_options const& options) const = 0;
Expand Down
2 changes: 2 additions & 0 deletions include/dwarfs/inode_fragments.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ class single_inode_fragment {

void extend(file_off_t length) { length_ += length; }

bool chunks_are_consistent() const;

private:
fragment_category category_;
file_off_t length_;
Expand Down
13 changes: 13 additions & 0 deletions src/dwarfs/inode_fragments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/

#include <numeric>
#include <ostream>
#include <sstream>

Expand All @@ -45,6 +46,18 @@ void single_inode_fragment::add_chunk(size_t block, size_t offset,
chunks_.push_back(std::move(c));
}

bool single_inode_fragment::chunks_are_consistent() const {
if (length_ > 0 && chunks_.empty()) {
return false;
}

auto total_chunks_len = std::accumulate(
chunks_.begin(), chunks_.end(), file_off_t{0},
[](auto acc, auto const& c) { return acc + c.get_size(); });

return total_chunks_len == length_;
}

std::ostream&
inode_fragments::to_stream(std::ostream& os,
mapper_function_type const& mapper) const {
Expand Down
8 changes: 7 additions & 1 deletion src/dwarfs/inode_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,13 +215,19 @@ class inode_ : public inode {

files_vector const& all() const override { return files_; }

void append_chunks_to(std::vector<chunk_type>& vec) const override {
bool append_chunks_to(std::vector<chunk_type>& vec) const override {
for (auto const& frag : fragments_) {
if (!frag.chunks_are_consistent()) {
return false;
}
}
for (auto const& frag : fragments_) {
auto chks = frag.chunks();
if (!chks.empty()) {
vec.insert(vec.end(), chks.begin(), chks.end());
}
}
return true;
}

inode_fragments& fragments() override { return fragments_; }
Expand Down
9 changes: 8 additions & 1 deletion src/dwarfs/scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,14 @@ void scanner_<LoggerPolicy>::scan(
mv2.chunks().value().reserve(prog.chunk_count);
im.for_each_inode_in_order([&](std::shared_ptr<inode> const& ino) {
DWARFS_NOTHROW(mv2.chunk_table()->at(ino->num())) = mv2.chunks()->size();
ino->append_chunks_to(mv2.chunks().value());
if (!ino->append_chunks_to(mv2.chunks().value())) {
std::ostringstream oss;
for (auto fp : ino->all()) {
oss << "\n " << fp->path_as_string();
}
LOG_ERROR << "inconsistent fragments in inode " << ino->num()
<< ", the following files will be empty:" << oss.str();
}
});

blockmgr->map_logical_blocks(mv2.chunks().value());
Expand Down
24 changes: 23 additions & 1 deletion test/tool_main_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

#include <fmt/format.h>

#include <folly/FileUtil.h>
#include <folly/String.h>
#include <folly/container/Enumerate.h>
#include <folly/json.h>
Expand Down Expand Up @@ -2262,11 +2263,32 @@ TEST_P(map_file_error_test, delayed) {

auto t = mkdwarfs_tester::create_empty();
t.add_root_dir();
t.os->add_local_files(audio_data_dir);
auto files = t.add_random_file_tree({.avg_size = 64.0,
.dimension = 20,
.max_name_len = 8,
.with_errors = true});

{
std::mt19937_64 rng{42};

for (auto const& p : fs::recursive_directory_iterator(audio_data_dir)) {
if (p.is_regular_file()) {
auto fp = fs::relative(p.path(), audio_data_dir);
std::string contents;
ASSERT_TRUE(folly::readFile(p.path().string().c_str(), contents));
files.emplace_back(fp, std::move(contents));

if (rng() % 2 == 0) {
t.os->set_map_file_error(
fs::path{"/"} / fp,
std::make_exception_ptr(std::runtime_error("map_file_error")),
rng() % 4);
}
}
}
}

t.os->setenv("DWARFS_DUMP_INODES", "inodes.dump");
// t.iol->use_real_terminal(true);

Expand Down Expand Up @@ -2317,7 +2339,7 @@ TEST_P(map_file_error_test, delayed) {

EXPECT_LE(failed_actual.size(), failed_expected.size());

EXPECT_EQ(8000, files.size());
EXPECT_GT(files.size(), 8000);
EXPECT_GT(num_non_empty, 4000);

// Ensure that files which never had any errors are all present
Expand Down

0 comments on commit b11725b

Please sign in to comment.