Skip to content

Commit

Permalink
Add a class for measuring the amount of garbage generated during comp…
Browse files Browse the repository at this point in the history
…action (facebook#8426)

Summary:
This is part of an alternative approach to facebook#8316.
Unlike that approach, this one relies on key-values getting processed one by one
during compaction, and does not involve persistence.

Specifically, the patch adds a class `BlobGarbageMeter` that can track the number
and total size of blobs in a (sub)compaction's input and output on a per-blob file
basis. This information can then be used to compute the amount of additional
garbage generated by the compaction for any given blob file by subtracting the
"outflow" from the "inflow."

Note: this patch only adds `BlobGarbageMeter` and associated unit tests. I plan to
hook up this class to the input and output of `CompactionIterator` in a subsequent PR.

Pull Request resolved: facebook#8426

Test Plan: `make check`

Reviewed By: jay-zhuang

Differential Revision: D29242250

Pulled By: ltamasi

fbshipit-source-id: 597e50ad556540e413a50e804ba15bc044d809bb
  • Loading branch information
ltamasi authored and facebook-github-bot committed Jun 22, 2021
1 parent a50da40 commit 065bea1
Show file tree
Hide file tree
Showing 8 changed files with 416 additions and 1 deletion.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -635,6 +635,7 @@ set(SOURCES
db/blob/blob_file_garbage.cc
db/blob/blob_file_meta.cc
db/blob/blob_file_reader.cc
db/blob/blob_garbage_meter.cc
db/blob/blob_log_format.cc
db/blob/blob_log_sequential_reader.cc
db/blob/blob_log_writer.cc
Expand Down Expand Up @@ -1128,6 +1129,7 @@ if(WITH_TESTS)
db/blob/blob_file_cache_test.cc
db/blob/blob_file_garbage_test.cc
db/blob/blob_file_reader_test.cc
db/blob/blob_garbage_meter_test.cc
db/blob/db_blob_basic_test.cc
db/blob/db_blob_compaction_test.cc
db/blob/db_blob_corruption_test.cc
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,9 @@ blob_file_garbage_test: $(OBJ_DIR)/db/blob/blob_file_garbage_test.o $(TEST_LIBRA
blob_file_reader_test: $(OBJ_DIR)/db/blob/blob_file_reader_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)

blob_garbage_meter_test: $(OBJ_DIR)/db/blob/blob_garbage_meter_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)

timer_test: $(OBJ_DIR)/util/timer_test.o $(TEST_LIBRARY) $(LIBRARY)
$(AM_LINK)

Expand Down
9 changes: 9 additions & 0 deletions TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ cpp_library(
"db/blob/blob_file_garbage.cc",
"db/blob/blob_file_meta.cc",
"db/blob/blob_file_reader.cc",
"db/blob/blob_garbage_meter.cc",
"db/blob/blob_log_format.cc",
"db/blob/blob_log_sequential_reader.cc",
"db/blob/blob_log_writer.cc",
Expand Down Expand Up @@ -456,6 +457,7 @@ cpp_library(
"db/blob/blob_file_garbage.cc",
"db/blob/blob_file_meta.cc",
"db/blob/blob_file_reader.cc",
"db/blob/blob_garbage_meter.cc",
"db/blob/blob_log_format.cc",
"db/blob/blob_log_sequential_reader.cc",
"db/blob/blob_log_writer.cc",
Expand Down Expand Up @@ -948,6 +950,13 @@ ROCKS_TESTS = [
[],
[],
],
[
"blob_garbage_meter_test",
"db/blob/blob_garbage_meter_test.cc",
"parallel",
[],
[],
],
[
"block_based_filter_block_test",
"table/block_based/block_based_filter_block_test.cc",
Expand Down
100 changes: 100 additions & 0 deletions db/blob/blob_garbage_meter.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

#include "db/blob/blob_garbage_meter.h"

#include "db/blob/blob_index.h"
#include "db/blob/blob_log_format.h"
#include "db/dbformat.h"

namespace ROCKSDB_NAMESPACE {

Status BlobGarbageMeter::ProcessInFlow(const Slice& key, const Slice& value) {
uint64_t blob_file_number = kInvalidBlobFileNumber;
uint64_t bytes = 0;

const Status s = Parse(key, value, &blob_file_number, &bytes);
if (!s.ok()) {
return s;
}

if (blob_file_number == kInvalidBlobFileNumber) {
return Status::OK();
}

flows_[blob_file_number].AddInFlow(bytes);

return Status::OK();
}

Status BlobGarbageMeter::ProcessOutFlow(const Slice& key, const Slice& value) {
uint64_t blob_file_number = kInvalidBlobFileNumber;
uint64_t bytes = 0;

const Status s = Parse(key, value, &blob_file_number, &bytes);
if (!s.ok()) {
return s;
}

if (blob_file_number == kInvalidBlobFileNumber) {
return Status::OK();
}

// Note: in order to measure the amount of additional garbage, we only need to
// track the outflow for preexisting files, i.e. those that also had inflow.
// (Newly written files would only have outflow.)
auto it = flows_.find(blob_file_number);
if (it == flows_.end()) {
return Status::OK();
}

it->second.AddOutFlow(bytes);

return Status::OK();
}

Status BlobGarbageMeter::Parse(const Slice& key, const Slice& value,
uint64_t* blob_file_number, uint64_t* bytes) {
assert(blob_file_number);
assert(*blob_file_number == kInvalidBlobFileNumber);
assert(bytes);
assert(*bytes == 0);

ParsedInternalKey ikey;

{
constexpr bool log_err_key = false;
const Status s = ParseInternalKey(key, &ikey, log_err_key);
if (!s.ok()) {
return s;
}
}

if (ikey.type != kTypeBlobIndex) {
return Status::OK();
}

BlobIndex blob_index;

{
const Status s = blob_index.DecodeFrom(value);
if (!s.ok()) {
return s;
}
}

if (blob_index.IsInlined() || blob_index.HasTTL()) {
return Status::Corruption("Unexpected TTL/inlined blob index");
}

*blob_file_number = blob_index.file_number();
*bytes =
blob_index.size() +
BlobLogRecord::CalculateAdjustmentForRecordHeader(ikey.user_key.size());

return Status::OK();
}

} // namespace ROCKSDB_NAMESPACE
102 changes: 102 additions & 0 deletions db/blob/blob_garbage_meter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <cassert>
#include <cstdint>
#include <unordered_map>

#include "db/blob/blob_constants.h"
#include "rocksdb/rocksdb_namespace.h"
#include "rocksdb/status.h"

namespace ROCKSDB_NAMESPACE {

class Slice;

// A class that can be used to compute the amount of additional garbage
// generated by a compaction. It parses the keys and blob references in the
// input and output of a compaction, and aggregates the "inflow" and "outflow"
// on a per-blob file basis. The amount of additional garbage for any given blob
// file can then be computed by subtracting the outflow from the inflow.
class BlobGarbageMeter {
public:
// A class to store the number and total size of blobs on a per-blob file
// basis.
class BlobStats {
public:
void Add(uint64_t bytes) {
++count_;
bytes_ += bytes;
}
void Add(uint64_t count, uint64_t bytes) {
count_ += count;
bytes_ += bytes;
}

uint64_t GetCount() const { return count_; }
uint64_t GetBytes() const { return bytes_; }

private:
uint64_t count_ = 0;
uint64_t bytes_ = 0;
};

// A class to keep track of the "inflow" and the "outflow" and to compute the
// amount of additional garbage for a given blob file.
class BlobInOutFlow {
public:
void AddInFlow(uint64_t bytes) {
in_flow_.Add(bytes);
assert(IsValid());
}
void AddOutFlow(uint64_t bytes) {
out_flow_.Add(bytes);
assert(IsValid());
}

const BlobStats& GetInFlow() const { return in_flow_; }
const BlobStats& GetOutFlow() const { return out_flow_; }

bool IsValid() const {
return in_flow_.GetCount() >= out_flow_.GetCount() &&
in_flow_.GetBytes() >= out_flow_.GetBytes();
}
bool HasGarbage() const {
assert(IsValid());
return in_flow_.GetCount() > out_flow_.GetCount();
}
uint64_t GetGarbageCount() const {
assert(IsValid());
assert(HasGarbage());
return in_flow_.GetCount() - out_flow_.GetCount();
}
uint64_t GetGarbageBytes() const {
assert(IsValid());
assert(HasGarbage());
return in_flow_.GetBytes() - out_flow_.GetBytes();
}

private:
BlobStats in_flow_;
BlobStats out_flow_;
};

Status ProcessInFlow(const Slice& key, const Slice& value);
Status ProcessOutFlow(const Slice& key, const Slice& value);

const std::unordered_map<uint64_t, BlobInOutFlow>& flows() const {
return flows_;
}

private:
static Status Parse(const Slice& key, const Slice& value,
uint64_t* blob_file_number, uint64_t* bytes);

std::unordered_map<uint64_t, BlobInOutFlow> flows_;
};

} // namespace ROCKSDB_NAMESPACE
Loading

0 comments on commit 065bea1

Please sign in to comment.