Skip to content

Commit

Permalink
os/bluestore: rework vselector calls
Browse files Browse the repository at this point in the history
We can provide fnode delta to vseector now. Which is a bit more
effective.

Signed-off-by: Igor Fedotov <[email protected]>
  • Loading branch information
ifed01 committed Nov 14, 2023
1 parent 070b690 commit 3e04ac4
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 92 deletions.
67 changes: 35 additions & 32 deletions src/os/bluestore/BlueFS.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1579,11 +1579,9 @@ int BlueFS::_replay(bool noop, bool to_stdout)
}
if (fnode.ino != 1) {
vselector->sub_usage(f->vselector_hint, f->fnode);
}
vselector->add_usage(f->vselector_hint, fnode);
}
f->fnode = fnode;
if (fnode.ino != 1) {
vselector->add_usage(f->vselector_hint, f->fnode);
}

if (fnode.ino > ino_last) {
ino_last = fnode.ino;
Expand Down Expand Up @@ -1825,7 +1823,7 @@ int BlueFS::device_migrate_to_existing(

// write entire file
auto l = _allocate(dev_target, bl.length(), 0,
&file_ref->fnode, 0, false);
&file_ref->fnode, nullptr, 0, false);
if (l < 0) {
derr << __func__ << " unable to allocate len 0x" << std::hex
<< bl.length() << std::dec << " from " << (int)dev_target
Expand Down Expand Up @@ -1965,7 +1963,7 @@ int BlueFS::device_migrate_to_new(

// write entire file
auto l = _allocate(dev_target, bl.length(), 0,
&file_ref->fnode, 0, false);
&file_ref->fnode, nullptr, 0, false);
if (l < 0) {
derr << __func__ << " unable to allocate len 0x" << std::hex
<< bl.length() << std::dec << " from " << (int)dev_target
Expand Down Expand Up @@ -2559,7 +2557,6 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback,
auto t0 = mono_clock::now();

File *log_file = log.writer->file.get();
bluefs_fnode_t fnode_tail;
// log.t.seq is always set to current live seq
ceph_assert(log.t.seq == log.seq_live);
// Capturing entire state. Dump anything that has been stored there.
Expand Down Expand Up @@ -2614,7 +2611,8 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback,

dout(20) << __func__ << " compacted_meta_need " << compacted_meta_need << dendl;

int r = _allocate(log_dev, compacted_meta_need, 0, &fnode_tail, 0,
bluefs_fnode_t fnode_tail;
int r = _allocate(log_dev, compacted_meta_need, 0, &fnode_tail, nullptr, 0,
permit_dev_fallback);
ceph_assert(r == 0);

Expand All @@ -2625,7 +2623,7 @@ void BlueFS::_rewrite_log_and_layout_sync_LNF_LD(bool permit_dev_fallback,
uint64_t starter_need = _make_initial_transaction(starter_seq, fnode_tail, 0, nullptr);

bluefs_fnode_t fnode_starter(log_file->fnode.ino, 0, mtime);
r = _allocate(log_dev, starter_need, 0, &fnode_starter, 0,
r = _allocate(log_dev, starter_need, 0, &fnode_starter, nullptr, 0,
permit_dev_fallback);
ceph_assert(r == 0);

Expand Down Expand Up @@ -3126,16 +3124,17 @@ void BlueFS::_extend_log(uint64_t amount) {
}
ll.release();
uint64_t allocated_before_extension = log.writer->file->fnode.get_allocated();
vselector->sub_usage(log.writer->file->vselector_hint, log.writer->file->fnode);
amount = round_up_to(amount, super.block_size);
int r = _allocate(
vselector->select_prefer_bdev(log.writer->file->vselector_hint),
amount,
0,
&log.writer->file->fnode);
&log.writer->file->fnode,
[&](const bluefs_extent_t& e) {
vselector->add_usage(log.writer->file->vselector_hint, e);
});
ceph_assert(r == 0);
dout(10) << "extended log by 0x" << std::hex << amount << " bytes " << dendl;
vselector->add_usage(log.writer->file->vselector_hint, log.writer->file->fnode);

bluefs_transaction_t log_extend_transaction;
log_extend_transaction.seq = log.t.seq;
Expand Down Expand Up @@ -3405,7 +3404,9 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)

dout(10) << __func__ << " " << h << " pos 0x" << std::hex << h->pos
<< " 0x" << offset << "~" << length << std::dec
<< " to " << h->file->fnode << dendl;
<< " to " << h->file->fnode
<< " hint " << h->file->vselector_hint
<< dendl;
if (h->file->deleted) {
dout(10) << __func__ << " deleted, no-op" << dendl;
return 0;
Expand All @@ -3426,7 +3427,6 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
ceph_assert(offset <= h->file->fnode.size);

uint64_t allocated = h->file->fnode.get_allocated();
vselector->sub_usage(h->file->vselector_hint, h->file->fnode);
// do not bother to dirty the file if we are overwriting
// previously allocated extents.
if (allocated < offset + length) {
Expand All @@ -3435,25 +3435,26 @@ int BlueFS::_flush_range_F(FileWriter *h, uint64_t offset, uint64_t length)
int r = _allocate(vselector->select_prefer_bdev(h->file->vselector_hint),
offset + length - allocated,
0,
&h->file->fnode);
&h->file->fnode,
[&](const bluefs_extent_t& e) {
vselector->add_usage(h->file->vselector_hint, e);
});
if (r < 0) {
derr << __func__ << " allocated: 0x" << std::hex << allocated
<< " offset: 0x" << offset << " length: 0x" << length << std::dec
<< dendl;
vselector->add_usage(h->file->vselector_hint, h->file->fnode); // undo
ceph_abort_msg("bluefs enospc");
return r;
}
h->file->is_dirty = true;
}
if (h->file->fnode.size < offset + length) {
vselector->add_usage(h->file->vselector_hint, offset + length - h->file->fnode.size);
h->file->fnode.size = offset + length;
h->file->is_dirty = true;
}

dout(20) << __func__ << " file now, unflushed " << h->file->fnode << dendl;
int res = _flush_data(h, offset, length, buffered);
vselector->add_usage(h->file->vselector_hint, h->file->fnode);
logger->tinc(l_bluefs_flush_lat, mono_clock::now() - t0);
return res;
}
Expand Down Expand Up @@ -3710,10 +3711,9 @@ int BlueFS::truncate(FileWriter *h, uint64_t offset)/*_WF_L*/
_flush_bdev(h);

std::lock_guard ll(log.lock);
vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size);
vselector->sub_usage(h->file->vselector_hint, h->file->fnode.size - offset);
h->file->fnode.size = offset;
h->file->is_dirty = true;
vselector->add_usage(h->file->vselector_hint, h->file->fnode.size);
log.t.op_file_update_inc(h->file->fnode);
logger->tinc(l_bluefs_truncate_lat, mono_clock::now() - t0);
return 0;
Expand Down Expand Up @@ -3809,6 +3809,7 @@ const char* BlueFS::get_device_name(unsigned id)
int BlueFS::_allocate(uint8_t id, uint64_t len,
uint64_t alloc_unit,
bluefs_fnode_t* node,
update_fn_t cb,
size_t alloc_attempts,
bool permit_dev_fallback)
{
Expand Down Expand Up @@ -3892,6 +3893,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
len,
alloc_unit,
node,
cb,
alloc_attempts,
permit_dev_fallback);
} else if (permit_dev_fallback && id != BDEV_SLOW && alloc[id + 1]) {
Expand All @@ -3905,6 +3907,7 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
len,
0, // back to default alloc unit
node,
cb,
alloc_attempts,
permit_dev_fallback);
} else {
Expand All @@ -3924,9 +3927,12 @@ int BlueFS::_allocate(uint8_t id, uint64_t len,
}

for (auto& p : extents) {
node->append_extent(bluefs_extent_t(id, p.offset, p.length));
bluefs_extent_t e(id, p.offset, p.length);
node->append_extent(e);
if (cb) {
cb(e);
}
}

return 0;
}

Expand All @@ -3945,12 +3951,13 @@ int BlueFS::preallocate(FileRef f, uint64_t off, uint64_t len)/*_LF*/
if (off + len > allocated) {
uint64_t want = off + len - allocated;

vselector->sub_usage(f->vselector_hint, f->fnode);
int r = _allocate(vselector->select_prefer_bdev(f->vselector_hint),
want,
0,
&f->fnode);
vselector->add_usage(f->vselector_hint, f->fnode);
&f->fnode,
[&](const bluefs_extent_t& e) {
vselector->add_usage(f->vselector_hint, e);
});
if (r < 0)
return r;

Expand Down Expand Up @@ -4007,7 +4014,6 @@ int BlueFS::open_for_write(
_maybe_check_vselector_LNF();
FileRef file;
bool create = false;
bool truncate = false;
mempool::bluefs::vector<bluefs_extent_t> pending_release_extents;
{
std::lock_guard ll(log.lock);
Expand All @@ -4034,10 +4040,12 @@ int BlueFS::open_for_write(
}
file = ceph::make_ref<File>();
file->fnode.ino = ++ino_last;
file->vselector_hint = vselector->get_hint_by_dir(dirname);
nodes.file_map[ino_last] = file;
dir->file_map.emplace_hint(q, string{filename}, file);
++file->refs;
create = true;
vselector->add_usage(file->vselector_hint, file->fnode.size, true); // update file count
logger->set(l_bluefs_num_files, nodes.file_map.size());
} else {
// overwrite existing file?
Expand All @@ -4052,20 +4060,15 @@ int BlueFS::open_for_write(
<< " already exists, truncate + overwrite" << dendl;
vselector->sub_usage(file->vselector_hint, file->fnode);
file->fnode.size = 0;
vselector->add_usage(file->vselector_hint, file->fnode.size, true); // restore file count
pending_release_extents.swap(file->fnode.extents);
truncate = true;

file->fnode.clear_extents();
}
}
ceph_assert(file->fnode.ino > 1);

file->fnode.mtime = ceph_clock_now();
file->vselector_hint = vselector->get_hint_by_dir(dirname);
if (create || truncate) {
vselector->add_usage(file->vselector_hint, file->fnode); // update file count
}

dout(20) << __func__ << " mapping " << dirname << "/" << filename
<< " vsel_hint " << file->vselector_hint
<< dendl;
Expand Down
31 changes: 23 additions & 8 deletions src/os/bluestore/BlueFS.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,22 @@ class BlueFSVolumeSelector {
virtual void* get_hint_for_log() const = 0;
virtual void* get_hint_by_dir(std::string_view dirname) const = 0;

virtual void add_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
virtual void sub_usage(void* file_hint, const bluefs_fnode_t& fnode) = 0;
virtual void add_usage(void* file_hint, uint64_t fsize) = 0;
virtual void sub_usage(void* file_hint, uint64_t fsize) = 0;
void add_usage(void* hint, const bluefs_fnode_t& fnode) {
for (auto& e : fnode.extents) {
add_usage(hint, e);
}
add_usage(hint, fnode.size, true);
}
void sub_usage(void* hint, const bluefs_fnode_t& fnode) {
for (auto& e : fnode.extents) {
sub_usage(hint, e);
}
sub_usage(hint, fnode.size, true);
}
virtual void add_usage(void* hint, const bluefs_extent_t& extent) = 0;
virtual void sub_usage(void* hint, const bluefs_extent_t& extent) = 0;
virtual void add_usage(void* hint, uint64_t fsize, bool upd_files = false) = 0;
virtual void sub_usage(void* hint, uint64_t fsize, bool upd_files = false) = 0;
virtual uint8_t select_prefer_bdev(void* hint) = 0;
virtual void get_paths(const std::string& base, paths& res) const = 0;
virtual void dump(std::ostream& sout) = 0;
Expand Down Expand Up @@ -439,9 +451,12 @@ class BlueFS {
return bdev[BDEV_SLOW] ? BDEV_SLOW : BDEV_DB;
}
const char* get_device_name(unsigned id);

typedef std::function<void(const bluefs_extent_t)> update_fn_t;
int _allocate(uint8_t bdev, uint64_t len,
uint64_t alloc_unit,
bluefs_fnode_t* node,
update_fn_t cb = nullptr,
size_t alloc_attempts = 0,
bool permit_dev_fallback = true);

Expand Down Expand Up @@ -716,19 +731,19 @@ class OriginalVolumeSelector : public BlueFSVolumeSelector {
void* get_hint_for_log() const override;
void* get_hint_by_dir(std::string_view dirname) const override;

void add_usage(void* hint, const bluefs_fnode_t& fnode) override {
void add_usage(void* hint, const bluefs_extent_t& extent) override {
// do nothing
return;
}
void sub_usage(void* hint, const bluefs_fnode_t& fnode) override {
void sub_usage(void* hint, const bluefs_extent_t& extent) override {
// do nothing
return;
}
void add_usage(void* hint, uint64_t fsize) override {
void add_usage(void*, uint64_t, bool) override {
// do nothing
return;
}
void sub_usage(void* hint, uint64_t fsize) override {
void sub_usage(void*, uint64_t, bool) override {
// do nothing
return;
}
Expand Down
10 changes: 8 additions & 2 deletions src/os/bluestore/BlueStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7396,7 +7396,7 @@ int BlueStore::_open_bluefs(bool create, bool read_only)
bluefs->get_block_device_size(BlueFS::BDEV_WAL) * 95 / 100,
bluefs->get_block_device_size(BlueFS::BDEV_DB) * 95 / 100,
bluefs->get_block_device_size(BlueFS::BDEV_SLOW) * 95 / 100,
1024 * 1024 * 1024, //FIXME: set expected l0 size here
rocks_opts.write_buffer_size * rocks_opts.max_write_buffer_number,
rocks_opts.max_bytes_for_level_base,
rocks_opts.max_bytes_for_level_multiplier,
reserved_factor,
Expand Down Expand Up @@ -19266,7 +19266,13 @@ void RocksDBBlueFSVolumeSelector::dump(ostream& sout) {
auto max_x = per_level_per_dev_usage.get_max_x();
auto max_y = per_level_per_dev_usage.get_max_y();

sout << "RocksDBBlueFSVolumeSelector Usage Matrix:" << std::endl;
sout << "RocksDBBlueFSVolumeSelector " << std::endl;
sout << ">>Settings<<"
<< " extra=" << byte_u_t(db_avail4slow)
<< ", l0_size=" << byte_u_t(level0_size)
<< ", l_base=" << byte_u_t(level_base)
<< ", l_multi=" << byte_u_t(level_multiplier)
<< std::endl;
constexpr std::array<const char*, 8> names{ {
"DEV/LEV",
"WAL",
Expand Down
Loading

0 comments on commit 3e04ac4

Please sign in to comment.