From 867f0a249e136aa7caea7c6432229fb0f27f6e2c Mon Sep 17 00:00:00 2001 From: Adam Kupczyk Date: Mon, 25 Oct 2021 13:30:26 +0200 Subject: [PATCH] os/bluestore: Disable compaction then no-column-b is storing allocations to bluefs file During BlueStore umount we store current allocation state to disk, in form of bluefs file. If RocksDB was performing compaction during capture of allocator state, it could cause corruption. Solution is to delete db (stop RocksDB) before state capture. Fixes: https://tracker.ceph.com/issues/52399 Signed-off-by: Adam Kupczyk --- src/os/bluestore/BlueStore.cc | 19 ++++++++++++++++++- src/os/bluestore/BlueStore.h | 1 + 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/os/bluestore/BlueStore.cc b/src/os/bluestore/BlueStore.cc index 4f59ebde508a5..16129c86c3db9 100644 --- a/src/os/bluestore/BlueStore.cc +++ b/src/os/bluestore/BlueStore.cc @@ -6089,7 +6089,12 @@ int BlueStore::_open_db_and_around(bool read_only, bool to_repair) void BlueStore::_close_db_and_around(bool read_only) { - _close_db(read_only); + if (db) { + _close_db_leave_bluefs(); + } + if (bluefs) { + _close_bluefs(read_only); + } _close_fm(); _close_alloc(); _close_bdev(); @@ -6327,6 +6332,13 @@ void BlueStore::_close_db(bool cold_close) } } +void BlueStore::_close_db_leave_bluefs() +{ + ceph_assert(db); + delete db; + db = nullptr; +} + void BlueStore::_dump_alloc_on_failure() { auto dump_interval = @@ -7274,6 +7286,7 @@ int BlueStore::umount() dout(20) << __func__ << " closing" << dendl; } + _close_db_leave_bluefs(); // GBH - Vault the allocation state dout(5) << "NCB::BlueStore::umount->store_allocation_state_on_bluestore() " << dendl; if (was_mounted && fm->is_null_manager()) { @@ -17210,6 +17223,9 @@ const unsigned MAX_EXTENTS_IN_BUFFER = 4 * 1024; // 4K extents = 64KB of data //----------------------------------------------------------------------------------- int BlueStore::store_allocator(Allocator* src_allocator) { + // when storing allocations to file we must be sure there is no background compactions + // the easiest way to achieve it is to make sure db is closed + ceph_assert(db == nullptr); utime_t start_time = ceph_clock_now(); int ret = 0; @@ -17989,6 +18005,7 @@ int BlueStore::read_allocation_from_drive_for_bluestore_tool(bool test_store_and } if (test_store_and_restore) { + _close_db_leave_bluefs(); dout(5) << "calling store_allocator(shared_alloc.a)" << dendl; store_allocator(shared_alloc.a); Allocator* alloc2 = create_bitmap_allocator(bdev_size); diff --git a/src/os/bluestore/BlueStore.h b/src/os/bluestore/BlueStore.h index 29ff87d5ff13e..517ec18a1e582 100644 --- a/src/os/bluestore/BlueStore.h +++ b/src/os/bluestore/BlueStore.h @@ -2403,6 +2403,7 @@ class BlueStore : public ObjectStore, bool to_repair_db=false, bool read_only = false); void _close_db(bool read_only); + void _close_db_leave_bluefs(); int _open_fm(KeyValueDB::Transaction t, bool read_only, bool fm_restore = false); void _close_fm(); int _write_out_fm_meta(uint64_t target_size);