Skip to content

Commit

Permalink
Merge pull request ceph#10294 from xiexingguo/xxg-wip-bluestore-2016-…
Browse files Browse the repository at this point in the history
…07-13

os/bluestore: use BE for gifting and reclaiming from bluefs

Mark's Comments:
This passed "ceph_test_objectstore --gtest_filter=*/2".
This PR did not appear to have a significant impact on performance tests.

Reviewed-by: Mark Nelson <[email protected]>
  • Loading branch information
markhpc authored Jul 19, 2016
2 parents 81c1021 + 4b5e2d6 commit dd4de0f
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 57 deletions.
105 changes: 52 additions & 53 deletions src/os/bluestore/BlueStore.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1018,24 +1018,32 @@ void BlueStore::BufferSpace::read(
void BlueStore::BufferSpace::finish_write(uint64_t seq)
{
std::lock_guard<std::mutex> l(cache->lock);
auto i = writing.begin();
while (i != writing.end()) {
Buffer *b = &*i;
dout(20) << __func__ << " " << *b << dendl;
assert(b->is_writing());
if (b->seq <= seq) {

auto i = writing_map.begin();
while (i != writing_map.end()) {
if (i->first > seq)
break;

auto l = i->second.begin();
while (l != i->second.end()) {
Buffer *b = &*l;
dout(20) << __func__ << " " << *b << dendl;
assert(b->is_writing());

if (b->flags & Buffer::FLAG_NOCACHE) {
++i;
_rm_buffer(b);
i->second.erase(l++);
buffer_map.erase(b->offset);
} else {
b->state = Buffer::STATE_CLEAN;
writing.erase(i++);
cache->_add_buffer(b, 1, nullptr);
b->state = Buffer::STATE_CLEAN;
i->second.erase(l++);
cache->_add_buffer(b, 1, nullptr);
}
} else {
++i;
}

assert(i->second.empty());
writing_map.erase(i++);
}

cache->_audit("finish_write end");
}

Expand Down Expand Up @@ -2333,20 +2341,24 @@ int BlueStore::_balance_bluefs_freespace(vector<bluestore_pextent_t> *extents,
int r = alloc->reserve(gift);
assert(r == 0);

uint64_t eoffset;
uint32_t elength;
r = alloc->allocate(gift, min_alloc_size, 0, &eoffset, &elength);
if (r < 0) {
assert(0 == "allocate failed, wtf");
return r;
}
if (elength < gift) {
alloc->unreserve(gift - elength);
uint64_t hint = 0;
while (gift > 0) {
uint64_t eoffset;
uint32_t elength;
r = alloc->allocate(gift, min_alloc_size, hint, &eoffset, &elength);
if (r < 0) {
assert(0 == "allocate failed, wtf");
return r;
}

bluestore_pextent_t e(eoffset, elength);
dout(1) << __func__ << " gifting " << e << " to bluefs" << dendl;
extents->push_back(e);
gift -= e.length;
hint = e.end();
}
assert(gift == 0); // otherwise there is a reservation leak

bluestore_pextent_t e(eoffset, elength);
dout(1) << __func__ << " gifting " << e << " to bluefs" << dendl;
extents->push_back(e);
ret = 1;
}

Expand All @@ -2360,18 +2372,23 @@ int BlueStore::_balance_bluefs_freespace(vector<bluestore_pextent_t> *extents,
dout(10) << __func__ << " reclaiming " << reclaim
<< " (" << pretty_si_t(reclaim) << ")" << dendl;

uint64_t offset = 0;
uint32_t length = 0;
while (reclaim > 0) {
uint64_t offset = 0;
uint32_t length = 0;

// NOTE: this will block and do IO.
int r = bluefs->reclaim_blocks(bluefs_shared_bdev, reclaim,
// NOTE: this will block and do IO.
int r = bluefs->reclaim_blocks(bluefs_shared_bdev, reclaim,
&offset, &length);
assert(r >= 0);
assert(r >= 0);

bluefs_extents.erase(offset, length);
bluefs_extents.erase(offset, length);

fm->release(offset, length, t);
alloc->release(offset, length);

reclaim -= length;
}

fm->release(offset, length, t);
alloc->release(offset, length);
ret = 1;
}

Expand Down Expand Up @@ -3088,18 +3105,12 @@ int BlueStore::fsck()
it = db->get_iterator(PREFIX_OBJ);
if (it) {
CollectionRef c;
bool expecting_objects = false;
shard_id_t expecting_shard;
int64_t expecting_pool;
uint32_t expecting_hash;
for (it->lower_bound(string()); it->valid(); it->next()) {
ghobject_t oid;
if (is_bnode_key(it->key())) {
if (expecting_objects) {
dout(30) << __func__ << " had bnode but no objects for 0x"
<< std::hex << expecting_hash << std::dec << dendl;
++errors;
}
int r = get_key_bnode(it->key(), &expecting_shard, &expecting_pool,
&expecting_hash);
if (r < 0) {
Expand All @@ -3109,21 +3120,15 @@ int BlueStore::fsck()
}
continue;
}

int r = get_key_object(it->key(), &oid);
if (r < 0) {
dout(30) << __func__ << " bad object key "
<< pretty_binary_string(it->key()) << dendl;
++errors;
continue;
}
if (expecting_objects) {
if (oid.hobj.get_bitwise_key_u32() != expecting_hash) {
dout(30) << __func__ << " had bnode but no objects for 0x"
<< std::hex << expecting_hash << std::dec << dendl;
++errors;
}
expecting_objects = false;
}

if (!c || !c->contains(oid)) {
c = NULL;
for (ceph::unordered_map<coll_t, CollectionRef>::iterator p =
Expand All @@ -3143,12 +3148,6 @@ int BlueStore::fsck()
}
}
}
if (expecting_objects) {
dout(30) << __func__ << " had bnode but no objects for 0x"
<< std::hex << expecting_hash << std::dec << dendl;
++errors;
expecting_objects = false;
}
}

dout(1) << __func__ << " checking for stray omap data" << dendl;
Expand Down
13 changes: 9 additions & 4 deletions src/os/bluestore/BlueStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,19 +199,19 @@ class BlueStore : public ObjectStore,

map<uint64_t,std::unique_ptr<Buffer>> buffer_map;
Cache *cache;
state_list_t writing;
map<uint64_t, state_list_t> writing_map;

BufferSpace(Cache *c) : cache(c) {}
~BufferSpace() {
assert(buffer_map.empty());
assert(writing.empty());
assert(writing_map.empty());
}

void _add_buffer(Buffer *b, int level, Buffer *near) {
cache->_audit("_add_buffer start");
buffer_map[b->offset].reset(b);
if (b->is_writing()) {
writing.push_back(*b);
writing_map[b->seq].push_back(*b);
} else {
cache->_add_buffer(b, level, near);
}
Expand All @@ -223,7 +223,12 @@ class BlueStore : public ObjectStore,
void _rm_buffer(map<uint64_t,std::unique_ptr<Buffer>>::iterator p) {
cache->_audit("_rm_buffer start");
if (p->second->is_writing()) {
writing.erase(writing.iterator_to(*p->second));
uint64_t seq = (*p->second.get()).seq;
auto it = writing_map.find(seq);
assert(it != writing_map.end());
it->second.erase(it->second.iterator_to(*p->second));
if (it->second.empty())
writing_map.erase(it);
} else {
cache->_rm_buffer(p->second.get());
}
Expand Down

0 comments on commit dd4de0f

Please sign in to comment.