Skip to content

Commit

Permalink
osd/PGBackend::be_scan_list: only call stat, getattrs once per object
Browse files Browse the repository at this point in the history
We call back into be_scan_list repeatedly for the same object in deep
scrub if the object is too large/has too many omap entries to be done
in a single call.  This causes us to wastefully call
ObjectStore::getattrs and stat multiple times.

This has been a long standing, but mostly harmless bug (aside from the
extra metadata traffic).  However, 1a4d3f0, between reef and squid,
switched ScrubMap::object::attrs to a map<string, bufferlist> from a
map<string, bufferptr>.  This should have been harmless, except that
the ObjectStore::getattrs overload for that type uses

      aset[i->first].append(i->second);

rather than

      aset.emplace(i.first.c_str(), i.second);

to populate the map.  Combined with this bug, that means that if we
need 7 calls into be_scan_list to deep scrub an object, we'll end up
with attributes concatenated 7 times each.  This didn't cause visible
problems with squid/main testing since all of the replicas would end
up doing the same thing and they'd still decode ok, but it did become
visible during reef->squid upgrade testing as the older osds sent
maps with the correct contents.

The next commit will fix ObjectStore::getattrs to clear the map.

Fixes: https://tracker.ceph.com/issues/65185
Signed-off-by: Samuel Just <[email protected]>
  • Loading branch information
athanatos committed Apr 19, 2024
1 parent 0ed764e commit 5671e85
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 28 deletions.
75 changes: 47 additions & 28 deletions src/osd/PGBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -615,42 +615,61 @@ int PGBackend::be_scan_list(
ceph_assert(pos.pos < pos.ls.size());
hobject_t& poid = pos.ls[pos.pos];

struct stat st;
int r = store->stat(
ch,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
&st,
true);
if (r == 0) {
ScrubMap::object &o = map.objects[poid];
o.size = st.st_size;
ceph_assert(!o.negative);
store->getattrs(
int r = 0;
ScrubMap::object &o = map.objects[poid];
if (!pos.metadata_done) {
struct stat st;
r = store->stat(
ch,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
o.attrs);
&st,
true);

if (r == 0) {
o.size = st.st_size;
ceph_assert(!o.negative);
r = store->getattrs(
ch,
ghobject_t(
poid, ghobject_t::NO_GEN, get_parent()->whoami_shard().shard),
o.attrs);
}

if (pos.deep) {
r = be_deep_scrub(poid, map, pos, o);
if (r == -ENOENT) {
dout(25) << __func__ << " " << poid << " got " << r
<< ", removing from map" << dendl;
map.objects.erase(poid);
} else if (r == -EIO) {
dout(25) << __func__ << " " << poid << " got " << r
<< ", stat_error" << dendl;
o.stat_error = true;
} else if (r != 0) {
derr << __func__ << " got: " << cpp_strerror(r) << dendl;
ceph_abort();
}

if (r != 0) {
dout(25) << __func__ << " " << poid << " got " << r
<< ", skipping" << dendl;
pos.next_object();
return 0;
}

dout(25) << __func__ << " " << poid << dendl;
} else if (r == -ENOENT) {
dout(25) << __func__ << " " << poid << " got " << r
<< ", skipping" << dendl;
} else if (r == -EIO) {
dout(25) << __func__ << " " << poid << " got " << r
<< ", stat_error" << dendl;
ScrubMap::object &o = map.objects[poid];
o.stat_error = true;
} else {
derr << __func__ << " got: " << cpp_strerror(r) << dendl;
ceph_abort();
pos.metadata_done = true;
}
if (r == -EINPROGRESS) {
return -EINPROGRESS;

if (pos.deep) {
r = be_deep_scrub(poid, map, pos, o);
if (r == -EINPROGRESS) {
return -EINPROGRESS;
} else if (r != 0) {
derr << __func__ << " be_deep_scrub got: " << cpp_strerror(r) << dendl;
ceph_abort();
}
}

pos.next_object();
return 0;
}
3 changes: 3 additions & 0 deletions src/osd/osd_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -6298,6 +6298,7 @@ WRITE_CLASS_ENCODER(ScrubMap)
struct ScrubMapBuilder {
bool deep = false;
std::vector<hobject_t> ls;
bool metadata_done = false;
size_t pos = 0;
int64_t data_pos = 0;
std::string omap_pos;
Expand All @@ -6322,6 +6323,7 @@ struct ScrubMapBuilder {

void next_object() {
++pos;
metadata_done = false;
data_pos = 0;
omap_pos.clear();
omap_keys = 0;
Expand All @@ -6333,6 +6335,7 @@ struct ScrubMapBuilder {
if (pos.pos < pos.ls.size()) {
out << " " << pos.ls[pos.pos];
}
out << " metadata_done " << pos.metadata_done;
if (pos.data_pos < 0) {
out << " byte " << pos.data_pos;
}
Expand Down

0 comments on commit 5671e85

Please sign in to comment.