Skip to content

Commit

Permalink
Merge pull request ceph#4016 from ceph/wip-11123
Browse files Browse the repository at this point in the history
osd: fix whiteout handling for delete+create compound ops

Reviewed-by: Samuel Just <[email protected]>
  • Loading branch information
Samuel Just committed Mar 18, 2015
2 parents c93ab86 + 1c20417 commit cce74c7
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 55 deletions.
88 changes: 33 additions & 55 deletions src/osd/ReplicatedPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3505,6 +3505,22 @@ static string list_entries(const T& m) {
return s;
}

bool ReplicatedPG::maybe_create_new_object(OpContext *ctx)
{
ObjectState& obs = ctx->new_obs;
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.new_object();
return true;
} else if (obs.oi.is_whiteout()) {
dout(10) << __func__ << " clearing whiteout on " << obs.oi.soid << dendl;
ctx->new_obs.oi.clear_flag(object_info_t::FLAG_WHITEOUT);
--ctx->delta_stats.num_whiteouts;
}
return false;
}

int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
{
int result = 0;
Expand Down Expand Up @@ -4275,13 +4291,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -EOPNOTSUPP;
break;
}
if (!obs.exists) {
if (maybe_create_new_object(ctx)) {
ctx->mod_desc.create();
t->touch(soid);
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.new_object();
}
}
t->set_alloc_hint(soid, op.alloc_hint.expected_object_size,
op.alloc_hint.expected_write_size);
ctx->delta_stats.num_wr++;
Expand Down Expand Up @@ -4365,11 +4378,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
write_update_size_and_usage(ctx->delta_stats, oi, ctx->modified_ranges,
op.extent.offset, op.extent.length, true);
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.set_omap_digest(-1);
}
maybe_create_new_object(ctx);
if (op.extent.offset == 0 && op.extent.length == oi.size)
obs.oi.set_data_digest(osd_op.indata.crc32c(-1));
else
Expand Down Expand Up @@ -4426,11 +4435,7 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
}
t->write(soid, op.extent.offset, op.extent.length, osd_op.indata, op.flags);
}
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.set_omap_digest(-1); // no omap yet
}
maybe_create_new_object(ctx);
obs.oi.set_data_digest(osd_op.indata.crc32c(-1));

interval_set<uint64_t> ch;
Expand Down Expand Up @@ -4500,14 +4505,10 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
// category is no longer implemented.
}
if (result >= 0) {
if (!obs.exists)
if (maybe_create_new_object(ctx)) {
ctx->mod_desc.create();
}
t->touch(soid);
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.new_object();
}
}
}
}
Expand Down Expand Up @@ -4584,11 +4585,8 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
++ctx->num_read;
++ctx->num_write;
{
if (!obs.exists) {
if (maybe_create_new_object(ctx)) {
t->touch(obs.oi.soid);
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.new_object();
}
if (op.clonerange.src_offset + op.clonerange.length > src_obc->obs.oi.size) {
dout(10) << " clonerange source " << osd_op.soid << " "
Expand Down Expand Up @@ -4699,12 +4697,9 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
result = -ENAMETOOLONG;
break;
}
if (!obs.exists) {
if (maybe_create_new_object(ctx)) {
ctx->mod_desc.create();
t->touch(soid);
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.new_object();
}
string aname;
bp.copy(op.xattr.name_len, aname);
Expand Down Expand Up @@ -5073,13 +5068,11 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
ctx->mod_desc.mark_unrollbackable();
++ctx->num_write;
{
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.set_data_digest(-1);
if (maybe_create_new_object(ctx)) {
t->touch(soid);
} else {
obs.oi.clear_omap_digest();
}
obs.oi.clear_omap_digest();
t->touch(soid);
map<string, bufferlist> to_set;
try {
::decode(to_set, bp);
Expand Down Expand Up @@ -5111,13 +5104,11 @@ int ReplicatedPG::do_osd_ops(OpContext *ctx, vector<OSDOp>& ops)
ctx->mod_desc.mark_unrollbackable();
++ctx->num_write;
{
if (!obs.exists) {
ctx->delta_stats.num_objects++;
obs.exists = true;
obs.oi.set_data_digest(-1);
if (maybe_create_new_object(ctx)) {
t->touch(soid);
} else {
obs.oi.clear_omap_digest();
}
obs.oi.clear_omap_digest();
t->touch(soid);
t->omap_setheader(soid, osd_op.indata);
ctx->delta_stats.num_wr++;
}
Expand Down Expand Up @@ -5502,10 +5493,7 @@ int ReplicatedPG::_rollback_to(OpContext *ctx, ceph_osd_op& op)
}

// Adjust the cached objectcontext
if (!obs.exists) {
obs.exists = true; //we're about to recreate it
ctx->delta_stats.num_objects++;
}
maybe_create_new_object(ctx);
ctx->delta_stats.num_bytes -= obs.oi.size;
ctx->delta_stats.num_bytes += rollback_to->obs.oi.size;
obs.oi.size = rollback_to->obs.oi.size;
Expand Down Expand Up @@ -5859,16 +5847,6 @@ int ReplicatedPG::prepare_transaction(OpContext *ctx)
return result;
}

// cache: clear whiteout?
if (pool.info.cache_mode != pg_pool_t::CACHEMODE_NONE) {
if (ctx->user_modify &&
ctx->obc->obs.oi.is_whiteout()) {
dout(10) << __func__ << " clearing whiteout on " << soid << dendl;
ctx->new_obs.oi.clear_flag(object_info_t::FLAG_WHITEOUT);
--ctx->delta_stats.num_whiteouts;
}
}

// clone, if necessary
if (soid.snap == CEPH_NOSNAP)
make_writeable(ctx);
Expand Down
3 changes: 3 additions & 0 deletions src/osd/ReplicatedPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1499,6 +1499,9 @@ class ReplicatedPG : public PG, public PGBackend::Listener {

int _verify_no_head_clones(const hobject_t& soid,
const SnapSet& ss);
// return true if we're creating a local object, false for a
// whiteout or no change.
bool maybe_create_new_object(OpContext *ctx);
int _delete_oid(OpContext *ctx, bool no_whiteout);
int _rollback_to(OpContext *ctx, ceph_osd_op& op);
public:
Expand Down
45 changes: 45 additions & 0 deletions src/test/librados/tier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,51 @@ TEST_F(LibRadosTwoPoolsPP, Whiteout) {
}
}

TEST_F(LibRadosTwoPoolsPP, WhiteoutDeleteCreate) {
// configure cache
bufferlist inbl;
ASSERT_EQ(0, cluster.mon_command(
"{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
"\", \"tierpool\": \"" + cache_pool_name +
"\", \"force_nonempty\": \"--force-nonempty\" }",
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
"{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
"\", \"overlaypool\": \"" + cache_pool_name + "\"}",
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
"{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
"\", \"mode\": \"writeback\"}",
inbl, NULL, NULL));

// wait for maps to settle
cluster.wait_for_latest_osdmap();

// create an object
{
bufferlist bl;
bl.append("foo");
ASSERT_EQ(0, ioctx.write_full("foo", bl));
}

// do delete + create operation
{
ObjectWriteOperation op;
op.remove();
bufferlist bl;
bl.append("bar");
op.write_full(bl);
ASSERT_EQ(0, ioctx.operate("foo", &op));
}

// verify it still "exists" (w/ new content)
{
bufferlist bl;
ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
ASSERT_EQ('b', bl[0]);
}
}

TEST_F(LibRadosTwoPoolsPP, Evict) {
// create object
{
Expand Down

0 comments on commit cce74c7

Please sign in to comment.