Skip to content

Commit

Permalink
osd: avoid two copy with same src cancel each other
Browse files Browse the repository at this point in the history
For cache tier, if some head object has two snaps, the two snaps share the same clone object,
and the clone object was flush/evicted from cache pool, when a rollback requests and a read
snap request to these two snaps at the same time will generate two promote requests to the
same clone object, these two promote requests will generate two copy ops with same src, than
the second copy op will cancel the first copy op by calling cancel_copy and kick_object_context_blocked,
but after calling kick_object_context_blocked, a new promote request corresponding to first
copy op will be restarted and generate a new copy op, the new copy op will cancel the second
copy op again, so two promote requests will cancel their copy op each other and run into dead
loop.

Fixes: https://tracker.ceph.com/issues/49409

Signed-off-by: YuanXin <[email protected]>
  • Loading branch information
mychoxin authored and myoungwon committed Apr 8, 2021
1 parent 94a0af2 commit db13b9a
Show file tree
Hide file tree
Showing 3 changed files with 117 additions and 5 deletions.
9 changes: 5 additions & 4 deletions src/osd/PrimaryLogPG.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10142,7 +10142,6 @@ void PrimaryLogPG::cancel_copy(CopyOpRef cop, bool requeue,
copy_ops.erase(cop->obc->obs.oi.soid);
cop->obc->stop_block();

kick_object_context_blocked(cop->obc);
cop->results.should_requeue = requeue;
CopyCallbackResults result(-ECANCELED, &cop->results);
cop->cb->complete(result);
Expand All @@ -10153,13 +10152,15 @@ void PrimaryLogPG::cancel_copy(CopyOpRef cop, bool requeue,
cop->obc = ObjectContextRef();
}

void PrimaryLogPG::cancel_copy_ops(bool requeue, vector<ceph_tid_t> *tids)
void PrimaryLogPG::cancel_and_kick_copy_ops(bool requeue, vector<ceph_tid_t> *tids)
{
dout(10) << __func__ << dendl;
map<hobject_t,CopyOpRef>::iterator p = copy_ops.begin();
while (p != copy_ops.end()) {
ObjectContextRef obc = p->second->obc;
// requeue this op? can I queue up all of them?
cancel_copy((p++)->second, requeue, tids);
kick_object_context_blocked(obc);
}
}

Expand Down Expand Up @@ -12563,7 +12564,7 @@ void PrimaryLogPG::on_shutdown()
m_scrubber->unreg_next_scrub();

vector<ceph_tid_t> tids;
cancel_copy_ops(false, &tids);
cancel_and_kick_copy_ops(false, &tids);
cancel_flush_ops(false, &tids);
cancel_proxy_ops(false, &tids);
cancel_manifest_ops(false, &tids);
Expand Down Expand Up @@ -12680,7 +12681,7 @@ void PrimaryLogPG::on_change(ObjectStore::Transaction &t)
requeue_ops(waiting_for_readable);

vector<ceph_tid_t> tids;
cancel_copy_ops(is_primary(), &tids);
cancel_and_kick_copy_ops(is_primary(), &tids);
cancel_flush_ops(is_primary(), &tids);
cancel_proxy_ops(is_primary(), &tids);
cancel_manifest_ops(is_primary(), &tids);
Expand Down
2 changes: 1 addition & 1 deletion src/osd/PrimaryLogPG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1329,7 +1329,7 @@ class PrimaryLogPG : public PG, public PGBackend::Listener {
void finish_copyfrom(CopyFromCallback *cb);
void finish_promote(int r, CopyResults *results, ObjectContextRef obc);
void cancel_copy(CopyOpRef cop, bool requeue, std::vector<ceph_tid_t> *tids);
void cancel_copy_ops(bool requeue, std::vector<ceph_tid_t> *tids);
void cancel_and_kick_copy_ops(bool requeue, std::vector<ceph_tid_t> *tids);

friend struct C_Copyfrom;

Expand Down
111 changes: 111 additions & 0 deletions src/test/librados/tier_cxx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,117 @@ TEST_F(LibRadosTwoPoolsPP, ListSnap){
ioctx.selfmanaged_snap_remove(my_snaps[0]);
}

// This test case reproduces https://tracker.ceph.com/issues/49409
TEST_F(LibRadosTwoPoolsPP, EvictSnapRollbackReadRace) {
// create object
{
bufferlist bl;
int len = string("hi there").length() * 2;
// append more chrunk data make sure the second promote
// op coming before the first promote op finished
for (int i=0; i<4*1024*1024/len; ++i)
bl.append("hi therehi there");
ObjectWriteOperation op;
op.write_full(bl);
ASSERT_EQ(0, ioctx.operate("foo", &op));
}

// create two snapshot, a clone
vector<uint64_t> my_snaps(2);
ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[1]));
ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
my_snaps));
{
bufferlist bl;
bl.append("ciao!");
ObjectWriteOperation op;
op.write_full(bl);
ASSERT_EQ(0, ioctx.operate("foo", &op));
}

// configure cache
bufferlist inbl;
ASSERT_EQ(0, cluster.mon_command(
"{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
"\", \"tierpool\": \"" + cache_pool_name +
"\", \"force_nonempty\": \"--force-nonempty\" }",
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
"{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
"\", \"overlaypool\": \"" + cache_pool_name + "\"}",
inbl, NULL, NULL));
ASSERT_EQ(0, cluster.mon_command(
"{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
"\", \"mode\": \"writeback\"}",
inbl, NULL, NULL));

// wait for maps to settle
cluster.wait_for_latest_osdmap();

// read, trigger a promote on the head
{
bufferlist bl;
ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
ASSERT_EQ('c', bl[0]);
}

// try more times
int retries = 50;
for (int i=0; i<retries; ++i)
{
{
librados::AioCompletion * completion = cluster.aio_create_completion();
librados::AioCompletion * completion1 = cluster.aio_create_completion();

// send a snap rollback op and a snap read op parallel
// trigger two promote(copy) to the same snap clone obj
// the second snap read op is read-ordered make sure
// op not wait for objects_blocked_on_snap_promotion
ObjectWriteOperation op;
op.selfmanaged_snap_rollback(my_snaps[0]);
ASSERT_EQ(0, ioctx.aio_operate(
"foo", completion, &op));

ioctx.snap_set_read(my_snaps[1]);
std::map<uint64_t, uint64_t> extents;
bufferlist read_bl;
int rval = -1;
ObjectReadOperation op1;
op1.sparse_read(0, 8, &extents, &read_bl, &rval);
ASSERT_EQ(0, ioctx.aio_operate("foo", completion1, &op1, &read_bl));
ioctx.snap_set_read(librados::SNAP_HEAD);

completion->wait_for_safe();
ASSERT_EQ(0, completion->get_return_value());
completion->release();

completion1->wait_for_safe();
ASSERT_EQ(0, completion1->get_return_value());
completion1->release();
}

// evict foo snap
ioctx.snap_set_read(my_snaps[0]);
{
ObjectReadOperation op;
op.cache_evict();
librados::AioCompletion *completion = cluster.aio_create_completion();
ASSERT_EQ(0, ioctx.aio_operate(
"foo", completion, &op,
librados::OPERATION_IGNORE_CACHE, NULL));
completion->wait_for_safe();
ASSERT_EQ(0, completion->get_return_value());
completion->release();
}
ioctx.snap_set_read(librados::SNAP_HEAD);
}

// cleanup
ioctx.selfmanaged_snap_remove(my_snaps[0]);
ioctx.selfmanaged_snap_remove(my_snaps[1]);
}

TEST_F(LibRadosTwoPoolsPP, TryFlush) {
// configure cache
bufferlist inbl;
Expand Down

0 comments on commit db13b9a

Please sign in to comment.