Skip to content

Commit

Permalink
Merge pull request ceph#15660 from fangyuxiangGL/meta_sync_crash
Browse files Browse the repository at this point in the history
rgw: meta sync thread crash at RGWMetaSyncShardCR

Reviewed-by: Casey Bodley <[email protected]>
  • Loading branch information
cbodley authored Jun 19, 2017
2 parents fdf9824 + 45877d3 commit 8557916
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 55 deletions.
29 changes: 13 additions & 16 deletions src/rgw/rgw_data_sync.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1084,8 +1084,8 @@ class RGWDataSyncShardCR : public RGWCoroutine {

set<string> spawned_keys;

RGWContinuousLeaseCR *lease_cr;
RGWCoroutinesStack *lease_stack;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
string status_oid;


Expand Down Expand Up @@ -1126,7 +1126,6 @@ class RGWDataSyncShardCR : public RGWCoroutine {
delete marker_tracker;
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
if (error_repo) {
error_repo->put();
Expand Down Expand Up @@ -1174,14 +1173,12 @@ class RGWDataSyncShardCR : public RGWCoroutine {
string lock_name = "sync_lock";
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, status_oid),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, status_oid),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
}

int full_sync() {
Expand Down Expand Up @@ -1358,7 +1355,7 @@ class RGWDataSyncShardCR : public RGWCoroutine {
set_status() << "num_spawned() > spawn_window";
yield wait_for_child();
int ret;
while (collect(&ret, lease_stack)) {
while (collect(&ret, lease_stack.get())) {
if (ret < 0) {
ldout(sync_env->cct, 0) << "ERROR: a sync operation returned error" << dendl;
/* we have reported this error */
Expand Down Expand Up @@ -2701,12 +2698,12 @@ int RGWRunBucketSyncCoroutine::operate()
yield {
set_status("acquiring sync lock");
auto store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, status_oid),
"sync_lock",
cct->_conf->rgw_sync_lease_period,
this);
lease_stack = spawn(lease_cr.get(), false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, status_oid),
"sync_lock",
cct->_conf->rgw_sync_lease_period,
this));
lease_stack.reset(spawn(lease_cr.get(), false));
}
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
Expand Down
66 changes: 27 additions & 39 deletions src/rgw/rgw_sync.cc
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,8 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {

rgw_meta_sync_info status;
vector<RGWMetadataLogInfo> shards_info;
RGWContinuousLeaseCR *lease_cr;
RGWCoroutinesStack *lease_stack;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
public:
RGWInitSyncStatusCoroutine(RGWMetaSyncEnv *_sync_env,
const rgw_meta_sync_info &status)
Expand All @@ -605,7 +605,6 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
~RGWInitSyncStatusCoroutine() override {
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
}

Expand All @@ -617,11 +616,10 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, sync_env->status_oid()),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(store->get_zone_params().log_pool, sync_env->status_oid()),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
}
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
Expand Down Expand Up @@ -655,7 +653,7 @@ class RGWInitSyncStatusCoroutine : public RGWCoroutine {
}
}

drain_all_but_stack(lease_stack); /* the lease cr still needs to run */
drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */

yield {
set_status("updating sync status");
Expand Down Expand Up @@ -782,8 +780,8 @@ class RGWFetchAllMetaCR : public RGWCoroutine {

std::unique_ptr<RGWShardedOmapCRManager> entries_index;

RGWContinuousLeaseCR *lease_cr;
RGWCoroutinesStack *lease_stack;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;
bool lost_lock;
bool failed;

Expand All @@ -798,9 +796,6 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
}

~RGWFetchAllMetaCR() override {
if (lease_cr) {
lease_cr->put();
}
}

void append_section_from_set(set<string>& all_sections, const string& name) {
Expand Down Expand Up @@ -836,12 +831,11 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
set_status(string("acquiring lock (") + sync_env->status_oid() + ")");
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados,
sync_env->store,
rgw_raw_obj(sync_env->store->get_zone_params().log_pool, sync_env->status_oid()),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados,
sync_env->store,
rgw_raw_obj(sync_env->store->get_zone_params().log_pool, sync_env->status_oid()),
lock_name, lock_duration, this));
lease_stack = spawn(lease_cr.get(), false);
}
while (!lease_cr->is_locked()) {
if (lease_cr->is_done()) {
Expand Down Expand Up @@ -921,7 +915,7 @@ class RGWFetchAllMetaCR : public RGWCoroutine {
}
}

drain_all_but_stack(lease_stack); /* the lease cr still needs to run */
drain_all_but_stack(lease_stack.get()); /* the lease cr still needs to run */

yield lease_cr->go_down();

Expand Down Expand Up @@ -1316,8 +1310,9 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
boost::asio::coroutine incremental_cr;
boost::asio::coroutine full_cr;

RGWContinuousLeaseCR *lease_cr = nullptr;
RGWCoroutinesStack *lease_stack = nullptr;
boost::intrusive_ptr<RGWContinuousLeaseCR> lease_cr;
boost::intrusive_ptr<RGWCoroutinesStack> lease_stack;

bool lost_lock = false;

bool *reset_backoff;
Expand Down Expand Up @@ -1350,7 +1345,6 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
delete marker_tracker;
if (lease_cr) {
lease_cr->abort();
lease_cr->put();
}
}

Expand Down Expand Up @@ -1442,15 +1436,11 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
yield {
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
if (lease_cr) {
lease_cr->put();
}
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset(new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
lost_lock = false;
}
while (!lease_cr->is_locked()) {
Expand Down Expand Up @@ -1545,8 +1535,7 @@ class RGWMetaSyncShardCR : public RGWCoroutine {

yield lease_cr->go_down();

lease_cr->put();
lease_cr = NULL;
lease_cr.reset();

drain_all();

Expand Down Expand Up @@ -1578,11 +1567,10 @@ class RGWMetaSyncShardCR : public RGWCoroutine {
uint32_t lock_duration = cct->_conf->rgw_sync_lease_period;
string lock_name = "sync_lock";
RGWRados *store = sync_env->store;
lease_cr = new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
lock_name, lock_duration, this);
lease_cr->get();
lease_stack = spawn(lease_cr, false);
lease_cr.reset( new RGWContinuousLeaseCR(sync_env->async_rados, store,
rgw_raw_obj(pool, sync_env->shard_obj_name(shard_id)),
lock_name, lock_duration, this));
lease_stack.reset(spawn(lease_cr.get(), false));
lost_lock = false;
}
while (!lease_cr->is_locked()) {
Expand Down

0 comments on commit 8557916

Please sign in to comment.