Skip to content

Commit

Permalink
mds: abort/pause/resume scrubs in multiple mds
Browse files Browse the repository at this point in the history
Limit scrub abort/pause/resume commands to mds.0. mds.0 sends messages
to other mds, asks them to abort/pause/resume scrubs.

Signed-off-by: "Yan, Zheng" <[email protected]>
  • Loading branch information
ukernel committed Nov 16, 2020
1 parent ad5471f commit ff10bdb
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 19 deletions.
17 changes: 17 additions & 0 deletions src/mds/MDSRank.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2639,6 +2639,12 @@ void MDSRankDispatcher::handle_asok_command(
}));
return;
} else if (command == "scrub abort") {
if (whoami != 0) {
*css << "Not rank 0";
r = -EXDEV;
goto out;
}

finisher->queue(
new LambdaContext(
[this, on_finish, f](int r) {
Expand All @@ -2655,6 +2661,12 @@ void MDSRankDispatcher::handle_asok_command(
}));
return;
} else if (command == "scrub pause") {
if (whoami != 0) {
*css << "Not rank 0";
r = -EXDEV;
goto out;
}

finisher->queue(
new LambdaContext(
[this, on_finish, f](int r) {
Expand All @@ -2671,6 +2683,11 @@ void MDSRankDispatcher::handle_asok_command(
}));
return;
} else if (command == "scrub resume") {
if (whoami != 0) {
*css << "Not rank 0";
r = -EXDEV;
goto out;
}
command_scrub_resume(f);
} else if (command == "scrub status") {
command_scrub_status(f);
Expand Down
73 changes: 61 additions & 12 deletions src/mds/ScrubStack.cc
Original file line number Diff line number Diff line change
Expand Up @@ -691,56 +691,83 @@ void ScrubStack::abort_pending_scrubs() {
clear_stack = false;
}

void ScrubStack::send_state_message(int op) {
MDSRank *mds = mdcache->mds;
set<mds_rank_t> up_mds;
mds->get_mds_map()->get_up_mds_set(up_mds);
for (auto& r : up_mds) {
if (r == 0)
continue;
auto m = make_message<MMDSScrub>(op);
mds->send_message_mds(m, r);
}
}

void ScrubStack::scrub_abort(Context *on_finish) {
ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
ceph_assert(on_finish != nullptr);

dout(10) << __func__ << ": aborting with " << scrubs_in_progress
<< " scrubs in progress and " << stack_size << " in the"
<< " stack" << dendl;

if (mdcache->mds->get_nodeid() == 0) {
scrub_epoch_last_abort = scrub_epoch;
scrub_any_peer_aborting = true;
send_state_message(MMDSScrub::OP_ABORT);
}

clear_stack = true;
if (scrub_in_transition_state()) {
control_ctxs.push_back(on_finish);
if (on_finish)
control_ctxs.push_back(on_finish);
return;
}

abort_pending_scrubs();
if (state != STATE_PAUSED) {
if (state != STATE_PAUSED)
set_state(STATE_IDLE);
}
on_finish->complete(0);

if (on_finish)
on_finish->complete(0);
}

void ScrubStack::scrub_pause(Context *on_finish) {
ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
ceph_assert(on_finish != nullptr);

dout(10) << __func__ << ": pausing with " << scrubs_in_progress
<< " scrubs in progress and " << stack_size << " in the"
<< " stack" << dendl;

if (mdcache->mds->get_nodeid() == 0)
send_state_message(MMDSScrub::OP_PAUSE);

// abort is in progress
if (clear_stack) {
on_finish->complete(-EINVAL);
if (on_finish)
on_finish->complete(-EINVAL);
return;
}

bool done = scrub_in_transition_state();
if (done) {
set_state(STATE_PAUSING);
control_ctxs.push_back(on_finish);
if (on_finish)
control_ctxs.push_back(on_finish);
return;
}

set_state(STATE_PAUSED);
on_finish->complete(0);
if (on_finish)
on_finish->complete(0);
}

bool ScrubStack::scrub_resume() {
ceph_assert(ceph_mutex_is_locked_by_me(mdcache->mds->mds_lock));
dout(20) << __func__ << ": state=" << state << dendl;

if (mdcache->mds->get_nodeid() == 0)
send_state_message(MMDSScrub::OP_RESUME);

int r = 0;

if (clear_stack) {
Expand Down Expand Up @@ -925,6 +952,15 @@ void ScrubStack::handle_scrub(const cref_t<MMDSScrub> &m)
}
}
break;
case MMDSScrub::OP_ABORT:
scrub_abort(nullptr);
break;
case MMDSScrub::OP_PAUSE:
scrub_pause(nullptr);
break;
case MMDSScrub::OP_RESUME:
scrub_resume();
break;
default:
derr << " scrub stack unknown scrub operation " << m->get_op() << dendl_impl;
ceph_abort_msg("scrub stack unknown scrub operation");
Expand Down Expand Up @@ -965,7 +1001,8 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)

scrub_epoch = m->get_epoch();

auto ack = make_message<MMDSScrubStats>(scrub_epoch, std::move(scrubbing_tags));
auto ack = make_message<MMDSScrubStats>(scrub_epoch,
std::move(scrubbing_tags), clear_stack);
mdcache->mds->send_message_mds(ack, 0);

if (any_finished)
Expand All @@ -978,13 +1015,14 @@ void ScrubStack::handle_scrub_stats(const cref_t<MMDSScrubStats> &m)
auto& stat = mds_scrub_stats[from];
stat.epoch_acked = m->get_epoch();
stat.scrubbing_tags = m->get_scrubbing_tags();
stat.aborting = m->is_aborting();
}
}
}

void ScrubStack::advance_scrub_status()
{
if (scrubbing_map.empty())
if (!scrub_any_peer_aborting && scrubbing_map.empty())
return;

MDSRank *mds = mdcache->mds;
Expand All @@ -998,16 +1036,22 @@ void ScrubStack::advance_scrub_status()

if (up_max == 0) {
update_scrubbing = true;
scrub_any_peer_aborting = false;
} else if (mds_scrub_stats.size() > (size_t)(up_max)) {
bool any_aborting = false;
bool fully_acked = true;
for (const auto& stat : mds_scrub_stats) {
if (stat.aborting || stat.epoch_acked <= scrub_epoch_last_abort)
any_aborting = true;
if (stat.epoch_acked != scrub_epoch) {
fully_acked = false;
break;
continue;
}
scrubbing_tags.insert(stat.scrubbing_tags.begin(),
stat.scrubbing_tags.end());
}
if (!any_aborting)
scrub_any_peer_aborting = false;
if (fully_acked) {
// handle_scrub_stats() reports scrub is still in-progress if it has
// forwarded any object to other mds since previous epoch. Let's assume,
Expand Down Expand Up @@ -1064,6 +1108,11 @@ void ScrubStack::advance_scrub_status()

void ScrubStack::handle_mds_failure(mds_rank_t mds)
{
if (mds == 0) {
scrub_abort(nullptr);
return;
}

bool kick = false;
for (auto it = remote_scrubs.begin(); it != remote_scrubs.end(); ) {
if (it->second.gather_set.erase(mds) &&
Expand Down
9 changes: 9 additions & 0 deletions src/mds/ScrubStack.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,14 @@ class ScrubStack {

unsigned scrub_epoch = 2;
unsigned scrub_epoch_fully_acked = 0;
unsigned scrub_epoch_last_abort = 2;
// check if any mds is aborting scrub after mds.0 starts
bool scrub_any_peer_aborting = true;

struct scrub_stat_t {
unsigned epoch_acked = 0;
std::set<std::string> scrubbing_tags;
bool aborting = false;
};
std::vector<scrub_stat_t> mds_scrub_stats;

Expand Down Expand Up @@ -231,6 +235,11 @@ class ScrubStack {
*/
void complete_control_contexts(int r);

/**
* ask peer mds (rank > 0) to abort/pause/resume scrubs
*/
void send_state_message(int op);

/**
* Abort pending scrubs for inodes waiting in the inode stack.
* Completion context is complete with -ECANCELED.
Expand Down
8 changes: 8 additions & 0 deletions src/messages/MMDSScrub.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,19 @@ class MMDSScrub : public MMDSOp {
static constexpr int OP_QUEUEDIR_ACK = -1;
static constexpr int OP_QUEUEINO = 2;
static constexpr int OP_QUEUEINO_ACK = -2;
static constexpr int OP_ABORT = 3;
static constexpr int OP_PAUSE = 4;
static constexpr int OP_RESUME = 5;

static const char *get_opname(int o) {
switch (o) {
case OP_QUEUEDIR: return "queue_dir";
case OP_QUEUEDIR_ACK: return "queue_dir_ack";
case OP_QUEUEINO: return "queue_ino";
case OP_QUEUEINO_ACK: return "queue_ino_ack";
case OP_ABORT: return "abort";
case OP_PAUSE: return "pause";
case OP_RESUME: return "resume";
default: ceph_abort(); return nullptr;
}
}
Expand Down Expand Up @@ -99,6 +105,8 @@ class MMDSScrub : public MMDSOp {
static constexpr int COMPAT_VERSION = 1;

MMDSScrub() : MMDSOp(MSG_MDS_SCRUB, HEAD_VERSION, COMPAT_VERSION) {}
MMDSScrub(int o)
: MMDSOp(MSG_MDS_SCRUB, HEAD_VERSION, COMPAT_VERSION), op(o) {}
MMDSScrub(int o, inodeno_t i, fragset_t&& _frags, std::string_view _tag,
inodeno_t _origin=inodeno_t(), bool internal_tag=false,
bool force=false, bool recursive=false, bool repair=false)
Expand Down
19 changes: 12 additions & 7 deletions src/messages/MMDSScrubStats.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@ class MMDSScrubStats : public MMDSOp {
void print(ostream& o) const override {
o << "mds_scrub_stats(e" << epoch;
if (update_scrubbing)
o << " [" << scrubbing_tags << "])";
else
o << ")";
o << " [" << scrubbing_tags << "]";
if (aborting)
o << " aborting";
o << ")";
}

unsigned get_epoch() const { return epoch; }
const auto& get_scrubbing_tags() const { return scrubbing_tags; }
bool is_aborting() const { return aborting; }
bool is_finished(const std::string& tag) const {
return update_scrubbing && !scrubbing_tags.count(tag);
}
Expand All @@ -42,31 +44,34 @@ class MMDSScrubStats : public MMDSOp {
encode(epoch, payload);
encode(scrubbing_tags, payload);
encode(update_scrubbing, payload);
encode(aborting, payload);
}
void decode_payload() override {
using ceph::decode;
auto p = payload.cbegin();
decode(epoch, p);
decode(scrubbing_tags, p);
decode(update_scrubbing, p);
decode(aborting, p);
}

protected:
MMDSScrubStats(unsigned e=0) :
MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
epoch(e) {}
MMDSScrubStats(unsigned e, std::set<std::string>&& tags) :
MMDSScrubStats(unsigned e, std::set<std::string>&& tags, bool abrt=false) :
MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
epoch(e), scrubbing_tags(std::move(tags)), update_scrubbing(true) {}
MMDSScrubStats(unsigned e, const std::set<std::string>& tags) :
epoch(e), scrubbing_tags(std::move(tags)), update_scrubbing(true), aborting(abrt) {}
MMDSScrubStats(unsigned e, const std::set<std::string>& tags, bool abrt=false) :
MMDSOp(MSG_MDS_SCRUB_STATS, HEAD_VERSION, COMPAT_VERSION),
epoch(e), scrubbing_tags(tags), update_scrubbing(true) {}
epoch(e), scrubbing_tags(tags), update_scrubbing(true), aborting(abrt) {}
~MMDSScrubStats() override {}

private:
unsigned epoch;
std::set<std::string> scrubbing_tags;
bool update_scrubbing = false;
bool aborting = false;

template<class T, typename... Args>
friend boost::intrusive_ptr<T> ceph::make_message(Args&&... args);
Expand Down

0 comments on commit ff10bdb

Please sign in to comment.