Skip to content

Commit

Permalink
fs: obsolete standby_for config options
Browse files Browse the repository at this point in the history
The operator can no longer configure which rank/fscid/name an MDS wants to
follow or standby for. This was an unfortunately confusing set of config
options as ultimately the MDSMonitor (by default) would set a standby to follow
any fscid/rank if no standby is available that explicitly follows the failed
rank. It is suggested that operators instead use the `standby_count_wanted`
setting on each fs to ensure that sufficient standbys are available.

The temporary effect of this commit is that the MDSMonitor no longer assigns
any standby to standby-replay, to be fixed in the following commits.

Signed-off-by: Patrick Donnelly <[email protected]>
  • Loading branch information
batrick committed Feb 28, 2019
1 parent 61fdf55 commit d463883
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 397 deletions.
5 changes: 0 additions & 5 deletions src/common/legacy_config_opts.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,6 @@ OPTION(mon_allow_pool_delete, OPT_BOOL) // allow pool deletion
OPTION(mon_fake_pool_delete, OPT_BOOL) // fake pool deletion (add _DELETED suffix)
OPTION(mon_globalid_prealloc, OPT_U32) // how many globalids to prealloc
OPTION(mon_osd_report_timeout, OPT_INT) // grace period before declaring unresponsive OSDs dead
OPTION(mon_force_standby_active, OPT_BOOL) // should mons force standby-replay mds to be active
OPTION(mon_warn_on_legacy_crush_tunables, OPT_BOOL) // warn if crush tunables are too old (older than mon_min_crush_required_version)
OPTION(mon_crush_min_required_version, OPT_STR)
OPTION(mon_warn_on_crush_straw_calc_version_zero, OPT_BOOL) // warn if crush straw_calc_version==0
Expand Down Expand Up @@ -475,10 +474,6 @@ OPTION(mds_inject_traceless_reply_probability, OPT_DOUBLE) /* percentage
OPTION(mds_wipe_sessions, OPT_BOOL)
OPTION(mds_wipe_ino_prealloc, OPT_BOOL)
OPTION(mds_skip_ino, OPT_INT)
OPTION(mds_standby_for_name, OPT_STR)
OPTION(mds_standby_for_rank, OPT_INT)
OPTION(mds_standby_for_fscid, OPT_INT)
OPTION(mds_standby_replay, OPT_BOOL)
OPTION(mds_enable_op_tracker, OPT_BOOL) // enable/disable MDS op tracking
OPTION(mds_op_history_size, OPT_U32) // Max number of completed ops to track
OPTION(mds_op_history_duration, OPT_U32) // Oldest completed op to track
Expand Down
21 changes: 0 additions & 21 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1593,11 +1593,6 @@ std::vector<Option> get_global_options() {
.add_service("mon")
.set_description("time before OSDs who do not report to the mons are marked down (seconds)"),

Option("mon_force_standby_active", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.add_service("mon")
.set_description("allow use of MDS daemons in standby-replay as replacements"),

Option("mon_warn_on_msgr2_not_enabled", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.add_service("mon")
Expand Down Expand Up @@ -7725,22 +7720,6 @@ std::vector<Option> get_mds_options() {
.set_default(0)
.set_description(""),

Option("mds_standby_for_name", Option::TYPE_STR, Option::LEVEL_ADVANCED)
.set_default("")
.set_description("standby for named MDS daemon when not active"),

Option("mds_standby_for_rank", Option::TYPE_INT, Option::LEVEL_BASIC)
.set_default(-1)
.set_description("allow MDS to become a standby:replay daemon"),

Option("mds_standby_for_fscid", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(-1)
.set_description("standby only for the file system with the given fscid"),

Option("mds_standby_replay", Option::TYPE_BOOL, Option::LEVEL_BASIC)
.set_default(false)
.set_description("allow MDS to standby replay for an active MDS"),

Option("mds_enable_op_tracker", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description("track remote operation progression and statistics"),
Expand Down
8 changes: 0 additions & 8 deletions src/mds/Beacon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,6 @@ void Beacon::init(const MDSMap &mdsmap)
std::unique_lock lock(mutex);

_notify_mdsmap(mdsmap);
standby_for_rank = mds_rank_t(g_conf()->mds_standby_for_rank);
standby_for_name = g_conf()->mds_standby_for_name;
standby_for_fscid = fs_cluster_id_t(g_conf()->mds_standby_for_fscid);
standby_replay = g_conf()->mds_standby_replay;

sender = std::thread([this]() {
std::unique_lock<std::mutex> lock(mutex);
Expand Down Expand Up @@ -208,10 +204,6 @@ bool Beacon::_send()
last_seq,
CEPH_FEATURES_SUPPORTED_DEFAULT);

beacon->set_standby_for_rank(standby_for_rank);
beacon->set_standby_for_name(standby_for_name);
beacon->set_standby_for_fscid(standby_for_fscid);
beacon->set_standby_replay(standby_replay);
beacon->set_health(health);
beacon->set_compat(compat);
// piggyback the sys info on beacon msg
Expand Down
4 changes: 0 additions & 4 deletions src/mds/Beacon.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ class Beacon : public Dispatcher
std::string name;
version_t epoch = 0;
CompatSet compat;
mds_rank_t standby_for_rank = MDS_RANK_NONE;
std::string standby_for_name;
fs_cluster_id_t standby_for_fscid = FS_CLUSTER_ID_NONE;
bool standby_replay = false;
MDSMap::DaemonState want_state = MDSMap::STATE_BOOT;

// Internal beacon state
Expand Down
101 changes: 20 additions & 81 deletions src/mds/FSMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -383,8 +383,7 @@ void FSMap::get_health_checks(health_check_map_t *checks) const
std::set<mds_rank_t> stuck_failed;

for (const auto &rank : fs->mds_map.failed) {
const mds_gid_t replacement = find_replacement_for(
{fs->fscid, rank}, {}, g_conf()->mon_force_standby_active);
auto&& replacement = find_replacement_for({fs->fscid, rank}, {});
if (replacement == MDS_GID_NONE) {
stuck_failed.insert(rank);
}
Expand Down Expand Up @@ -598,22 +597,20 @@ void FSMap::decode(bufferlist::const_iterator& p)

// Construct mds_roles, standby_daemons, and remove
// standbys from the MDSMap in the Filesystem.
for (auto &p : migrate_fs->mds_map.mds_info) {
if (p.second.state == MDSMap::STATE_STANDBY_REPLAY) {
// In legacy MDSMap, standby replay daemons don't have
// rank set, but since FSMap they do.
p.second.rank = p.second.standby_for_rank;
}
if (p.second.rank == MDS_RANK_NONE) {
if (p.second.state != MDSMap::STATE_STANDBY) {
for (const auto& [gid, info] : migrate_fs->mds_map.mds_info) {
if (info.state == MDSMap::STATE_STANDBY_REPLAY) {
/* drop any legacy standby-replay daemons */
drop_gids.insert(gid);
} else if (info.rank == MDS_RANK_NONE) {
if (info.state != MDSMap::STATE_STANDBY) {
// Old MDSMaps can have down:dne here, which
// is invalid in an FSMap (#17837)
drop_gids.insert(p.first);
drop_gids.insert(gid);
} else {
insert(p.second); // into standby_daemons
insert(info); // into standby_daemons
}
} else {
mds_roles[p.first] = migrate_fs->fscid;
mds_roles[gid] = migrate_fs->fscid;
}
}
for (const auto &p : standby_daemons) {
Expand Down Expand Up @@ -714,90 +711,32 @@ void Filesystem::print(std::ostream &out) const
mds_map.print(out);
}

mds_gid_t FSMap::find_standby_for(mds_role_t role, std::string_view name) const
mds_gid_t FSMap::find_replacement_for(mds_role_t role, std::string_view name) const
{
mds_gid_t result = MDS_GID_NONE;
auto&& fs = get_filesystem(role.fscid);

// First see if we have a STANDBY_REPLAY
auto fs = get_filesystem(role.fscid);
for (const auto &i : fs->mds_map.mds_info) {
const auto &info = i.second;
for (const auto& [gid, info] : fs->mds_map.mds_info) {
if (info.rank == role.rank && info.state == MDSMap::STATE_STANDBY_REPLAY) {
return info.global_id;
return gid;
}
}

// See if there are any STANDBY daemons available
for (const auto &i : standby_daemons) {
const auto &gid = i.first;
const auto &info = i.second;
ceph_assert(info.state == MDSMap::STATE_STANDBY);
for (const auto& [gid, info] : standby_daemons) {
ceph_assert(info.rank == MDS_RANK_NONE);
ceph_assert(info.state == MDSMap::STATE_STANDBY);

if (info.laggy()) {
continue;
}

// The mds_info_t may or may not tell us exactly which filesystem
// the standby_for_rank refers to: lookup via legacy_client_fscid
mds_role_t target_role = {
info.standby_for_fscid == FS_CLUSTER_ID_NONE ?
legacy_client_fscid : info.standby_for_fscid,
info.standby_for_rank};

if ((target_role.rank == role.rank && target_role.fscid == role.fscid)
|| (name.length() && info.standby_for_name == name)) {
// It's a named standby for *me*, use it.
return gid;
} else if (
info.standby_for_rank < 0 && info.standby_for_name.length() == 0 &&
(info.standby_for_fscid == FS_CLUSTER_ID_NONE ||
info.standby_for_fscid == role.fscid)) {
// It's not a named standby for anyone, use it if we don't find
// a named standby for me later, unless it targets another FSCID.
result = gid;
}
return gid;
}

return result;
}

mds_gid_t FSMap::find_unused_for(mds_role_t role,
bool force_standby_active) const {
for (const auto &i : standby_daemons) {
const auto &gid = i.first;
const auto &info = i.second;
ceph_assert(info.state == MDSMap::STATE_STANDBY);

if (info.laggy() || info.rank >= 0)
continue;

if (info.standby_for_fscid != FS_CLUSTER_ID_NONE &&
info.standby_for_fscid != role.fscid)
continue;
if (info.standby_for_rank != MDS_RANK_NONE &&
info.standby_for_rank != role.rank)
continue;

// To be considered 'unused' a daemon must either not
// be selected for standby-replay or the force_standby_active
// setting must be enabled to use replay daemons anyway.
if (!info.standby_replay || force_standby_active) {
return gid;
}
}
return MDS_GID_NONE;
}

mds_gid_t FSMap::find_replacement_for(mds_role_t role, std::string_view name,
bool force_standby_active) const {
const mds_gid_t standby = find_standby_for(role, name);
if (standby)
return standby;
else
return find_unused_for(role, force_standby_active);
}

void FSMap::sanity() const
{
if (legacy_client_fscid != FS_CLUSTER_ID_NONE) {
Expand Down Expand Up @@ -854,7 +793,7 @@ void FSMap::sanity() const

void FSMap::promote(
mds_gid_t standby_gid,
const Filesystem::ref& filesystem,
Filesystem& filesystem,
mds_rank_t assigned_rank)
{
ceph_assert(gid_exists(standby_gid));
Expand All @@ -864,7 +803,7 @@ void FSMap::promote(
ceph_assert(standby_daemons.at(standby_gid).state == MDSMap::STATE_STANDBY);
}

MDSMap &mds_map = filesystem->mds_map;
MDSMap &mds_map = filesystem.mds_map;

// Insert daemon state to Filesystem
if (!is_standby_replay) {
Expand All @@ -889,7 +828,7 @@ void FSMap::promote(
}
info.rank = assigned_rank;
info.inc = epoch;
mds_roles[standby_gid] = filesystem->fscid;
mds_roles[standby_gid] = filesystem.fscid;

// Update the rank state in Filesystem
mds_map.in.insert(assigned_rank);
Expand Down
35 changes: 13 additions & 22 deletions src/mds/FSMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ class FSMap {
*/
void promote(
mds_gid_t standby_gid,
const Filesystem::ref& filesystem,
Filesystem& filesystem,
mds_rank_t assigned_rank);

/**
Expand Down Expand Up @@ -325,11 +325,9 @@ class FSMap {
* Mutator helper for Filesystem objects: expose a non-const
* Filesystem pointer to `fn` and update epochs appropriately.
*/
void modify_filesystem(
const fs_cluster_id_t fscid,
std::function<void(Filesystem::ref)> fn)
void modify_filesystem(fs_cluster_id_t fscid, auto&& fn)
{
auto fs = filesystems.at(fscid);
auto& fs = filesystems.at(fscid);
fn(fs);
fs->mds_map.epoch = epoch;
}
Expand All @@ -338,20 +336,18 @@ class FSMap {
* Apply a mutation to the mds_info_t structure for a particular
* daemon (identified by GID), and make appropriate updates to epochs.
*/
void modify_daemon(
mds_gid_t who,
std::function<void(MDSMap::mds_info_t *info)> fn)
void modify_daemon(mds_gid_t who, auto&& fn)
{
if (mds_roles.at(who) == FS_CLUSTER_ID_NONE) {
auto &info = standby_daemons.at(who);
fn(&info);
const auto& fscid = mds_roles.at(who);
if (fscid == FS_CLUSTER_ID_NONE) {
auto& info = standby_daemons.at(who);
fn(info);
ceph_assert(info.state == MDSMap::STATE_STANDBY);
standby_epochs[who] = epoch;
} else {
const auto &fs = filesystems[mds_roles.at(who)];
auto &info = fs->mds_map.mds_info.at(who);
fn(&info);

auto& fs = filesystems.at(fscid);
auto& info = fs->mds_map.mds_info.at(who);
fn(info);
fs->mds_map.epoch = epoch;
}
}
Expand Down Expand Up @@ -404,7 +400,7 @@ class FSMap {
void update_export_targets(mds_gid_t who, const std::set<mds_rank_t> &targets)
{
auto fscid = mds_roles.at(who);
modify_filesystem(fscid, [who, &targets](auto fs) {
modify_filesystem(fscid, [who, &targets](auto&& fs) {
fs->mds_map.mds_info.at(who).export_targets = targets;
});
}
Expand Down Expand Up @@ -458,12 +454,7 @@ class FSMap {
return false;
}

mds_gid_t find_standby_for(mds_role_t mds, std::string_view name) const;

mds_gid_t find_unused_for(mds_role_t mds, bool force_standby_active) const;

mds_gid_t find_replacement_for(mds_role_t mds, std::string_view name,
bool force_standby_active) const;
mds_gid_t find_replacement_for(mds_role_t mds, std::string_view name) const;

void get_health(list<pair<health_status_t,std::string> >& summary,
list<pair<health_status_t,std::string> > *detail) const;
Expand Down
Loading

0 comments on commit d463883

Please sign in to comment.