Skip to content

Commit

Permalink
mds: save the metadata pool id MDSRank class's private member
Browse files Browse the repository at this point in the history
There is one rare case that when mds daemon received a new mdsmap
and during decoding it, the metadata_pool will be reset to -1, if
other threads try to get it from old mdsmap it will get a invalid
pool id.

This can also help get rid of the mds_lock else where.

Fixes: https://tracker.ceph.com/issues/50389
Signed-off-by: Xiubo Li <[email protected]>

Signed-off-by: Xiubo Li <[email protected]>
  • Loading branch information
lxbsz committed May 12, 2021
1 parent dc7df7c commit 5cc44b6
Show file tree
Hide file tree
Showing 11 changed files with 43 additions and 36 deletions.
2 changes: 1 addition & 1 deletion src/mds/CDir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2193,7 +2193,7 @@ class C_IO_Dir_Commit_Ops : public Context {
vector<string> &&r,
mempool::mds_co::compact_set<mempool::mds_co::string> &&stales) :
dir(d), op_prio(pr) {
metapool = dir->mdcache->mds->mdsmap->get_metadata_pool();
metapool = dir->mdcache->mds->get_metadata_pool();
version = dir->get_version();
is_new = dir->is_new();
to_set.swap(s);
Expand Down
6 changes: 3 additions & 3 deletions src/mds/CInode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ void CInode::store(MDSContext *fin)
m.write_full(bl);

object_t oid = CInode::get_object_name(ino(), frag_t(), ".inode");
object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mdcache->mds->get_metadata_pool());

Context *newfin =
new C_OnFinisher(new C_IO_Inode_Stored(this, get_version(), fin),
Expand Down Expand Up @@ -1224,7 +1224,7 @@ void CInode::fetch(MDSContext *fin)
C_GatherBuilder gather(g_ceph_context, new C_OnFinisher(c, mdcache->mds->finisher));

object_t oid = CInode::get_object_name(ino(), frag_t(), "");
object_locator_t oloc(mdcache->mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mdcache->mds->get_metadata_pool());

// Old on-disk format: inode stored in xattr of a dirfrag
ObjectOperation rd;
Expand Down Expand Up @@ -5188,7 +5188,7 @@ void CInode::scrub_finished() {
int64_t CInode::get_backtrace_pool() const
{
if (is_dir()) {
return mdcache->mds->mdsmap->get_metadata_pool();
return mdcache->mds->get_metadata_pool();
} else {
// Files are required to have an explicit layout that specifies
// a pool
Expand Down
2 changes: 1 addition & 1 deletion src/mds/MDBalancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ int MDBalancer::localize_balancer()

/* we assume that balancer is in the metadata pool */
object_t oid = object_t(mds->mdsmap->get_balancer());
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
ceph_tid_t tid = mds->objecter->read(oid, oloc, 0, 0, CEPH_NOSNAP, &lua_src, 0,
new C_SafeCond(lock, cond, &ack, &r));
dout(15) << "launched non-blocking read tid=" << tid
Expand Down
8 changes: 4 additions & 4 deletions src/mds/MDCache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8648,7 +8648,7 @@ void MDCache::open_remote_dentry(CDentry *dn, bool projected, MDSContext *fin, b
dout(10) << "open_remote_dentry " << *dn << dendl;
CDentry::linkage_t *dnl = projected ? dn->get_projected_linkage() : dn->get_linkage();
inodeno_t ino = dnl->get_remote_ino();
int64_t pool = dnl->get_remote_d_type() == DT_DIR ? mds->mdsmap->get_metadata_pool() : -1;
int64_t pool = dnl->get_remote_d_type() == DT_DIR ? mds->get_metadata_pool() : -1;
open_ino(ino, pool,
new C_MDC_OpenRemoteDentry(this, dn, ino, fin, want_xlocked), true, want_xlocked); // backtrace
}
Expand Down Expand Up @@ -8778,7 +8778,7 @@ void MDCache::_open_ino_backtrace_fetched(inodeno_t ino, bufferlist& bl, int err
return;
}
} else if (err == -CEPHFS_ENOENT) {
int64_t meta_pool = mds->mdsmap->get_metadata_pool();
int64_t meta_pool = mds->get_metadata_pool();
if (info.pool != meta_pool) {
dout(10) << " no object in pool " << info.pool
<< ", retrying pool " << meta_pool << dendl;
Expand Down Expand Up @@ -9023,7 +9023,7 @@ void MDCache::do_open_ino(inodeno_t ino, open_ino_info_t& info, int err)
} else {
ceph_assert(!info.ancestors.empty());
info.checking = mds->get_nodeid();
open_ino(info.ancestors[0].dirino, mds->mdsmap->get_metadata_pool(),
open_ino(info.ancestors[0].dirino, mds->get_metadata_pool(),
new C_MDC_OpenInoParentOpened(this, ino), info.want_replica);
}
}
Expand Down Expand Up @@ -11949,7 +11949,7 @@ void MDCache::_fragment_committed(dirfrag_t basedirfrag, const MDRequestRef& mdr
mds->finisher));

SnapContext nullsnapc;
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
for (const auto& fg : uf.old_frags) {
object_t oid = CInode::get_object_name(basedirfrag.ino, fg, "");
ObjectOperation op;
Expand Down
12 changes: 6 additions & 6 deletions src/mds/MDLog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ void MDLog::create(MDSContext *c)

// Instantiate Journaler and start async write to RADOS
ceph_assert(journaler == NULL);
journaler = new Journaler("mdlog", ino, mds->mdsmap->get_metadata_pool(),
journaler = new Journaler("mdlog", ino, mds->get_metadata_pool(),
CEPH_FS_ONDISK_MAGIC, mds->objecter, logger,
l_mdl_jlat, mds->finisher);
ceph_assert(journaler->is_readonly());
Expand All @@ -171,7 +171,7 @@ void MDLog::create(MDSContext *c)
journaler->write_head(gather.new_sub());

// Async write JournalPointer to RADOS
JournalPointer jp(mds->get_nodeid(), mds->mdsmap->get_metadata_pool());
JournalPointer jp(mds->get_nodeid(), mds->get_metadata_pool());
jp.front = ino;
jp.back = 0;
jp.save(mds->objecter, gather.new_sub());
Expand Down Expand Up @@ -966,7 +966,7 @@ void MDLog::_recovery_thread(MDSContext *completion)
// First, read the pointer object.
// If the pointer object is not present, then create it with
// front = default ino and back = null
JournalPointer jp(mds->get_nodeid(), mds->mdsmap->get_metadata_pool());
JournalPointer jp(mds->get_nodeid(), mds->get_metadata_pool());
const int read_result = jp.load(mds->objecter);
if (read_result == -CEPHFS_ENOENT) {
inodeno_t const default_log_ino = MDS_INO_LOG_OFFSET + mds->get_nodeid();
Expand Down Expand Up @@ -1001,7 +1001,7 @@ void MDLog::_recovery_thread(MDSContext *completion)
}
dout(1) << "Erasing journal " << jp.back << dendl;
C_SaferCond erase_waiter;
Journaler back("mdlog", jp.back, mds->mdsmap->get_metadata_pool(),
Journaler back("mdlog", jp.back, mds->get_metadata_pool(),
CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat,
mds->finisher);

Expand Down Expand Up @@ -1041,7 +1041,7 @@ void MDLog::_recovery_thread(MDSContext *completion)

/* Read the header from the front journal */
Journaler *front_journal = new Journaler("mdlog", jp.front,
mds->mdsmap->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter,
mds->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter,
logger, l_mdl_jlat, mds->finisher);

// Assign to ::journaler so that we can be aborted by ::shutdown while
Expand Down Expand Up @@ -1128,7 +1128,7 @@ void MDLog::_reformat_journal(JournalPointer const &jp_in, Journaler *old_journa

/* Create the new Journaler file */
Journaler *new_journal = new Journaler("mdlog", jp.back,
mds->mdsmap->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher);
mds->get_metadata_pool(), CEPH_FS_ONDISK_MAGIC, mds->objecter, logger, l_mdl_jlat, mds->finisher);
dout(4) << "Writing new journal header " << jp.back << dendl;
file_layout_t new_layout = old_journal->get_layout();
new_journal->set_writeable();
Expand Down
10 changes: 5 additions & 5 deletions src/mds/MDSRank.cc
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,11 @@ MDSRank::MDSRank(
{
hb = g_ceph_context->get_heartbeat_map()->add_worker("MDSRank", pthread_self());

// The metadata pool won't change in the whole life time
// of the fs, with this we can get rid of the mds_lock
// in many places too.
metadata_pool = mdsmap->get_metadata_pool();

purge_queue.update_op_limit(*mdsmap);

objecter->unset_honor_pool_full();
Expand Down Expand Up @@ -883,11 +888,6 @@ class C_MDS_VoidFn : public MDSInternalContext
}
};

int64_t MDSRank::get_metadata_pool()
{
return mdsmap->get_metadata_pool();
}

MDSTableClient *MDSRank::get_table_client(int t)
{
switch (t) {
Expand Down
9 changes: 8 additions & 1 deletion src/mds/MDSRank.h
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,10 @@ class MDSRank {

mds_rank_t get_nodeid() const { return whoami; }
std::string_view get_fs_name() const { return fs_name; }
int64_t get_metadata_pool();
int64_t get_metadata_pool() const
{
return metadata_pool;
}

mono_time get_starttime() const {
return starttime;
Expand Down Expand Up @@ -601,6 +604,10 @@ class MDSRank {
private:
bool send_status = true;

// The metadata pool won't change in the whole life time of the fs,
// with this we can get rid of the mds_lock in many places too.
int64_t metadata_pool = -1;

// "task" string that gets displayed in ceph status
inline static const std::string SCRUB_STATUS_KEY = "scrub status";

Expand Down
4 changes: 2 additions & 2 deletions src/mds/MDSTable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ void MDSTable::save(MDSContext *onfinish, version_t v)
// write (async)
SnapContext snapc;
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
mds->objecter->write_full(oid, oloc,
snapc,
bl, ceph::real_clock::now(), 0,
Expand Down Expand Up @@ -159,7 +159,7 @@ void MDSTable::load(MDSContext *onfinish)

C_IO_MT_Load *c = new C_IO_MT_Load(this, onfinish);
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0,
new C_OnFinisher(c, mds->finisher));
}
Expand Down
10 changes: 5 additions & 5 deletions src/mds/OpenFileTable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ void OpenFileTable::_journal_finish(int r, uint64_t log_seq, MDSContext *c,
new C_OnFinisher(new C_IO_OFT_Save(this, log_seq, c),
mds->finisher));
SnapContext snapc;
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
for (auto& [idx, vops] : ops_map) {
object_t oid = get_object_name(idx);
for (auto& op : vops) {
Expand Down Expand Up @@ -345,7 +345,7 @@ void OpenFileTable::commit(MDSContext *c, uint64_t log_seq, int op_prio)
C_GatherBuilder gather(g_ceph_context);

SnapContext snapc;
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());

const unsigned max_write_size = mds->mdcache->max_dir_commit_size;

Expand Down Expand Up @@ -728,7 +728,7 @@ void OpenFileTable::_read_omap_values(const std::string& key, unsigned idx,
{
object_t oid = get_object_name(idx);
dout(10) << __func__ << ": load from '" << oid << ":" << key << "'" << dendl;
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
C_IO_OFT_Load *c = new C_IO_OFT_Load(this, idx, first);
ObjectOperation op;
if (first)
Expand Down Expand Up @@ -867,7 +867,7 @@ void OpenFileTable::_load_finish(int op_r, int header_r, int values_r,
C_GatherBuilder gather(g_ceph_context,
new C_OnFinisher(new C_IO_OFT_Recover(this),
mds->finisher));
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
SnapContext snapc;

for (unsigned omap_idx = 0; omap_idx < loaded_journals.size(); omap_idx++) {
Expand Down Expand Up @@ -1113,7 +1113,7 @@ void OpenFileTable::_prefetch_inodes()

int64_t pool;
if (prefetch_state == DIR_INODES)
pool = mds->mdsmap->get_metadata_pool();
pool = mds->get_metadata_pool();
else if (prefetch_state == FILE_INODES)
pool = mds->mdsmap->get_first_data_pool();
else
Expand Down
6 changes: 3 additions & 3 deletions src/mds/Server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2459,7 +2459,7 @@ void Server::handle_osd_map()
* using osdmap_full_flag(), because we want to know "is the flag set"
* rather than "does the flag apply to us?" */
mds->objecter->with_osdmap([this](const OSDMap& o) {
auto pi = o.get_pg_pool(mds->mdsmap->get_metadata_pool());
auto pi = o.get_pg_pool(mds->get_metadata_pool());
is_full = pi && pi->has_flag(pg_pool_t::FLAG_FULL);
dout(7) << __func__ << ": full = " << is_full << " epoch = "
<< o.get_epoch() << dendl;
Expand Down Expand Up @@ -4035,7 +4035,7 @@ void Server::_lookup_snap_ino(MDRequestRef& mdr)
if (parent_ino) {
diri = mdcache->get_inode(parent_ino);
if (!diri) {
mdcache->open_ino(parent_ino, mds->mdsmap->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr));
mdcache->open_ino(parent_ino, mds->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr));
return;
}

Expand Down Expand Up @@ -4067,7 +4067,7 @@ void Server::_lookup_snap_ino(MDRequestRef& mdr)

respond_to_request(mdr, -CEPHFS_ESTALE);
} else {
mdcache->open_ino(vino.ino, mds->mdsmap->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr), false);
mdcache->open_ino(vino.ino, mds->get_metadata_pool(), new C_MDS_LookupIno2(this, mdr), false);
}
}

Expand Down
10 changes: 5 additions & 5 deletions src/mds/SessionMap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ void SessionMap::_load_finish(
dout(10) << __func__ << ": continue omap load from '"
<< last_key << "'" << dendl;
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
C_IO_SM_Load *c = new C_IO_SM_Load(this, false);
ObjectOperation op;
op.omap_get_vals(last_key, "", g_conf()->mds_sessionmap_keys_per_op,
Expand Down Expand Up @@ -279,7 +279,7 @@ void SessionMap::load(MDSContext *onload)

C_IO_SM_Load *c = new C_IO_SM_Load(this, true);
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());

ObjectOperation op;
op.omap_get_header(&c->header_bl, &c->header_r);
Expand Down Expand Up @@ -316,7 +316,7 @@ void SessionMap::load_legacy()

C_IO_SM_LoadLegacy *c = new C_IO_SM_LoadLegacy(this);
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());

mds->objecter->read_full(oid, oloc, CEPH_NOSNAP, &c->bl, 0,
new C_OnFinisher(c, mds->finisher));
Expand Down Expand Up @@ -388,7 +388,7 @@ void SessionMap::save(MDSContext *onsave, version_t needv)
committing = version;
SnapContext snapc;
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());

ObjectOperation op;

Expand Down Expand Up @@ -885,7 +885,7 @@ void SessionMap::save_if_dirty(const std::set<entity_name_t> &tgt_sessions,

SnapContext snapc;
object_t oid = get_object_name();
object_locator_t oloc(mds->mdsmap->get_metadata_pool());
object_locator_t oloc(mds->get_metadata_pool());
MDSContext *on_safe = gather_bld->new_sub();
mds->objecter->mutate(oid, oloc, op, snapc,
ceph::real_clock::now(), 0,
Expand Down

0 comments on commit 5cc44b6

Please sign in to comment.