Skip to content

Commit

Permalink
mds: fix alternate_name durability
Browse files Browse the repository at this point in the history
This is a collection of fixes to Xiubo's prior work. Namely:

- Add new mds_alternate_name_max option to limit the size of
  alternate_name. Otherwise a Client could trick the MDS into creating
  an alternate_name of any size!

- Clean up how alternate_name is assigned to CDentry. In the general
  case, this should be assigned as part of creating the dentry. We want
  this value to be immutable for the life of the dentry. Even for the
  very special case of rename(2) where the destination dentry already
  exists.  We explicitly check (after discussion with Jeff) that the
  target dentry alternate_name already matches what the rename RPC is
  giving.

- The MDS is now properly journaling the alternate_name.

- The MDS rejoin phase is properly transmitting each dentry's
  alternate_name. I've discovered that this MMDSCacheRejoin message
  actually wasn't versioned which I've raised in a tracker [1]. In the
  mean time, we'll just bump CEPH_MDS_PROTOCOL as usual.

[1] https://tracker.ceph.com/issues/48886

Signed-off-by: Patrick Donnelly <[email protected]>
  • Loading branch information
batrick committed Jan 16, 2021
1 parent 9e25022 commit b91490d
Show file tree
Hide file tree
Showing 18 changed files with 238 additions and 121 deletions.
5 changes: 5 additions & 0 deletions src/common/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7953,6 +7953,11 @@ static std::vector<Option> get_immutable_object_cache_options() {

std::vector<Option> get_mds_options() {
return std::vector<Option>({
Option("mds_alternate_name_max", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
.set_default(8192)
.set_flag(Option::FLAG_RUNTIME)
.set_description("set the maximum length of alternate names for dentries"),

Option("mds_numa_node", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(-1)
.set_flag(Option::FLAG_STARTUP)
Expand Down
18 changes: 9 additions & 9 deletions src/mds/CDentry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ ostream& operator<<(ostream& out, const CDentry& dn)
dn.print_pin_set(out);
}

if (dn.get_alternate_name().size()) {
out << " altname=" << binstrprint(dn.get_alternate_name(), 16);
}

out << " " << &dn;
out << "]";
return out;
Expand Down Expand Up @@ -235,12 +239,6 @@ void CDentry::make_path(filepath& fp, bool projected) const
fp.push_dentry(get_name());
}

void CDentry::set_alternate_name(std::string_view _alternate_name)
{
dout(10) << "setting alternate_name on " << *this << dendl;
alternate_name = _alternate_name;
}

/*
* we only add ourselves to remote_parents when the linkage is
* active (no longer projected). if the passed dnl is projected,
Expand Down Expand Up @@ -317,9 +315,11 @@ CDentry::linkage_t *CDentry::pop_projected_linkage()
linkage.inode = n.inode;
linkage.inode->add_remote_parent(this);
}
} else if (n.inode) {
dir->link_primary_inode(this, n.inode);
n.inode->pop_projected_parent();
} else {
if (n.inode) {
dir->link_primary_inode(this, n.inode);
n.inode->pop_projected_parent();
}
}

ceph_assert(n.inode == linkage.inode);
Expand Down
25 changes: 18 additions & 7 deletions src/mds/CDentry.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
unsigned char get_remote_d_type() const { return remote_d_type; }
std::string get_remote_d_type_string() const;

void set_remote(inodeno_t ino, unsigned char d_type) {
void set_remote(inodeno_t ino, unsigned char d_type) {
remote_ino = ino;
remote_d_type = d_type;
inode = 0;
Expand Down Expand Up @@ -100,22 +100,27 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>


CDentry(std::string_view n, __u32 h,
mempool::mds_co::string alternate_name,
snapid_t f, snapid_t l) :
hash(h),
first(f), last(l),
item_dirty(this),
lock(this, &lock_type),
versionlock(this, &versionlock_type),
name(n)
name(n),
alternate_name(std::move(alternate_name))
{}
CDentry(std::string_view n, __u32 h, inodeno_t ino, unsigned char dt,
CDentry(std::string_view n, __u32 h,
mempool::mds_co::string alternate_name,
inodeno_t ino, unsigned char dt,
snapid_t f, snapid_t l) :
hash(h),
first(f), last(l),
item_dirty(this),
lock(this, &lock_type),
versionlock(this, &versionlock_type),
name(n)
name(n),
alternate_name(std::move(alternate_name))
{
linkage.remote_ino = ino;
linkage.remote_d_type = dt;
Expand Down Expand Up @@ -151,9 +156,15 @@ class CDentry : public MDSCacheObject, public LRUObject, public Counter<CDentry>
const CDir *get_dir() const { return dir; }
CDir *get_dir() { return dir; }
std::string_view get_name() const { return std::string_view(name); }
void set_alternate_name(std::string_view _alternate_name);
std::string_view get_alternate_name() const { return std::string_view(alternate_name); }
void decode_alternate_name(bufferlist::const_iterator &bl) { decode(alternate_name, bl); }
std::string_view get_alternate_name() const {
return std::string_view(alternate_name);
}
void set_alternate_name(mempool::mds_co::string altn) {
alternate_name = std::move(altn);
}
void set_alternate_name(std::string_view altn) {
alternate_name = mempool::mds_co::string(altn);
}

__u32 get_hash() const { return hash; }

Expand Down
45 changes: 23 additions & 22 deletions src/mds/CDir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -345,7 +345,7 @@ CDentry* CDir::add_null_dentry(std::string_view dname,
ceph_assert(lookup_exact_snap(dname, last) == 0);

// create dentry
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), first, last);
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), "", first, last);
if (is_auth())
dn->state_set(CDentry::STATE_AUTH);

Expand Down Expand Up @@ -382,13 +382,14 @@ CDentry* CDir::add_null_dentry(std::string_view dname,


CDentry* CDir::add_primary_dentry(std::string_view dname, CInode *in,
mempool::mds_co::string alternate_name,
snapid_t first, snapid_t last)
{
// primary
ceph_assert(lookup_exact_snap(dname, last) == 0);

// create dentry
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), first, last);
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), std::move(alternate_name), first, last);
if (is_auth())
dn->state_set(CDentry::STATE_AUTH);
if (is_auth() || !inode->is_stray()) {
Expand Down Expand Up @@ -431,13 +432,14 @@ CDentry* CDir::add_primary_dentry(std::string_view dname, CInode *in,
}

CDentry* CDir::add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type,
mempool::mds_co::string alternate_name,
snapid_t first, snapid_t last)
{
// foreign
ceph_assert(lookup_exact_snap(dname, last) == 0);

// create dentry
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), ino, d_type, first, last);
CDentry* dn = new CDentry(dname, inode->hash_dentry_name(dname), std::move(alternate_name), ino, d_type, first, last);
if (is_auth())
dn->state_set(CDentry::STATE_AUTH);
mdcache->lru.lru_insert_mid(dn);
Expand Down Expand Up @@ -1769,8 +1771,6 @@ CDentry *CDir::_load_dentry(
mempool::mds_co::string alternate_name;

CDentry::decode_remote(type, ino, d_type, alternate_name, q);
if (alternate_name.length())
dn->set_alternate_name(std::move(alternate_name));

if (stale) {
if (!dn) {
Expand All @@ -1787,14 +1787,15 @@ CDentry *CDir::_load_dentry(
dnl->is_remote() &&
dn->is_dirty() &&
ino == dnl->get_remote_ino() &&
d_type == dnl->get_remote_d_type()) {
d_type == dnl->get_remote_d_type() &&
alternate_name == dn->get_alternate_name()) {
// see comment below
dout(10) << "_fetched had underwater dentry " << *dn << ", marking clean" << dendl;
dn->mark_clean();
}
} else {
// (remote) link
dn = add_remote_dentry(dname, ino, d_type, first, last);
dn = add_remote_dentry(dname, ino, d_type, std::move(alternate_name), first, last);

// link to inode?
CInode *in = mdcache->get_inode(ino); // we may or may not have it.
Expand All @@ -1808,12 +1809,14 @@ CDentry *CDir::_load_dentry(
}
else if (type == 'I' || type == 'i') {
InodeStore inode_data;
mempool::mds_co::string alternate_name;
// inode
// Load inode data before looking up or constructing CInode
if (type == 'i') {
DECODE_START(2, q);
if (struct_v >= 2)
dn->decode_alternate_name(q);
if (struct_v >= 2) {
decode(alternate_name, q);
}
inode_data.decode(q);
DECODE_FINISH(q);
} else {
Expand Down Expand Up @@ -1891,7 +1894,7 @@ CDentry *CDir::_load_dentry(

if (!undef_inode) {
mdcache->add_inode(in); // add
dn = add_primary_dentry(dname, in, first, last); // link
dn = add_primary_dentry(dname, in, std::move(alternate_name), first, last); // link
}
dout(12) << "_fetched got " << *dn << " " << *in << dendl;

Expand All @@ -1904,7 +1907,7 @@ CDentry *CDir::_load_dentry(
//num_new_inodes_loaded++;
} else if (g_conf().get_val<bool>("mds_hack_allow_loading_invalid_metadata")) {
dout(20) << "hack: adding duplicate dentry for " << *in << dendl;
dn = add_primary_dentry(dname, in, first, last);
dn = add_primary_dentry(dname, in, std::move(alternate_name), first, last);
} else {
dout(0) << "_fetched badness: got (but i already had) " << *in
<< " mode " << in->get_inode()->mode
Expand Down Expand Up @@ -2233,7 +2236,6 @@ void CDir::_encode_primary_inode_base(dentry_commit_item &item, bufferlist &dfts

encode(item.oldest_snap, bl);
encode(item.damage_flags, bl);
encode(item.alternate_name, bl);
ENCODE_FINISH(bl);
}

Expand Down Expand Up @@ -2330,6 +2332,7 @@ void CDir::_omap_commit_ops(int r, int op_prio, int64_t metapool, version_t vers
bl.append('i'); // inode

ENCODE_START(2, 1, bl);
encode(item.alternate_name, bl);
_encode_primary_inode_base(item, dfts, bl);
ENCODE_FINISH(bl);
}
Expand Down Expand Up @@ -2450,19 +2453,17 @@ void CDir::_parse_dentry(CDentry *dn, dentry_commit_item &item,

item.first = dn->first;

// Only in very rare case the dn->alternate_name not empty,
// so it won't cost much to copy it here
item.alternate_name = dn->get_alternate_name();

// primary or remote?
if (dn->linkage.is_remote()) {
auto& linkage = dn->linkage;
item.alternate_name = dn->get_alternate_name();
if (linkage.is_remote()) {
item.is_remote = true;
item.ino = dn->linkage.get_remote_ino();
item.d_type = dn->linkage.get_remote_d_type();
item.ino = linkage.get_remote_ino();
item.d_type = linkage.get_remote_d_type();
dout(14) << " dn '" << dn->get_name() << "' remote ino " << item.ino << dendl;
} else if (dn->linkage.is_primary()) {
} else if (linkage.is_primary()) {
// primary link
CInode *in = dn->linkage.get_inode();
CInode *in = linkage.get_inode();
ceph_assert(in);

dout(14) << " dn '" << dn->get_name() << "' inode " << *in << dendl;
Expand Down Expand Up @@ -2491,7 +2492,7 @@ void CDir::_parse_dentry(CDentry *dn, dentry_commit_item &item,
item.oldest_snap = in->oldest_snap;
item.damage_flags = in->damage_flags;
} else {
ceph_assert(!dn->linkage.is_null());
ceph_assert(!linkage.is_null());
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/mds/CDir.h
Original file line number Diff line number Diff line change
Expand Up @@ -370,9 +370,10 @@ class CDir : public MDSCacheObject, public Counter<CDir> {

CDentry* add_null_dentry(std::string_view dname,
snapid_t first=2, snapid_t last=CEPH_NOSNAP);
CDentry* add_primary_dentry(std::string_view dname, CInode *in,
CDentry* add_primary_dentry(std::string_view dname, CInode *in, mempool::mds_co::string alternate_name,
snapid_t first=2, snapid_t last=CEPH_NOSNAP);
CDentry* add_remote_dentry(std::string_view dname, inodeno_t ino, unsigned char d_type,
mempool::mds_co::string alternate_name,
snapid_t first=2, snapid_t last=CEPH_NOSNAP);
void remove_dentry( CDentry *dn ); // delete dentry
void link_remote_inode( CDentry *dn, inodeno_t ino, unsigned char d_type);
Expand Down
4 changes: 2 additions & 2 deletions src/mds/Locker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4252,15 +4252,15 @@ void Locker::issue_client_lease(CDentry *dn, MDRequestRef &mdr, int mask,
lstat.mask = CEPH_LEASE_VALID | mask;
lstat.duration_ms = (uint32_t)(1000 * mdcache->client_lease_durations[pool]);
lstat.seq = ++l->seq;
lstat.alternate_name = mempool::mds_co::string(dn->get_alternate_name());
lstat.alternate_name = std::string(dn->alternate_name);
encode_lease(bl, session->info, lstat);
dout(20) << "issue_client_lease seq " << lstat.seq << " dur " << lstat.duration_ms << "ms "
<< " on " << *dn << dendl;
} else {
// null lease
LeaseStat lstat;
lstat.mask = mask;
lstat.alternate_name = mempool::mds_co::string(dn->get_alternate_name());
lstat.alternate_name = std::string(dn->alternate_name);
encode_lease(bl, session->info, lstat);
dout(20) << "issue_client_lease no/null lease on " << *dn << dendl;
}
Expand Down
Loading

0 comments on commit b91490d

Please sign in to comment.