Skip to content

Commit

Permalink
mds: detach mdsdir inode from hierarchy
Browse files Browse the repository at this point in the history
Attaching mdsdir to the root hierarchy means that random mds X
can't push replicas of everything needed for mds Y to reach a
stray item, because Y may not have the /.ceph/mds# dentries and
inode.  So detach myin from the hierarchy.

This simplifies startup/mkfs somewhat because non-root mds's
don't have to traverse through /.ceph.

We can still make it reachable via /.ceph/mds# with a remote
dentry (although we haven't done that yet).

MDS stopping is currently broken, since we can't export the stray
dir any more (it's auth mds fixed).
  • Loading branch information
liewegas committed Mar 11, 2010
1 parent 0e44c85 commit 6c88cf3
Show file tree
Hide file tree
Showing 15 changed files with 149 additions and 158 deletions.
3 changes: 3 additions & 0 deletions src/TODO
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ bugs
- rm -r failure (on kernel tree)
- dbench 1, restart mds (may take a few times), dbench will error out.

- multi-mds: the stray dir should be it's own root/base (with /.ceph/mds$n/stray a remote dentry?)
...otherwise mds X can't always push a stray replica to Y and have it fully linked into the hierarchical cache

- multi-mds, dbench 10, null dn is migrated, linked on new node, but remains null on old mds. a little later we crash when we unlink due to the discrepancy.
mds/MDCache.cc: In function 'void MDCache::handle_dentry_unlink(MDentryUnlink*)':
mds/MDCache.cc:8057: FAILED assert(dnl->is_remote())
Expand Down
2 changes: 1 addition & 1 deletion src/mds/CDentry.cc
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ void CDentry::make_path_string(string& s)
void CDentry::make_path(filepath& fp)
{
assert(dir);
if (dir->inode->is_root())
if (dir->inode->is_base())
fp = filepath(dir->inode->ino()); // base case
else if (dir->inode->get_parent_dn())
dir->inode->get_parent_dn()->make_path(fp); // recurse
Expand Down
4 changes: 2 additions & 2 deletions src/mds/CDir.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ boost::pool<> CDir::pool(sizeof(CDir));

ostream& operator<<(ostream& out, CDir& dir)
{
filepath path;
dir.get_inode()->make_path(path);
string path;
dir.get_inode()->make_path_string(path);
out << "[dir " << dir.dirfrag() << " " << path << "/"
<< " [" << dir.first << ",head]";
if (dir.is_auth()) {
Expand Down
11 changes: 8 additions & 3 deletions src/mds/CInode.cc
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,11 @@ void CInode::make_path_string(string& s, bool force, CDentry *use_parent)
else if (is_root()) {
s = ""; // root
}
else if (is_mdsdir()) {
char t[30];
sprintf(t, "~mds%d", (int)ino() - MDS_INO_MDSDIR_OFFSET);
s = t;
}
else {
char n[20];
snprintf(n, sizeof(n), "#%llx", (unsigned long long)(ino()));
Expand Down Expand Up @@ -514,7 +519,7 @@ void CInode::make_anchor_trace(vector<Anchor>& trace)
if (get_projected_parent_dn())
get_projected_parent_dn()->make_anchor_trace(trace, this);
else
assert(is_root() || is_stray());
assert(is_base());
}

void CInode::name_stray_dentry(string& dname)
Expand Down Expand Up @@ -585,7 +590,7 @@ version_t CInode::pre_dirty()
pv = get_projected_parent_dn()->pre_dirty(get_projected_version());
dout(10) << "pre_dirty " << pv << " (current v " << inode.version << ")" << dendl;
} else {
assert(is_root());
assert(is_base());
pv = get_projected_version() + 1;
}
return pv;
Expand Down Expand Up @@ -659,7 +664,7 @@ struct C_Inode_Stored : public Context {
void CInode::store(Context *fin)
{
dout(10) << "store " << get_version() << dendl;
assert(is_root());
assert(is_base());

bufferlist bl;
nstring magic = CEPH_FS_ONDISK_MAGIC;
Expand Down
2 changes: 2 additions & 0 deletions src/mds/CInode.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ class CInode : public MDSCacheObject {

bool is_root() { return inode.ino == MDS_INO_ROOT; }
bool is_stray() { return MDS_INO_IS_STRAY(inode.ino); }
bool is_mdsdir() { return MDS_INO_IS_MDSDIR(inode.ino); }
bool is_base() { return is_root() || is_mdsdir(); }
bool is_system() { return inode.ino < MDS_INO_SYSTEM_BASE; }

// note: this overloads MDSCacheObject
Expand Down
4 changes: 2 additions & 2 deletions src/mds/Locker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2823,7 +2823,7 @@ void Locker::scatter_writebehind(ScatterLock *lock)
dout(10) << "scatter_writebehind " << in->inode.mtime << " on " << *lock << " on " << *in << dendl;

// hack:
if (in->is_root()) {
if (in->is_base()) {
dout(10) << "scatter_writebehind just clearing updated flag for base inode " << *in << dendl;
lock->clear_dirty();
if (!lock->is_stable())
Expand Down Expand Up @@ -2895,7 +2895,7 @@ void Locker::scatter_eval(ScatterLock *lock, bool *need_issue)
}

CInode *in = (CInode*)lock->get_parent();
if (!in->has_subtree_root_dirfrag() || in->is_root()) {
if (!in->has_subtree_root_dirfrag() || in->is_base()) {
// i _should_ be sync.
if (!lock->is_wrlocked() &&
!lock->is_xlocked() &&
Expand Down
8 changes: 4 additions & 4 deletions src/mds/MDBalancer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ void MDBalancer::export_empties()
dir->is_frozen())
continue;

if (!dir->inode->is_root() && dir->get_num_head_items() == 0)
if (!dir->inode->is_base() && dir->get_num_head_items() == 0)
mds->mdcache->migrator->export_empty_import(dir);
}
}
Expand Down Expand Up @@ -595,7 +595,7 @@ void MDBalancer::try_rebalance()
dout(5) << "considering " << *dir << " from " << (*p.first).first << dendl;
multimap<int,CDir*>::iterator plast = p.first++;

if (dir->inode->is_root()) continue;
if (dir->inode->is_base()) continue;
if (dir->is_freezing() || dir->is_frozen()) continue; // export pbly already in progress
double pop = dir->pop_auth_subtree.meta_load(rebalance_time, mds->mdcache->decayrate);
assert(dir->inode->authority().first == target); // cuz that's how i put it in the map, dummy
Expand Down Expand Up @@ -625,7 +625,7 @@ void MDBalancer::try_rebalance()
import != import_pop_map.end();
import++) {
CDir *imp = (*import).second;
if (imp->inode->is_root()) continue;
if (imp->inode->is_base()) continue;

double pop = (*import).first;
if (pop < amount-have || pop < MIN_REEXPORT) {
Expand Down Expand Up @@ -878,7 +878,7 @@ void MDBalancer::hit_dir(utime_t now, CDir *dir, int type, int who, double amoun
// hit modify counter, if this was a modify
if (//g_conf.num_mds > 2 && // FIXME >2 thing
g_conf.mds_bal_frag &&
!dir->inode->is_root() && // not root (for now at least)
!dir->inode->is_base() && // not root/base (for now at least)
dir->is_auth() &&

((g_conf.mds_bal_split_size > 0 &&
Expand Down
Loading

0 comments on commit 6c88cf3

Please sign in to comment.