Skip to content

Commit

Permalink
Merge pull request ceph#4226 from ceph/wip-11276
Browse files Browse the repository at this point in the history
ceph#11276: make ceph-fuse well behaved with pool quotas

Reviewed-by: Greg Farnum <[email protected]>
  • Loading branch information
gregsfortytwo committed May 14, 2015
2 parents 0373e49 + 0083445 commit e63c44b
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 44 deletions.
109 changes: 76 additions & 33 deletions src/client/Client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2091,37 +2091,78 @@ void Client::handle_client_reply(MClientReply *reply)
mount_cond.Signal();
}

void Client::_handle_full_flag(int64_t pool)
{
ldout(cct, 1) << __func__ << ": FULL: cancelling outstanding operations "
<< "on " << pool << dendl;
// Cancel all outstanding ops in this pool with -ENOSPC: it is necessary
// to do this rather than blocking, because otherwise when we fill up we
// potentially lock caps forever on files with dirty pages, and we need
// to be able to release those caps to the MDS so that it can delete files
// and free up space.
epoch_t cancelled_epoch = objecter->op_cancel_writes(-ENOSPC, pool);

// For all inodes with layouts in this pool and a pending flush write op
// (i.e. one of the ones we will cancel), we've got to purge_set their data
// from ObjectCacher so that it doesn't re-issue the write in response to
// the ENOSPC error.
// Fortunately since we're cancelling everything in a given pool, we don't
// need to know which ops belong to which ObjectSet, we can just blow all
// the un-flushed cached data away and mark any dirty inodes' async_err
// field with -ENOSPC as long as we're sure all the ops we cancelled were
// affecting this pool, and all the objectsets we're purging were also
// in this pool.
for (unordered_map<vinodeno_t,Inode*>::iterator i = inode_map.begin();
i != inode_map.end(); ++i)
{
Inode *inode = i->second;
if (inode->oset.dirty_or_tx
&& (pool == -1 || inode->layout.fl_pg_pool == pool)) {
ldout(cct, 4) << __func__ << ": FULL: inode 0x" << std::hex << i->first << std::dec
<< " has dirty objects, purging and setting ENOSPC" << dendl;
objectcacher->purge_set(&inode->oset);
inode->async_err = -ENOSPC;
}
}

if (cancelled_epoch != (epoch_t)-1) {
set_cap_epoch_barrier(cancelled_epoch);
}
}

void Client::handle_osd_map(MOSDMap *m)
{
if (objecter->osdmap_full_flag()) {
ldout(cct, 1) << __func__ << ": FULL: cancelling outstanding operations" << dendl;
// Cancel all outstanding ops with -ENOSPC: it is necessary to do this rather than blocking,
// because otherwise when we fill up we potentially lock caps forever on files with
// dirty pages, and we need to be able to release those caps to the MDS so that it can
// delete files and free up space.
epoch_t cancelled_epoch = objecter->op_cancel_writes(-ENOSPC);

// For all inodes with a pending flush write op (i.e. one of the ones we
// will cancel), we've got to purge_set their data from ObjectCacher
// so that it doesn't re-issue the write in response to the ENOSPC error.
// Fortunately since we're cancelling *everything*, we don't need to know
// which ops belong to which ObjectSet, we can just blow all the un-flushed
// cached data away and mark any dirty inodes' async_err field with -ENOSPC
// (i.e. we only need to know which inodes had outstanding ops, not the exact
// op-to-inode relation)
for (unordered_map<vinodeno_t,Inode*>::iterator i = inode_map.begin();
i != inode_map.end(); ++i)
{
Inode *inode = i->second;
if (inode->oset.dirty_or_tx) {
ldout(cct, 4) << __func__ << ": FULL: inode 0x" << std::hex << i->first << std::dec
<< " has dirty objects, purging and setting ENOSPC" << dendl;
objectcacher->purge_set(&inode->oset);
inode->async_err = -ENOSPC;
_handle_full_flag(-1);
} else {
// Accumulate local list of full pools so that I can drop
// the objecter lock before re-entering objecter in
// cancel_writes
std::vector<int64_t> full_pools;

const OSDMap *osd_map = objecter->get_osdmap_read();
const map<int64_t,pg_pool_t>& pools = osd_map->get_pools();
for (map<int64_t,pg_pool_t>::const_iterator i = pools.begin();
i != pools.end(); ++i) {
if (i->second.has_flag(pg_pool_t::FLAG_FULL)) {
full_pools.push_back(i->first);
}
}

set_cap_epoch_barrier(cancelled_epoch);
objecter->put_osdmap_read();

for (std::vector<int64_t>::iterator i = full_pools.begin();
i != full_pools.end(); ++i) {
_handle_full_flag(*i);
}

// Subscribe to subsequent maps to watch for the full flag going
// away. For the global full flag objecter does this for us, but
// it pays no attention to the per-pool full flag so in this branch
// we do it ourselves.
if (!full_pools.empty()) {
objecter->maybe_request_map();
}
}

m->put();
Expand Down Expand Up @@ -3300,7 +3341,7 @@ bool Client::_flush(Inode *in, Context *onfinish)
return true;
}

if (objecter->osdmap_full_flag()) {
if (objecter->osdmap_pool_full(in->layout.fl_pg_pool)) {
ldout(cct, 1) << __func__ << ": FULL, purging for ENOSPC" << dendl;
objectcacher->purge_set(&in->oset);
if (onfinish) {
Expand Down Expand Up @@ -7353,13 +7394,13 @@ int Client::_write(Fh *f, int64_t offset, uint64_t size, const char *buf)
if ((uint64_t)(offset+size) > mdsmap->get_max_filesize()) //too large!
return -EFBIG;

if (objecter->osdmap_full_flag()) {
return -ENOSPC;
}

//ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
Inode *in = f->inode;

if (objecter->osdmap_pool_full(in->layout.fl_pg_pool)) {
return -ENOSPC;
}

assert(in->snapid == CEPH_NOSNAP);

// was Fh opened as writeable?
Expand Down Expand Up @@ -10125,11 +10166,13 @@ int Client::_fallocate(Fh *fh, int mode, int64_t offset, int64_t length)
if ((mode & FALLOC_FL_PUNCH_HOLE) && !(mode & FALLOC_FL_KEEP_SIZE))
return -EOPNOTSUPP;

if (objecter->osdmap_full_flag() && !(mode & FALLOC_FL_PUNCH_HOLE))
return -ENOSPC;

Inode *in = fh->inode;

if (objecter->osdmap_pool_full(in->layout.fl_pg_pool)
&& !(mode & FALLOC_FL_PUNCH_HOLE)) {
return -ENOSPC;
}

if (in->snapid != CEPH_NOSNAP)
return -EROFS;

Expand Down
8 changes: 8 additions & 0 deletions src/client/Client.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,14 @@ class Client : public Dispatcher {
list<Cond*> waiting_for_pool_perm;
int check_pool_perm(Inode *in, int need);

/**
* Call this when an OSDMap is seen with a full flag (global or per pool)
* set.
*
* @param pool the pool ID affected, or -1 if all.
*/
void _handle_full_flag(int64_t pool);

public:
void set_filer_flags(int flags);
void clear_filer_flags(int flags);
Expand Down
36 changes: 26 additions & 10 deletions src/osdc/Objecter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2297,14 +2297,8 @@ int Objecter::_op_cancel(ceph_tid_t tid, int r)
return ret;
}

/**
* Any write op which is in progress at the start of this call shall no longer
* be in progress when this call ends. Operations started after the start
* of this call may still be in progress when this call ends.
*
* @return the latest possible epoch in which a cancelled op could have existed
*/
epoch_t Objecter::op_cancel_writes(int r)

epoch_t Objecter::op_cancel_writes(int r, int64_t pool)
{
rwlock.get_write();

Expand All @@ -2314,7 +2308,8 @@ epoch_t Objecter::op_cancel_writes(int r)
OSDSession *s = siter->second;
s->lock.get_read();
for (map<ceph_tid_t, Op*>::iterator op_i = s->ops.begin(); op_i != s->ops.end(); ++op_i) {
if (op_i->second->target.flags & CEPH_OSD_FLAG_WRITE) {
if (op_i->second->target.flags & CEPH_OSD_FLAG_WRITE
&& (pool == -1 || op_i->second->target.target_oloc.pool == pool)) {
to_cancel.push_back(op_i->first);
}
}
Expand All @@ -2331,7 +2326,11 @@ epoch_t Objecter::op_cancel_writes(int r)

rwlock.unlock();

return epoch;
if (to_cancel.size()) {
return epoch;
} else {
return -1;
}
}

bool Objecter::is_pg_changed(
Expand Down Expand Up @@ -2372,6 +2371,23 @@ bool Objecter::osdmap_full_flag() const
return _osdmap_full_flag();
}

bool Objecter::osdmap_pool_full(const int64_t pool_id) const
{
RWLock::RLocker rl(rwlock);

if (_osdmap_full_flag()) {
return true;
}

const pg_pool_t *pool = osdmap->get_pg_pool(pool_id);
if (pool == NULL) {
ldout(cct, 4) << __func__ << ": DNE pool " << pool_id << dendl;
return false;
}

return pool->has_flag(pg_pool_t::FLAG_FULL);
}

/**
* Wrapper around osdmap->test_flag for special handling of the FULL flag.
*/
Expand Down
19 changes: 18 additions & 1 deletion src/osdc/Objecter.h
Original file line number Diff line number Diff line change
Expand Up @@ -1694,6 +1694,14 @@ class Objecter : public md_config_obs_t, public Dispatcher {

bool osdmap_full_flag() const;

/**
* Test pg_pool_t::FLAG_FULL on a pool
*
* @return true if the pool exists and has the flag set, or
* the global full flag is set, else false
*/
bool osdmap_pool_full(const int64_t pool_id) const;

private:
map<uint64_t, LingerOp*> linger_ops;
// we use this just to confirm a cookie is valid before dereferencing the ptr
Expand Down Expand Up @@ -1993,7 +2001,16 @@ class Objecter : public md_config_obs_t, public Dispatcher {
friend class C_CancelOp;
public:
int op_cancel(ceph_tid_t tid, int r);
epoch_t op_cancel_writes(int r);

/**
* Any write op which is in progress at the start of this call shall no
* longer be in progress when this call ends. Operations started after the
* start of this call may still be in progress when this call ends.
*
* @return the latest possible epoch in which a cancelled op could have
* existed, or -1 if nothing was cancelled.
*/
epoch_t op_cancel_writes(int r, int64_t pool=-1);

// commands
int osd_command(int osd, vector<string>& cmd,
Expand Down

0 comments on commit e63c44b

Please sign in to comment.