diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 9f95918ce278a..13cf69bee740a 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -267,12 +267,21 @@ int ImageWatcher::request_lock( } bool ImageWatcher::try_request_lock() { - RWLock::WLocker l(m_image_ctx.owner_lock); + assert(m_image_ctx.owner_lock.is_locked()); if (is_lock_owner()) { return true; } - int r = try_lock(); + int r = 0; + m_image_ctx.owner_lock.put_read(); + { + RWLock::WLocker l(m_image_ctx.owner_lock); + if (!is_lock_owner()) { + r = try_lock(); + } + } + m_image_ctx.owner_lock.get_read(); + if (r < 0) { ldout(m_image_ctx.cct, 5) << "failed to acquire exclusive lock:" << cpp_strerror(r) << dendl; @@ -292,8 +301,14 @@ bool ImageWatcher::try_request_lock() { void ImageWatcher::finalize_request_lock() { cancel_retry_aio_requests(); - if (try_request_lock()) { + bool owned_lock; + { + RWLock::RLocker l(m_image_ctx.owner_lock); + owned_lock = try_request_lock(); + } + if (owned_lock) { retry_aio_requests(); + } else { schedule_retry_aio_requests(); } @@ -450,6 +465,9 @@ int ImageWatcher::notify_async_complete(const RemoteAsyncRequest &request, } int ImageWatcher::notify_flatten(ProgressContext &prog_ctx) { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!is_lock_owner()); + bufferlist bl; uint64_t async_request_id; ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, bl); @@ -461,6 +479,9 @@ int ImageWatcher::notify_flatten(ProgressContext &prog_ctx) { } int ImageWatcher::notify_resize(uint64_t size, ProgressContext &prog_ctx) { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!is_lock_owner()); + bufferlist bl; uint64_t async_request_id; ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, bl); @@ -473,6 +494,9 @@ int ImageWatcher::notify_resize(uint64_t size, ProgressContext &prog_ctx) { } int ImageWatcher::notify_snap_create(const std::string &snap_name) { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!is_lock_owner()); + bufferlist bl; ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, bl); ::encode(NOTIFY_OP_SNAP_CREATE, bl); @@ -599,10 +623,14 @@ uint64_t ImageWatcher::encode_async_request(bufferlist &bl) { int ImageWatcher::decode_response_code(bufferlist &bl) { int r; - bufferlist::iterator iter = bl.begin(); - DECODE_START(NOTIFY_VERSION, iter); - ::decode(r, iter); - DECODE_FINISH(iter); + try { + bufferlist::iterator iter = bl.begin(); + DECODE_START(NOTIFY_VERSION, iter); + ::decode(r, iter); + DECODE_FINISH(iter); + } catch (const buffer::error &err) { + r = -EINVAL; + } return r; } @@ -617,7 +645,9 @@ void ImageWatcher::notify_released_lock() { void ImageWatcher::notify_request_lock() { cancel_retry_aio_requests(); + m_image_ctx.owner_lock.get_read(); if (try_request_lock()) { + m_image_ctx.owner_lock.put_read(); retry_aio_requests(); return; } @@ -629,6 +659,8 @@ void ImageWatcher::notify_request_lock() { bufferlist response; int r = notify_lock_owner(bl, response); + m_image_ctx.owner_lock.put_read(); + if (r == -ETIMEDOUT) { ldout(m_image_ctx.cct, 5) << "timed out requesting lock: retrying" << dendl; retry_aio_requests(); @@ -640,9 +672,15 @@ void ImageWatcher::notify_request_lock() { } int ImageWatcher::notify_lock_owner(bufferlist &bl, bufferlist& response) { + assert(m_image_ctx.owner_lock.is_locked()); + + // since we need to ack our own notifications, release the owner lock just in + // case another notification occurs before this one and it requires the lock bufferlist response_bl; + m_image_ctx.owner_lock.put_read(); int r = m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, NOTIFY_TIMEOUT, &response_bl); + m_image_ctx.owner_lock.get_read(); if (r < 0 && r != -ETIMEDOUT) { lderr(m_image_ctx.cct) << "lock owner notification failed: " << cpp_strerror(r) << dendl; @@ -683,6 +721,8 @@ int ImageWatcher::notify_lock_owner(bufferlist &bl, bufferlist& response) { int ImageWatcher::notify_async_request(uint64_t async_request_id, bufferlist &in, ProgressContext& prog_ctx) { + assert(m_image_ctx.owner_lock.is_locked()); + Mutex my_lock("librbd::ImageWatcher::notify_async_request::my_lock"); Cond cond; bool done = false; @@ -734,13 +774,16 @@ void ImageWatcher::handle_acquired_lock() { void ImageWatcher::handle_released_lock() { ldout(m_image_ctx.cct, 20) << "exclusive lock released" << dendl; + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::cancel_async_requests, this, -ERESTART)); + m_finisher->queue(ctx); Mutex::Locker l(m_aio_request_lock); if (!m_aio_requests.empty()) { ldout(m_image_ctx.cct, 20) << "queuing lock request" << dendl; - FunctionContext *ctx = new FunctionContext( + FunctionContext *req_ctx = new FunctionContext( boost::bind(&ImageWatcher::finalize_request_lock, this)); - m_finisher->queue(ctx); + m_finisher->queue(req_ctx); } } @@ -867,7 +910,6 @@ void ImageWatcher::handle_snap_create(bufferlist::iterator iter, bufferlist *out ::decode(snap_name, iter); ldout(m_image_ctx.cct, 20) << "remote snap_create request: " << snap_name << dendl; - int r = librbd::snap_create(&m_image_ctx, snap_name.c_str(), false); ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, *out); ::encode(r, *out); diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index b30ab5ffcf7fa..1ef02f140f845 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -151,7 +151,6 @@ namespace librbd { int notify_async_request(uint64_t async_request_id, bufferlist &in, ProgressContext& prog_ctx); void notify_request_leadership(); - int notify_leader(bufferlist &bl, bufferlist &response); void handle_header_update(); void handle_acquired_lock(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 5087a92ed7f46..db2be22d5a746 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -157,6 +157,10 @@ namespace librbd { void trim_image(ImageCtx *ictx, uint64_t newsize, ProgressContext& prog_ctx) { + assert(ictx->owner_lock.is_locked()); + assert(!ictx->image_watcher->is_lock_supported() || + ictx->image_watcher->is_lock_owner()); + Mutex my_lock("librbd::trim_image::my_lock"); Cond cond; bool done; @@ -445,7 +449,7 @@ namespace librbd { { assert(ictx->owner_lock.is_locked() && !ictx->owner_lock.is_wlocked()); if (ictx->image_watcher == NULL) { - return -EROFS;; + return -EROFS; } else if (!ictx->image_watcher->is_lock_supported() || ictx->image_watcher->is_lock_owner()) { return 0; @@ -476,13 +480,19 @@ namespace librbd { if (r < 0) return r; - r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return ictx->image_watcher->notify_snap_create(snap_name); + while (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } else if (ictx->image_watcher->is_lock_owner()) { + break; + } + + r = ictx->image_watcher->notify_snap_create(snap_name); + if (r != -ETIMEDOUT) { + return r; + } + ldout(ictx->cct, 5) << "snap_create timed out notifying lock owner" << dendl; } RWLock::RLocker l2(ictx->md_lock); @@ -1443,8 +1453,20 @@ namespace librbd { unknown_format = false; id = ictx->id; + ictx->owner_lock.get_read(); + if (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0 || !ictx->image_watcher->is_lock_owner()) { + lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl; + ictx->owner_lock.put_read(); + close_image(ictx); + return -EBUSY; + } + } + if (ictx->snaps.size()) { lderr(cct) << "image has snapshots - not removing" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return -ENOTEMPTY; } @@ -1453,11 +1475,13 @@ namespace librbd { r = io_ctx.list_watchers(header_oid, &watchers); if (r < 0) { lderr(cct) << "error listing watchers" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return r; } if (watchers.size() > 1) { lderr(cct) << "image has watchers - not removing" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return -EBUSY; } @@ -1476,9 +1500,12 @@ namespace librbd { parent_info.spec, id); if (r < 0 && r != -ENOENT) { lderr(cct) << "error removing child from children list" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return r; } + + ictx->owner_lock.put_read(); close_image(ictx); ldout(cct, 2) << "removing header..." << dendl; @@ -1532,39 +1559,50 @@ namespace librbd { ldout(cct, 20) << "resize " << ictx << " " << ictx->size << " -> " << size << dendl; - { - RWLock::RLocker l(ictx->owner_lock); - int r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return ictx->image_watcher->notify_resize(size, prog_ctx); - } - } + int r; + do { + Mutex my_lock("librbd::resize::my_lock"); + Cond cond; + bool done; + { + RWLock::RLocker l(ictx->owner_lock); + while (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } else if (ictx->image_watcher->is_lock_owner()) { + break; + } - Mutex my_lock("librbd::resize::my_lock"); - Cond cond; - bool done; - int ret; - Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret); + r = ictx->image_watcher->notify_resize(size, prog_ctx); + if (r != -ETIMEDOUT && r != -ERESTART) { + return r; + } + ldout(ictx->cct, 5) << "resize timed out notifying lock owner" << dendl; + } - ret = async_resize(ictx, ctx, size, prog_ctx); - if (ret < 0) { - delete ctx; - return ret; - } + Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r); + r = async_resize(ictx, ctx, size, prog_ctx); + if (r < 0) { + delete ctx; + return r; + } + } - my_lock.Lock(); - while (!done) { - cond.Wait(my_lock); - } - my_lock.Unlock(); + my_lock.Lock(); + while (!done) { + cond.Wait(my_lock); + } + my_lock.Unlock(); + + if (r == -ERESTART) { + ldout(ictx->cct, 5) << "resize interrupted: restarting" << dendl; + } + } while (r == -ERESTART); notify_change(ictx->md_ctx, ictx->header_oid, ictx); ldout(cct, 2) << "resize finished" << dendl; - return ret; + return r; } class AsyncResizeFinishContext : public Context { @@ -1588,6 +1626,10 @@ namespace librbd { int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size, ProgressContext &prog_ctx) { + assert(ictx->owner_lock.is_locked()); + assert(!ictx->image_watcher->is_lock_supported() || + ictx->image_watcher->is_lock_owner()); + CephContext *cct = ictx->cct; ldout(cct, 20) << "async_resize " << ictx << " " << ictx->size << " -> " << size << dendl; @@ -1603,17 +1645,7 @@ namespace librbd { uint64_t original_size; { - RWLock::RLocker l(ictx->owner_lock); - r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return -EROFS; - } - - RWLock::RLocker l2(ictx->md_lock); + RWLock::RLocker l(ictx->md_lock); original_size = ictx->size; if (size < ictx->size) { ictx->wait_for_pending_copyup(); @@ -1662,7 +1694,7 @@ namespace librbd { RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - r = -EROFS; + r = -ERESTART; return; } @@ -1745,7 +1777,7 @@ namespace librbd { RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - return -EROFS; + return -ERESTART; } string oid = m_ictx->get_object_name(m_object_no); @@ -1782,7 +1814,7 @@ namespace librbd { RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - r = -EROFS; + r = -ERESTART; return; } @@ -2588,7 +2620,7 @@ namespace librbd { RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - return -EROFS; + return -ERESTART; } RWLock::RLocker l2(m_ictx->md_lock); @@ -2661,7 +2693,7 @@ namespace librbd { RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - r = -EROFS; + r = -ERESTART; return; } @@ -2713,43 +2745,58 @@ namespace librbd { return -EROFS; } - { - RWLock::RLocker l(ictx->owner_lock); - int r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return ictx->image_watcher->notify_flatten(prog_ctx); - } - } + int r; + do { + Mutex my_lock("librbd::flatten:my_lock"); + Cond cond; + bool done; + { + RWLock::RLocker l(ictx->owner_lock); + while (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } else if (ictx->image_watcher->is_lock_owner()) { + break; + } - Mutex my_lock("librbd::flatten:my_lock"); - Cond cond; - bool done; - int ret; - Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret); + r = ictx->image_watcher->notify_flatten(prog_ctx); + if (r != -ETIMEDOUT && r != -ERESTART) { + return r; + } + ldout(ictx->cct, 5) << "flatten timed out notifying lock owner" << dendl; + } - ret = async_flatten(ictx, ctx, prog_ctx); - if (ret < 0) { - delete ctx; - return ret; - } + Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r); + r = async_flatten(ictx, ctx, prog_ctx); + if (r < 0) { + delete ctx; + return r; + } + } - my_lock.Lock(); - while (!done) { - cond.Wait(my_lock); - } - my_lock.Unlock(); + my_lock.Lock(); + while (!done) { + cond.Wait(my_lock); + } + my_lock.Unlock(); + + if (r == -ERESTART) { + ldout(ictx->cct, 5) << "flatten interrupted: restarting" << dendl; + } + } while (r == -ERESTART); notify_change(ictx->md_ctx, ictx->header_oid, ictx); ldout(cct, 20) << "flatten finished" << dendl; - return ret; + return r; } int async_flatten(ImageCtx *ictx, Context *ctx, ProgressContext &prog_ctx) { + assert(ictx->owner_lock.is_locked()); + assert(!ictx->image_watcher->is_lock_supported() || + ictx->image_watcher->is_lock_owner()); + CephContext *cct = ictx->cct; ldout(cct, 20) << "flatten" << dendl; @@ -2793,19 +2840,6 @@ namespace librbd { overlap_objects = Striper::get_num_objects(ictx->layout, overlap); } - { - RWLock::RLocker l(ictx->owner_lock); - r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - // TODO: temporary until request proxied to lock owner - return -EROFS; - } - } - AsyncObjectThrottle::ContextFactory context_factory( boost::lambda::bind(boost::lambda::new_ptr(), boost::lambda::_1, ictx, object_size, snapc, diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc index 0d13fd68b5926..f74e83ec769e8 100644 --- a/src/test/librbd/test_internal.cc +++ b/src/test/librbd/test_internal.cc @@ -225,19 +225,19 @@ TEST_F(TestInternal, FlattenFailsToLockImage) { int order = ictx->order; ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap1", m_ioctx, clone_name.c_str(), features, &order, 0, 0)); - BOOST_SCOPE_EXIT( (&m_ioctx) (clone_name) ) { + + TestInternal *parent = this; + librbd::ImageCtx *ictx2 = NULL; + BOOST_SCOPE_EXIT( (&m_ioctx) (clone_name) (parent) (&ictx2) ) { + if (ictx2 != NULL) { + parent->close_image(ictx2); + parent->unlock_image(); + } librbd::NoOpProgressContext no_op; ASSERT_EQ(0, librbd::remove(m_ioctx, clone_name.c_str(), no_op)); } BOOST_SCOPE_EXIT_END; - librbd::ImageCtx *ictx2; ASSERT_EQ(0, open_image(clone_name, &ictx2)); - - TestInternal *parent = this; - BOOST_SCOPE_EXIT( (parent) (ictx2) ) { - parent->close_image(ictx2); - } BOOST_SCOPE_EXIT_END; - ASSERT_EQ(0, lock_image(*ictx2, LOCK_EXCLUSIVE, "manually locked")); librbd::NoOpProgressContext no_op;