From f26752a8a8b5041189a4e6d638946826b5403feb Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Fri, 7 Sep 2018 12:06:11 -0700 Subject: [PATCH] mon: test if gid exists in pending for prepare_beacon If it does not, send a null map. Bug introduced by 624efc64323f99b2e843f376879c1080276e036f which made preprocess_beacon only look at the current fsmap (correctly). prepare_beacon relied on preprocess_beacon doing that check on pending. Running: while sleep 0.5; do bin/ceph mds fail 0; done is sufficient to reproduce this bug. You will see: 2018-09-07 15:33:30.350 7fffe36a8700 5 mon.a@0(leader).mds e69 preprocess_beacon mdsbeacon(24412/a up:reconnect seq 2 v69) v7 from mds.0 127.0.0.1:6813/2891525302 compat={},rocompat={},incompat={1=base v0.20,2=client writeable ranges,3=default file layouts on dirs,4=dir inode in separate object,5=mds uses versioned encoding,6=dirfrag is stored in omap,8=no anchor table,9=file layout v2,10=snaprealm v2} 2018-09-07 15:33:30.350 7fffe36a8700 10 mon.a@0(leader).mds e69 preprocess_beacon: GID exists in map: 24412 2018-09-07 15:33:30.350 7fffe36a8700 5 mon.a@0(leader).mds e69 _note_beacon mdsbeacon(24412/a up:reconnect seq 2 v69) v7 noting time 2018-09-07 15:33:30.350 7fffe36a8700 7 mon.a@0(leader).mds e69 prepare_update mdsbeacon(24412/a up:reconnect seq 2 v69) v7 2018-09-07 15:33:30.350 7fffe36a8700 12 mon.a@0(leader).mds e69 prepare_beacon mdsbeacon(24412/a up:reconnect seq 2 v69) v7 from mds.0 127.0.0.1:6813/2891525302 2018-09-07 15:33:30.350 7fffe36a8700 15 mon.a@0(leader).mds e69 prepare_beacon got health from gid 24412 with 0 metrics. 2018-09-07 15:33:30.350 7fffe36a8700 5 mon.a@0(leader).mds e69 mds_beacon mdsbeacon(24412/a up:reconnect seq 2 v69) v7 is not in fsmap (state up:reconnect) in the mon leader log. The last line indicates the problem was safely handled. Fixes: http://tracker.ceph.com/issues/35848 Signed-off-by: Patrick Donnelly --- src/mon/MDSMonitor.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/mon/MDSMonitor.cc b/src/mon/MDSMonitor.cc index e9317d474280c..f127833d6a4bb 100644 --- a/src/mon/MDSMonitor.cc +++ b/src/mon/MDSMonitor.cc @@ -644,6 +644,31 @@ bool MDSMonitor::prepare_beacon(MonOpRequestRef op) update_metadata(m->get_global_id(), m->get_sys_info()); } else { // state update + + if (!pending.gid_exists(gid)) { + /* gid has been removed from pending, send null map */ + dout(5) << "mds_beacon " << *m << " is not in fsmap (state " + << ceph_mds_state_name(state) << ")" << dendl; + + /* We can't send an MDSMap this MDS was a part of because we no longer + * know which FS it was part of. Nor does this matter. Sending an empty + * MDSMap is sufficient for getting the MDS to respawn. + */ + wait_for_finished_proposal(op, new FunctionContext([op, this](int r){ + if (r >= 0) { + const auto& fsmap = get_fsmap(); + MDSMap null_map; + null_map.epoch = fsmap.epoch; + null_map.compat = fsmap.compat; + auto m = MMDSMap::create(mon->monmap->fsid, null_map); + mon->send_reply(op, m.detach()); + } else { + dispatch(op); // try again + } + })); + return true; + } + const MDSMap::mds_info_t &info = pending.get_info_gid(gid); // Old MDS daemons don't mention that they're standby replay until // after they've sent their boot beacon, so update this field.