Skip to content

Commit

Permalink
Merge pull request ceph#25126 from xiexingguo/wip-heartbeat-bs
Browse files Browse the repository at this point in the history
osd: two heartbeat fixes

Reviewed-by: Sage Weil <[email protected]>
Reviewed-by: Greg Farnum <[email protected]>
  • Loading branch information
xiexingguo authored Dec 1, 2018
2 parents 6d64cf5 + 114c65f commit e50be3e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
33 changes: 24 additions & 9 deletions src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5622,8 +5622,17 @@ bool OSD::_is_healthy()
}

if (is_waiting_for_healthy()) {
utime_t now = ceph_clock_now();
utime_t grace = utime_t(cct->_conf->osd_max_markdown_period, 0);
while (!osd_markdown_log.empty() &&
osd_markdown_log.front() + grace < now)
osd_markdown_log.pop_front();
if (osd_markdown_log.size() <= 1) {
dout(5) << __func__ << " first time marked as down,"
<< " try reboot unconditionally" << dendl;
return true;
}
std::lock_guard l(heartbeat_lock);
utime_t now = ceph_clock_now();
int num = 0, up = 0;
for (map<int,HeartbeatInfo>::iterator p = heartbeat_peers.begin();
p != heartbeat_peers.end();
Expand Down Expand Up @@ -5879,6 +5888,18 @@ void OSD::send_still_alive(epoch_t epoch, int osd, const entity_addrvec_t &addrs
monc->send_mon_message(m);
}

void OSD::cancel_pending_failures()
{
std::lock_guard l(heartbeat_lock);
auto it = failure_pending.begin();
while (it != failure_pending.end()) {
dout(10) << __func__ << " canceling in-flight failure report for osd."
<< it->first << dendl;
send_still_alive(osdmap->get_epoch(), it->first, it->second.second);
failure_pending.erase(it++);
}
}

void OSD::send_beacon(const ceph::coarse_mono_clock::time_point& now)
{
const auto& monmap = monc->monmap;
Expand Down Expand Up @@ -7838,6 +7859,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)
// set incarnation so that osd_reqid_t's we generate for our
// objecter requests are unique across restarts.
service.objecter->set_client_incarnation(osdmap->get_epoch());
cancel_pending_failures();
}
}

Expand Down Expand Up @@ -7979,14 +8001,7 @@ void OSD::_committed_osd_maps(epoch_t first, epoch_t last, MOSDMap *m)

if (do_shutdown) {
if (network_error) {
std::lock_guard l(heartbeat_lock);
auto it = failure_pending.begin();
while (it != failure_pending.end()) {
dout(10) << "handle_osd_ping canceling in-flight failure report for osd."
<< it->first << dendl;
send_still_alive(osdmap->get_epoch(), it->first, it->second.second);
failure_pending.erase(it++);
}
cancel_pending_failures();
}
// trigger shutdown in a different thread
dout(0) << __func__ << " shutdown OSD via async signal" << dendl;
Expand Down
1 change: 1 addition & 0 deletions src/osd/OSD.h
Original file line number Diff line number Diff line change
Expand Up @@ -2002,6 +2002,7 @@ class OSD : public Dispatcher,
void requeue_failures();
void send_failures();
void send_still_alive(epoch_t epoch, int osd, const entity_addrvec_t &addrs);
void cancel_pending_failures();

ceph::coarse_mono_clock::time_point last_sent_beacon;
Mutex min_last_epoch_clean_lock{"OSD::min_last_epoch_clean_lock"};
Expand Down

0 comments on commit e50be3e

Please sign in to comment.