Skip to content

Commit

Permalink
Merge PR ceph#56271 into main
Browse files Browse the repository at this point in the history
* refs/pull/56271/head:
	qa/cephfs: stop ignoring MON_DOWN globally
	qa: extend mon timeout coming up after mondb creation
	qa: update dashboard schema for mon_status
	mon: do not log MON_DOWN if monitor uptime is less than threshold

Reviewed-by: Leonid Usov <[email protected]>
Reviewed-by: Venky Shankar <[email protected]>
  • Loading branch information
batrick committed Mar 22, 2024
2 parents 7e0efd1 + ae96ed4 commit 2716b6e
Show file tree
Hide file tree
Showing 7 changed files with 33 additions and 4 deletions.
2 changes: 2 additions & 0 deletions qa/cephfs/conf/mon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ overrides:
conf:
mon:
mon op complaint time: 120
# cephadm can take up to 5 minutes to bring up remaining mons
mon down mkfs grace: 300
1 change: 0 additions & 1 deletion qa/cephfs/overrides/ignorelist_wrongly_marked_down.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ overrides:
- but it is still running
# MDS daemon 'b' is not responding, replacing it as rank 0 with standby 'a'
- is not responding
- MON_DOWN
3 changes: 2 additions & 1 deletion qa/tasks/mgr/dashboard/test_health.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ def test_full_health(self):
'state': str,
# @TODO: What type should be expected here?
'sync_provider': JList(JAny(none=True)),
'stretch_mode': bool
'stretch_mode': bool,
'uptime': int,
}),
'osd_map': JObj({
# @TODO: define schema for crush map and osd_metadata, among
Expand Down
9 changes: 9 additions & 0 deletions src/common/options/mon.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ options:
default: 1_min
services:
- mon
- name: mon_down_uptime_grace
type: secs
level: advanced
desc: Period in seconds that the cluster may have a mon down after this (leader) monitor comes up.
default: 1_min
services:
- mon
flags:
- runtime
- name: mon_mgr_beacon_grace
type: secs
level: advanced
Expand Down
9 changes: 7 additions & 2 deletions src/mon/HealthMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -803,9 +803,14 @@ void HealthMonitor::check_for_mon_down(health_check_map_t *checks)
{
int max = mon.monmap->size();
int actual = mon.get_quorum().size();
const auto now = ceph::real_clock::now();
const auto rcnow = ceph::real_clock::now();
const auto created = mon.monmap->created.to_real_time();
const auto mcnow = ceph::coarse_mono_clock::now();
const auto starttime = mon.get_starttime();

if (actual < max &&
now > mon.monmap->created.to_real_time() + g_conf().get_val<std::chrono::seconds>("mon_down_mkfs_grace")) {
(rcnow - created) > g_conf().get_val<std::chrono::seconds>("mon_down_mkfs_grace") &&
(mcnow - starttime) > g_conf().get_val<std::chrono::seconds>("mon_down_uptime_grace")) {
ostringstream ss;
ss << (max-actual) << "/" << max << " mons down, quorum "
<< mon.get_quorum_names();
Expand Down
1 change: 1 addition & 0 deletions src/mon/Monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2672,6 +2672,7 @@ void Monitor::get_mon_status(Formatter *f)
f->dump_int("rank", rank);
f->dump_string("state", get_state_name());
f->dump_int("election_epoch", get_epoch());
f->dump_int("uptime", get_uptime().count());

f->open_array_section("quorum");
for (set<int>::iterator p = quorum.begin(); p != quorum.end(); ++p) {
Expand Down
12 changes: 12 additions & 0 deletions src/mon/Monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,18 @@ class Monitor : public Dispatcher,
}

bool is_keyring_required();

public:
ceph::coarse_mono_time get_starttime() const {
return starttime;
}
std::chrono::milliseconds get_uptime() const {
auto now = ceph::coarse_mono_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(now-starttime);
}

private:
ceph::coarse_mono_time const starttime = coarse_mono_clock::now();
};

#define CEPH_MON_FEATURE_INCOMPAT_BASE CompatSet::Feature (1, "initial feature set (~v.18)")
Expand Down

0 comments on commit 2716b6e

Please sign in to comment.