Skip to content

Commit

Permalink
Merge pull request ceph#18847 from jcsp/wip-mgr-mon-upness
Browse files Browse the repository at this point in the history
mgr/dashboard: show warnings if data is out of date or mons are down

Reviewed-by: Kefu Chai <[email protected]>
Reviewed-by: Jan Fajerski <[email protected]>
  • Loading branch information
tchaikov authored Nov 18, 2017
2 parents e0a7ef5 + cbe2507 commit d5bb6e4
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 23 deletions.
16 changes: 16 additions & 0 deletions doc/mgr/plugins.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,22 @@ function. This will result in a circular locking exception.
.. automethod:: MgrModule.get_metadata
.. automethod:: MgrModule.get_counter

What if the mons are down?
--------------------------

The manager daemon gets much of its state (such as the cluster maps)
from the monitor. If the monitor cluster is inaccessible, whichever
manager was active will continue to run, with the latest state it saw
still in memory.

However, if you are creating a module that shows the cluster state
to the user then you may well not want to mislead them by showing
them that out of date state.

To check if the manager daemon currently has a connection to
the monitor cluster, use this function:

.. automethod:: MgrModule.have_mon_connection

Sending commands
----------------
Expand Down
14 changes: 14 additions & 0 deletions src/mgr/BaseMgrModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,16 @@ ceph_set_uri(BaseMgrModule *self, PyObject *args)
Py_RETURN_NONE;
}

static PyObject*
ceph_have_mon_connection(BaseMgrModule *self, PyObject *args)
{
if (self->py_modules->get_monc().is_connected()) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
}


PyMethodDef BaseMgrModule_methods[] = {
{"_ceph_get", (PyCFunction)ceph_state_get, METH_VARARGS,
Expand Down Expand Up @@ -555,6 +565,10 @@ PyMethodDef BaseMgrModule_methods[] = {
{"_ceph_set_uri", (PyCFunction)ceph_set_uri, METH_VARARGS,
"Advertize a service URI served by this module"},

{"_ceph_have_mon_connection", (PyCFunction)ceph_have_mon_connection,
METH_NOARGS, "Find out whether this mgr daemon currently has "
"a connection to a monitor"},

{NULL, NULL, 0, NULL}
};

Expand Down
2 changes: 2 additions & 0 deletions src/mon/MonClient.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ class MonClient : public Dispatcher {
int authenticate(double timeout=0.0);
bool is_authenticated() const {return authenticated;}

bool is_connected() const { return active_con != nullptr; }

/**
* Try to flush as many log messages as we can in a single
* message. Use this before shutting down to transmit your
Expand Down
44 changes: 22 additions & 22 deletions src/pybind/mgr/dashboard/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@
var refresh_interval = 5000;

var refresh = function() {
$.get("{{ url_prefix }}/toplevel_data", function(data) {
_.extend(toplevel_data, data);
setTimeout(refresh, refresh_interval);
});
$.get("{{ url_prefix }}/toplevel_data")
.done(function(data) {
_.extend(toplevel_data, data);
setTimeout(refresh, refresh_interval);
})
.fail(function(){
$("#mgr-load-banner").show();
});
};

rivets.configure({
Expand All @@ -60,14 +64,6 @@
}
}

rivets.formatters.health_ok = function(status_str) {
if (status_str == "HEALTH_OK") {
return true;
} else {
return false;
}
}

var truncate = function(n, max_width) {
var stringized = n.toString();
var parts = stringized.split(".");
Expand Down Expand Up @@ -150,8 +146,12 @@
return (isNaN(value) || value == 0)
};

rivets.bind($("#health"), toplevel_data);
// Bindings for the dynamic menu content
rivets.bind($("section.sidebar"), toplevel_data);

// Bindings for the danger header when there's no mon connection
rivets.bind($("#mon-conn-banner"), toplevel_data);

setTimeout(refresh, refresh_interval);
});
</script>
Expand Down Expand Up @@ -293,8 +293,16 @@
</head>

<body class="hold-transition sidebar-mini sidebar-collapse">
<div class="wrapper">

<div style="display:none;" id="mon-conn-banner" class="alert alert-danger" rv-hide="have_mon_connection">
No monitor connection from manager daemon {mgr_id}: data may be stale
</div>

<div style="display:none;" id="mgr-load-banner" class="alert alert-warning">
Failed to load data from server <a href="javascript:window.location.reload(true)">Reload</a>
</div>

<div class="wrapper">
<!-- Main Header -->
<header class="main-header">
<!-- Logo -->
Expand All @@ -317,14 +325,6 @@
<span class="sr-only">Toggle navigation</span>
</a>

<div id="health" style="font-size: 18px; padding: 12px 12px;">
<span rv-hide="health_status | health_ok" >
<span rv-style="health_status | health_color">
{health_status}
</span>
</span>
</div>

<!-- Navbar Right Menu -->
<div class="navbar-custom-menu">
<ul class="nav navbar-nav">
Expand Down
4 changes: 3 additions & 1 deletion src/pybind/mgr/dashboard/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,9 @@ def _toplevel_data(self):
'rbd_pools': rbd_pools,
'rbd_mirroring': rbd_mirroring,
'health_status': self._health_data()['status'],
'filesystems': filesystems
'filesystems': filesystems,
'mgr_id': global_instance().get_mgr_id(),
'have_mon_connection': global_instance().have_mon_connection()
}

class Root(EndPoint):
Expand Down
10 changes: 10 additions & 0 deletions src/pybind/mgr/mgr_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,3 +594,13 @@ def set_uri(self, uri):
:return: a string
"""
return self._ceph_set_uri(uri)

def have_mon_connection(self):
"""
Check whether this ceph-mgr daemon has an open connection
to a monitor. If it doesn't, then it's likely that the
information we have about the cluster is out of date,
and/or the monitor cluster is down.
"""

return self._ceph_have_mon_connection()

0 comments on commit d5bb6e4

Please sign in to comment.