Skip to content

Commit

Permalink
msg: add min delay packets support for mons
Browse files Browse the repository at this point in the history
when suffering network bottleneck, like switch qos control for only 50Mbps,
mon cluster may stuck in electing for long time and elects again and again,
which introduces many troubles, like it's hard to find where the problem is.
even more worse, it may lead to IO hang when one osd can't serve IO.
so it's better to keep mon cluster stable always.

Signed-off-by: Song Shun <[email protected]>
  • Loading branch information
shun-s authored and tchaikov committed Aug 6, 2022
1 parent e460893 commit 13d675b
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 1 deletion.
8 changes: 8 additions & 0 deletions src/common/options/mon.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -1321,3 +1321,11 @@ options:
services:
- mon
with_legacy: true
- name: mon_use_min_delay_socket
type: bool
level: advanced
default: false
desc: priority packets between mons
with_legacy: true
see_also:
- osd_heartbeat_use_min_delay_socket
14 changes: 13 additions & 1 deletion src/msg/async/AsyncConnection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,12 @@ void AsyncConnection::process() {

SocketOptions opts;
opts.priority = async_msgr->get_socket_priority();
if (async_msgr->cct->_conf->mon_use_min_delay_socket) {
if (async_msgr->get_mytype() == CEPH_ENTITY_TYPE_MON &&
peer_is_mon()) {
opts.priority = SOCKET_PRIORITY_MIN_DELAY;
}
}
opts.connect_bind_addr = msgr->get_myaddrs().front();
ssize_t r = worker->connect(target_addr, opts, &cs);
if (r < 0) {
Expand Down Expand Up @@ -451,7 +457,13 @@ void AsyncConnection::process() {
case STATE_ACCEPTING: {
center->create_file_event(cs.fd(), EVENT_READABLE, read_handler);
state = STATE_CONNECTION_ESTABLISHED;

if (async_msgr->cct->_conf->mon_use_min_delay_socket) {
if (async_msgr->get_mytype() == CEPH_ENTITY_TYPE_MON &&
peer_is_mon()) {
cs.set_priority(cs.fd(), SOCKET_PRIORITY_MIN_DELAY,
target_addr.get_family());
}
}
break;
}

Expand Down
3 changes: 3 additions & 0 deletions src/msg/async/PosixStack.cc
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,9 @@ class PosixConnectedSocketImpl final : public ConnectedSocketImpl {
void close() override {
compat_closesocket(_fd);
}
void set_priority(int sd, int prio, int domain) override {
handler.set_priority(sd, prio, domain);
}
int fd() const override {
return _fd;
}
Expand Down
5 changes: 5 additions & 0 deletions src/msg/async/Stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class ConnectedSocketImpl {
virtual void shutdown() = 0;
virtual void close() = 0;
virtual int fd() const = 0;
virtual void set_priority(int sd, int prio, int domain) = 0;
};

class ConnectedSocket;
Expand Down Expand Up @@ -123,6 +124,10 @@ class ConnectedSocket {
return _csi->fd();
}

void set_priority(int sd, int prio, int domain) {
_csi->set_priority(sd, prio, domain);
}

explicit operator bool() const {
return _csi.get();
}
Expand Down
1 change: 1 addition & 0 deletions src/msg/async/dpdk/DPDKStack.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class DPDKServerSocketImpl : public ServerSocketImpl {
virtual int fd() const override {
return _listener.fd();
}
virtual void set_priority(int sd, int prio, int domain) override {}
};

// NativeConnectedSocketImpl
Expand Down
5 changes: 5 additions & 0 deletions src/msg/async/rdma/RDMAConnectedSocketImpl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,11 @@ void RDMAConnectedSocketImpl::close()
active = false;
}

void RDMAConnectedSocketImpl::set_priority(int sd, int prio, int domain) {
ceph::NetHandler net(cct);
net.set_priority(sd, prio, domain);
}

void RDMAConnectedSocketImpl::fault()
{
ldout(cct, 1) << __func__ << " tcp fd " << tcp_fd << dendl;
Expand Down
1 change: 1 addition & 0 deletions src/msg/async/rdma/RDMAStack.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ class RDMAConnectedSocketImpl : public ConnectedSocketImpl {
virtual void shutdown() override;
virtual void close() override;
virtual int fd() const override { return notify_fd; }
virtual void set_priority(int sd, int prio, int domain) override;
void fault();
const char* get_qp_state() { return Infiniband::qp_state_string(qp->get_state()); }
uint32_t get_peer_qpn () const { return peer_qpn; }
Expand Down

0 comments on commit 13d675b

Please sign in to comment.