diff --git a/src/Makefile.am b/src/Makefile.am index 5c1f1bab38d21..172d5533ea9b1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1291,6 +1291,7 @@ noinst_HEADERS = \ messages/MMonGetVersion.h\ messages/MMonGetVersionReply.h\ messages/MMonGlobalID.h\ + messages/MMonJoin.h\ messages/MMonMap.h\ messages/MMonObserve.h\ messages/MMonObserveNotify.h\ diff --git a/src/messages/MMonJoin.h b/src/messages/MMonJoin.h new file mode 100644 index 0000000000000..c2dc8e8c058ba --- /dev/null +++ b/src/messages/MMonJoin.h @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_MMONJOIN_H +#define CEPH_MMONJOIN_H + +#include "messages/PaxosServiceMessage.h" + +#include +using std::vector; + +class MMonJoin : public PaxosServiceMessage { + public: + ceph_fsid_t fsid; + string name; + entity_addr_t addr; + + MMonJoin() : PaxosServiceMessage(MSG_MON_JOIN, 0) {} + MMonJoin(ceph_fsid_t &f, string n, entity_addr_t a) + : PaxosServiceMessage(MSG_MON_JOIN, 0), + fsid(f), name(n), addr(a) + { } + +private: + ~MMonJoin() {} + +public: + const char *get_type_name() { return "mon_join"; } + void print(ostream& o) { + o << "mon_join(" << name << " " << addr << ")"; + } + + void encode_payload(CephContext *cct) { + paxos_encode(); + ::encode(fsid, payload); + ::encode(name, payload); + ::encode(addr, payload); + } + void decode_payload(CephContext *cct) { + bufferlist::iterator p = payload.begin(); + paxos_decode(p); + ::decode(fsid, p); + ::decode(name, p); + ::decode(addr, p); + } +}; + +#endif diff --git a/src/mon/Monitor.cc b/src/mon/Monitor.cc index 4d1692b8fcc34..39c7957556f3e 100644 --- a/src/mon/Monitor.cc +++ b/src/mon/Monitor.cc @@ -31,6 +31,7 @@ #include "messages/MMonObserve.h" #include "messages/MMonObserveNotify.h" #include "messages/MMonProbe.h" +#include "messages/MMonJoin.h" #include "messages/MMonPaxos.h" #include "messages/MRoute.h" #include "messages/MForward.h" @@ -402,7 +403,14 @@ void Monitor::handle_probe_reply(MMonProbe *m) } } if (ok) { - start_election(); + if (monmap->contains(name)) { + // i'm part of the cluster; just initiate a new election + start_election(); + } else { + dout(10) << " ready to join, but i'm not in the monmap, trying to join" << dendl; + messenger->send_message(new MMonJoin(monmap->fsid, name, messenger->get_myaddr()), + monmap->get_inst(*m->quorum.begin())); + } } else { slurp_source = m->get_source_inst(); slurp_versions = m->paxos_versions; @@ -1234,6 +1242,11 @@ bool Monitor::_ms_dispatch(Message *m) paxos_service[PAXOS_LOG]->dispatch((PaxosServiceMessage*)m); break; + // monmap + case MSG_MON_JOIN: + paxos_service[PAXOS_MONMAP]->dispatch((PaxosServiceMessage*)m); + break; + // paxos case MSG_MON_PAXOS: { diff --git a/src/mon/MonmapMonitor.cc b/src/mon/MonmapMonitor.cc index 2ca54ca8ab781..2dbf429e95a83 100644 --- a/src/mon/MonmapMonitor.cc +++ b/src/mon/MonmapMonitor.cc @@ -17,6 +17,8 @@ #include "MonitorStore.h" #include "messages/MMonCommand.h" +#include "messages/MMonJoin.h" + #include "common/Timer.h" #include "common/ceph_argparse.h" #include "mon/MDSMonitor.h" @@ -123,6 +125,8 @@ bool MonmapMonitor::preprocess_query(PaxosServiceMessage *m) // READs case MSG_MON_COMMAND: return preprocess_command((MMonCommand*)m); + case MSG_MON_JOIN: + return preprocess_join((MMonJoin*)m); default: assert(0); m->put(); @@ -263,6 +267,8 @@ bool MonmapMonitor::prepare_update(PaxosServiceMessage *m) switch (m->get_type()) { case MSG_MON_COMMAND: return prepare_command((MMonCommand*)m); + case MSG_MON_JOIN: + return prepare_join((MMonJoin*)m); default: assert(0); m->put(); @@ -329,6 +335,31 @@ bool MonmapMonitor::prepare_command(MMonCommand *m) return false; } +bool MonmapMonitor::preprocess_join(MMonJoin *join) +{ + dout(10) << "preprocess_join " << join->name << " at " << join->addr << dendl; + + if (pending_map.contains(join->name)) { + dout(10) << " already have " << join->name << dendl; + join->put(); + return true; + } + if (pending_map.contains(join->addr)) { + dout(10) << " already have " << join->addr << dendl; + join->put(); + return true; + } + return false; +} +bool MonmapMonitor::prepare_join(MMonJoin *join) +{ + dout(0) << "adding " << join->name << " at " << join->addr << " to monitor cluster" << dendl; + pending_map.add(join->name, join->addr); + pending_map.last_changed = ceph_clock_now(g_ceph_context); + join->put(); + return true; +} + bool MonmapMonitor::should_propose(double& delay) { delay = 0.0; diff --git a/src/mon/MonmapMonitor.h b/src/mon/MonmapMonitor.h index 152bc56ebb105..0df458cae415e 100644 --- a/src/mon/MonmapMonitor.h +++ b/src/mon/MonmapMonitor.h @@ -33,6 +33,7 @@ using namespace std; class MMonGetMap; class MMonMap; class MMonCommand; +class MMonJoin; class MonmapMonitor : public PaxosService { public: @@ -54,6 +55,9 @@ class MonmapMonitor : public PaxosService { bool preprocess_query(PaxosServiceMessage *m); bool prepare_update(PaxosServiceMessage *m); + bool preprocess_join(MMonJoin *m); + bool prepare_join(MMonJoin *m); + bool preprocess_command(MMonCommand *m); bool prepare_command(MMonCommand *m); diff --git a/src/msg/Message.cc b/src/msg/Message.cc index 31a25a0b28cda..284f7529a8102 100644 --- a/src/msg/Message.cc +++ b/src/msg/Message.cc @@ -30,6 +30,7 @@ using namespace std; #include "messages/MMonObserveNotify.h" #include "messages/MMonProbe.h" +#include "messages/MMonJoin.h" #include "messages/MMonElection.h" #include "messages/MLog.h" @@ -232,6 +233,9 @@ Message *decode_message(CephContext *cct, ceph_msg_header& header, ceph_msg_foot case MSG_MON_PROBE: m = new MMonProbe; break; + case MSG_MON_JOIN: + m = new MMonJoin; + break; case MSG_MON_ELECTION: m = new MMonElection; break; diff --git a/src/msg/Message.h b/src/msg/Message.h index 60c122e20fad3..389220e98cfa0 100644 --- a/src/msg/Message.h +++ b/src/msg/Message.h @@ -23,6 +23,7 @@ #define MSG_MON_ELECTION 65 #define MSG_MON_PAXOS 66 #define MSG_MON_PROBE 67 +#define MSG_MON_JOIN 68 /* monitor <-> mon admin tool */ #define MSG_MON_COMMAND 50