417 lines
15 KiB
C++
417 lines
15 KiB
C++
// /db/repl/rs.h
|
|
|
|
/**
|
|
* Copyright (C) 2008 10gen Inc.
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3,
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "../../util/concurrency/list.h"
|
|
#include "../../util/concurrency/value.h"
|
|
#include "../../util/concurrency/msg.h"
|
|
#include "../../util/hostandport.h"
|
|
#include "../commands.h"
|
|
#include "rs_exception.h"
|
|
#include "rs_optime.h"
|
|
#include "rs_member.h"
|
|
#include "rs_config.h"
|
|
|
|
namespace mongo {
|
|
|
|
struct HowToFixUp;
|
|
struct Target;
|
|
class DBClientConnection;
|
|
class ReplSetImpl;
|
|
class OplogReader;
|
|
extern bool replSet; // true if using repl sets
|
|
extern class ReplSet *theReplSet; // null until initialized
|
|
extern Tee *rsLog;
|
|
|
|
/* member of a replica set */
|
|
class Member : public List1<Member>::Base {
|
|
public:
|
|
Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self);
|
|
string fullName() const { return h().toString(); }
|
|
const ReplSetConfig::MemberCfg& config() const { return *_config; }
|
|
const HeartbeatInfo& hbinfo() const { return _hbinfo; }
|
|
string lhb() { return _hbinfo.lastHeartbeatMsg; }
|
|
MemberState state() const { return _hbinfo.hbstate; }
|
|
const HostAndPort& h() const { return _h; }
|
|
unsigned id() const { return _hbinfo.id(); }
|
|
bool potentiallyHot() const { return _config->potentiallyHot(); } // not arbiter, not priority 0
|
|
|
|
void summarizeAsHtml(stringstream& s) const;
|
|
friend class ReplSetImpl;
|
|
private:
|
|
const ReplSetConfig::MemberCfg *_config; /* todo: when this changes??? */
|
|
HostAndPort _h;
|
|
HeartbeatInfo _hbinfo;
|
|
};
|
|
|
|
class Manager : public task::Server {
|
|
ReplSetImpl *rs;
|
|
bool busyWithElectSelf;
|
|
int _primary;
|
|
const Member* findOtherPrimary();
|
|
void noteARemoteIsPrimary(const Member *);
|
|
virtual void starting();
|
|
public:
|
|
Manager(ReplSetImpl *rs);
|
|
~Manager();
|
|
void msgReceivedNewConfig(BSONObj);
|
|
void msgCheckNewState();
|
|
};
|
|
|
|
struct Target;
|
|
|
|
class Consensus {
|
|
ReplSetImpl &rs;
|
|
struct LastYea {
|
|
LastYea() : when(0), who(0xffffffff) { }
|
|
time_t when;
|
|
unsigned who;
|
|
};
|
|
Atomic<LastYea> ly;
|
|
unsigned yea(unsigned memberId); // throws VoteException
|
|
void electionFailed(unsigned meid);
|
|
void _electSelf();
|
|
bool weAreFreshest(bool& allUp, int& nTies);
|
|
bool sleptLast; // slept last elect() pass
|
|
public:
|
|
Consensus(ReplSetImpl *t) : rs(*t) {
|
|
sleptLast = false;
|
|
steppedDown = 0;
|
|
}
|
|
|
|
/* if we've stepped down, this is when we are allowed to try to elect ourself again.
|
|
todo: handle possible weirdnesses at clock skews etc.
|
|
*/
|
|
time_t steppedDown;
|
|
|
|
int totalVotes() const;
|
|
bool aMajoritySeemsToBeUp() const;
|
|
void electSelf();
|
|
void electCmdReceived(BSONObj, BSONObjBuilder*);
|
|
void multiCommand(BSONObj cmd, list<Target>& L);
|
|
};
|
|
|
|
/** most operations on a ReplSet object should be done while locked. that logic implemented here. */
|
|
class RSBase : boost::noncopyable {
|
|
public:
|
|
const unsigned magic;
|
|
void assertValid() { assert( magic == 0x12345677 ); }
|
|
private:
|
|
mutex m;
|
|
int _locked;
|
|
ThreadLocalValue<bool> _lockedByMe;
|
|
protected:
|
|
RSBase() : magic(0x12345677), m("RSBase"), _locked(0) { }
|
|
~RSBase() {
|
|
log() << "~RSBase should never be called?" << rsLog;
|
|
assert(false);
|
|
}
|
|
|
|
class lock {
|
|
RSBase& rsbase;
|
|
auto_ptr<scoped_lock> sl;
|
|
public:
|
|
lock(RSBase* b) : rsbase(*b) {
|
|
if( rsbase._lockedByMe.get() )
|
|
return; // recursive is ok...
|
|
|
|
sl.reset( new scoped_lock(rsbase.m) );
|
|
DEV assert(rsbase._locked == 0);
|
|
rsbase._locked++;
|
|
rsbase._lockedByMe.set(true);
|
|
}
|
|
~lock() {
|
|
if( sl.get() ) {
|
|
assert( rsbase._lockedByMe.get() );
|
|
DEV assert(rsbase._locked == 1);
|
|
rsbase._lockedByMe.set(false);
|
|
rsbase._locked--;
|
|
}
|
|
}
|
|
};
|
|
|
|
public:
|
|
/* for asserts */
|
|
bool locked() const { return _locked != 0; }
|
|
|
|
/* if true, is locked, and was locked by this thread. note if false, it could be in the lock or not for another
|
|
just for asserts & such so we can make the contracts clear on who locks what when.
|
|
we don't use these locks that frequently, so the little bit of overhead is fine.
|
|
*/
|
|
bool lockedByMe() { return _lockedByMe.get(); }
|
|
};
|
|
|
|
class ReplSetHealthPollTask;
|
|
|
|
/* safe container for our state that keeps member pointer and state variables always aligned */
|
|
class StateBox : boost::noncopyable {
|
|
public:
|
|
struct SP { // SP is like pair<MemberState,const Member *> but nicer
|
|
SP() : state(MemberState::RS_STARTUP), primary(0) { }
|
|
MemberState state;
|
|
const Member *primary;
|
|
};
|
|
const SP get() {
|
|
scoped_lock lk(m);
|
|
return sp;
|
|
}
|
|
MemberState getState() const { return sp.state; }
|
|
const Member* getPrimary() const { return sp.primary; }
|
|
void change(MemberState s, const Member *self) {
|
|
scoped_lock lk(m);
|
|
sp.state = s;
|
|
if( s.primary() ) {
|
|
sp.primary = self;
|
|
}
|
|
else {
|
|
if( self == sp.primary )
|
|
sp.primary = 0;
|
|
}
|
|
}
|
|
void set(MemberState s, const Member *p) {
|
|
scoped_lock lk(m);
|
|
sp.state = s; sp.primary = p;
|
|
}
|
|
void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); }
|
|
void setOtherPrimary(const Member *mem) {
|
|
scoped_lock lk(m);
|
|
assert( !sp.state.primary() );
|
|
sp.primary = mem;
|
|
}
|
|
StateBox() : m("StateBox") { }
|
|
private:
|
|
mutex m;
|
|
SP sp;
|
|
};
|
|
|
|
void parseReplsetCmdLine(string cfgString, string& setname, vector<HostAndPort>& seeds, set<HostAndPort>& seedSet );
|
|
|
|
/** Parameter given to the --replSet command line option (parsed).
|
|
Syntax is "<setname>/<seedhost1>,<seedhost2>"
|
|
where setname is a name and seedhost is "<host>[:<port>]" */
|
|
class ReplSetCmdline {
|
|
public:
|
|
ReplSetCmdline(string cfgString) { parseReplsetCmdLine(cfgString, setname, seeds, seedSet); }
|
|
string setname;
|
|
vector<HostAndPort> seeds;
|
|
set<HostAndPort> seedSet;
|
|
};
|
|
|
|
/* information about the entire repl set, such as the various servers in the set, and their state */
|
|
/* note: We currently do not free mem when the set goes away - it is assumed the replset is a
|
|
singleton and long lived.
|
|
*/
|
|
class ReplSetImpl : protected RSBase {
|
|
public:
|
|
/** info on our state if the replset isn't yet "up". for example, if we are pre-initiation. */
|
|
enum StartupStatus {
|
|
PRESTART=0, LOADINGCONFIG=1, BADCONFIG=2, EMPTYCONFIG=3,
|
|
EMPTYUNREACHABLE=4, STARTED=5, SOON=6
|
|
};
|
|
static StartupStatus startupStatus;
|
|
static string startupStatusMsg;
|
|
static string stateAsStr(MemberState state);
|
|
static string stateAsHtml(MemberState state);
|
|
|
|
/* todo thread */
|
|
void msgUpdateHBInfo(HeartbeatInfo);
|
|
|
|
StateBox box;
|
|
|
|
OpTime lastOpTimeWritten;
|
|
long long lastH; // hash we use to make sure we are reading the right flow of ops and aren't on an out-of-date "fork"
|
|
private:
|
|
set<ReplSetHealthPollTask*> healthTasks;
|
|
void endOldHealthTasks();
|
|
void startHealthTaskFor(Member *m);
|
|
|
|
private:
|
|
Consensus elect;
|
|
bool ok() const { return !box.getState().fatal(); }
|
|
|
|
void relinquish();
|
|
void forgetPrimary();
|
|
|
|
protected:
|
|
bool _stepDown();
|
|
private:
|
|
void assumePrimary();
|
|
void loadLastOpTimeWritten();
|
|
void changeState(MemberState s);
|
|
|
|
protected:
|
|
// "heartbeat message"
|
|
// sent in requestHeartbeat respond in field "hbm"
|
|
char _hbmsg[256]; // we change this unocked, thus not a c++ string
|
|
public:
|
|
void sethbmsg(string s, int logLevel = 0) {
|
|
unsigned sz = s.size();
|
|
if( sz >= 256 )
|
|
memcpy(_hbmsg, s.c_str(), 255);
|
|
else {
|
|
_hbmsg[sz] = 0;
|
|
memcpy(_hbmsg, s.c_str(), sz);
|
|
}
|
|
log(logLevel) << "replSet " << s << rsLog;
|
|
}
|
|
protected:
|
|
bool initFromConfig(ReplSetConfig& c); // true if ok; throws if config really bad; false if config doesn't include self
|
|
void _fillIsMaster(BSONObjBuilder&);
|
|
void _fillIsMasterHost(const Member*, vector<string>&, vector<string>&, vector<string>&);
|
|
const ReplSetConfig& config() { return *_cfg; }
|
|
string name() const { return _name; } /* @return replica set's logical name */
|
|
MemberState state() const { return box.getState(); }
|
|
void _fatal();
|
|
void _getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const;
|
|
void _summarizeAsHtml(stringstream&) const;
|
|
void _summarizeStatus(BSONObjBuilder&) const; // for replSetGetStatus command
|
|
|
|
/* throws exception if a problem initializing. */
|
|
ReplSetImpl(ReplSetCmdline&);
|
|
|
|
/* call afer constructing to start - returns fairly quickly after launching its threads */
|
|
void _go();
|
|
|
|
private:
|
|
string _name;
|
|
const vector<HostAndPort> *_seeds;
|
|
ReplSetConfig *_cfg;
|
|
|
|
/** load our configuration from admin.replset. try seed machines too.
|
|
@return true if ok; throws if config really bad; false if config doesn't include self
|
|
*/
|
|
bool _loadConfigFinish(vector<ReplSetConfig>& v);
|
|
void loadConfig();
|
|
|
|
list<HostAndPort> memberHostnames() const;
|
|
const ReplSetConfig::MemberCfg& myConfig() const { return _self->config(); }
|
|
bool iAmArbiterOnly() const { return myConfig().arbiterOnly; }
|
|
bool iAmPotentiallyHot() const { return myConfig().potentiallyHot(); }
|
|
protected:
|
|
Member *_self;
|
|
private:
|
|
List1<Member> _members; /* all members of the set EXCEPT self. */
|
|
|
|
public:
|
|
unsigned selfId() const { return _self->id(); }
|
|
Manager *mgr;
|
|
|
|
private:
|
|
Member* head() const { return _members.head(); }
|
|
Member* findById(unsigned id) const;
|
|
void _getTargets(list<Target>&, int &configVersion);
|
|
void getTargets(list<Target>&, int &configVersion);
|
|
void startThreads();
|
|
friend class FeedbackThread;
|
|
friend class CmdReplSetElect;
|
|
friend class Member;
|
|
friend class Manager;
|
|
friend class Consensus;
|
|
|
|
private:
|
|
/* pulling data from primary related - see rs_sync.cpp */
|
|
void _syncDoInitialSync();
|
|
void syncDoInitialSync();
|
|
void _syncThread();
|
|
void syncTail();
|
|
void syncApply(const BSONObj &o);
|
|
void syncRollback(OplogReader& r);
|
|
void syncFixUp(HowToFixUp& h, DBClientConnection*);
|
|
public:
|
|
void syncThread();
|
|
};
|
|
|
|
class ReplSet : public ReplSetImpl {
|
|
public:
|
|
ReplSet(ReplSetCmdline& replSetCmdline) : ReplSetImpl(replSetCmdline) { }
|
|
|
|
bool stepDown() { return _stepDown(); }
|
|
|
|
string selfFullName() {
|
|
lock lk(this);
|
|
return _self->fullName();
|
|
}
|
|
|
|
/* call after constructing to start - returns fairly quickly after la[unching its threads */
|
|
void go() { _go(); }
|
|
void fatal() { _fatal(); }
|
|
bool isMaster(const char *client);
|
|
MemberState state() const { return ReplSetImpl::state(); }
|
|
string name() const { return ReplSetImpl::name(); }
|
|
const ReplSetConfig& config() { return ReplSetImpl::config(); }
|
|
void getOplogDiagsAsHtml(unsigned server_id, stringstream& ss) const { _getOplogDiagsAsHtml(server_id,ss); }
|
|
void summarizeAsHtml(stringstream& ss) const { _summarizeAsHtml(ss); }
|
|
void summarizeStatus(BSONObjBuilder& b) const { _summarizeStatus(b); }
|
|
void fillIsMaster(BSONObjBuilder& b) { _fillIsMaster(b); }
|
|
|
|
/* we have a new config (reconfig) - apply it.
|
|
@param comment write a no-op comment to the oplog about it. only makes sense if one is primary and initiating the reconf.
|
|
*/
|
|
void haveNewConfig(ReplSetConfig& c, bool comment);
|
|
|
|
/* if we delete old configs, this needs to assure locking. currently we don't so it is ok. */
|
|
const ReplSetConfig& getConfig() { return config(); }
|
|
|
|
bool lockedByMe() { return RSBase::lockedByMe(); }
|
|
|
|
// heartbeat msg to send to others; descriptive diagnostic info
|
|
string hbmsg() const { return _hbmsg; }
|
|
};
|
|
|
|
/** base class for repl set commands. checks basic things such as in rs mode before the command
|
|
does its real work
|
|
*/
|
|
class ReplSetCommand : public Command {
|
|
protected:
|
|
ReplSetCommand(const char * s, bool show=false) : Command(s) { }
|
|
virtual bool slaveOk() const { return true; }
|
|
virtual bool adminOnly() const { return true; }
|
|
virtual bool logTheOp() { return false; }
|
|
virtual LockType locktype() const { return NONE; }
|
|
virtual void help( stringstream &help ) const { help << "internal"; }
|
|
bool check(string& errmsg, BSONObjBuilder& result) {
|
|
if( !replSet ) {
|
|
errmsg = "not running with --replSet";
|
|
return false;
|
|
}
|
|
if( theReplSet == 0 ) {
|
|
result.append("startupStatus", ReplSet::startupStatus);
|
|
errmsg = ReplSet::startupStatusMsg.empty() ? "replset unknown error 2" : ReplSet::startupStatusMsg;
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
/** inlines ----------------- */
|
|
|
|
inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) :
|
|
_config(c), _h(h), _hbinfo(ord) {
|
|
if( self ) {
|
|
_hbinfo.health = 1.0;
|
|
}
|
|
}
|
|
|
|
inline bool ReplSet::isMaster(const char *client) {
|
|
/* todo replset */
|
|
return box.getState().primary();
|
|
}
|
|
|
|
}
|