Files
mongo/db/repl/rs.cpp

317 lines
11 KiB
C++
Raw Normal View History

2010-04-13 13:22:42 -04:00
/**
2010-04-14 17:25:03 -04:00
* Copyright (C) 2008 10gen Inc.
2010-04-13 13:22:42 -04:00
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2010-04-27 15:27:52 -04:00
#include "pch.h"
2010-04-13 13:22:42 -04:00
#include "../cmdline.h"
2010-04-18 13:05:57 -04:00
#include "../../util/sock.h"
2010-05-13 17:28:53 -04:00
#include "../client.h"
2010-06-04 15:37:37 -04:00
#include "../../client/dbclient.h"
#include "../dbhelpers.h"
2010-05-29 15:54:46 -04:00
#include "rs.h"
2010-04-13 13:22:42 -04:00
namespace mongo {
2010-04-21 21:19:37 -04:00
bool replSet = false;
2010-04-13 17:38:15 -04:00
ReplSet *theReplSet = 0;
2010-06-01 16:25:47 -04:00
void ReplSetImpl::assumePrimary() {
2010-06-01 11:08:27 -04:00
writelock lk("admin."); // so we are synchronized with _logOp()
2010-05-24 17:11:47 -04:00
_myState = PRIMARY;
_currentPrimary = _self;
log() << "replSet self is now primary" << rsLog;
}
2010-06-01 16:25:47 -04:00
void ReplSetImpl::relinquish() {
2010-05-24 17:11:47 -04:00
if( state() == PRIMARY ) {
_myState = RECOVERING;
log() << "replSet info relinquished primary state" << rsLog;
}
else if( state() == STARTUP2 )
_myState = RECOVERING;
}
2010-06-01 16:25:47 -04:00
void ReplSetImpl::msgUpdateHBInfo(HeartbeatInfo h) {
2010-05-20 14:01:22 -04:00
for( Member *m = _members.head(); m; m=m->next() ) {
if( m->id() == h.id() ) {
2010-05-20 14:50:36 -04:00
m->_hbinfo = h;
return;
2010-05-20 14:01:22 -04:00
}
}
}
2010-06-01 16:25:47 -04:00
list<HostAndPort> ReplSetImpl::memberHostnames() const {
2010-05-14 14:01:51 -04:00
list<HostAndPort> L;
2010-05-20 12:40:22 -04:00
L.push_back(_self->h());
2010-05-14 14:01:51 -04:00
for( Member *m = _members.head(); m; m = m->next() )
2010-05-20 12:40:22 -04:00
L.push_back(m->h());
2010-05-14 14:01:51 -04:00
return L;
}
2010-06-01 16:25:47 -04:00
void ReplSetImpl::_fillIsMaster(BSONObjBuilder& b) {
2010-06-04 09:37:46 -04:00
b.append("ismaster", isPrimary());
b.append("secondary", isSecondary());
b.append("msg", "replica sets not yet fully implemented. do not use yet.");
2010-05-21 17:50:08 -04:00
{
BSONObjBuilder a;
int n = 0;
a.append("0", _self->h().toString());
2010-06-04 09:37:46 -04:00
for( Member *m = _members.head(); m; m = m->next() ) {
if( m->hot() )
a.append(BSONObjBuilder::numStr(++n).c_str(), m->h().toString());
}
2010-05-21 17:50:08 -04:00
b.appendArray("hosts", a.done());
}
2010-04-21 17:40:24 -04:00
}
2010-04-21 14:41:09 -04:00
/** @param cfgString <setname>/<seedhost1>,<seedhost2> */
2010-04-23 17:35:05 -04:00
/*
2010-04-21 16:43:51 -04:00
ReplSet::ReplSet(string cfgString) : fatal(false) {
2010-04-23 17:35:05 -04:00
}
*/
/** @param cfgString <setname>/<seedhost1>,<seedhost2> */
2010-06-01 16:25:47 -04:00
ReplSetImpl::ReplSetImpl(string cfgString) : elect(this),
2010-05-19 14:21:41 -04:00
_self(0),
mgr( new Manager(this) )
{
2010-06-28 17:29:15 -04:00
h = 0;
2010-05-07 15:35:16 -04:00
_myState = STARTUP;
2010-05-12 16:03:09 -04:00
_currentPrimary = 0;
2010-04-23 17:35:05 -04:00
2010-04-21 14:41:09 -04:00
const char *p = cfgString.c_str();
const char *slash = strchr(p, '/');
uassert(13093, "bad --replSet config string format is: <setname>/<seedhost1>,<seedhost2>[,...]", slash != 0 && p != slash);
_name = string(p, slash-p);
2010-05-11 15:58:44 -04:00
log() << "replSet startup " << cfgString << rsLog;
2010-04-14 17:25:03 -04:00
2010-05-07 17:49:24 -04:00
set<HostAndPort> seedSet;
2010-04-18 13:05:57 -04:00
vector<HostAndPort> *seeds = new vector<HostAndPort>;
2010-04-21 14:41:09 -04:00
p = slash + 1;
2010-04-14 17:25:03 -04:00
while( 1 ) {
2010-04-21 14:41:09 -04:00
const char *comma = strchr(p, ',');
if( comma == 0 ) comma = strchr(p,0);
2010-07-01 15:13:50 -04:00
if( p == comma )
break;
//uassert(13094, "bad --replSet config string", p != comma);
2010-04-21 14:41:09 -04:00
{
HostAndPort m;
try {
2010-05-14 14:45:54 -04:00
m = HostAndPort( string(p, comma-p) );
2010-04-21 14:41:09 -04:00
}
catch(...) {
uassert(13114, "bad --replSet seed hostname", false);
}
2010-05-07 17:49:24 -04:00
uassert(13096, "bad --replSet config string - dups?", seedSet.count(m) == 0 );
seedSet.insert(m);
2010-04-21 14:41:09 -04:00
uassert(13101, "can't use localhost in replset host list", !m.isLocalHost());
if( m.isSelf() )
2010-05-07 16:42:55 -04:00
log() << "replSet ignoring seed " << m.toString() << " (=self)" << rsLog;
2010-04-21 14:41:09 -04:00
else
seeds->push_back(m);
if( *comma == 0 )
break;
p = comma + 1;
2010-04-14 17:25:03 -04:00
}
}
2010-04-14 20:50:15 -04:00
2010-04-15 19:12:28 -04:00
_seeds = seeds;
2010-04-21 16:43:51 -04:00
//for( vector<HostAndPort>::iterator i = seeds->begin(); i != seeds->end(); i++ )
// addMemberIfMissing(*i);
2010-05-11 15:58:44 -04:00
log() << "replSet load config from various servers..." << rsLog;
2010-04-21 16:43:51 -04:00
loadConfig();
2010-05-07 17:49:24 -04:00
for( Member *m = head(); m; m = m->next() )
2010-05-20 12:40:22 -04:00
seedSet.erase(m->h());
2010-05-07 17:49:24 -04:00
for( set<HostAndPort>::iterator i = seedSet.begin(); i != seedSet.end(); i++ ) {
2010-05-24 13:38:53 -04:00
log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog;
2010-05-07 17:49:24 -04:00
}
2010-04-14 17:25:03 -04:00
}
2010-06-04 15:37:37 -04:00
void newReplUp();
void ReplSetImpl::loadLastOpTimeWritten() {
assert( lastOpTimeWritten.isNull() );
2010-06-04 15:37:37 -04:00
readlock lk(rsoplog);
BSONObj o;
if( Helpers::getLast(rsoplog.c_str(), o) ) {
2010-06-29 18:27:09 -04:00
cout << "TEMP " << o.toString() << endl;
lastOpTimeWritten = o["ts"]._opTime();
uassert(13290, "bad replSet oplog entry?", !lastOpTimeWritten.isNull());
2010-06-04 15:37:37 -04:00
}
}
/* call after constructing to start - returns fairly quickly after launching its threads */
void ReplSetImpl::_go() {
try {
loadLastOpTimeWritten();
2010-06-04 15:37:37 -04:00
}
catch(std::exception& e) {
log() << "replSet ERROR FATAL couldn't query the local " << rsoplog << " collection. Terminating mongod after 30 seconds." << rsLog;
log() << e.what() << rsLog;
sleepsecs(30);
dbexit( EXIT_REPLICATION_ERROR );
return;
}
_myState = STARTUP2;
startThreads();
newReplUp();
}
2010-06-01 19:12:38 -04:00
ReplSetImpl::StartupStatus ReplSetImpl::startupStatus = PRESTART;
string ReplSetImpl::startupStatusMsg;
2010-04-21 17:40:24 -04:00
2010-06-01 16:25:47 -04:00
void ReplSetImpl::initFromConfig(ReplSetConfig& c) { //, bool save) {
2010-05-13 17:18:17 -04:00
_cfg = new ReplSetConfig(c);
2010-05-07 12:37:09 -04:00
assert( _cfg->ok() );
assert( _name.empty() || _name == _cfg->_id );
_name = _cfg->_id;
2010-05-03 13:33:49 -04:00
assert( !_name.empty() );
2010-05-07 12:37:09 -04:00
assert( _members.head() == 0 );
2010-05-05 14:00:25 -04:00
int me=0;
2010-05-07 12:37:09 -04:00
for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) {
2010-05-05 14:57:49 -04:00
const ReplSetConfig::MemberCfg& m = *i;
2010-05-05 14:00:25 -04:00
if( m.h.isSelf() ) {
me++;
2010-05-05 14:57:49 -04:00
assert( _self == 0 );
2010-05-07 12:37:09 -04:00
_self = new Member(m.h, m._id, &m);
2010-06-28 17:29:15 -04:00
_selfId = m._id;
2010-05-05 14:00:25 -04:00
} else {
2010-05-07 12:37:09 -04:00
Member *mi = new Member(m.h, m._id, &m);
2010-05-05 14:00:25 -04:00
_members.push(mi);
}
2010-05-03 13:33:49 -04:00
}
2010-06-17 20:04:58 +01:00
if( me != 1 ) {
log() << "replSet config : " << _cfg->toString() << rsLog;
uassert( 13302, "replSet : can't find self in the repl set configuration", me == 1 );
}
2010-05-13 17:18:17 -04:00
2010-05-14 14:01:51 -04:00
/* if( save ) {
2010-05-13 17:18:17 -04:00
_cfg->save();
2010-05-14 14:01:51 -04:00
}*/
2010-05-03 13:33:49 -04:00
}
2010-05-15 15:09:18 -04:00
// Our own config must be the first one.
2010-06-01 16:25:47 -04:00
void ReplSetImpl::_loadConfigFinish(vector<ReplSetConfig>& cfgs) {
2010-05-03 13:33:49 -04:00
int v = -1;
ReplSetConfig *highest = 0;
2010-05-13 17:18:17 -04:00
int myVersion = -2000;
int n = 0;
2010-05-03 13:33:49 -04:00
for( vector<ReplSetConfig>::iterator i = cfgs.begin(); i != cfgs.end(); i++ ) {
ReplSetConfig& cfg = *i;
2010-05-13 17:18:17 -04:00
if( ++n == 1 ) myVersion = cfg.version;
2010-05-03 13:33:49 -04:00
if( cfg.ok() && cfg.version > v ) {
highest = &cfg;
v = cfg.version;
}
}
assert( highest );
2010-05-15 15:09:18 -04:00
initFromConfig(*highest);
if( highest->version > myVersion && highest->version >= 0 ) {
log() << "replSet got config version " << highest->version << " from a remote, saving locally" << rsLog;
writelock lk("admin.");
2010-06-29 18:27:09 -04:00
highest->saveConfigLocally(BSONObj());
2010-05-15 15:09:18 -04:00
}
2010-05-03 13:33:49 -04:00
}
2010-06-01 16:25:47 -04:00
void ReplSetImpl::loadConfig() {
2010-04-21 17:40:24 -04:00
while( 1 ) {
2010-04-22 16:17:18 -04:00
startupStatus = LOADINGCONFIG;
2010-05-09 15:16:14 -04:00
startupStatusMsg = "loading " + rsConfigNs + " config (LOADINGCONFIG)";
2010-04-21 17:40:24 -04:00
try {
vector<ReplSetConfig> configs;
configs.push_back( ReplSetConfig(HostAndPort::me()) );
2010-04-22 16:17:18 -04:00
for( vector<HostAndPort>::const_iterator i = _seeds->begin(); i != _seeds->end(); i++ ) {
2010-04-21 17:40:24 -04:00
configs.push_back( ReplSetConfig(*i) );
2010-04-22 16:17:18 -04:00
}
2010-04-21 17:40:24 -04:00
int nok = 0;
2010-04-22 16:17:18 -04:00
int nempty = 0;
2010-04-21 17:40:24 -04:00
for( vector<ReplSetConfig>::iterator i = configs.begin(); i != configs.end(); i++ ) {
if( i->ok() )
nok++;
2010-04-22 16:17:18 -04:00
if( i->empty() )
nempty++;
2010-04-21 17:40:24 -04:00
}
2010-04-22 16:17:18 -04:00
if( nok == 0 ) {
2010-04-23 18:51:51 -04:00
if( nempty == (int) configs.size() ) {
2010-04-22 16:17:18 -04:00
startupStatus = EMPTYCONFIG;
2010-05-13 17:18:17 -04:00
startupStatusMsg = "can't get " + rsConfigNs + " config from self or any seed (EMPTYCONFIG)";
2010-05-09 15:16:14 -04:00
log() << "replSet can't get " << rsConfigNs << " config from self or any seed (EMPTYCONFIG)" << rsLog;
2010-05-08 14:12:24 -04:00
log() << "replSet have you ran replSetInitiate yet?" << rsLog;
2010-05-14 14:45:54 -04:00
log() << "replSet sleeping 20sec and will try again." << rsLog;
2010-04-22 16:17:18 -04:00
}
else {
2010-04-23 17:35:05 -04:00
startupStatus = EMPTYUNREACHABLE;
2010-05-09 15:16:14 -04:00
startupStatusMsg = "can't currently get " + rsConfigNs + " config from self or any seed (EMPTYUNREACHABLE)";
log() << "replSet can't get " << rsConfigNs << " config from self or any seed." << rsLog;
2010-05-14 14:45:54 -04:00
log() << "replSet sleeping 20sec and will try again." << rsLog;
2010-04-22 16:17:18 -04:00
}
2010-05-14 14:45:54 -04:00
sleepsecs(20);
2010-04-21 17:40:24 -04:00
continue;
}
2010-05-15 15:09:18 -04:00
_loadConfigFinish(configs);
2010-04-21 17:40:24 -04:00
}
2010-05-10 11:26:02 -04:00
catch(DBException& e) {
2010-04-22 16:17:18 -04:00
startupStatus = BADCONFIG;
startupStatusMsg = "replSet error loading set config (BADCONFIG)";
2010-05-10 11:26:02 -04:00
log() << "replSet error loading configurations " << e.toString() << rsLog;
2010-06-17 23:28:06 +01:00
log() << "replSet error replication will not start" << rsLog;
2010-06-01 16:25:47 -04:00
_fatal();
2010-04-21 17:40:24 -04:00
throw;
}
break;
2010-04-21 17:13:25 -04:00
}
2010-05-11 15:58:44 -04:00
startupStatusMsg = "? started";
startupStatus = STARTED;
2010-04-21 16:43:51 -04:00
}
2010-06-01 16:25:47 -04:00
void ReplSetImpl::_fatal()
{
lock l(this);
_myState = FATAL;
log() << "replSet error fatal error, stopping replication" << rsLog;
}
2010-05-19 14:40:10 -04:00
/* forked as a thread during startup
it can run quite a while looking for config. but once found,
2010-06-01 16:25:47 -04:00
a separate thread takes over as ReplSetImpl::Manager, and this thread
2010-05-19 14:40:10 -04:00
terminates.
*/
2010-04-21 16:43:51 -04:00
void startReplSets() {
2010-05-13 17:18:17 -04:00
Client::initThread("startReplSets");
2010-04-21 16:43:51 -04:00
try {
assert( theReplSet == 0 );
2010-04-21 21:19:37 -04:00
if( cmdLine.replSet.empty() ) {
assert(!replSet);
2010-04-21 16:43:51 -04:00
return;
2010-04-21 21:19:37 -04:00
}
2010-05-05 14:00:25 -04:00
(theReplSet = new ReplSet(cmdLine.replSet))->go();
2010-04-21 16:43:51 -04:00
}
2010-04-27 17:07:07 -04:00
catch(std::exception& e) {
2010-05-19 14:40:10 -04:00
log() << "replSet caught exception in startReplSets thread: " << e.what() << rsLog;
2010-04-21 16:43:51 -04:00
if( theReplSet )
theReplSet->fatal();
2010-04-21 16:43:51 -04:00
}
2010-05-13 17:18:17 -04:00
cc().shutdown();
2010-04-13 13:22:42 -04:00
}
}