2008-07-27 18:36:47 -04:00
// repl.cpp
2008-09-05 18:04:29 -04:00
/* TODO
PAIRING
_ on a syncexception , don ' t allow going back to master state ?
*/
2008-07-27 18:36:47 -04:00
/**
* Copyright ( C ) 2008 10 gen Inc .
2008-12-28 20:28:49 -05:00
*
2008-07-27 18:36:47 -04:00
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU Affero General Public License , version 3 ,
* as published by the Free Software Foundation .
2008-12-28 20:28:49 -05:00
*
2008-07-27 18:36:47 -04:00
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Affero General Public License for more details .
2008-12-28 20:28:49 -05:00
*
2008-07-27 18:36:47 -04:00
* You should have received a copy of the GNU Affero General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
2008-12-15 17:23:54 -05:00
/* Collections we use:
local . sources - indicates what sources we pull from as a " slave " , and the last update of each
local . oplog . $ main - our op log as " master "
2009-03-17 13:48:40 -04:00
local . dbinfo . < dbname >
2008-12-15 17:23:54 -05:00
local . pair . startup - can contain a special value indicating for a pair that we have the master copy .
used when replacing other half of the pair which has permanently failed .
2008-12-28 20:28:49 -05:00
local . pair . sync - { initialsynccomplete : 1 }
2008-12-15 17:23:54 -05:00
*/
2010-04-27 15:27:52 -04:00
# include "pch.h"
2008-07-27 18:36:47 -04:00
# include "jsobj.h"
# include "../util/goodies.h"
# include "repl.h"
2009-02-03 13:30:28 -05:00
# include "../util/message.h"
2010-03-30 14:32:15 -04:00
# include "../util/background.h"
2008-10-19 17:46:53 -05:00
# include "../client/dbclient.h"
2010-02-11 14:35:55 -05:00
# include "../client/connpool.h"
2008-07-27 18:36:47 -04:00
# include "pdfile.h"
2008-07-28 13:51:39 -04:00
# include "query.h"
2008-08-02 14:58:15 -04:00
# include "db.h"
2008-09-11 09:15:34 -04:00
# include "commands.h"
2009-01-20 11:05:53 -05:00
# include "security.h"
2009-08-25 14:35:22 -04:00
# include "cmdline.h"
2010-04-13 11:45:01 -04:00
# include "repl_block.h"
2010-05-29 15:45:47 -04:00
# include "repl/rs.h"
2008-07-27 18:36:47 -04:00
2009-01-14 17:09:51 -05:00
namespace mongo {
2010-03-01 13:55:31 -08:00
2010-03-05 13:50:46 -05:00
// our config from command line etc.
2010-02-08 21:04:09 -05:00
ReplSettings replSettings ;
2009-01-15 10:17:11 -05:00
/* if 1 sync() is running */
2010-03-27 12:23:39 -04:00
volatile int syncing = 0 ;
2010-03-27 12:37:38 -04:00
static volatile int relinquishSyncingSome = 0 ;
2009-01-15 10:17:11 -05:00
/* if true replace our peer in a replication pair -- don't worry about if his
local . oplog . $ main is empty .
*/
bool replacePeer = false ;
2008-12-15 17:23:54 -05:00
2009-01-15 10:17:11 -05:00
/* "dead" means something really bad happened like replication falling completely out of sync.
when non - null , we are dead and the string is informational
*/
2009-02-04 13:22:02 -05:00
const char * replAllDead = 0 ;
2008-12-01 14:55:36 -05:00
2009-02-02 11:15:24 -05:00
time_t lastForcedResync = 0 ;
2009-05-11 10:46:54 -04:00
IdTracker & idTracker = * ( new IdTracker ( ) ) ;
2009-04-23 14:44:05 -04:00
2009-01-14 17:09:51 -05:00
} // namespace mongo
2010-04-21 18:46:31 -04:00
# include "replpair.h"
2008-09-04 10:33:56 -04:00
2009-01-14 17:09:51 -05:00
namespace mongo {
2009-01-15 10:17:11 -05:00
PairSync * pairSync = new PairSync ( ) ;
bool getInitialSyncCompleted ( ) {
return pairSync - > initialSyncCompleted ( ) ;
}
2008-12-28 22:01:18 -05:00
2009-01-15 10:17:11 -05:00
/* --- ReplPair -------------------------------- */
2008-09-11 15:13:47 -04:00
2009-01-15 10:17:11 -05:00
ReplPair * replPair = 0 ;
2008-09-04 10:33:56 -04:00
2009-01-15 10:17:11 -05:00
/* output by the web console */
const char * replInfo = " " ;
struct ReplInfo {
ReplInfo ( const char * msg ) {
replInfo = msg ;
}
~ ReplInfo ( ) {
replInfo = " ? " ;
}
} ;
2008-09-11 15:13:47 -04:00
2009-01-15 10:17:11 -05:00
void ReplPair : : setMaster ( int n , const char * _comment ) {
if ( n = = State_Master & & ! getInitialSyncCompleted ( ) )
return ;
info = _comment ;
2009-08-25 14:35:22 -04:00
if ( n ! = state & & ! cmdLine . quiet )
2010-05-19 12:11:17 -04:00
tlog ( ) < < " pair: setting master= " < < n < < " was " < < state < < endl ;
2009-01-15 10:17:11 -05:00
state = n ;
2008-09-11 15:13:47 -04:00
}
2009-01-15 10:17:11 -05:00
/* peer unreachable, try our arbiter */
void ReplPair : : arbitrate ( ) {
ReplInfo r ( " arbitrate " ) ;
2008-09-11 15:13:47 -04:00
2009-01-15 10:17:11 -05:00
if ( arbHost = = " - " ) {
2009-02-02 09:53:01 -05:00
// no arbiter. we are up, let's assume partner is down and network is not partitioned.
2009-01-15 10:17:11 -05:00
setMasterLocked ( State_Master , " remote unreachable " ) ;
return ;
}
2008-09-11 15:13:47 -04:00
2009-01-15 10:17:11 -05:00
auto_ptr < DBClientConnection > conn ( newClientConnection ( ) ) ;
string errmsg ;
if ( ! conn - > connect ( arbHost . c_str ( ) , errmsg ) ) {
2010-05-19 12:11:17 -04:00
tlog ( ) < < " repl: cantconn arbiter " < < errmsg < < endl ;
2009-01-15 10:17:11 -05:00
setMasterLocked ( State_CantArb , " can't connect to arb " ) ;
return ;
}
2008-09-11 15:13:47 -04:00
2009-02-02 09:53:01 -05:00
negotiate ( conn . get ( ) , " arbiter " ) ;
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
/* --------------------------------------------- */
class CmdReplacePeer : public Command {
public :
2010-04-23 15:50:49 -04:00
virtual bool slaveOk ( ) const {
2009-01-15 10:17:11 -05:00
return true ;
2008-12-28 20:28:49 -05:00
}
2010-04-23 16:41:56 -04:00
virtual bool adminOnly ( ) const {
2009-01-15 10:17:11 -05:00
return true ;
2008-12-15 17:23:54 -05:00
}
2009-01-15 10:17:11 -05:00
virtual bool logTheOp ( ) {
2008-12-15 17:23:54 -05:00
return false ;
}
2010-04-23 15:50:49 -04:00
virtual LockType locktype ( ) const { return WRITE ; }
2010-04-23 16:41:56 -04:00
void help ( stringstream & h ) const { h < < " replace a node in a replica pair " ; }
CmdReplacePeer ( ) : Command ( " replacePeer " , false , " replacepeer " ) { }
2010-05-03 16:25:34 -04:00
virtual bool run ( const string & , BSONObj & cmdObj , string & errmsg , BSONObjBuilder & result , bool fromRepl ) {
2009-01-15 10:17:11 -05:00
if ( replPair = = 0 ) {
errmsg = " not paired " ;
return false ;
2008-12-15 17:23:54 -05:00
}
2009-01-15 10:17:11 -05:00
if ( ! getInitialSyncCompleted ( ) ) {
errmsg = " not caught up cannot replace peer " ;
2008-12-15 18:00:10 -05:00
return false ;
}
2009-01-15 10:17:11 -05:00
if ( syncing < 0 ) {
errmsg = " replacepeer already invoked " ;
return false ;
}
Timer t ;
while ( 1 ) {
2010-03-27 12:23:39 -04:00
if ( syncing = = 0 | | t . millis ( ) > 30000 )
2009-01-15 10:17:11 -05:00
break ;
{
dbtemprelease t ;
2010-03-27 12:37:38 -04:00
relinquishSyncingSome = 1 ;
sleepmillis ( 1 ) ;
2009-01-15 10:17:11 -05:00
}
}
if ( syncing ) {
assert ( syncing > 0 ) ;
errmsg = " timeout waiting for sync() to finish " ;
return false ;
}
{
2009-04-01 16:00:56 -04:00
ReplSource : : SourceVector sources ;
2009-01-15 10:17:11 -05:00
ReplSource : : loadAll ( sources ) ;
if ( sources . size ( ) ! = 1 ) {
errmsg = " local.sources.count() != 1, cannot replace peer " ;
return false ;
}
}
{
2009-01-18 17:48:44 -05:00
Helpers : : emptyCollection ( " local.sources " ) ;
2009-01-15 10:17:11 -05:00
BSONObj o = fromjson ( " { \" replacepeer \" :1} " ) ;
2009-01-18 17:48:44 -05:00
Helpers : : putSingleton ( " local.pair.startup " , o ) ;
2009-01-15 10:17:11 -05:00
}
syncing = - 1 ;
2009-02-04 13:22:02 -05:00
replAllDead = " replacepeer invoked -- adjust local.sources hostname then restart this db process " ;
2009-01-15 10:17:11 -05:00
result . append ( " info " , " adjust local.sources hostname; db restart now required " ) ;
return true ;
2008-12-15 18:00:10 -05:00
}
2009-01-15 10:17:11 -05:00
} cmdReplacePeer ;
2008-12-15 17:23:54 -05:00
2009-04-23 12:16:18 -04:00
class CmdForceDead : public Command {
public :
2010-04-23 15:50:49 -04:00
virtual bool slaveOk ( ) const {
2009-04-23 12:16:18 -04:00
return true ;
}
2010-04-23 16:41:56 -04:00
virtual bool adminOnly ( ) const {
2009-04-23 12:16:18 -04:00
return true ;
}
virtual bool logTheOp ( ) {
return false ;
}
2010-04-23 16:41:56 -04:00
virtual void help ( stringstream & h ) const { h < < " internal " ; }
2010-04-23 15:50:49 -04:00
virtual LockType locktype ( ) const { return WRITE ; }
2009-04-23 12:16:18 -04:00
CmdForceDead ( ) : Command ( " forcedead " ) { }
2010-05-03 16:25:34 -04:00
virtual bool run ( const string & , BSONObj & cmdObj , string & errmsg , BSONObjBuilder & result , bool fromRepl ) {
2010-03-05 13:50:46 -05:00
replAllDead = " replication forced to stop by 'forcedead' command " ;
2010-03-08 13:40:24 -05:00
log ( ) < < " ********************************************************* \n " ;
log ( ) < < " received 'forcedead' command, replication forced to stop " < < endl ;
2009-04-23 12:16:18 -04:00
return true ;
}
} cmdForceDead ;
2009-08-10 16:57:59 -04:00
/* operator requested resynchronization of replication (on the slave). { resync : 1 } */
2009-01-29 11:46:45 -05:00
class CmdResync : public Command {
public :
2010-04-23 15:50:49 -04:00
virtual bool slaveOk ( ) const {
2009-01-29 11:46:45 -05:00
return true ;
}
2010-04-23 16:41:56 -04:00
virtual bool adminOnly ( ) const {
2009-01-29 11:46:45 -05:00
return true ;
}
virtual bool logTheOp ( ) {
return false ;
}
2010-04-23 15:50:49 -04:00
virtual LockType locktype ( ) const { return WRITE ; }
2010-04-23 16:41:56 -04:00
void help ( stringstream & h ) const { h < < " resync (from scratch) an out of date replica slave. \n http://www.mongodb.org/display/DOCS/Master+Slave " ; }
2009-01-29 11:46:45 -05:00
CmdResync ( ) : Command ( " resync " ) { }
2010-05-03 16:25:34 -04:00
virtual bool run ( const string & , BSONObj & cmdObj , string & errmsg , BSONObjBuilder & result , bool fromRepl ) {
2009-04-22 10:52:32 -04:00
if ( cmdObj . getBoolField ( " force " ) ) {
if ( ! waitForSyncToFinish ( errmsg ) )
return false ;
replAllDead = " resync forced " ;
}
2009-02-04 13:22:02 -05:00
if ( ! replAllDead ) {
2009-01-29 11:46:45 -05:00
errmsg = " not dead, no need to resync " ;
return false ;
}
2009-04-22 10:52:32 -04:00
if ( ! waitForSyncToFinish ( errmsg ) )
return false ;
2009-03-31 11:44:35 -04:00
2009-04-22 10:52:32 -04:00
ReplSource : : forceResyncDead ( " client " ) ;
result . append ( " info " , " triggered resync for all sources " ) ;
return true ;
}
bool waitForSyncToFinish ( string & errmsg ) const {
2009-03-31 11:44:35 -04:00
// Wait for slave thread to finish syncing, so sources will be be
// reloaded with new saved state on next pass.
Timer t ;
while ( 1 ) {
2010-03-27 12:23:39 -04:00
if ( syncing = = 0 | | t . millis ( ) > 30000 )
2009-03-31 11:44:35 -04:00
break ;
{
dbtemprelease t ;
2010-03-27 12:37:38 -04:00
relinquishSyncingSome = 1 ;
sleepmillis ( 1 ) ;
2009-03-31 11:44:35 -04:00
}
}
if ( syncing ) {
errmsg = " timeout waiting for sync() to finish " ;
return false ;
}
2009-04-22 10:52:32 -04:00
return true ;
}
2009-01-29 11:46:45 -05:00
} cmdResync ;
2010-02-08 17:17:18 -05:00
bool anyReplEnabled ( ) {
2010-02-08 21:04:09 -05:00
return replPair | | replSettings . slave | | replSettings . master ;
2010-02-08 17:17:18 -05:00
}
2010-02-10 14:18:57 -05:00
void appendReplicationInfo ( BSONObjBuilder & result , bool authed , int level ) {
2010-02-08 17:17:18 -05:00
if ( replAllDead ) {
2010-04-21 17:40:24 -04:00
result . append ( " ismaster " , 0 ) ;
2010-02-08 17:17:18 -05:00
if ( authed ) {
if ( replPair )
result . append ( " remote " , replPair - > remote ) ;
}
2010-03-08 13:40:24 -05:00
string s = string ( " dead: " ) + replAllDead ;
result . append ( " info " , s ) ;
2010-02-08 17:17:18 -05:00
}
else if ( replPair ) {
result . append ( " ismaster " , replPair - > state ) ;
if ( authed ) {
result . append ( " remote " , replPair - > remote ) ;
if ( ! replPair - > info . empty ( ) )
2010-07-19 12:39:14 -04:00
result . append ( " info " , replPair - > info . toString ( ) ) ;
2010-02-08 17:17:18 -05:00
}
}
else {
2010-07-14 14:04:46 -04:00
result . append ( " ismaster " , replSettings . master | | replSettings . slave = = 0 ? 1 : 0 ) ;
2010-07-15 13:08:34 -04:00
//result.append("msg", "not paired");
2010-02-08 17:17:18 -05:00
}
2010-02-10 14:18:57 -05:00
if ( level ) {
BSONObjBuilder sources ( result . subarrayStart ( " sources " ) ) ;
readlock lk ( " local.sources " ) ;
Client : : Context ctx ( " local.sources " ) ;
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > c = findTableScan ( " local.sources " , BSONObj ( ) ) ;
2010-02-10 14:18:57 -05:00
int n = 0 ;
while ( c - > ok ( ) ) {
2010-02-11 14:35:55 -05:00
BSONObj s = c - > current ( ) ;
BSONObjBuilder bb ;
bb . append ( s [ " host " ] ) ;
string sourcename = s [ " source " ] . valuestr ( ) ;
if ( sourcename ! = " main " )
bb . append ( s [ " source " ] ) ;
{
BSONElement e = s [ " syncedTo " ] ;
BSONObjBuilder t ( bb . subobjStart ( " syncedTo " ) ) ;
t . appendDate ( " time " , e . timestampTime ( ) ) ;
t . append ( " inc " , e . timestampInc ( ) ) ;
t . done ( ) ;
}
if ( level > 1 ) {
2010-02-11 17:17:24 -05:00
dbtemprelease unlock ;
2010-02-11 14:35:55 -05:00
ScopedDbConnection conn ( s [ " host " ] . valuestr ( ) ) ;
BSONObj first = conn - > findOne ( ( string ) " local.oplog.$ " + sourcename , Query ( ) . sort ( BSON ( " $natural " < < 1 ) ) ) ;
BSONObj last = conn - > findOne ( ( string ) " local.oplog.$ " + sourcename , Query ( ) . sort ( BSON ( " $natural " < < - 1 ) ) ) ;
bb . appendDate ( " masterFirst " , first [ " ts " ] . timestampTime ( ) ) ;
bb . appendDate ( " masterLast " , last [ " ts " ] . timestampTime ( ) ) ;
2010-02-12 08:46:53 -05:00
double lag = ( double ) ( last [ " ts " ] . timestampTime ( ) - s [ " syncedTo " ] . timestampTime ( ) ) ;
2010-02-11 14:35:55 -05:00
bb . append ( " lagSeconds " , lag / 1000 ) ;
conn . done ( ) ;
}
sources . append ( BSONObjBuilder : : numStr ( n + + ) , bb . obj ( ) ) ;
2010-02-10 14:18:57 -05:00
c - > advance ( ) ;
}
sources . done ( ) ;
}
2010-02-08 17:17:18 -05:00
}
2010-04-26 20:58:41 -04:00
class CmdIsMaster : public Command {
2009-01-15 10:17:11 -05:00
public :
2009-01-24 16:05:12 -05:00
virtual bool requiresAuth ( ) { return false ; }
2010-04-23 15:50:49 -04:00
virtual bool slaveOk ( ) const {
2009-01-15 10:17:11 -05:00
return true ;
}
2010-04-23 15:50:49 -04:00
virtual void help ( stringstream & help ) const {
2010-04-26 20:58:41 -04:00
help < < " Check if this server is primary for a replica pair/set; also if it is --master or --slave in simple master/slave setups. \n " ;
2010-04-23 15:50:49 -04:00
help < < " { isMaster : 1 } " ;
}
virtual LockType locktype ( ) const { return NONE ; }
2010-04-26 21:16:57 -04:00
CmdIsMaster ( ) : Command ( " isMaster " , true , " ismaster " ) { }
2010-05-03 16:25:34 -04:00
virtual bool run ( const string & , BSONObj & cmdObj , string & errmsg , BSONObjBuilder & result , bool /*fromRepl*/ ) {
2009-01-24 16:05:12 -05:00
/* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
authenticated .
we allow unauthenticated ismaster but we aren ' t as verbose informationally if
one is not authenticated for admin db to be safe .
*/
2010-04-21 17:40:24 -04:00
2010-04-21 21:19:37 -04:00
if ( replSet ) {
2010-04-21 17:40:24 -04:00
if ( theReplSet = = 0 ) {
2010-05-14 16:32:24 -04:00
result . append ( " ismaster " , false ) ;
2010-08-02 13:25:28 -04:00
result . append ( " secondary " , false ) ;
2010-05-14 16:32:24 -04:00
errmsg = " replSet still trying to initialize " ;
2010-04-22 18:43:37 -04:00
result . append ( " info " , ReplSet : : startupStatusMsg ) ;
2010-08-02 13:25:28 -04:00
return true ;
2010-04-21 17:40:24 -04:00
}
theReplSet - > fillIsMaster ( result ) ;
return true ;
}
2009-01-29 17:26:07 -05:00
2010-02-08 17:17:18 -05:00
bool authed = cc ( ) . getAuthenticationInfo ( ) - > isAuthorizedReads ( " admin " ) ;
appendReplicationInfo ( result , authed ) ;
2009-01-15 10:17:11 -05:00
return true ;
2008-11-20 18:03:41 -05:00
}
2009-01-15 10:17:11 -05:00
} cmdismaster ;
2009-04-28 15:33:56 -04:00
class CmdIsInitialSyncComplete : public Command {
public :
virtual bool requiresAuth ( ) { return false ; }
2010-04-23 15:50:49 -04:00
virtual bool slaveOk ( ) const {
2009-04-28 15:33:56 -04:00
return true ;
}
2010-04-23 15:50:49 -04:00
virtual LockType locktype ( ) const { return WRITE ; }
2009-04-28 15:33:56 -04:00
CmdIsInitialSyncComplete ( ) : Command ( " isinitialsynccomplete " ) { }
2010-05-03 16:25:34 -04:00
virtual bool run ( const string & , BSONObj & cmdObj , string & errmsg , BSONObjBuilder & result , bool /*fromRepl*/ ) {
2009-04-28 15:33:56 -04:00
result . appendBool ( " initialsynccomplete " , getInitialSyncCompleted ( ) ) ;
return true ;
}
} cmdisinitialsynccomplete ;
2009-01-15 10:17:11 -05:00
/* negotiate who is master
- 1 = not set ( probably means we just booted )
0 = was slave
1 = was master
remote , local - > new remote , local
! 1 , 1 - > 0 , 1
1 , ! 1 - > 1 , 0
- 1 , - 1 - > dominant - > 1 , nondom - > 0
0 , 0 - > dominant - > 1 , nondom - > 0
1 , 1 - > dominant - > 1 , nondom - > 0
{ negotiatemaster : 1 , i_was : < state > , your_name : < hostname > }
returns :
{ ok : 1 , you_are : . . . , i_am : . . . }
*/
class CmdNegotiateMaster : public Command {
public :
CmdNegotiateMaster ( ) : Command ( " negotiatemaster " ) { }
2010-04-23 15:50:49 -04:00
virtual bool slaveOk ( ) const {
2009-01-15 10:17:11 -05:00
return true ;
2008-09-11 09:15:34 -04:00
}
2010-04-23 16:41:56 -04:00
virtual bool adminOnly ( ) const {
2009-01-15 10:17:11 -05:00
return true ;
2008-09-11 09:15:34 -04:00
}
2010-04-23 15:50:49 -04:00
virtual LockType locktype ( ) const { return WRITE ; }
2010-05-03 16:25:34 -04:00
virtual bool run ( const string & , BSONObj & cmdObj , string & errmsg , BSONObjBuilder & result , bool ) {
2009-01-15 10:17:11 -05:00
if ( replPair = = 0 ) {
2009-12-28 16:43:43 -05:00
massert ( 10383 , " Another mongod instance believes incorrectly that this node is its peer " , ! cmdObj . getBoolField ( " fromArbiter " ) ) ;
2009-02-02 09:53:01 -05:00
// assume that we are an arbiter and should forward the request
string host = cmdObj . getStringField ( " your_name " ) ;
int port = cmdObj . getIntField ( " your_port " ) ;
if ( port = = INT_MIN ) {
errmsg = " no port specified " ;
problem ( ) < < errmsg < < endl ;
return false ;
}
stringstream ss ;
ss < < host < < " : " < < port ;
string remote = ss . str ( ) ;
2009-05-27 13:55:30 -04:00
BSONObj ret ;
{
dbtemprelease t ;
auto_ptr < DBClientConnection > conn ( new DBClientConnection ( ) ) ;
if ( ! conn - > connect ( remote . c_str ( ) , errmsg ) ) {
result . append ( " you_are " , ReplPair : : State_Master ) ;
return true ;
}
2009-05-28 13:35:39 -04:00
BSONObjBuilder forwardCommand ;
forwardCommand . appendElements ( cmdObj ) ;
forwardCommand . appendBool ( " fromArbiter " , true ) ;
ret = conn - > findOne ( " admin.$cmd " , forwardCommand . done ( ) ) ;
2009-02-02 09:53:01 -05:00
}
BSONObjIterator i ( ret ) ;
2009-06-09 11:43:04 -04:00
while ( i . moreWithEOO ( ) ) {
2009-02-02 09:53:01 -05:00
BSONElement e = i . next ( ) ;
if ( e . eoo ( ) )
break ;
if ( e . fieldName ( ) ! = string ( " ok " ) )
result . append ( e ) ;
}
2010-05-18 11:36:10 -04:00
return ret [ " ok " ] . trueValue ( ) ;
2009-01-15 10:17:11 -05:00
}
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
int was = cmdObj . getIntField ( " i_was " ) ;
string myname = cmdObj . getStringField ( " your_name " ) ;
2009-04-03 14:24:05 -04:00
if ( myname . empty ( ) | | was < - 3 ) {
2009-01-15 10:17:11 -05:00
errmsg = " your_name/i_was not specified " ;
return false ;
}
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
int N = ReplPair : : State_Negotiating ;
int M = ReplPair : : State_Master ;
int S = ReplPair : : State_Slave ;
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
if ( ! replPair - > dominant ( myname ) ) {
result . append ( " you_are " , N ) ;
2009-04-10 17:03:45 -04:00
result . append ( " i_am " , replPair - > state ) ;
2009-01-15 10:17:11 -05:00
return true ;
}
2008-12-28 20:28:49 -05:00
2009-01-15 10:17:11 -05:00
int me , you ;
if ( ! getInitialSyncCompleted ( ) | | ( replPair - > state ! = M & & was = = M ) ) {
me = S ;
you = M ;
}
else {
me = M ;
you = S ;
}
replPair - > setMaster ( me , " CmdNegotiateMaster::run() " ) ;
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
result . append ( " you_are " , you ) ;
result . append ( " i_am " , me ) ;
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
return true ;
}
} cmdnegotiatemaster ;
2009-02-02 09:53:01 -05:00
2009-04-10 17:03:45 -04:00
int ReplPair : : negotiate ( DBClientConnection * conn , string method ) {
2009-01-15 10:17:11 -05:00
BSONObjBuilder b ;
b . append ( " negotiatemaster " , 1 ) ;
b . append ( " i_was " , state ) ;
b . append ( " your_name " , remoteHost ) ;
2009-02-02 09:53:01 -05:00
b . append ( " your_port " , remotePort ) ;
2009-01-15 10:17:11 -05:00
BSONObj cmd = b . done ( ) ;
BSONObj res = conn - > findOne ( " admin.$cmd " , cmd ) ;
2010-05-18 11:36:10 -04:00
if ( ! res [ " ok " ] . trueValue ( ) ) {
2009-02-02 09:53:01 -05:00
string message = method + " negotiate failed " ;
problem ( ) < < message < < " : " < < res . toString ( ) < < ' \n ' ;
setMasterLocked ( State_Confused , message . c_str ( ) ) ;
2009-04-10 17:03:45 -04:00
return State_Confused ;
2009-01-15 10:17:11 -05:00
}
int x = res . getIntField ( " you_are " ) ;
2009-04-10 17:03:45 -04:00
int remote = res . getIntField ( " i_am " ) ;
2009-01-15 10:17:11 -05:00
// State_Negotiating means the remote node is not dominant and cannot
// choose who is master.
if ( x ! = State_Slave & & x ! = State_Master & & x ! = State_Negotiating ) {
2009-02-02 09:53:01 -05:00
problem ( ) < < method < < " negotiate: bad you_are value " < < res . toString ( ) < < endl ;
2009-04-10 17:03:45 -04:00
} else if ( x ! = State_Negotiating ) {
2009-02-02 09:53:01 -05:00
string message = method + " negotiation " ;
setMasterLocked ( x , message . c_str ( ) ) ;
2009-01-15 10:17:11 -05:00
}
2009-04-10 17:03:45 -04:00
return remote ;
2009-01-15 10:17:11 -05:00
}
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
/* --------------------------------------------------------------*/
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
ReplSource : : ReplSource ( ) {
replacing = false ;
nClonedThisPass = 0 ;
paired = false ;
2008-12-28 20:28:49 -05:00
}
2008-09-11 09:15:34 -04:00
2009-01-15 10:17:11 -05:00
ReplSource : : ReplSource ( BSONObj o ) : nClonedThisPass ( 0 ) {
replacing = false ;
paired = false ;
only = o . getStringField ( " only " ) ;
hostName = o . getStringField ( " host " ) ;
_sourceName = o . getStringField ( " source " ) ;
2009-12-28 16:43:43 -05:00
uassert ( 10118 , " 'host' field not set in sources collection object " , ! hostName . empty ( ) ) ;
uassert ( 10119 , " only source='main' allowed for now with replication " , sourceName ( ) = = " main " ) ;
2009-01-15 10:17:11 -05:00
BSONElement e = o . getField ( " syncedTo " ) ;
if ( ! e . eoo ( ) ) {
2009-12-28 16:43:43 -05:00
uassert ( 10120 , " bad sources 'syncedTo' field value " , e . type ( ) = = Date | | e . type ( ) = = Timestamp ) ;
2009-01-15 10:17:11 -05:00
OpTime tmp ( e . date ( ) ) ;
syncedTo = tmp ;
}
2008-07-28 13:51:39 -04:00
2009-03-31 14:05:20 -04:00
BSONObj dbsObj = o . getObjectField ( " dbsNextPass " ) ;
if ( ! dbsObj . isEmpty ( ) ) {
BSONObjIterator i ( dbsObj ) ;
while ( 1 ) {
BSONElement e = i . next ( ) ;
if ( e . eoo ( ) )
break ;
addDbNextPass . insert ( e . fieldName ( ) ) ;
}
}
2009-04-16 11:36:06 -04:00
dbsObj = o . getObjectField ( " incompleteCloneDbs " ) ;
if ( ! dbsObj . isEmpty ( ) ) {
BSONObjIterator i ( dbsObj ) ;
while ( 1 ) {
BSONElement e = i . next ( ) ;
if ( e . eoo ( ) )
break ;
incompleteCloneDbs . insert ( e . fieldName ( ) ) ;
}
}
2010-01-03 16:37:38 -05:00
_lastSavedLocalTs = OpTime ( o . getField ( " localLogTs " ) . date ( ) ) ;
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
/* Turn our C++ Source object into a BSONObj */
BSONObj ReplSource : : jsobj ( ) {
BSONObjBuilder b ;
b . append ( " host " , hostName ) ;
b . append ( " source " , sourceName ( ) ) ;
if ( ! only . empty ( ) )
b . append ( " only " , only ) ;
if ( ! syncedTo . isNull ( ) )
2009-10-09 13:10:04 -04:00
b . appendTimestamp ( " syncedTo " , syncedTo . asDate ( ) ) ;
2009-01-15 10:17:11 -05:00
2010-01-03 16:37:38 -05:00
b . appendTimestamp ( " localLogTs " , _lastSavedLocalTs . asDate ( ) ) ;
2009-04-09 18:50:29 -04:00
2009-03-31 14:05:20 -04:00
BSONObjBuilder dbsNextPassBuilder ;
2009-04-22 13:53:35 -04:00
int n = 0 ;
2009-03-31 14:05:20 -04:00
for ( set < string > : : iterator i = addDbNextPass . begin ( ) ; i ! = addDbNextPass . end ( ) ; i + + ) {
n + + ;
2010-07-20 12:39:35 -04:00
dbsNextPassBuilder . appendBool ( * i , 1 ) ;
2009-03-31 14:05:20 -04:00
}
if ( n )
b . append ( " dbsNextPass " , dbsNextPassBuilder . done ( ) ) ;
2009-01-15 10:17:11 -05:00
2009-04-16 11:36:06 -04:00
BSONObjBuilder incompleteCloneDbsBuilder ;
n = 0 ;
for ( set < string > : : iterator i = incompleteCloneDbs . begin ( ) ; i ! = incompleteCloneDbs . end ( ) ; i + + ) {
n + + ;
2010-07-20 12:39:35 -04:00
incompleteCloneDbsBuilder . appendBool ( * i , 1 ) ;
2009-04-16 11:36:06 -04:00
}
if ( n )
b . append ( " incompleteCloneDbs " , incompleteCloneDbsBuilder . done ( ) ) ;
2009-02-09 13:04:32 -05:00
return b . obj ( ) ;
2008-12-28 20:28:49 -05:00
}
2008-08-02 14:58:15 -04:00
2009-01-15 10:17:11 -05:00
void ReplSource : : save ( ) {
BSONObjBuilder b ;
assert ( ! hostName . empty ( ) ) ;
b . append ( " host " , hostName ) ;
// todo: finish allowing multiple source configs.
// this line doesn't work right when source is null, if that is allowed as it is now:
//b.append("source", _sourceName);
BSONObj pattern = b . done ( ) ;
2008-08-02 14:58:15 -04:00
2009-01-15 10:17:11 -05:00
BSONObj o = jsobj ( ) ;
2010-07-17 16:07:38 -04:00
log ( 1 ) < < " Saving repl source: " < < o < < endl ;
2008-11-10 11:20:30 -05:00
2010-01-29 17:22:34 -05:00
{
OpDebug debug ;
Client : : Context ctx ( " local.sources " ) ;
UpdateResult res = updateObjects ( " local.sources " , o , pattern , true /*upsert for pair feature*/ , false , false , debug ) ;
assert ( ! res . mod ) ;
assert ( res . num = = 1 ) ;
}
2009-01-15 10:17:11 -05:00
if ( replacing ) {
/* if we were in "replace" mode, we now have synced up with the replacement,
so turn that off .
*/
replacing = false ;
wassert ( replacePeer ) ;
replacePeer = false ;
2009-01-18 17:48:44 -05:00
Helpers : : emptyCollection ( " local.pair.startup " ) ;
2008-12-28 20:28:49 -05:00
}
}
2009-04-01 16:00:56 -04:00
static void addSourceToList ( ReplSource : : SourceVector & v , ReplSource & s , const BSONObj & spec , ReplSource : : SourceVector & old ) {
2009-04-22 11:57:45 -04:00
if ( ! s . syncedTo . isNull ( ) ) { // Don't reuse old ReplSource if there was a forced resync.
2009-04-01 16:00:56 -04:00
for ( ReplSource : : SourceVector : : iterator i = old . begin ( ) ; i ! = old . end ( ) ; ) {
2009-03-31 11:44:35 -04:00
if ( s = = * * i ) {
v . push_back ( * i ) ;
old . erase ( i ) ;
return ;
}
i + + ;
}
}
2009-04-16 11:36:06 -04:00
2009-04-01 16:00:56 -04:00
v . push_back ( shared_ptr < ReplSource > ( new ReplSource ( s ) ) ) ;
2009-01-15 10:17:11 -05:00
}
/* we reuse our existing objects so that we can keep our existing connection
and cursor in effect .
*/
2009-04-01 16:00:56 -04:00
void ReplSource : : loadAll ( SourceVector & v ) {
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( " local.sources " ) ;
2009-04-01 16:00:56 -04:00
SourceVector old = v ;
v . clear ( ) ;
2009-01-15 10:17:11 -05:00
bool gotPairWith = false ;
2009-08-25 14:35:22 -04:00
if ( ! cmdLine . source . empty ( ) ) {
2009-01-15 10:17:11 -05:00
// --source <host> specified.
// check that no items are in sources other than that
// add if missing
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > c = findTableScan ( " local.sources " , BSONObj ( ) ) ;
2009-01-15 10:17:11 -05:00
int n = 0 ;
while ( c - > ok ( ) ) {
n + + ;
ReplSource tmp ( c - > current ( ) ) ;
2009-08-25 14:35:22 -04:00
if ( tmp . hostName ! = cmdLine . source ) {
2010-02-12 16:05:05 -05:00
log ( ) < < " repl: --source " < < cmdLine . source < < " != " < < tmp . hostName < < " from local.sources collection " < < endl ;
log ( ) < < " repl: terminating mongod after 30 seconds " < < endl ;
2009-02-12 15:03:38 -05:00
sleepsecs ( 30 ) ;
2009-08-05 16:00:27 -04:00
dbexit ( EXIT_REPLICATION_ERROR ) ;
2009-02-12 15:03:38 -05:00
}
2009-08-25 14:35:22 -04:00
if ( tmp . only ! = cmdLine . only ) {
2009-12-28 16:43:43 -05:00
log ( ) < < " --only " < < cmdLine . only < < " != " < < tmp . only < < " from local.sources collection " < < endl ;
2009-01-15 10:17:11 -05:00
log ( ) < < " terminating after 30 seconds " < < endl ;
sleepsecs ( 30 ) ;
2009-08-05 16:00:27 -04:00
dbexit ( EXIT_REPLICATION_ERROR ) ;
2009-01-15 10:17:11 -05:00
}
c - > advance ( ) ;
}
2009-12-28 16:43:43 -05:00
uassert ( 10002 , " local.sources collection corrupt? " , n < 2 ) ;
2009-01-15 10:17:11 -05:00
if ( n = = 0 ) {
// source missing. add.
ReplSource s ;
2009-08-25 14:35:22 -04:00
s . hostName = cmdLine . source ;
s . only = cmdLine . only ;
2009-01-15 10:17:11 -05:00
s . save ( ) ;
}
}
2009-03-24 11:46:55 -04:00
else {
try {
2009-12-28 16:43:43 -05:00
massert ( 10384 , " --only requires use of --source " , cmdLine . only . empty ( ) ) ;
2009-03-24 11:46:55 -04:00
} catch ( . . . ) {
2009-08-07 15:37:50 -04:00
dbexit ( EXIT_BADOPTIONS ) ;
2009-03-24 11:46:55 -04:00
}
2009-02-12 15:03:38 -05:00
}
2009-03-23 11:38:22 -04:00
if ( replPair ) {
const string & remote = replPair - > remote ;
// --pairwith host specified.
2010-02-16 19:33:24 -08:00
if ( replSettings . fastsync ) {
Helpers : : emptyCollection ( " local.sources " ) ; // ignore saved sources
}
2009-03-23 11:38:22 -04:00
// check that no items are in sources other than that
// add if missing
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > c = findTableScan ( " local.sources " , BSONObj ( ) ) ;
2009-03-23 11:38:22 -04:00
int n = 0 ;
while ( c - > ok ( ) ) {
n + + ;
ReplSource tmp ( c - > current ( ) ) ;
if ( tmp . hostName ! = remote ) {
2009-12-28 16:43:43 -05:00
log ( ) < < " pairwith " < < remote < < " != " < < tmp . hostName < < " from local.sources collection " < < endl ;
2009-03-23 11:38:22 -04:00
log ( ) < < " terminating after 30 seconds " < < endl ;
sleepsecs ( 30 ) ;
2009-08-05 16:00:27 -04:00
dbexit ( EXIT_REPLICATION_ERROR ) ;
2009-03-23 11:38:22 -04:00
}
c - > advance ( ) ;
}
2009-12-28 16:43:43 -05:00
uassert ( 10122 , " local.sources collection corrupt? " , n < 2 ) ;
2009-03-23 11:38:22 -04:00
if ( n = = 0 ) {
// source missing. add.
ReplSource s ;
s . hostName = remote ;
s . save ( ) ;
}
}
2008-11-10 17:45:39 -05:00
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > c = findTableScan ( " local.sources " , BSONObj ( ) ) ;
2008-12-28 20:28:49 -05:00
while ( c - > ok ( ) ) {
2008-11-10 17:45:39 -05:00
ReplSource tmp ( c - > current ( ) ) ;
2009-01-15 10:17:11 -05:00
if ( replPair & & tmp . hostName = = replPair - > remote & & tmp . sourceName ( ) = = " main " ) {
gotPairWith = true ;
tmp . paired = true ;
if ( replacePeer ) {
// peer was replaced -- start back at the beginning.
tmp . syncedTo = OpTime ( ) ;
tmp . replacing = true ;
}
2010-02-16 19:33:24 -08:00
}
if ( ( ! replPair & & tmp . syncedTo . isNull ( ) ) | |
( replPair & & replSettings . fastsync ) ) {
2010-02-16 11:30:50 -08:00
DBDirectClient c ;
if ( c . exists ( " local.oplog.$main " ) ) {
2010-03-29 16:35:06 -07:00
BSONObj op = c . findOne ( " local.oplog.$main " , QUERY ( " op " < < NE < < " n " ) . sort ( BSON ( " $natural " < < - 1 ) ) ) ;
2010-02-16 11:30:50 -08:00
if ( ! op . isEmpty ( ) ) {
tmp . syncedTo = op [ " ts " ] . date ( ) ;
2010-02-16 19:33:24 -08:00
tmp . _lastSavedLocalTs = op [ " ts " ] . date ( ) ;
2010-02-16 11:30:50 -08:00
}
}
2008-11-10 17:45:39 -05:00
}
2009-03-31 11:44:35 -04:00
addSourceToList ( v , tmp , c - > current ( ) , old ) ;
2008-11-10 17:45:39 -05:00
c - > advance ( ) ;
}
2009-01-15 10:17:11 -05:00
if ( ! gotPairWith & & replPair ) {
/* add the --pairwith server */
2009-04-01 16:00:56 -04:00
shared_ptr < ReplSource > s ( new ReplSource ( ) ) ;
2009-01-15 10:17:11 -05:00
s - > paired = true ;
s - > hostName = replPair - > remote ;
s - > replacing = replacePeer ;
v . push_back ( s ) ;
2008-11-10 17:45:39 -05:00
}
}
2009-01-15 10:17:11 -05:00
BSONObj opTimeQuery = fromjson ( " { \" getoptime \" :1} " ) ;
2009-02-02 11:15:24 -05:00
bool ReplSource : : throttledForceResyncDead ( const char * requester ) {
if ( time ( 0 ) - lastForcedResync > 600 ) {
forceResyncDead ( requester ) ;
lastForcedResync = time ( 0 ) ;
return true ;
}
return false ;
}
void ReplSource : : forceResyncDead ( const char * requester ) {
2009-02-04 13:22:02 -05:00
if ( ! replAllDead )
2009-02-02 11:15:24 -05:00
return ;
2009-04-01 16:00:56 -04:00
SourceVector sources ;
2009-02-02 11:15:24 -05:00
ReplSource : : loadAll ( sources ) ;
2009-04-01 16:00:56 -04:00
for ( SourceVector : : iterator i = sources . begin ( ) ; i ! = sources . end ( ) ; + + i ) {
2009-02-02 11:15:24 -05:00
( * i ) - > forceResync ( requester ) ;
}
2009-02-04 13:22:02 -05:00
replAllDead = 0 ;
2009-02-02 11:15:24 -05:00
}
void ReplSource : : forceResync ( const char * requester ) {
2009-03-18 13:45:32 -04:00
BSONObj info ;
{
dbtemprelease t ;
2010-07-05 12:29:42 -04:00
oplogReader . connect ( hostName ) ;
2010-07-19 09:29:49 -04:00
/* todo use getDatabaseNames() method here */
2010-07-05 12:29:42 -04:00
bool ok = oplogReader . conn ( ) - > runCommand ( " admin " , BSON ( " listDatabases " < < 1 ) , info ) ;
2009-12-28 16:43:43 -05:00
massert ( 10385 , " Unable to get database list " , ok ) ;
2009-03-18 13:45:32 -04:00
}
BSONObjIterator i ( info . getField ( " databases " ) . embeddedObject ( ) ) ;
2009-06-09 11:43:04 -04:00
while ( i . moreWithEOO ( ) ) {
2009-03-18 13:45:32 -04:00
BSONElement e = i . next ( ) ;
if ( e . eoo ( ) )
break ;
string name = e . embeddedObject ( ) . getField ( " name " ) . valuestr ( ) ;
2009-05-01 12:18:17 -04:00
if ( ! e . embeddedObject ( ) . getBoolField ( " empty " ) ) {
if ( name ! = " local " ) {
if ( only . empty ( ) | | only = = name ) {
resyncDrop ( name . c_str ( ) , requester ) ;
}
2009-03-18 13:45:32 -04:00
}
}
}
2009-01-29 11:46:45 -05:00
syncedTo = OpTime ( ) ;
2009-04-22 19:10:23 -04:00
addDbNextPass . clear ( ) ;
2009-03-31 10:23:05 -04:00
save ( ) ;
2009-01-29 11:46:45 -05:00
}
2009-03-18 13:45:32 -04:00
string ReplSource : : resyncDrop ( const char * db , const char * requester ) {
log ( ) < < " resync: dropping database " < < db < < endl ;
2010-05-03 17:04:55 -04:00
Client : : Context ctx ( db ) ;
dropDatabase ( db ) ;
return db ;
2009-03-18 13:45:32 -04:00
}
2009-01-29 11:46:45 -05:00
2009-08-10 16:57:59 -04:00
/* grab initial copy of a database from the master */
2009-01-15 10:17:11 -05:00
bool ReplSource : : resync ( string db ) {
2009-03-18 13:45:32 -04:00
string dummyNs = resyncDrop ( db . c_str ( ) , " internal " ) ;
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( dummyNs ) ;
2009-01-15 10:17:11 -05:00
{
2010-02-25 13:52:58 -05:00
log ( ) < < " resync: cloning database " < < db < < " to get an initial copy " < < endl ;
2009-01-15 10:17:11 -05:00
ReplInfo r ( " resync: cloning a database " ) ;
string errmsg ;
2009-10-14 14:34:38 -04:00
bool ok = cloneFrom ( hostName . c_str ( ) , errmsg , cc ( ) . database ( ) - > name , false , /*slaveok*/ true , /*replauth*/ true , /*snapshot*/ false ) ;
2009-01-15 10:17:11 -05:00
if ( ! ok ) {
problem ( ) < < " resync of " < < db < < " from " < < hostName < < " failed " < < errmsg < < endl ;
throw SyncException ( ) ;
}
}
2008-08-02 14:58:15 -04:00
2010-02-25 13:52:58 -05:00
log ( ) < < " resync: done with initial clone for db: " < < db < < endl ;
2008-08-02 14:58:15 -04:00
2009-01-15 10:17:11 -05:00
return true ;
2008-12-28 20:28:49 -05:00
}
2009-01-23 10:17:29 -05:00
void ReplSource : : applyOperation ( const BSONObj & op ) {
try {
2010-06-29 14:49:06 -04:00
applyOperation_inlock ( op ) ;
2009-01-23 10:17:29 -05:00
}
2009-02-06 16:56:14 -05:00
catch ( UserException & e ) {
2010-07-17 16:07:38 -04:00
log ( ) < < " sync: caught user assertion " < < e < < " while applying op: " < < op < < endl ; ;
2009-05-05 12:52:53 -04:00
}
catch ( DBException & e ) {
2010-07-17 16:07:38 -04:00
log ( ) < < " sync: caught db exception " < < e < < " while applying op: " < < op < < endl ; ;
2009-05-05 12:52:53 -04:00
}
2010-06-29 14:49:06 -04:00
2009-01-23 10:17:29 -05:00
}
2010-06-30 15:56:59 -04:00
2009-01-15 10:17:11 -05:00
/* local.$oplog.main is of the form:
{ ts : . . . , op : < optype > , ns : . . . , o : < obj > , o2 : < extraobj > , b : < boolflag > }
. . .
see logOp ( ) comments .
*/
2009-04-23 14:44:05 -04:00
void ReplSource : : sync_pullOpLog_applyOperation ( BSONObj & op , OpTime * localLogTail ) {
2010-07-17 16:07:38 -04:00
log ( 6 ) < < " processing op: " < < op < < endl ;
2009-04-22 10:52:32 -04:00
// skip no-op
if ( op . getStringField ( " op " ) [ 0 ] = = ' n ' )
return ;
2009-12-31 16:34:27 -05:00
char clientName [ MaxDatabaseLen ] ;
2009-01-15 10:17:11 -05:00
const char * ns = op . getStringField ( " ns " ) ;
2009-12-31 16:31:07 -05:00
nsToDatabase ( ns , clientName ) ;
2009-01-15 10:17:11 -05:00
if ( * ns = = ' . ' ) {
problem ( ) < < " skipping bad op in oplog: " < < op . toString ( ) < < endl ;
return ;
}
else if ( * ns = = 0 ) {
problem ( ) < < " halting replication, bad op in oplog: \n " < < op . toString ( ) < < endl ;
2009-02-04 13:22:02 -05:00
replAllDead = " bad object in oplog " ;
2008-12-28 20:28:49 -05:00
throw SyncException ( ) ;
}
2009-01-15 10:17:11 -05:00
if ( ! only . empty ( ) & & only ! = clientName )
return ;
2008-12-24 11:11:10 -05:00
2010-07-05 10:12:32 -04:00
if ( cmdLine . pretouch ) {
2010-07-06 17:49:20 -04:00
if ( cmdLine . pretouch > 1 ) {
/* note: this is bad - should be put in ReplSource. but this is first test... */
static int countdown ;
if ( countdown > 0 ) {
countdown - - ; // was pretouched on a prev pass
assert ( countdown > = 0 ) ;
} else {
2010-07-12 12:37:27 -04:00
const int m = 4 ;
2010-07-06 17:49:20 -04:00
if ( tp . get ( ) = = 0 ) {
2010-07-12 12:37:27 -04:00
int nthr = min ( 8 , cmdLine . pretouch ) ;
2010-07-06 17:49:20 -04:00
nthr = max ( nthr , 1 ) ;
tp . reset ( new ThreadPool ( nthr ) ) ;
}
vector < BSONObj > v ;
oplogReader . peek ( v , cmdLine . pretouch ) ;
unsigned a = 0 ;
while ( 1 ) {
if ( a > = v . size ( ) ) break ;
unsigned b = a + m - 1 ; // v[a..b]
if ( b > = v . size ( ) ) b = v . size ( ) - 1 ;
tp - > schedule ( pretouchN , v , a , b ) ;
DEV cout < < " pretouch task: " < < a < < " .. " < < b < < endl ;
a + = m ;
}
// we do one too...
pretouchOperation ( op ) ;
tp - > join ( ) ;
countdown = v . size ( ) ;
}
}
else {
pretouchOperation ( op ) ;
}
2010-06-30 15:56:59 -04:00
}
2009-01-15 10:17:11 -05:00
dblock lk ;
2009-04-17 13:21:50 -04:00
if ( localLogTail & & replPair & & replPair - > state = = ReplPair : : State_Master ) {
2009-04-24 11:14:29 -04:00
updateSetsWithLocalOps ( * localLogTail , true ) ; // allow unlocking
updateSetsWithLocalOps ( * localLogTail , false ) ; // don't allow unlocking or conversion to db backed storage
2009-04-17 13:21:50 -04:00
}
2009-04-22 13:53:35 -04:00
if ( replAllDead ) {
// hmmm why is this check here and not at top of this function? does it get set between top and here?
2009-12-22 10:00:47 -05:00
log ( ) < < " replAllDead, throwing SyncException: " < < replAllDead < < endl ;
2009-04-22 13:53:35 -04:00
throw SyncException ( ) ;
}
2009-04-17 13:21:50 -04:00
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( ns ) ;
2010-03-26 21:38:39 -04:00
ctx . getClient ( ) - > curop ( ) - > reset ( ) ;
2008-08-25 16:46:39 -04:00
2010-01-29 17:22:34 -05:00
bool empty = ctx . db ( ) - > isEmpty ( ) ;
2009-04-16 11:36:06 -04:00
bool incompleteClone = incompleteCloneDbs . count ( clientName ) ! = 0 ;
2009-04-22 13:53:35 -04:00
2010-07-19 22:32:43 -04:00
if ( logLevel > = 6 )
log ( 6 ) < < " ns: " < < ns < < " , justCreated: " < < ctx . justCreated ( ) < < " , empty: " < < empty < < " , incompleteClone: " < < incompleteClone < < endl ;
2010-01-29 17:22:34 -05:00
// always apply admin command command
// this is a bit hacky -- the semantics of replication/commands aren't well specified
if ( strcmp ( clientName , " admin " ) = = 0 & & * op . getStringField ( " op " ) = = ' c ' ) {
applyOperation ( op ) ;
return ;
}
2009-04-23 12:16:18 -04:00
2010-01-29 17:22:34 -05:00
if ( ctx . justCreated ( ) | | empty | | incompleteClone ) {
2009-04-22 19:10:23 -04:00
// we must add to incomplete list now that setClient has been called
incompleteCloneDbs . insert ( clientName ) ;
2009-04-22 13:53:35 -04:00
if ( nClonedThisPass ) {
/* we only clone one database per pass, even if a lot need done. This helps us
avoid overflowing the master ' s transaction log by doing too much work before going
back to read more transactions . ( Imagine a scenario of slave startup where we try to
clone 100 databases in one pass . )
*/
addDbNextPass . insert ( clientName ) ;
} else {
2009-04-29 17:28:14 -04:00
if ( incompleteClone ) {
log ( ) < < " An earlier initial clone of ' " < < clientName < < " ' did not complete, now resyncing. " < < endl ;
}
2009-04-22 13:53:35 -04:00
save ( ) ;
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( ns ) ;
2009-04-22 13:53:35 -04:00
nClonedThisPass + + ;
2010-01-29 17:22:34 -05:00
resync ( ctx . db ( ) - > name ) ;
2009-04-22 13:53:35 -04:00
addDbNextPass . erase ( clientName ) ;
incompleteCloneDbs . erase ( clientName ) ;
}
2009-04-16 11:36:06 -04:00
save ( ) ;
2009-03-17 17:20:08 -04:00
} else {
2009-04-09 18:50:29 -04:00
bool mod ;
2009-04-24 18:14:35 -04:00
if ( replPair & & replPair - > state = = ReplPair : : State_Master ) {
BSONObj id = idForOp ( op , mod ) ;
if ( ! idTracker . haveId ( ns , id ) ) {
applyOperation ( op ) ;
} else if ( idTracker . haveModId ( ns , id ) ) {
2010-07-17 16:07:38 -04:00
log ( 6 ) < < " skipping operation matching mod id object " < < op < < endl ;
2009-04-24 18:14:35 -04:00
BSONObj existing ;
if ( Helpers : : findOne ( ns , id , existing ) )
logOp ( " i " , ns , existing ) ;
2009-05-13 16:34:35 -04:00
} else {
2010-07-17 16:07:38 -04:00
log ( 6 ) < < " skipping operation matching changed id object " < < op < < endl ;
2009-04-24 18:14:35 -04:00
}
} else {
applyOperation ( op ) ;
2009-04-09 18:50:29 -04:00
}
2009-03-31 14:05:20 -04:00
addDbNextPass . erase ( clientName ) ;
2009-03-17 17:20:08 -04:00
}
2009-01-15 10:17:11 -05:00
}
2009-04-09 18:50:29 -04:00
BSONObj ReplSource : : idForOp ( const BSONObj & op , bool & mod ) {
mod = false ;
const char * opType = op . getStringField ( " op " ) ;
BSONObj o = op . getObjectField ( " o " ) ;
switch ( opType [ 0 ] ) {
case ' i ' : {
BSONObjBuilder idBuilder ;
BSONElement id ;
if ( ! o . getObjectID ( id ) )
return BSONObj ( ) ;
idBuilder . append ( id ) ;
return idBuilder . obj ( ) ;
}
case ' u ' : {
BSONObj o2 = op . getObjectField ( " o2 " ) ;
if ( strcmp ( o2 . firstElement ( ) . fieldName ( ) , " _id " ) ! = 0 )
return BSONObj ( ) ;
if ( o . firstElement ( ) . fieldName ( ) [ 0 ] = = ' $ ' )
mod = true ;
return o2 ;
}
case ' d ' : {
if ( opType [ 1 ] ! = ' \0 ' )
return BSONObj ( ) ; // skip "db" op type
return o ;
}
default :
break ;
}
return BSONObj ( ) ;
}
2009-04-24 11:14:29 -04:00
void ReplSource : : updateSetsWithOp ( const BSONObj & op , bool mayUnlock ) {
if ( mayUnlock ) {
2009-04-27 10:31:32 -04:00
idTracker . mayUpgradeStorage ( ) ;
2009-04-24 11:14:29 -04:00
}
2009-04-17 13:21:50 -04:00
bool mod ;
BSONObj id = idForOp ( op , mod ) ;
if ( ! id . isEmpty ( ) ) {
const char * ns = op . getStringField ( " ns " ) ;
2009-05-19 17:30:11 -04:00
// Since our range of local ops may not be the same as our peer's
// range of unapplied ops, it is always necessary to rewrite objects
// to the oplog after a mod update.
if ( mod )
idTracker . haveModId ( ns , id , true ) ;
2009-04-23 15:00:40 -04:00
idTracker . haveId ( ns , id , true ) ;
2009-04-17 13:21:50 -04:00
}
}
2009-04-22 17:44:23 -04:00
void ReplSource : : syncToTailOfRemoteLog ( ) {
string _ns = ns ( ) ;
2010-02-25 23:06:37 -08:00
BSONObjBuilder b ;
if ( ! only . empty ( ) ) {
b . appendRegex ( " ns " , string ( " ^ " ) + only ) ;
}
2010-07-05 12:29:42 -04:00
BSONObj last = oplogReader . findOne ( _ns . c_str ( ) , Query ( b . done ( ) ) . sort ( BSON ( " $natural " < < - 1 ) ) ) ;
2009-04-22 17:44:23 -04:00
if ( ! last . isEmpty ( ) ) {
2010-01-28 13:41:51 -05:00
BSONElement ts = last . getField ( " ts " ) ;
2010-05-14 13:33:16 -04:00
massert ( 10386 , " non Date ts found: " + last . toString ( ) , ts . type ( ) = = Date | | ts . type ( ) = = Timestamp ) ;
2009-04-22 17:44:23 -04:00
syncedTo = OpTime ( ts . date ( ) ) ;
}
}
2009-04-27 15:30:17 -04:00
OpTime ReplSource : : nextLastSavedLocalTs ( ) const {
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( " local.oplog.$main " ) ;
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > c = findTableScan ( " local.oplog.$main " , BSON ( " $natural " < < - 1 ) ) ;
2009-04-22 17:44:23 -04:00
if ( c - > ok ( ) )
2009-04-27 15:30:17 -04:00
return OpTime ( c - > current ( ) . getField ( " ts " ) . date ( ) ) ;
return OpTime ( ) ;
}
void ReplSource : : setLastSavedLocalTs ( const OpTime & nextLocalTs ) {
2010-01-03 16:37:38 -05:00
_lastSavedLocalTs = nextLocalTs ;
log ( 3 ) < < " updated _lastSavedLocalTs to: " < < _lastSavedLocalTs < < endl ;
2009-04-22 17:44:23 -04:00
}
void ReplSource : : resetSlave ( ) {
2010-03-08 13:40:24 -05:00
log ( ) < < " ********************************************************** \n " ;
log ( ) < < " Sending forcedead command to slave to stop its replication \n " ;
log ( ) < < " Host: " < < hostName < < " paired: " < < paired < < endl ;
massert ( 10387 , " request to kill slave replication failed " ,
2010-07-05 12:29:42 -04:00
oplogReader . conn ( ) - > simpleCommand ( " admin " , 0 , " forcedead " ) ) ;
2009-04-22 17:44:23 -04:00
syncToTailOfRemoteLog ( ) ;
{
dblock lk ;
2009-04-27 15:30:17 -04:00
setLastSavedLocalTs ( nextLastSavedLocalTs ( ) ) ;
2009-04-22 17:44:23 -04:00
save ( ) ;
2010-07-21 13:13:36 -04:00
oplogReader . resetCursor ( ) ;
2009-04-22 17:44:23 -04:00
}
}
2009-04-24 11:14:29 -04:00
bool ReplSource : : updateSetsWithLocalOps ( OpTime & localLogTail , bool mayUnlock ) {
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( " local.oplog.$main " ) ;
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > localLog = findTableScan ( " local.oplog.$main " , BSON ( " $natural " < < - 1 ) ) ;
2009-04-24 14:35:13 -04:00
OpTime newTail ;
2009-04-22 17:44:23 -04:00
for ( ; localLog - > ok ( ) ; localLog - > advance ( ) ) {
BSONObj op = localLog - > current ( ) ;
OpTime ts ( localLog - > current ( ) . getField ( " ts " ) . date ( ) ) ;
2009-04-24 14:35:13 -04:00
if ( newTail . isNull ( ) ) {
newTail = ts ;
2009-04-22 17:44:23 -04:00
}
2009-04-24 14:35:13 -04:00
if ( ! ( localLogTail < ts ) )
2009-04-22 17:44:23 -04:00
break ;
2009-04-24 11:14:29 -04:00
updateSetsWithOp ( op , mayUnlock ) ;
if ( mayUnlock ) {
2009-04-27 10:31:32 -04:00
RARELY {
2009-04-24 14:35:13 -04:00
dbtemprelease t ;
}
2009-04-22 17:44:23 -04:00
}
}
2009-05-01 13:14:37 -04:00
if ( ! localLogTail . isNull ( ) & & ! localLog - > ok ( ) ) {
2009-04-22 17:52:33 -04:00
// local log filled up
2009-04-23 15:00:40 -04:00
idTracker . reset ( ) ;
2009-04-23 16:00:57 -04:00
dbtemprelease t ;
2009-04-22 17:44:23 -04:00
resetSlave ( ) ;
2009-12-28 16:43:43 -05:00
massert ( 10388 , " local master log filled, forcing slave resync " , false ) ;
2009-04-22 17:44:23 -04:00
}
2009-04-24 14:35:13 -04:00
if ( ! newTail . isNull ( ) )
localLogTail = newTail ;
2009-04-22 17:44:23 -04:00
return true ;
}
2010-01-04 11:15:09 -05:00
/* slave: pull some data from the master's oplog
note : not yet in db mutex at this point .
2010-03-26 14:37:31 -04:00
@ return - 1 error
0 ok , don ' t sleep
1 ok , sleep
2010-01-04 11:15:09 -05:00
*/
2010-03-26 14:37:31 -04:00
int ReplSource : : sync_pullOpLog ( int & nApplied ) {
int okResultCode = 1 ;
2009-01-15 10:17:11 -05:00
string ns = string ( " local.oplog.$ " ) + sourceName ( ) ;
2009-01-23 18:24:15 -05:00
log ( 2 ) < < " repl: sync_pullOpLog " < < ns < < " syncedTo: " < < syncedTo . toStringLong ( ) < < ' \n ' ;
2009-01-15 10:17:11 -05:00
bool tailing = true ;
2010-07-20 13:37:09 -04:00
oplogReader . tailCheck ( ) ;
2009-01-15 10:17:11 -05:00
2009-05-01 13:14:37 -04:00
if ( replPair & & replPair - > state = = ReplPair : : State_Master ) {
2009-04-23 16:00:57 -04:00
dblock lk ;
idTracker . reset ( ) ;
}
2010-01-03 16:37:38 -05:00
OpTime localLogTail = _lastSavedLocalTs ;
2009-04-16 11:36:06 -04:00
2009-03-30 16:28:52 -04:00
bool initial = syncedTo . isNull ( ) ;
2009-04-10 17:03:45 -04:00
2010-07-05 12:29:42 -04:00
if ( ! oplogReader . haveCursor ( ) | | initial ) {
2009-04-22 19:10:23 -04:00
if ( initial ) {
2009-03-31 11:44:35 -04:00
// Important to grab last oplog timestamp before listing databases.
2009-04-22 17:44:23 -04:00
syncToTailOfRemoteLog ( ) ;
2009-04-16 11:36:06 -04:00
BSONObj info ;
2010-07-05 12:29:42 -04:00
bool ok = oplogReader . conn ( ) - > runCommand ( " admin " , BSON ( " listDatabases " < < 1 ) , info ) ;
2009-12-28 16:43:43 -05:00
massert ( 10389 , " Unable to get database list " , ok ) ;
2009-04-16 11:36:06 -04:00
BSONObjIterator i ( info . getField ( " databases " ) . embeddedObject ( ) ) ;
2009-06-09 11:43:04 -04:00
while ( i . moreWithEOO ( ) ) {
2009-04-16 11:36:06 -04:00
BSONElement e = i . next ( ) ;
if ( e . eoo ( ) )
break ;
string name = e . embeddedObject ( ) . getField ( " name " ) . valuestr ( ) ;
2009-05-01 12:18:17 -04:00
if ( ! e . embeddedObject ( ) . getBoolField ( " empty " ) ) {
if ( name ! = " local " ) {
if ( only . empty ( ) | | only = = name ) {
log ( 2 ) < < " adding to 'addDbNextPass': " < < name < < endl ;
addDbNextPass . insert ( name ) ;
}
2009-03-17 13:18:54 -04:00
}
}
}
2009-04-22 16:25:53 -04:00
dblock lk ;
save ( ) ;
2009-03-17 13:18:54 -04:00
}
2009-04-16 11:36:06 -04:00
2009-01-15 10:17:11 -05:00
BSONObjBuilder q ;
q . appendDate ( " $gte " , syncedTo . asDate ( ) ) ;
BSONObjBuilder query ;
query . append ( " ts " , q . done ( ) ) ;
2009-02-12 15:03:38 -05:00
if ( ! only . empty ( ) ) {
2010-01-04 11:15:09 -05:00
// note we may here skip a LOT of data table scanning, a lot of work for the master.
2010-07-05 12:29:42 -04:00
query . appendRegex ( " ns " , string ( " ^ " ) + only ) ; // maybe append "\\." here?
2009-02-12 15:03:38 -05:00
}
2009-01-15 10:17:11 -05:00
BSONObj queryObj = query . done ( ) ;
2010-07-05 12:29:42 -04:00
// e.g. queryObj = { ts: { $gte: syncedTo } }
oplogReader . tailingQuery ( ns . c_str ( ) , queryObj ) ;
2009-01-15 10:17:11 -05:00
tailing = false ;
2008-12-28 20:28:49 -05:00
}
else {
2009-01-23 18:24:15 -05:00
log ( 2 ) < < " repl: tailing=true \n " ;
2008-12-28 20:28:49 -05:00
}
2008-08-19 14:39:44 -04:00
2010-07-05 12:29:42 -04:00
if ( ! oplogReader . haveCursor ( ) ) {
problem ( ) < < " repl: dbclient::query returns null (conn closed?) " < < endl ;
oplogReader . resetConnection ( ) ;
2010-03-26 14:37:31 -04:00
return - 1 ;
2009-01-15 10:17:11 -05:00
}
2008-09-05 10:40:00 -04:00
2009-01-15 10:17:11 -05:00
// show any deferred database creates from a previous pass
{
set < string > : : iterator i = addDbNextPass . begin ( ) ;
if ( i ! = addDbNextPass . end ( ) ) {
BSONObjBuilder b ;
b . append ( " ns " , * i + ' . ' ) ;
b . append ( " op " , " db " ) ;
BSONObj op = b . done ( ) ;
2009-04-23 14:44:05 -04:00
sync_pullOpLog_applyOperation ( op , 0 ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
2010-07-05 12:29:42 -04:00
if ( ! oplogReader . more ( ) ) {
2009-01-15 10:17:11 -05:00
if ( tailing ) {
2009-01-23 18:24:15 -05:00
log ( 2 ) < < " repl: tailing & no new activity \n " ;
2010-07-05 12:29:42 -04:00
if ( oplogReader . awaitCapable ( ) )
2010-03-26 14:37:31 -04:00
okResultCode = 0 ; // don't sleep
2009-04-23 14:44:05 -04:00
} else {
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: " < < ns < < " oplog is empty \n " ;
2009-04-23 14:44:05 -04:00
}
{
dblock lk ;
2009-04-27 15:30:17 -04:00
OpTime nextLastSaved = nextLastSavedLocalTs ( ) ;
{
dbtemprelease t ;
2010-07-05 12:29:42 -04:00
if ( ! oplogReader . more ( ) ) {
2009-04-27 15:30:17 -04:00
setLastSavedLocalTs ( nextLastSaved ) ;
}
}
2009-04-23 14:44:05 -04:00
save ( ) ;
}
2010-03-26 14:37:31 -04:00
return okResultCode ;
2009-01-15 10:17:11 -05:00
}
2010-03-01 13:55:31 -08:00
OpTime nextOpTime ;
{
2010-07-05 12:29:42 -04:00
BSONObj op = oplogReader . next ( ) ;
2010-03-01 13:55:31 -08:00
BSONElement ts = op . getField ( " ts " ) ;
if ( ts . type ( ) ! = Date & & ts . type ( ) ! = Timestamp ) {
string err = op . getStringField ( " $err " ) ;
if ( ! err . empty ( ) ) {
2010-07-12 17:39:01 -04:00
// 13051 is "tailable cursor requested on non capped collection"
if ( op . getIntField ( " code " ) = = 13051 ) {
problem ( ) < < " trying to slave off of a non-master " < < ' \n ' ;
massert ( 13344 , " trying to slave off of a non-master " , false ) ;
}
else {
problem ( ) < < " repl: $err reading remote oplog: " + err < < ' \n ' ;
massert ( 10390 , " got $err reading remote oplog " , false ) ;
}
2010-03-01 13:55:31 -08:00
}
else {
problem ( ) < < " repl: bad object read from remote oplog: " < < op . toString ( ) < < ' \n ' ;
massert ( 10391 , " repl: bad object read from remote oplog " , false ) ;
}
2009-04-22 10:52:32 -04:00
}
2010-03-01 13:55:31 -08:00
if ( replPair & & replPair - > state = = ReplPair : : State_Master ) {
2009-04-22 10:52:32 -04:00
2010-03-05 13:50:46 -05:00
OpTime next ( ts . date ( ) ) ;
if ( ! tailing & & ! initial & & next ! = syncedTo ) {
2010-03-01 13:55:31 -08:00
log ( ) < < " remote slave log filled, forcing slave resync " < < endl ;
resetSlave ( ) ;
2010-03-26 14:37:31 -04:00
return 1 ;
2010-03-01 13:55:31 -08:00
}
2009-04-22 10:52:32 -04:00
2010-03-01 13:55:31 -08:00
dblock lk ;
updateSetsWithLocalOps ( localLogTail , true ) ;
}
2009-04-22 10:52:32 -04:00
2010-03-01 13:55:31 -08:00
nextOpTime = OpTime ( ts . date ( ) ) ;
log ( 2 ) < < " repl: first op time received: " < < nextOpTime . toString ( ) < < ' \n ' ;
if ( tailing | | initial ) {
if ( initial )
log ( 1 ) < < " repl: initial run \n " ;
else
assert ( syncedTo < nextOpTime ) ;
2010-07-05 12:29:42 -04:00
oplogReader . putBack ( op ) ; // op will be processed in the loop below
2010-03-01 13:55:31 -08:00
nextOpTime = OpTime ( ) ; // will reread the op below
}
else if ( nextOpTime ! = syncedTo ) { // didn't get what we queried for - error
Nullstream & l = log ( ) ;
l < < " repl: nextOpTime " < < nextOpTime . toStringLong ( ) < < ' ' ;
if ( nextOpTime < syncedTo )
l < < " <?? " ;
else
l < < " > " ;
l < < " syncedTo " < < syncedTo . toStringLong ( ) < < ' \n ' ;
log ( ) < < " repl: time diff: " < < ( nextOpTime . getSecs ( ) - syncedTo . getSecs ( ) ) < < " sec \n " ;
log ( ) < < " repl: tailing: " < < tailing < < ' \n ' ;
log ( ) < < " repl: data too stale, halting replication " < < endl ;
replInfo = replAllDead = " data too stale halted replication " ;
2009-03-30 16:28:52 -04:00
assert ( syncedTo < nextOpTime ) ;
2010-03-01 13:55:31 -08:00
throw SyncException ( ) ;
}
else {
/* t == syncedTo, so the first op was applied previously. */
}
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
// apply operations
2008-08-02 14:58:15 -04:00
{
2010-03-01 13:55:31 -08:00
int n = 0 ;
2009-01-21 18:48:37 -05:00
time_t saveLast = time ( 0 ) ;
2009-01-15 10:17:11 -05:00
while ( 1 ) {
2009-10-19 13:29:12 -04:00
/* from a.s.:
2009-11-28 11:50:46 -05:00
I think the idea here is that we can establish a sync point between the local op log and the remote log with the following steps :
1 ) identify most recent op in local log - - call it O
2 ) ask " does nextOpTime reflect the tail of the remote op log? " ( in other words , is more ( ) false ? ) - If yes , all subsequent ops after nextOpTime in the remote log must have occurred after O . If no , we can ' t establish a sync point .
Note that we can ' t do step ( 2 ) followed by step ( 1 ) because if we do so ops may be added to both machines between steps ( 2 ) and ( 1 ) and we can ' t establish a sync point . ( In particular , between ( 2 ) and ( 1 ) an op may be added to the remote log before a different op is added to the local log . In this case , the newest remote op will have occurred after nextOpTime but before O . )
Now , for performance reasons we don ' t want to have to identify the most recent op in the local log every time we call c - > more ( ) because in performance sensitive situations more ( ) will be true most of the time . So we do :
0 ) more ( ) ?
1 ) find most recent op in local log
2009-10-19 13:29:12 -04:00
2 ) more ( ) ?
*/
2010-07-05 12:29:42 -04:00
if ( ! oplogReader . more ( ) ) {
2009-04-27 15:30:17 -04:00
dblock lk ;
2010-03-01 13:55:31 -08:00
OpTime nextLastSaved = nextLastSavedLocalTs ( ) ;
2009-04-27 15:30:17 -04:00
{
dbtemprelease t ;
2010-07-05 12:29:42 -04:00
if ( oplogReader . more ( ) ) {
2010-03-30 14:46:53 -07:00
if ( getInitialSyncCompleted ( ) ) { // if initial sync hasn't completed, break out of loop so we can set to completed or clone more dbs
continue ;
}
2009-10-13 17:13:51 -04:00
} else {
2009-04-27 15:30:17 -04:00
setLastSavedLocalTs ( nextLastSaved ) ;
}
}
2010-07-05 12:29:42 -04:00
if ( oplogReader . awaitCapable ( ) & & tailing )
2010-03-26 14:37:31 -04:00
okResultCode = 0 ; // don't sleep
2009-01-15 10:17:11 -05:00
syncedTo = nextOpTime ;
save ( ) ; // note how far we are synced up to now
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: applied " < < n < < " operations " < < endl ;
nApplied = n ;
2010-03-08 15:03:00 -05:00
log ( ) < < " repl: end sync_pullOpLog syncedTo: " < < syncedTo . toStringLong ( ) < < endl ;
2009-01-15 10:17:11 -05:00
break ;
}
2009-01-21 18:48:37 -05:00
2010-03-01 13:55:31 -08:00
OCCASIONALLY if ( n > 0 & & ( n > 100000 | | time ( 0 ) - saveLast > 60 ) ) {
2009-01-21 18:48:37 -05:00
// periodically note our progress, in case we are doing a lot of work and crash
dblock lk ;
2009-05-21 14:30:41 -04:00
syncedTo = nextOpTime ;
// can't update local log ts since there are pending operations from our peer
2009-01-21 18:48:37 -05:00
save ( ) ;
2009-10-13 16:01:02 -04:00
log ( ) < < " repl: checkpoint applied " < < n < < " operations " < < endl ;
log ( ) < < " repl: syncedTo: " < < syncedTo . toStringLong ( ) < < endl ;
2009-01-21 18:48:37 -05:00
saveLast = time ( 0 ) ;
2009-05-21 14:30:41 -04:00
n = 0 ;
2009-01-21 18:48:37 -05:00
}
2010-07-05 12:29:42 -04:00
BSONObj op = oplogReader . next ( ) ;
2010-03-01 13:55:31 -08:00
BSONElement ts = op . getField ( " ts " ) ;
2010-03-05 13:50:46 -05:00
if ( ! ( ts . type ( ) = = Date | | ts . type ( ) = = Timestamp ) ) {
log ( ) < < " sync error: problem querying remote oplog record \n " ;
log ( ) < < " op: " < < op . toString ( ) < < ' \n ' ;
log ( ) < < " halting replication " < < endl ;
replInfo = replAllDead = " sync error: no ts found querying remote oplog record " ;
throw SyncException ( ) ;
}
2009-01-15 10:17:11 -05:00
OpTime last = nextOpTime ;
2010-03-05 13:50:46 -05:00
nextOpTime = OpTime ( ts . date ( ) ) ;
2009-01-15 10:17:11 -05:00
if ( ! ( last < nextOpTime ) ) {
2010-03-05 14:11:00 -05:00
log ( ) < < " sync error: last applied optime at slave >= nextOpTime from master " < < endl ;
2010-03-05 13:50:46 -05:00
log ( ) < < " last: " < < last . toStringLong ( ) < < ' \n ' ;
log ( ) < < " nextOpTime: " < < nextOpTime . toStringLong ( ) < < ' \n ' ;
log ( ) < < " halting replication " < < endl ;
replInfo = replAllDead = " sync error last >= nextOpTime " ;
2010-03-05 14:11:00 -05:00
uassert ( 10123 , " replication error last applied optime at slave >= nextOpTime from master " , false ) ;
2009-01-15 10:17:11 -05:00
}
2010-03-01 13:55:31 -08:00
if ( replSettings . slavedelay & & ( unsigned ( time ( 0 ) ) < nextOpTime . getSecs ( ) + replSettings . slavedelay ) ) {
2010-07-05 12:29:42 -04:00
oplogReader . putBack ( op ) ;
2010-03-01 13:55:31 -08:00
_sleepAdviceTime = nextOpTime . getSecs ( ) + replSettings . slavedelay + 1 ;
dblock lk ;
if ( n > 0 ) {
syncedTo = last ;
save ( ) ;
}
log ( ) < < " repl: applied " < < n < < " operations " < < endl ;
log ( ) < < " repl: syncedTo: " < < syncedTo . toStringLong ( ) < < endl ;
log ( ) < < " waiting until: " < < _sleepAdviceTime < < " to continue " < < endl ;
break ;
}
2008-08-02 14:58:15 -04:00
2009-04-23 14:44:05 -04:00
sync_pullOpLog_applyOperation ( op , & localLogTail ) ;
2009-01-15 10:17:11 -05:00
n + + ;
2008-12-28 20:28:49 -05:00
}
}
2008-12-15 17:23:54 -05:00
2010-03-26 14:37:31 -04:00
return okResultCode ;
2008-08-02 14:58:15 -04:00
}
2009-01-23 18:24:15 -05:00
BSONObj userReplQuery = fromjson ( " { \" user \" : \" repl \" } " ) ;
2010-04-02 11:29:45 -04:00
2009-01-23 18:24:15 -05:00
bool replAuthenticate ( DBClientConnection * conn ) {
2010-02-04 10:49:19 -05:00
if ( ! cc ( ) . isAdmin ( ) ) {
2009-01-23 18:24:15 -05:00
log ( ) < < " replauthenticate: requires admin permissions, failing \n " ;
return false ;
}
BSONObj user ;
{
dblock lk ;
2010-01-17 16:57:35 -05:00
Client : : Context ctxt ( " local. " ) ;
2009-01-23 18:24:15 -05:00
if ( ! Helpers : : findOne ( " local.system.users " , userReplQuery , user ) ) {
// try the first user is local
if ( ! Helpers : : getSingleton ( " local.system.users " , user ) ) {
if ( noauth )
return true ; // presumably we are running a --noauth setup all around.
log ( ) < < " replauthenticate: no user in local.system.users to use for authentication \n " ;
return false ;
}
}
2010-04-02 11:29:45 -04:00
2009-01-23 18:24:15 -05:00
}
string u = user . getStringField ( " user " ) ;
string p = user . getStringField ( " pwd " ) ;
2009-12-28 16:43:43 -05:00
massert ( 10392 , " bad user object? [1] " , ! u . empty ( ) ) ;
massert ( 10393 , " bad user object? [2] " , ! p . empty ( ) ) ;
2009-01-23 18:24:15 -05:00
string err ;
2009-01-24 21:25:55 -05:00
if ( ! conn - > auth ( " local " , u . c_str ( ) , p . c_str ( ) , err , false ) ) {
2009-01-23 18:24:15 -05:00
log ( ) < < " replauthenticate: can't authenticate to master server, user: " < < u < < endl ;
return false ;
}
return true ;
}
2010-04-02 11:29:45 -04:00
bool replHandshake ( DBClientConnection * conn ) {
BSONObj me ;
{
dblock l ;
if ( ! Helpers : : getSingleton ( " local.me " , me ) ) {
BSONObjBuilder b ;
b . appendOID ( " _id " , 0 , true ) ;
me = b . obj ( ) ;
Helpers : : putSingleton ( " local.me " , me ) ;
}
}
BSONObjBuilder cmd ;
cmd . appendAs ( me [ " _id " ] , " handshake " ) ;
BSONObj res ;
bool ok = conn - > runCommand ( " admin " , cmd . obj ( ) , res ) ;
// ignoring for now on purpose for older versions
2010-07-17 16:07:38 -04:00
log ( ok ) < < " replHandshake res not: " < < ok < < " res: " < < res < < endl ;
2010-04-02 11:29:45 -04:00
return true ;
}
2010-07-05 12:29:42 -04:00
bool OplogReader : : connect ( string hostName ) {
if ( conn ( ) = = 0 ) {
_conn = auto_ptr < DBClientConnection > ( new DBClientConnection ( false , 0 , replPair ? 20 : 0 /* tcp timeout */ ) ) ;
2009-03-18 13:45:32 -04:00
string errmsg ;
ReplInfo r ( " trying to connect to sync source " ) ;
2010-07-05 12:29:42 -04:00
if ( ! _conn - > connect ( hostName . c_str ( ) , errmsg ) | |
! replAuthenticate ( _conn . get ( ) ) | |
! replHandshake ( _conn . get ( ) ) ) {
2009-03-18 13:45:32 -04:00
resetConnection ( ) ;
2010-02-12 15:56:17 -05:00
log ( ) < < " repl: " < < errmsg < < endl ;
2009-03-18 13:45:32 -04:00
return false ;
}
}
return true ;
}
2009-01-15 10:17:11 -05:00
/* note: not yet in mutex at this point.
2010-03-26 14:37:31 -04:00
returns > = 0 if ok . return - 1 if you want to reconnect .
return value of zero indicates no sleep necessary before next call
2009-01-15 10:17:11 -05:00
*/
2010-03-26 14:37:31 -04:00
int ReplSource : : sync ( int & nApplied ) {
2010-03-01 13:55:31 -08:00
_sleepAdviceTime = 0 ;
2009-01-15 10:17:11 -05:00
ReplInfo r ( " sync " ) ;
2010-03-08 13:56:55 -05:00
if ( ! cmdLine . quiet ) {
Nullstream & l = log ( ) ;
l < < " repl: from " ;
if ( sourceName ( ) ! = " main " ) {
l < < " source: " < < sourceName ( ) < < ' ' ;
}
l < < " host: " < < hostName < < endl ;
}
2009-01-15 10:17:11 -05:00
nClonedThisPass = 0 ;
2009-02-02 16:18:44 -05:00
// FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName.
2009-08-25 10:24:44 -04:00
if ( ( string ( " localhost " ) = = hostName | | string ( " 127.0.0.1 " ) = = hostName ) & & cmdLine . port = = CmdLine : : DefaultDBPort ) {
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: can't sync from self (localhost). sources configuration may be wrong. " < < endl ;
2009-01-15 10:17:11 -05:00
sleepsecs ( 5 ) ;
2010-03-26 14:37:31 -04:00
return - 1 ;
2008-12-28 20:28:49 -05:00
}
2008-07-28 13:51:39 -04:00
2010-07-05 12:29:42 -04:00
if ( ! oplogReader . connect ( hostName ) ) {
2010-02-12 16:05:05 -05:00
log ( 4 ) < < " repl: can't connect to sync source " < < endl ;
2009-03-18 13:45:32 -04:00
if ( replPair & & paired ) {
assert ( startsWith ( hostName . c_str ( ) , replPair - > remoteHost . c_str ( ) ) ) ;
replPair - > arbitrate ( ) ;
2009-01-15 10:17:11 -05:00
}
2010-03-26 14:37:31 -04:00
return - 1 ;
2009-01-15 10:17:11 -05:00
}
2009-03-18 13:45:32 -04:00
2009-04-10 17:03:45 -04:00
if ( paired ) {
2010-07-05 12:29:42 -04:00
int remote = replPair - > negotiate ( oplogReader . conn ( ) , " direct " ) ;
2009-04-10 17:03:45 -04:00
int nMasters = ( remote = = ReplPair : : State_Master ) + ( replPair - > state = = ReplPair : : State_Master ) ;
if ( getInitialSyncCompleted ( ) & & nMasters ! = 1 ) {
log ( ) < < ( nMasters = = 0 ? " no master " : " two masters " ) < < " , deferring oplog pull " < < endl ;
2010-03-26 14:37:31 -04:00
return 1 ;
2009-04-10 17:03:45 -04:00
}
}
2009-01-15 10:17:11 -05:00
/*
// get current mtime at the server.
BSONObj o = conn - > findOne ( " admin.$cmd " , opTimeQuery ) ;
2010-01-28 13:41:51 -05:00
BSONElement e = o . getField ( " optime " ) ;
2009-01-15 10:17:11 -05:00
if ( e . eoo ( ) ) {
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: failed to get cur optime from master " < < endl ;
2009-01-15 10:17:11 -05:00
log ( ) < < " " < < o . toString ( ) < < endl ;
return false ;
}
2009-12-28 16:43:43 -05:00
uassert ( 10124 , e . type ( ) = = Date ) ;
2009-01-15 10:17:11 -05:00
OpTime serverCurTime ;
serverCurTime . asDate ( ) = e . date ( ) ;
*/
2009-09-22 10:10:02 -04:00
return sync_pullOpLog ( nApplied ) ;
2009-01-15 10:17:11 -05:00
}
2008-08-02 14:58:15 -04:00
2009-01-15 10:17:11 -05:00
/* --------------------------------------------------------------*/
2008-09-11 15:13:47 -04:00
2009-01-15 10:17:11 -05:00
/*
TODO :
_ source has autoptr to the cursor
_ reuse that cursor when we can
*/
2008-08-02 14:58:15 -04:00
2009-09-22 10:10:02 -04:00
/* returns: # of seconds to sleep before next pass
0 = no sleep recommended
1 = special sentinel indicating adaptive sleep recommended
*/
int _replMain ( ReplSource : : SourceVector & sources , int & nApplied ) {
2008-12-15 17:23:54 -05:00
{
2009-01-15 10:17:11 -05:00
ReplInfo r ( " replMain load sources " ) ;
2008-12-15 17:23:54 -05:00
dblock lk ;
2009-01-15 10:17:11 -05:00
ReplSource : : loadAll ( sources ) ;
2010-03-29 14:14:50 -07:00
replSettings . fastsync = false ; // only need this param for initial reset
2008-12-15 17:23:54 -05:00
}
2009-01-15 10:17:11 -05:00
if ( sources . empty ( ) ) {
/* replication is not configured yet (for --slave) in local.sources. Poll for config it
every 20 seconds .
*/
return 20 ;
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
2009-09-22 10:10:02 -04:00
int sleepAdvice = 1 ;
2009-04-01 16:00:56 -04:00
for ( ReplSource : : SourceVector : : iterator i = sources . begin ( ) ; i ! = sources . end ( ) ; i + + ) {
ReplSource * s = i - > get ( ) ;
2010-03-26 14:37:31 -04:00
int res = - 1 ;
2009-01-15 10:17:11 -05:00
try {
2010-03-26 14:37:31 -04:00
res = s - > sync ( nApplied ) ;
2009-01-15 10:17:11 -05:00
bool moreToSync = s - > haveMoreDbsToSync ( ) ;
2010-03-26 14:37:31 -04:00
if ( res < 0 ) {
2009-09-22 10:10:02 -04:00
sleepAdvice = 3 ;
}
else if ( moreToSync ) {
sleepAdvice = 0 ;
}
2010-03-01 13:55:31 -08:00
else if ( s - > sleepAdvice ( ) ) {
sleepAdvice = s - > sleepAdvice ( ) ;
}
2010-03-26 14:37:31 -04:00
else
sleepAdvice = res ;
if ( res > = 0 & & ! moreToSync /*&& !s->syncedTo.isNull()*/ ) {
2009-01-15 10:17:11 -05:00
pairSync - > setInitialSyncCompletedLocking ( ) ;
}
}
2009-03-12 11:01:52 -04:00
catch ( const SyncException & ) {
2009-09-22 10:10:02 -04:00
log ( ) < < " caught SyncException " < < endl ;
2009-01-15 10:17:11 -05:00
return 10 ;
}
catch ( AssertionException & e ) {
if ( e . severe ( ) ) {
2009-09-22 10:10:02 -04:00
log ( ) < < " replMain AssertionException " < < e . what ( ) < < endl ;
2009-01-15 10:17:11 -05:00
return 60 ;
}
else {
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: AssertionException " < < e . what ( ) < < ' \n ' ;
2009-01-15 10:17:11 -05:00
}
replInfo = " replMain caught AssertionException " ;
}
2009-03-12 11:01:52 -04:00
catch ( const DBException & e ) {
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: DBException " < < e . what ( ) < < endl ;
2009-03-12 11:01:52 -04:00
replInfo = " replMain caught DBException " ;
}
catch ( const std : : exception & e ) {
2009-09-22 10:10:02 -04:00
log ( ) < < " repl: std::exception " < < e . what ( ) < < endl ;
2009-03-12 11:01:52 -04:00
replInfo = " replMain caught std::exception " ;
}
2009-02-11 09:58:01 -05:00
catch ( . . . ) {
2009-03-12 11:01:52 -04:00
log ( ) < < " unexpected exception during replication. replication will halt " < < endl ;
replAllDead = " caught unexpected exception during replication " ;
2009-02-11 09:58:01 -05:00
}
2010-03-26 14:37:31 -04:00
if ( res < 0 )
2010-07-05 12:29:42 -04:00
s - > oplogReader . resetConnection ( ) ;
2008-12-15 17:23:54 -05:00
}
2009-09-22 10:10:02 -04:00
return sleepAdvice ;
2008-12-15 17:23:54 -05:00
}
2009-01-15 10:17:11 -05:00
void replMain ( ) {
2009-04-01 16:00:56 -04:00
ReplSource : : SourceVector sources ;
2009-01-15 10:17:11 -05:00
while ( 1 ) {
int s = 0 ;
{
dblock lk ;
2009-02-04 13:22:02 -05:00
if ( replAllDead ) {
2010-02-08 21:04:09 -05:00
if ( ! replSettings . autoresync | | ! ReplSource : : throttledForceResyncDead ( " auto " ) )
2009-02-02 11:15:24 -05:00
break ;
}
2010-01-24 10:26:24 -05:00
assert ( syncing = = 0 ) ; // i.e., there is only one sync thread running. we will want to change/fix this.
2009-01-15 10:17:11 -05:00
syncing + + ;
}
try {
2009-09-22 10:10:02 -04:00
int nApplied = 0 ;
s = _replMain ( sources , nApplied ) ;
if ( s = = 1 ) {
if ( nApplied = = 0 ) s = 2 ;
else if ( nApplied > 100 ) {
// sleep very little - just enought that we aren't truly hammering master
sleepmillis ( 75 ) ;
s = 0 ;
}
}
2009-01-15 10:17:11 -05:00
} catch ( . . . ) {
2009-09-22 10:10:02 -04:00
out ( ) < < " caught exception in _replMain " < < endl ;
s = 4 ;
2009-01-15 10:17:11 -05:00
}
{
dblock lk ;
assert ( syncing = = 1 ) ;
syncing - - ;
}
2010-03-27 12:37:38 -04:00
if ( relinquishSyncingSome ) {
relinquishSyncingSome = 0 ;
s = 1 ; // sleep before going back in to syncing=1
}
2009-01-15 10:17:11 -05:00
if ( s ) {
stringstream ss ;
2010-03-08 15:03:00 -05:00
ss < < " repl: sleep " < < s < < " sec before next pass " ;
2009-01-15 10:17:11 -05:00
string msg = ss . str ( ) ;
2010-02-05 10:53:22 -05:00
if ( ! cmdLine . quiet )
log ( ) < < msg < < endl ;
2009-01-15 10:17:11 -05:00
ReplInfo r ( msg . c_str ( ) ) ;
sleepsecs ( s ) ;
}
}
}
2008-08-02 14:58:15 -04:00
2009-01-15 10:17:11 -05:00
int debug_stop_repl = 0 ;
2008-08-02 14:58:15 -04:00
2010-03-08 15:03:00 -05:00
static void replMasterThread ( ) {
sleepsecs ( 4 ) ;
Client : : initThread ( " replmaster " ) ;
2010-06-04 23:07:41 -04:00
int toSleep = 10 ;
2010-03-08 15:03:00 -05:00
while ( 1 ) {
2010-06-04 23:07:41 -04:00
sleepsecs ( toSleep ) ;
2010-03-08 15:03:00 -05:00
/* write a keep-alive like entry to the log. this will make things like
printReplicationStatus ( ) and printSlaveReplicationStatus ( ) stay up - to - date
even when things are idle .
*/
{
2010-06-04 23:07:41 -04:00
writelocktry lk ( " " , 1 ) ;
if ( lk . got ( ) ) {
toSleep = 10 ;
cc ( ) . getAuthenticationInfo ( ) - > authorize ( " admin " ) ;
try {
logKeepalive ( ) ;
}
catch ( . . . ) {
log ( ) < < " caught exception in replMasterThread() " < < endl ;
}
2010-03-08 15:03:00 -05:00
}
2010-06-04 23:07:41 -04:00
else {
2010-06-08 11:37:30 -04:00
log ( 5 ) < < " couldn't logKeepalive " < < endl ;
2010-06-04 23:07:41 -04:00
toSleep = 1 ;
2010-03-08 15:03:00 -05:00
}
}
}
}
2009-01-15 10:17:11 -05:00
void replSlaveThread ( ) {
sleepsecs ( 1 ) ;
2010-02-10 14:18:57 -05:00
Client : : initThread ( " replslave " ) ;
2009-01-15 10:17:11 -05:00
{
dblock lk ;
2010-02-04 10:49:19 -05:00
cc ( ) . getAuthenticationInfo ( ) - > authorize ( " admin " ) ;
2009-01-23 18:24:15 -05:00
2009-01-15 10:17:11 -05:00
BSONObj obj ;
2009-01-18 17:48:44 -05:00
if ( Helpers : : getSingleton ( " local.pair.startup " , obj ) ) {
2009-01-15 10:17:11 -05:00
// should be: {replacepeer:1}
replacePeer = true ;
pairSync - > setInitialSyncCompleted ( ) ; // we are the half that has all the data
}
2008-12-15 17:23:54 -05:00
}
2009-01-15 10:17:11 -05:00
while ( 1 ) {
try {
replMain ( ) ;
if ( debug_stop_repl )
break ;
sleepsecs ( 5 ) ;
}
catch ( AssertionException & ) {
ReplInfo r ( " Assertion in replSlaveThread() : sleeping 5 minutes before retry " ) ;
problem ( ) < < " Assertion in replSlaveThread(): sleeping 5 minutes before retry " < < endl ;
sleepsecs ( 300 ) ;
2008-12-28 20:28:49 -05:00
}
}
2008-09-03 16:43:00 -04:00
}
2008-12-02 14:24:45 -05:00
2009-01-15 10:17:11 -05:00
void tempThread ( ) {
while ( 1 ) {
2009-12-03 13:12:51 -05:00
out ( ) < < dbMutex . info ( ) . isLocked ( ) < < endl ;
2009-01-15 10:17:11 -05:00
sleepmillis ( 100 ) ;
}
2008-12-28 20:28:49 -05:00
}
2010-05-30 15:38:37 -04:00
void newRepl ( ) ;
void oldRepl ( ) ;
2009-01-15 10:17:11 -05:00
void startReplication ( ) {
2010-04-21 16:43:51 -04:00
/* if we are going to be a replica set, we aren't doing other forms of replication. */
2010-05-30 15:04:20 -04:00
if ( ! cmdLine . replSet . empty ( ) ) {
if ( replSettings . slave | | replSettings . master | | replPair ) {
2010-05-30 15:38:37 -04:00
log ( ) < < " *** " < < endl ;
2010-05-30 15:04:20 -04:00
log ( ) < < " ERROR: can't use --slave or --master replication options with --replSet " < < endl ;
2010-05-30 15:38:37 -04:00
log ( ) < < " *** " < < endl ;
2010-05-30 15:04:20 -04:00
}
2010-06-29 18:27:09 -04:00
createOplog ( ) ;
2010-05-30 15:38:37 -04:00
newRepl ( ) ;
2010-04-13 13:22:42 -04:00
return ;
2010-05-30 15:04:20 -04:00
}
2010-04-13 13:22:42 -04:00
2010-05-30 15:38:37 -04:00
oldRepl ( ) ;
2009-01-15 10:17:11 -05:00
/* this was just to see if anything locks for longer than it should -- we need to be careful
not to be locked when trying to connect ( ) or query ( ) the other side .
*/
//boost::thread tempt(tempThread);
2010-04-13 13:22:42 -04:00
if ( ! replSettings . slave & & ! replSettings . master & & ! replPair )
2009-01-15 10:17:11 -05:00
return ;
2008-12-28 20:28:49 -05:00
{
dblock lk ;
2010-03-22 10:50:55 -07:00
cc ( ) . getAuthenticationInfo ( ) - > authorize ( " admin " ) ;
2009-01-15 10:17:11 -05:00
pairSync - > init ( ) ;
}
2010-02-08 21:04:09 -05:00
if ( replSettings . slave | | replPair ) {
if ( replSettings . slave ) {
2010-04-13 13:22:42 -04:00
assert ( replSettings . slave = = SimpleSlave ) ;
2009-01-23 15:15:07 -05:00
log ( 1 ) < < " slave=true " < < endl ;
2010-04-13 13:22:42 -04:00
}
else
replSettings . slave = ReplPairSlave ;
2009-01-15 10:17:11 -05:00
boost : : thread repl_thread ( replSlaveThread ) ;
}
2010-02-08 21:04:09 -05:00
if ( replSettings . master | | replPair ) {
if ( replSettings . master )
2009-01-23 15:15:07 -05:00
log ( 1 ) < < " master=true " < < endl ;
2010-02-08 21:04:09 -05:00
replSettings . master = true ;
2009-01-23 10:17:29 -05:00
createOplog ( ) ;
2010-03-08 15:03:00 -05:00
boost : : thread t ( replMasterThread ) ;
2009-01-15 10:17:11 -05:00
}
2010-03-29 14:14:50 -07:00
while ( replSettings . fastsync ) // don't allow writes until we've set up from log
sleepmillis ( 50 ) ;
2008-12-28 20:28:49 -05:00
}
2008-09-03 16:43:00 -04:00
2009-01-15 10:17:11 -05:00
/* called from main at server startup */
void pairWith ( const char * remoteEnd , const char * arb ) {
replPair = new ReplPair ( remoteEnd , arb ) ;
}
2009-01-14 17:09:51 -05:00
2010-07-12 12:37:27 -04:00
void testPretouch ( ) {
int nthr = min ( 8 , 8 ) ;
nthr = max ( nthr , 1 ) ;
int m = 8 / nthr ;
ThreadPool tp ( nthr ) ;
vector < BSONObj > v ;
BSONObj x = BSON ( " ns " < < " test.foo " < < " o " < < BSON ( " _id " < < 1 ) < < " op " < < " i " ) ;
v . push_back ( x ) ;
v . push_back ( x ) ;
v . push_back ( x ) ;
unsigned a = 0 ;
while ( 1 ) {
if ( a > = v . size ( ) ) break ;
unsigned b = a + m - 1 ; // v[a..b]
if ( b > = v . size ( ) ) b = v . size ( ) - 1 ;
tp . schedule ( pretouchN , v , a , b ) ;
DEV cout < < " pretouch task: " < < a < < " .. " < < b < < endl ;
a + = m ;
}
tp . join ( ) ;
}
2009-03-04 15:57:35 -05:00
2009-01-14 17:09:51 -05:00
} // namespace mongo