Files
mongo/db/repl/rs_sync.cpp

193 lines
6.7 KiB
C++
Raw Normal View History

2010-07-16 10:42:53 -04:00
/**
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "pch.h"
#include "../client.h"
#include "../../client/dbclient.h"
#include "rs.h"
2010-07-19 20:09:00 -04:00
#include "../repl.h"
2010-07-16 10:42:53 -04:00
2010-07-16 12:51:01 -04:00
namespace mongo {
2010-07-16 10:42:53 -04:00
2010-07-19 13:08:24 -04:00
void startSyncThread() {
2010-07-16 12:51:01 -04:00
Client::initThread("rs_sync");
2010-07-19 13:08:24 -04:00
theReplSet->syncThread();
2010-07-20 14:58:51 -04:00
cc().shutdown();
2010-07-19 13:08:24 -04:00
}
2010-07-19 22:32:43 -04:00
void ReplSetImpl::syncApply(const BSONObj &o) {
//const char *op = o.getStringField("op");
char db[MaxDatabaseLen];
const char *ns = o.getStringField("ns");
nsToDatabase(ns, db);
if ( *ns == '.' || *ns == 0 ) {
log() << "replSet skipping bad op in oplog: " << o.toString() << endl;
return;
}
Client::Context ctx(ns);
ctx.getClient()->curop()->reset();
/* todo : if this asserts, do we want to ignore or not? */
applyOperation_inlock(o);
}
2010-07-19 20:09:00 -04:00
void ReplSetImpl::syncTail() {
2010-07-19 22:32:43 -04:00
// todo : locking vis a vis the mgr...
2010-07-22 17:50:54 -04:00
const Member *primary = box.getPrimary();
2010-07-19 22:32:43 -04:00
if( primary == 0 ) return;
string hn = primary->h().toString();
2010-07-19 20:09:00 -04:00
OplogReader r;
2010-07-19 22:32:43 -04:00
if( !r.connect(primary->h().toString()) ) {
log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
return;
}
2010-07-21 15:39:59 -04:00
/* first make sure we are not hopelessly out of sync by being very stale. */
{
BSONObj remoteOldestOp = r.findOne(rsoplog, Query());
OpTime ts = remoteOldestOp["ts"]._opTime();
DEV log() << "remoteOldestOp: " << ts.toStringPretty() << endl;
else log(3) << "remoteOldestOp: " << ts.toStringPretty() << endl;
if( lastOpTimeWritten < ts ) {
log() << "replSet error too stale to catch up, at least from primary " << hn << rsLog;
log() << "replSet our last optime : " << lastOpTimeWritten.toStringPretty() << rsLog;
log() << "replSet oldest at " << hn << " : " << ts.toStringPretty() << rsLog;
log() << "replSet See http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member" << rsLog;
sethbmsg("error too stale to catch up");
sleepsecs(120);
return;
}
}
2010-07-19 22:32:43 -04:00
r.tailingQueryGTE(rsoplog, lastOpTimeWritten);
assert( r.haveCursor() );
assert( r.awaitCapable() );
{
BSONObj o = r.nextSafe();
OpTime ts = o["ts"]._opTime();
long long h = o["h"].numberLong();
if( ts != lastOpTimeWritten || h != lastH ) {
2010-07-21 15:39:59 -04:00
log() << "TEMP " << lastOpTimeWritten.toStringPretty() << endl;
log() << "TEMP " << ts.toStringPretty() << endl;
/*
}*/
2010-07-20 16:03:03 -04:00
2010-07-21 13:13:36 -04:00
syncRollback(r);
2010-07-19 22:32:43 -04:00
return;
}
}
// TODO : switch state to secondary here when appropriate...
while( 1 ) {
2010-07-20 11:05:27 -04:00
while( 1 ) {
if( !r.moreInCurrentBatch() ) {
/* we need to occasionally check some things. between
batches is probably a good time. */
/* perhaps we should check this earlier? but not before the rollback checks. */
2010-07-22 17:50:54 -04:00
if( state().recovering() ) {
2010-07-20 11:05:27 -04:00
/* can we go to RS_SECONDARY state? we can if not too old and not minvalid */
bool golive = false;
{
readlock lk("local.replset.minvalid");
BSONObj mv;
if( Helpers::getSingleton("local.replset.minvalid", mv) ) {
2010-07-20 11:34:23 -04:00
if( mv["ts"]._opTime() <= lastOpTimeWritten ) {
2010-07-20 11:05:27 -04:00
golive=true;
}
}
else
golive = true; /* must have been the original member */
}
if( golive )
2010-07-22 17:50:54 -04:00
changeState(MemberState::RS_SECONDARY);
2010-07-20 11:05:27 -04:00
/* todo: too stale capability */
}
2010-07-22 17:50:54 -04:00
if( box.getPrimary() != primary )
2010-07-20 11:05:27 -04:00
return;
}
if( !r.more() )
break;
{
BSONObj o = r.nextSafe(); /* note we might get "not master" at some point */
{
writelock lk("");
syncApply(o);
_logOpObjRS(o); /* with repl sets we write the ops to our oplog too: */
}
2010-07-19 22:32:43 -04:00
}
}
2010-07-20 13:37:09 -04:00
r.tailCheck();
if( !r.haveCursor() ) {
log() << "replSet TEMP end syncTail pass with " << hn << rsLog;
// TODO : reuse our cnonection to the primary.
2010-07-20 11:05:27 -04:00
return;
2010-07-20 13:37:09 -04:00
}
2010-07-22 17:50:54 -04:00
if( box.getPrimary() != primary )
2010-07-20 11:05:27 -04:00
return;
2010-07-19 22:32:43 -04:00
// looping back is ok because this is a tailable cursor
}
2010-07-19 20:09:00 -04:00
}
2010-07-19 13:08:24 -04:00
void ReplSetImpl::_syncThread() {
2010-07-22 17:50:54 -04:00
StateBox::SP sp = box.get();
if( sp.state.primary() )
2010-07-19 13:08:24 -04:00
return;
2010-07-19 18:05:44 -04:00
/* later, we can sync from up secondaries if we want. tbd. */
2010-07-22 17:50:54 -04:00
if( sp.primary == 0 )
2010-07-19 18:05:44 -04:00
return;
2010-07-19 13:08:24 -04:00
2010-07-19 18:05:44 -04:00
/* do we have anything at all? */
if( lastOpTimeWritten.isNull() ) {
syncDoInitialSync();
2010-07-19 18:09:00 -04:00
return; // _syncThread will be recalled, starts from top again in case sync failed.
2010-07-19 18:05:44 -04:00
}
2010-07-19 18:09:00 -04:00
/* we have some data. continue tailing. */
2010-07-19 20:09:00 -04:00
syncTail();
2010-07-19 13:08:24 -04:00
}
void ReplSetImpl::syncThread() {
2010-07-20 11:12:17 -04:00
if( myConfig().arbiterOnly )
return;
2010-07-19 13:08:24 -04:00
while( 1 ) {
2010-07-19 17:23:11 -04:00
try {
_syncThread();
2010-07-19 13:08:24 -04:00
}
catch(DBException& e) {
log() << "replSet syncThread: " << e.toString() << rsLog;
sleepsecs(10);
}
2010-07-20 13:37:09 -04:00
catch(...) {
sethbmsg("unexpected exception in syncThread()");
// TODO : SET NOT SECONDARY here.
sleepsecs(60);
}
2010-07-19 13:08:24 -04:00
sleepsecs(2);
}
2010-07-16 12:51:01 -04:00
}
2010-07-16 10:42:53 -04:00
}