2010-07-21 14:15:48 -04:00
/* @file rs_rollback.cpp
2011-01-04 00:40:41 -05:00
*
2010-07-21 01:45:04 -04:00
* Copyright ( C ) 2008 10 gen Inc .
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU Affero General Public License , version 3 ,
* as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Affero General Public License for more details .
*
* You should have received a copy of the GNU Affero General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
# include "pch.h"
# include "../client.h"
# include "../../client/dbclient.h"
# include "rs.h"
# include "../repl.h"
2011-06-22 15:51:08 -04:00
# include "../ops/query.h"
2011-03-29 00:13:27 -07:00
# include "../cloner.h"
2011-06-20 16:23:54 -04:00
# include "../ops/update.h"
2011-06-20 17:34:16 -04:00
# include "../ops/delete.h"
2010-07-21 01:45:04 -04:00
/* Scenarios
We went offline with ops not replicated out .
2011-01-04 00:40:41 -05:00
2010-07-21 01:45:04 -04:00
F = node that failed and coming back .
P = node that took over , new primary
# 1:
F : a b c d e f g
P : a b c d q
2011-01-04 00:40:41 -05:00
The design is " keep P " . One could argue here that " keep F " has some merits , however , in most cases P
will have significantly more data . Also note that P may have a proper subset of F ' s stream if there were
2010-07-21 12:50:03 -04:00
no subsequent writes .
2010-07-21 01:45:04 -04:00
2011-01-04 00:40:41 -05:00
For now the model is simply : get F back in sync with P . If P was really behind or something , we should have
2010-07-21 01:45:04 -04:00
just chosen not to fail over anyway .
# 2:
F : a b c d e f g - > a b c d
P : a b c d
# 3:
F : a b c d e f g - > a b c d q r s t u v w x z
P : a b c d . q r s t u v w x z
Steps
find an event in common . ' d ' .
2011-01-04 00:40:41 -05:00
undo our events beyond that by :
2010-07-21 01:45:04 -04:00
( 1 ) taking copy from other server of those objects
2011-01-04 00:40:41 -05:00
( 2 ) do not consider copy valid until we pass reach an optime after when we fetched the new version of object
2010-07-21 01:45:04 -04:00
- - i . e . , reset minvalid .
2010-07-21 12:50:03 -04:00
( 3 ) we could skip operations on objects that are previous in time to our capture of the object as an optimization .
2010-07-21 01:45:04 -04:00
*/
namespace mongo {
2010-07-21 14:15:48 -04:00
using namespace bson ;
2010-08-16 10:26:07 -04:00
void incRBID ( ) ;
2010-08-16 10:01:26 -04:00
2011-01-04 00:40:41 -05:00
class rsfatal : public std : : exception {
2010-08-16 09:17:14 -04:00
public :
2011-01-04 00:40:41 -05:00
virtual const char * what ( ) const throw ( ) { return " replica set fatal exception " ; }
2010-08-16 09:17:14 -04:00
} ;
2010-07-26 15:29:18 -04:00
struct DocID {
const char * ns ;
be _id ;
2011-01-04 00:40:41 -05:00
bool operator < ( const DocID & d ) const {
2010-07-26 15:29:18 -04:00
int c = strcmp ( ns , d . ns ) ;
if ( c < 0 ) return true ;
if ( c > 0 ) return false ;
return _id < d . _id ;
}
} ;
2010-07-21 15:39:59 -04:00
struct HowToFixUp {
2011-01-04 00:40:41 -05:00
/* note this is a set -- if there are many $inc's on a single document we need to rollback, we only
2010-07-26 15:29:18 -04:00
need to refetch it once . */
set < DocID > toRefetch ;
2010-07-21 22:20:34 -04:00
2010-08-03 15:00:56 -04:00
/* collections to drop */
set < string > toDrop ;
2010-08-16 09:39:28 -04:00
set < string > collectionsToResync ;
2010-07-21 15:39:59 -04:00
OpTime commonPoint ;
2010-07-26 15:29:18 -04:00
DiskLoc commonPointOurDiskloc ;
2010-07-29 17:21:50 -04:00
int rbid ; // remote server's current rollback sequence #
2010-07-21 15:39:59 -04:00
} ;
2011-01-04 00:40:41 -05:00
static void refetch ( HowToFixUp & h , const BSONObj & ourObj ) {
2010-07-27 02:13:13 -04:00
const char * op = ourObj . getStringField ( " op " ) ;
2011-01-04 00:40:41 -05:00
if ( * op = = ' n ' )
2010-07-27 02:13:13 -04:00
return ;
unsigned long long totSize = 0 ;
totSize + = ourObj . objsize ( ) ;
if ( totSize > 512 * 1024 * 1024 )
throw " rollback too large " ;
DocID d ;
2011-01-31 15:58:37 -08:00
// NOTE The assigned ns value may become invalid if we yield.
2010-07-27 02:13:13 -04:00
d . ns = ourObj . getStringField ( " ns " ) ;
2011-01-04 00:40:41 -05:00
if ( * d . ns = = 0 ) {
2010-08-03 15:00:56 -04:00
log ( ) < < " replSet WARNING ignoring op on rollback no ns TODO : " < < ourObj . toString ( ) < < rsLog ;
2010-07-27 02:13:13 -04:00
return ;
}
bo o = ourObj . getObjectField ( * op = = ' u ' ? " o2 " : " o " ) ;
2011-01-04 00:40:41 -05:00
if ( o . isEmpty ( ) ) {
2010-07-27 02:13:13 -04:00
log ( ) < < " replSet warning ignoring op on rollback : " < < ourObj . toString ( ) < < rsLog ;
return ;
}
2011-01-04 00:40:41 -05:00
if ( * op = = ' c ' ) {
2010-08-03 15:19:34 -04:00
be first = o . firstElement ( ) ;
2010-08-03 15:00:56 -04:00
NamespaceString s ( d . ns ) ; // foo.$cmd
2010-08-16 09:17:14 -04:00
string cmdname = first . fieldName ( ) ;
Command * cmd = Command : : findCommand ( cmdname . c_str ( ) ) ;
2011-01-04 00:40:41 -05:00
if ( cmd = = 0 ) {
2010-08-16 08:57:28 -04:00
log ( ) < < " replSet warning rollback no suchcommand " < < first . fieldName ( ) < < " - different mongod versions perhaps? " < < rsLog ;
2010-08-03 15:19:34 -04:00
return ;
}
2010-08-16 08:57:28 -04:00
else {
2010-08-16 09:39:28 -04:00
/* findandmodify - tranlated?
2011-01-04 00:40:41 -05:00
godinsert ? ,
2010-08-16 09:17:14 -04:00
renamecollection a - > b . just resync a & b
*/
if ( cmdname = = " create " ) {
2011-01-04 00:40:41 -05:00
/* Create collection operation
2010-08-22 10:30:26 -04:00
{ ts : . . . , h : . . . , op : " c " , ns : " foo.$cmd " , o : { create : " abc " , . . . } }
2010-08-16 08:57:28 -04:00
*/
string ns = s . db + ' . ' + o [ " create " ] . String ( ) ; // -> foo.abc
h . toDrop . insert ( ns ) ;
return ;
}
2011-01-04 00:40:41 -05:00
else if ( cmdname = = " drop " ) {
2010-08-16 09:39:28 -04:00
string ns = s . db + ' . ' + first . valuestr ( ) ;
h . collectionsToResync . insert ( ns ) ;
return ;
}
2011-01-04 00:40:41 -05:00
else if ( cmdname = = " dropIndexes " | | cmdname = = " deleteIndexes " ) {
2010-08-16 09:39:28 -04:00
/* TODO: this is bad. we simply full resync the collection here, which could be very slow. */
log ( ) < < " replSet info rollback of dropIndexes is slow in this version of mongod " < < rsLog ;
string ns = s . db + ' . ' + first . valuestr ( ) ;
h . collectionsToResync . insert ( ns ) ;
return ;
}
2011-01-04 00:40:41 -05:00
else if ( cmdname = = " renameCollection " ) {
2010-08-16 09:39:28 -04:00
/* TODO: slow. */
log ( ) < < " replSet info rollback of renameCollection is slow in this version of mongod " < < rsLog ;
string from = first . valuestr ( ) ;
string to = o [ " to " ] . String ( ) ;
h . collectionsToResync . insert ( from ) ;
h . collectionsToResync . insert ( to ) ;
return ;
}
2011-01-04 00:40:41 -05:00
else if ( cmdname = = " reIndex " ) {
2010-08-16 09:17:14 -04:00
return ;
}
2011-01-04 00:40:41 -05:00
else if ( cmdname = = " dropDatabase " ) {
2010-08-31 05:06:53 -04:00
log ( ) < < " replSet error rollback : can't rollback drop database full resync will be required " < < rsLog ;
2010-08-16 09:17:14 -04:00
log ( ) < < " replSet " < < o . toString ( ) < < rsLog ;
throw rsfatal ( ) ;
}
2011-01-04 00:40:41 -05:00
else {
2010-08-31 05:06:53 -04:00
log ( ) < < " replSet error can't rollback this command yet: " < < o . toString ( ) < < rsLog ;
log ( ) < < " replSet cmdname= " < < cmdname < < rsLog ;
2010-08-16 09:17:14 -04:00
throw rsfatal ( ) ;
2010-08-16 08:57:28 -04:00
}
2010-08-03 15:19:34 -04:00
}
2010-08-03 15:00:56 -04:00
}
2010-07-27 02:13:13 -04:00
d . _id = o [ " _id " ] ;
if ( d . _id . eoo ( ) ) {
2010-08-03 15:19:34 -04:00
log ( ) < < " replSet WARNING ignoring op on rollback no _id TODO : " < < d . ns < < ' ' < < ourObj . toString ( ) < < rsLog ;
2010-07-27 02:13:13 -04:00
return ;
}
h . toRefetch . insert ( d ) ;
}
2010-07-29 17:21:50 -04:00
int getRBID ( DBClientConnection * ) ;
2011-01-04 00:40:41 -05:00
static void syncRollbackFindCommonPoint ( DBClientConnection * them , HowToFixUp & h ) {
2010-07-26 15:29:18 -04:00
static time_t last ;
2011-01-04 00:40:41 -05:00
if ( time ( 0 ) - last < 60 ) {
2010-07-26 15:29:18 -04:00
throw " findcommonpoint waiting a while before trying again " ;
}
last = time ( 0 ) ;
assert ( dbMutex . atLeastReadLocked ( ) ) ;
2011-01-19 16:41:57 -05:00
Client : : Context c ( rsoplog ) ;
2010-07-26 15:29:18 -04:00
NamespaceDetails * nsd = nsdetails ( rsoplog ) ;
assert ( nsd ) ;
ReverseCappedCursor u ( nsd ) ;
if ( ! u . ok ( ) )
throw " our oplog empty or unreadable " ;
2010-07-23 14:52:14 -04:00
const Query q = Query ( ) . sort ( reverseNaturalObj ) ;
2010-07-21 14:15:48 -04:00
const bo fields = BSON ( " ts " < < 1 < < " h " < < 1 ) ;
2010-07-26 15:29:18 -04:00
//auto_ptr<DBClientCursor> u = us->query(rsoplog, q, 0, 0, &fields, 0, 0);
2010-07-29 17:21:50 -04:00
2010-08-02 14:44:53 -04:00
h . rbid = getRBID ( them ) ;
2010-07-21 14:15:48 -04:00
auto_ptr < DBClientCursor > t = them - > query ( rsoplog , q , 0 , 0 , & fields , 0 , 0 ) ;
2010-08-02 14:44:53 -04:00
if ( t . get ( ) = = 0 | | ! t - > more ( ) ) throw " remote oplog empty or unreadable " ;
2010-07-21 14:15:48 -04:00
2010-07-26 15:29:18 -04:00
BSONObj ourObj = u . current ( ) ;
2010-07-21 15:39:59 -04:00
OpTime ourTime = ourObj [ " ts " ] . _opTime ( ) ;
2010-07-21 14:15:48 -04:00
BSONObj theirObj = t - > nextSafe ( ) ;
2010-07-21 15:39:59 -04:00
OpTime theirTime = theirObj [ " ts " ] . _opTime ( ) ;
2010-07-21 14:15:48 -04:00
2010-08-09 10:00:13 -04:00
{
2010-07-21 14:15:48 -04:00
long long diff = ( long long ) ourTime . getSecs ( ) - ( ( long long ) theirTime . getSecs ( ) ) ;
/* diff could be positive, negative, or zero */
2010-08-17 16:31:21 -04:00
log ( ) < < " replSet info rollback our last optime: " < < ourTime . toStringPretty ( ) < < rsLog ;
log ( ) < < " replSet info rollback their last optime: " < < theirTime . toStringPretty ( ) < < rsLog ;
log ( ) < < " replSet info rollback diff in end of log times: " < < diff < < " seconds " < < rsLog ;
2011-04-20 13:18:10 -04:00
if ( diff > 1800 ) {
2010-08-17 16:31:21 -04:00
log ( ) < < " replSet rollback too long a time period for a rollback. " < < rsLog ;
2011-04-20 13:18:10 -04:00
throw " error not willing to roll back more than 30 minutes of data " ;
2010-07-26 15:29:18 -04:00
}
2010-07-21 14:15:48 -04:00
}
2010-07-21 15:39:59 -04:00
unsigned long long scanned = 0 ;
while ( 1 ) {
scanned + + ;
/* todo add code to assure no excessive scanning for too long */
2011-01-04 00:40:41 -05:00
if ( ourTime = = theirTime ) {
if ( ourObj [ " h " ] . Long ( ) = = theirObj [ " h " ] . Long ( ) ) {
2010-07-21 15:39:59 -04:00
// found the point back in time where we match.
// todo : check a few more just to be careful about hash collisions.
log ( ) < < " replSet rollback found matching events at " < < ourTime . toStringPretty ( ) < < rsLog ;
2010-07-28 20:24:21 -04:00
log ( ) < < " replSet rollback findcommonpoint scanned : " < < scanned < < rsLog ;
2010-07-21 15:39:59 -04:00
h . commonPoint = ourTime ;
2010-07-26 15:29:18 -04:00
h . commonPointOurDiskloc = u . currLoc ( ) ;
2010-07-21 15:39:59 -04:00
return ;
}
2010-07-27 02:13:13 -04:00
refetch ( h , ourObj ) ;
2011-01-04 00:40:41 -05:00
if ( ! t - > more ( ) ) {
2010-08-17 18:23:40 -04:00
log ( ) < < " replSet rollback error RS100 reached beginning of remote oplog " < < rsLog ;
log ( ) < < " replSet them: " < < them - > toString ( ) < < " scanned: " < < scanned < < rsLog ;
log ( ) < < " replSet theirTime: " < < theirTime . toStringLong ( ) < < rsLog ;
log ( ) < < " replSet ourTime: " < < ourTime . toStringLong ( ) < < rsLog ;
2010-08-18 11:50:52 -04:00
throw " RS100 reached beginning of remote oplog [2] " ;
2010-08-08 18:02:12 -04:00
}
2010-07-21 15:39:59 -04:00
theirObj = t - > nextSafe ( ) ;
theirTime = theirObj [ " ts " ] . _opTime ( ) ;
2010-07-27 02:13:13 -04:00
2010-07-26 15:29:18 -04:00
u . advance ( ) ;
2010-08-17 18:23:40 -04:00
if ( ! u . ok ( ) ) {
log ( ) < < " replSet rollback error RS101 reached beginning of local oplog " < < rsLog ;
log ( ) < < " replSet them: " < < them - > toString ( ) < < " scanned: " < < scanned < < rsLog ;
log ( ) < < " replSet theirTime: " < < theirTime . toStringLong ( ) < < rsLog ;
log ( ) < < " replSet ourTime: " < < ourTime . toStringLong ( ) < < rsLog ;
2010-08-18 11:50:52 -04:00
throw " RS101 reached beginning of local oplog [1] " ;
2010-08-17 18:23:40 -04:00
}
2010-07-26 15:29:18 -04:00
ourObj = u . current ( ) ;
2010-07-21 15:39:59 -04:00
ourTime = ourObj [ " ts " ] . _opTime ( ) ;
}
2011-01-04 00:40:41 -05:00
else if ( theirTime > ourTime ) {
if ( ! t - > more ( ) ) {
2010-08-17 18:23:40 -04:00
log ( ) < < " replSet rollback error RS100 reached beginning of remote oplog " < < rsLog ;
log ( ) < < " replSet them: " < < them - > toString ( ) < < " scanned: " < < scanned < < rsLog ;
log ( ) < < " replSet theirTime: " < < theirTime . toStringLong ( ) < < rsLog ;
log ( ) < < " replSet ourTime: " < < ourTime . toStringLong ( ) < < rsLog ;
2010-08-18 11:50:52 -04:00
throw " RS100 reached beginning of remote oplog [1] " ;
2010-08-08 18:02:12 -04:00
}
2010-07-21 15:39:59 -04:00
theirObj = t - > nextSafe ( ) ;
theirTime = theirObj [ " ts " ] . _opTime ( ) ;
}
2011-01-04 00:40:41 -05:00
else {
2010-07-21 15:39:59 -04:00
// theirTime < ourTime
2010-07-27 02:13:13 -04:00
refetch ( h , ourObj ) ;
2010-07-26 15:29:18 -04:00
u . advance ( ) ;
2011-01-04 00:40:41 -05:00
if ( ! u . ok ( ) ) {
2010-08-17 18:23:40 -04:00
log ( ) < < " replSet rollback error RS101 reached beginning of local oplog " < < rsLog ;
log ( ) < < " replSet them: " < < them - > toString ( ) < < " scanned: " < < scanned < < rsLog ;
log ( ) < < " replSet theirTime: " < < theirTime . toStringLong ( ) < < rsLog ;
log ( ) < < " replSet ourTime: " < < ourTime . toStringLong ( ) < < rsLog ;
2010-08-18 11:50:52 -04:00
throw " RS101 reached beginning of local oplog [2] " ;
2010-08-17 18:23:40 -04:00
}
2010-07-26 15:29:18 -04:00
ourObj = u . current ( ) ;
2010-07-21 15:39:59 -04:00
ourTime = ourObj [ " ts " ] . _opTime ( ) ;
}
2010-07-21 14:15:48 -04:00
}
2010-07-21 13:13:36 -04:00
}
2011-01-04 00:40:41 -05:00
struct X {
2010-07-23 16:19:39 -04:00
const bson : : bo * op ;
bson : : bo goodVersionOfObject ;
} ;
2011-01-04 00:40:41 -05:00
static void setMinValid ( bo newMinValid ) {
try {
log ( ) < < " replSet minvalid= " < < newMinValid [ " ts " ] . _opTime ( ) . toStringLong ( ) < < rsLog ;
}
catch ( . . . ) { }
{
Helpers : : putSingleton ( " local.replset.minvalid " , newMinValid ) ;
Client : : Context cx ( " local. " ) ;
cx . db ( ) - > flushFiles ( true ) ;
}
2010-08-16 12:21:49 -04:00
}
void ReplSetImpl : : syncFixUp ( HowToFixUp & h , OplogReader & r ) {
2011-01-04 00:40:41 -05:00
DBClientConnection * them = r . conn ( ) ;
// fetch all first so we needn't handle interruption in a fancy way
unsigned long long totSize = 0 ;
list < pair < DocID , bo > > goodVersions ;
bo newMinValid ;
/* fetch all the goodVersions of each document from current primary */
DocID d ;
unsigned long long n = 0 ;
try {
for ( set < DocID > : : iterator i = h . toRefetch . begin ( ) ; i ! = h . toRefetch . end ( ) ; i + + ) {
d = * i ;
assert ( ! d . _id . eoo ( ) ) ;
{
/* TODO : slow. lots of round trips. */
n + + ;
bo good = them - > findOne ( d . ns , d . _id . wrap ( ) ) . getOwned ( ) ;
totSize + = good . objsize ( ) ;
uassert ( 13410 , " replSet too much data to roll back " , totSize < 300 * 1024 * 1024 ) ;
// note good might be eoo, indicating we should delete it
goodVersions . push_back ( pair < DocID , bo > ( d , good ) ) ;
}
}
newMinValid = r . getLastOp ( rsoplog ) ;
if ( newMinValid . isEmpty ( ) ) {
sethbmsg ( " rollback error newMinValid empty? " ) ;
return ;
}
}
catch ( DBException & e ) {
sethbmsg ( str : : stream ( ) < < " rollback re-get objects: " < < e . toString ( ) , 0 ) ;
log ( ) < < " rollback couldn't re-get ns: " < < d . ns < < " _id: " < < d . _id < < ' ' < < n < < ' / ' < < h . toRefetch . size ( ) < < rsLog ;
throw e ;
}
MemoryMappedFile : : flushAll ( true ) ;
sethbmsg ( " rollback 3.5 " ) ;
if ( h . rbid ! = getRBID ( r . conn ( ) ) ) {
// our source rolled back itself. so the data we received isn't necessarily consistent.
sethbmsg ( " rollback rbid on source changed during rollback, cancelling this attempt " ) ;
return ;
}
// update them
sethbmsg ( str : : stream ( ) < < " rollback 4 n: " < < goodVersions . size ( ) ) ;
bool warn = false ;
assert ( ! h . commonPointOurDiskloc . isNull ( ) ) ;
dbMutex . assertWriteLocked ( ) ;
/* we have items we are writing that aren't from a point-in-time. thus best not to come online
until we get to that point in freshness . */
setMinValid ( newMinValid ) ;
/** any full collection resyncs required? */
if ( ! h . collectionsToResync . empty ( ) ) {
for ( set < string > : : iterator i = h . collectionsToResync . begin ( ) ; i ! = h . collectionsToResync . end ( ) ; i + + ) {
string ns = * i ;
sethbmsg ( str : : stream ( ) < < " rollback 4.1 coll resync " < < ns ) ;
2011-01-19 16:41:57 -05:00
Client : : Context c ( * i ) ;
2011-01-04 00:40:41 -05:00
try {
bob res ;
string errmsg ;
dropCollection ( ns , errmsg , res ) ;
{
dbtemprelease r ;
2011-04-01 18:33:11 -07:00
bool ok = copyCollectionFromRemote ( them - > getServerAddress ( ) , ns , bo ( ) , errmsg , false , true , false ) ;
2011-01-04 00:40:41 -05:00
if ( ! ok ) {
log ( ) < < " replSet rollback error resyncing collection " < < ns < < ' ' < < errmsg < < rsLog ;
throw " rollback error resyncing rollection [1] " ;
}
}
}
catch ( . . . ) {
log ( ) < < " replset rollback error resyncing collection " < < ns < < rsLog ;
throw " rollback error resyncing rollection [2] " ;
}
}
/* we did more reading from primary, so check it again for a rollback (which would mess us up), and
make minValid newer .
*/
sethbmsg ( " rollback 4.2 " ) ;
{
string err ;
try {
newMinValid = r . getLastOp ( rsoplog ) ;
if ( newMinValid . isEmpty ( ) ) {
err = " can't get minvalid from primary " ;
}
else {
setMinValid ( newMinValid ) ;
}
}
catch ( . . . ) {
err = " can't get/set minvalid " ;
}
if ( h . rbid ! = getRBID ( r . conn ( ) ) ) {
// our source rolled back itself. so the data we received isn't necessarily consistent.
// however, we've now done writes. thus we have a problem.
err + = " rbid at primary changed during resync/rollback " ;
}
if ( ! err . empty ( ) ) {
log ( ) < < " replSet error rolling back : " < < err < < " . A full resync will be necessary. " < < rsLog ;
/* todo: reset minvalid so that we are permanently in fatal state */
/* todo: don't be fatal, but rather, get all the data first. */
sethbmsg ( " rollback error " ) ;
throw rsfatal ( ) ;
}
}
sethbmsg ( " rollback 4.3 " ) ;
}
sethbmsg ( " rollback 4.6 " ) ;
/** drop collections to drop before doing individual fixups - that might make things faster below actually if there were subsequent inserts to rollback */
for ( set < string > : : iterator i = h . toDrop . begin ( ) ; i ! = h . toDrop . end ( ) ; i + + ) {
2011-01-19 16:41:57 -05:00
Client : : Context c ( * i ) ;
2011-01-04 00:40:41 -05:00
try {
bob res ;
string errmsg ;
log ( 1 ) < < " replSet rollback drop: " < < * i < < rsLog ;
dropCollection ( * i , errmsg , res ) ;
}
catch ( . . . ) {
log ( ) < < " replset rollback error dropping collection " < < * i < < rsLog ;
}
}
sethbmsg ( " rollback 4.7 " ) ;
2011-01-19 16:41:57 -05:00
Client : : Context c ( rsoplog ) ;
2011-01-04 00:40:41 -05:00
NamespaceDetails * oplogDetails = nsdetails ( rsoplog ) ;
uassert ( 13423 , str : : stream ( ) < < " replSet error in rollback can't find " < < rsoplog , oplogDetails ) ;
map < string , shared_ptr < RemoveSaver > > removeSavers ;
unsigned deletes = 0 , updates = 0 ;
for ( list < pair < DocID , bo > > : : iterator i = goodVersions . begin ( ) ; i ! = goodVersions . end ( ) ; i + + ) {
const DocID & d = i - > first ;
bo pattern = d . _id . wrap ( ) ; // { _id : ... }
try {
assert ( d . ns & & * d . ns ) ;
if ( h . collectionsToResync . count ( d . ns ) ) {
/* we just synced this entire collection */
continue ;
}
2011-03-11 15:59:18 -05:00
getDur ( ) . commitIfNeeded ( ) ;
2011-01-04 00:40:41 -05:00
/* keep an archive of items rolled back */
shared_ptr < RemoveSaver > & rs = removeSavers [ d . ns ] ;
if ( ! rs )
rs . reset ( new RemoveSaver ( " rollback " , " " , d . ns ) ) ;
// todo: lots of overhead in context, this can be faster
2011-01-19 16:41:57 -05:00
Client : : Context c ( d . ns ) ;
2011-01-04 00:40:41 -05:00
if ( i - > second . isEmpty ( ) ) {
// wasn't on the primary; delete.
/* TODO1.6 : can't delete from a capped collection. need to handle that here. */
deletes + + ;
NamespaceDetails * nsd = nsdetails ( d . ns ) ;
if ( nsd ) {
if ( nsd - > capped ) {
/* can't delete from a capped collection - so we truncate instead. if this item must go,
so must all successors ! ! ! */
try {
/** todo: IIRC cappedTrunateAfter does not handle completely empty. todo. */
// this will crazy slow if no _id index.
long long start = Listener : : getElapsedTimeMillis ( ) ;
DiskLoc loc = Helpers : : findOne ( d . ns , pattern , false ) ;
if ( Listener : : getElapsedTimeMillis ( ) - start > 200 )
log ( ) < < " replSet warning roll back slow no _id index for " < < d . ns < < " perhaps? " < < rsLog ;
//would be faster but requires index: DiskLoc loc = Helpers::findById(nsd, pattern);
if ( ! loc . isNull ( ) ) {
try {
nsd - > cappedTruncateAfter ( d . ns , loc , true ) ;
}
catch ( DBException & e ) {
if ( e . getCode ( ) = = 13415 ) {
// hack: need to just make cappedTruncate do this...
nsd - > emptyCappedCollection ( d . ns ) ;
}
else {
throw ;
}
}
}
}
catch ( DBException & e ) {
log ( ) < < " replSet error rolling back capped collection rec " < < d . ns < < ' ' < < e . toString ( ) < < rsLog ;
}
}
else {
try {
deletes + + ;
deleteObjects ( d . ns , pattern , /*justone*/ true , /*logop*/ false , /*god*/ true , rs . get ( ) ) ;
}
catch ( . . . ) {
log ( ) < < " replSet error rollback delete failed ns: " < < d . ns < < rsLog ;
}
}
// did we just empty the collection? if so let's check if it even exists on the source.
if ( nsd - > stats . nrecords = = 0 ) {
try {
string sys = cc ( ) . database ( ) - > name + " .system.namespaces " ;
bo o = them - > findOne ( sys , QUERY ( " name " < < d . ns ) ) ;
if ( o . isEmpty ( ) ) {
// we should drop
try {
bob res ;
string errmsg ;
dropCollection ( d . ns , errmsg , res ) ;
}
catch ( . . . ) {
log ( ) < < " replset error rolling back collection " < < d . ns < < rsLog ;
}
}
}
catch ( DBException & ) {
/* this isn't *that* big a deal, but is bad. */
log ( ) < < " replSet warning rollback error querying for existence of " < < d . ns < < " at the primary, ignoring " < < rsLog ;
}
}
}
}
else {
// todo faster...
OpDebug debug ;
updates + + ;
_updateObjects ( /*god*/ true , d . ns , i - > second , pattern , /*upsert=*/ true , /*multi=*/ false , /*logtheop=*/ false , debug , rs . get ( ) ) ;
}
}
catch ( DBException & e ) {
log ( ) < < " replSet exception in rollback ns: " < < d . ns < < ' ' < < pattern . toString ( ) < < ' ' < < e . toString ( ) < < " ndeletes: " < < deletes < < rsLog ;
warn = true ;
}
}
removeSavers . clear ( ) ; // this effectively closes all of them
sethbmsg ( str : : stream ( ) < < " rollback 5 d: " < < deletes < < " u: " < < updates ) ;
MemoryMappedFile : : flushAll ( true ) ;
sethbmsg ( " rollback 6 " ) ;
// clean up oplog
log ( 2 ) < < " replSet rollback truncate oplog after " < < h . commonPoint . toStringPretty ( ) < < rsLog ;
// todo: fatal error if this throws?
oplogDetails - > cappedTruncateAfter ( rsoplog , h . commonPointOurDiskloc , false ) ;
/* reset cached lastoptimewritten and h value */
loadLastOpTimeWritten ( ) ;
sethbmsg ( " rollback 7 " ) ;
MemoryMappedFile : : flushAll ( true ) ;
// done
if ( warn )
sethbmsg ( " issues during syncRollback, see log " ) ;
else
sethbmsg ( " rollback done " ) ;
}
void ReplSetImpl : : syncRollback ( OplogReader & r ) {
2010-08-17 17:23:00 -04:00
unsigned s = _syncRollback ( r ) ;
2011-01-04 00:40:41 -05:00
if ( s )
2010-08-17 17:23:00 -04:00
sleepsecs ( s ) ;
}
2011-01-04 00:40:41 -05:00
unsigned ReplSetImpl : : _syncRollback ( OplogReader & r ) {
2010-07-21 14:15:48 -04:00
assert ( ! lockedByMe ( ) ) ;
assert ( ! dbMutex . atLeastReadLocked ( ) ) ;
2010-08-17 16:31:21 -04:00
sethbmsg ( " rollback 0 " ) ;
2010-07-26 15:29:18 -04:00
writelocktry lk ( rsoplog , 20000 ) ;
if ( ! lk . got ( ) ) {
2010-08-17 16:31:21 -04:00
sethbmsg ( " rollback couldn't get write lock in a reasonable time " ) ;
2010-08-17 17:23:00 -04:00
return 2 ;
2010-07-26 15:29:18 -04:00
}
2011-05-02 18:26:08 -04:00
if ( state ( ) . secondary ( ) ) {
2010-08-12 16:06:16 -04:00
/* by doing this, we will not service reads (return an error as we aren't in secondary staate.
2011-01-04 00:40:41 -05:00
that perhaps is moot becasue of the write lock above , but that write lock probably gets deferred
2010-08-12 16:06:16 -04:00
or removed or yielded later anyway .
also , this is better for status reporting - we know what is happening .
*/
2011-05-02 18:26:08 -04:00
changeState ( MemberState : : RS_ROLLBACK ) ;
2010-08-12 16:06:16 -04:00
}
2010-07-21 15:39:59 -04:00
HowToFixUp how ;
2010-08-17 16:31:21 -04:00
sethbmsg ( " rollback 1 " ) ;
2010-07-21 13:50:53 -04:00
{
r . resetCursor ( ) ;
2010-07-21 15:39:59 -04:00
2010-08-17 16:31:21 -04:00
sethbmsg ( " rollback 2 FindCommonPoint " ) ;
2010-07-21 14:15:48 -04:00
try {
2010-07-26 15:29:18 -04:00
syncRollbackFindCommonPoint ( r . conn ( ) , how ) ;
2010-07-21 14:15:48 -04:00
}
2011-01-04 00:40:41 -05:00
catch ( const char * p ) {
2010-08-17 16:31:21 -04:00
sethbmsg ( string ( " rollback 2 error " ) + p ) ;
2010-08-17 17:23:00 -04:00
return 10 ;
2010-07-21 14:15:48 -04:00
}
2011-01-04 00:40:41 -05:00
catch ( rsfatal & ) {
2010-08-16 09:17:14 -04:00
_fatal ( ) ;
2010-08-17 17:23:00 -04:00
return 2 ;
2010-08-16 09:17:14 -04:00
}
2011-01-04 00:40:41 -05:00
catch ( DBException & e ) {
2010-08-17 16:31:21 -04:00
sethbmsg ( string ( " rollback 2 exception " ) + e . toString ( ) + " ; sleeping 1 min " ) ;
2010-08-17 17:23:00 -04:00
dbtemprelease r ;
2010-07-21 15:39:59 -04:00
sleepsecs ( 60 ) ;
throw ;
}
2010-07-21 13:50:53 -04:00
}
2010-07-21 13:13:36 -04:00
2010-08-17 16:31:21 -04:00
sethbmsg ( " replSet rollback 3 fixup " ) ;
2010-07-26 15:29:18 -04:00
2010-08-16 10:26:07 -04:00
{
incRBID ( ) ;
2011-01-04 00:40:41 -05:00
try {
2010-08-16 10:26:07 -04:00
syncFixUp ( how , r ) ;
}
2011-01-04 00:40:41 -05:00
catch ( rsfatal & ) {
2010-09-02 11:26:38 -04:00
sethbmsg ( " rollback fixup error " ) ;
_fatal ( ) ;
return 2 ;
}
2011-01-04 00:40:41 -05:00
catch ( . . . ) {
2010-08-16 10:26:07 -04:00
incRBID ( ) ; throw ;
}
incRBID ( ) ;
2010-08-30 10:20:02 -04:00
2011-01-04 00:40:41 -05:00
/* success - leave "ROLLBACK" state
2010-08-30 10:20:02 -04:00
can go to SECONDARY once minvalid is achieved
*/
2011-05-02 18:26:08 -04:00
changeState ( MemberState : : RS_RECOVERING ) ;
2010-08-16 10:26:07 -04:00
}
2010-08-17 17:23:00 -04:00
return 0 ;
2010-07-21 13:13:36 -04:00
}
2010-07-21 01:45:04 -04:00
}