2008-06-06 09:43:15 -04:00
// pdfile.cpp
2008-07-20 17:37:33 -04:00
/**
* Copyright ( C ) 2008 10 gen Inc .
2008-12-28 20:28:49 -05:00
*
2008-07-20 17:37:33 -04:00
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU Affero General Public License , version 3 ,
* as published by the Free Software Foundation .
2008-12-28 20:28:49 -05:00
*
2008-07-20 17:37:33 -04:00
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Affero General Public License for more details .
2008-12-28 20:28:49 -05:00
*
2008-07-20 17:37:33 -04:00
* You should have received a copy of the GNU Affero General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
2008-12-28 20:28:49 -05:00
/*
todo :
2008-06-06 09:43:15 -04:00
_ table scans must be sequential , not next / prev pointers
2008-07-24 16:07:18 -04:00
_ coalesce deleted
2008-12-02 14:24:45 -05:00
_ disallow system * manipulations from the database .
2008-06-06 09:43:15 -04:00
*/
2010-04-27 15:27:52 -04:00
# include "pch.h"
2008-06-06 09:43:15 -04:00
# include "pdfile.h"
# include "db.h"
# include "../util/mmap.h"
# include "../util/hashtab.h"
2009-04-15 16:10:21 -04:00
# include "../util/file_allocator.h"
2010-02-20 16:25:10 -05:00
# include "../util/processinfo.h"
2011-04-14 15:49:45 -04:00
# include "../util/file.h"
2008-06-06 09:43:15 -04:00
# include "btree.h"
2011-04-11 18:21:48 -04:00
# include "btreebuilder.h"
2008-06-06 09:43:15 -04:00
# include <algorithm>
# include <list>
2008-12-03 18:27:32 -05:00
# include "repl.h"
2009-01-26 17:23:45 -05:00
# include "dbhelpers.h"
2010-11-04 09:07:28 -04:00
# include "namespace-inl.h"
2009-02-26 11:08:23 -05:00
# include "queryutil.h"
2009-09-21 16:31:00 -04:00
# include "extsort.h"
2010-11-04 09:00:39 -04:00
# include "curop-inl.h"
2010-01-22 15:17:03 -05:00
# include "background.h"
2011-04-04 16:30:09 -04:00
# include "compact.h"
2011-06-20 17:34:16 -04:00
# include "ops/delete.h"
2011-06-22 15:51:08 -04:00
# include "instance.h"
2008-06-06 09:43:15 -04:00
2009-01-14 17:09:51 -05:00
namespace mongo {
2011-04-25 10:24:28 -04:00
BOOST_STATIC_ASSERT ( sizeof ( Extent ) - 4 = = 48 + 128 ) ;
BOOST_STATIC_ASSERT ( sizeof ( DataFileHeader ) - 4 = = 8192 ) ;
2011-05-15 23:05:08 -04:00
bool isValidNS ( const StringData & ns ) {
// TODO: should check for invalid characters
const char * x = strchr ( ns . data ( ) , ' . ' ) ;
if ( ! x )
return false ;
x + + ;
return * x > 0 ;
}
2010-05-07 12:19:00 -04:00
bool inDBRepair = false ;
struct doingRepair {
2011-01-04 00:40:41 -05:00
doingRepair ( ) {
2010-05-07 12:19:00 -04:00
assert ( ! inDBRepair ) ;
inDBRepair = true ;
}
2011-01-04 00:40:41 -05:00
~ doingRepair ( ) {
2010-05-07 12:19:00 -04:00
inDBRepair = false ;
}
} ;
2010-01-22 15:17:03 -05:00
map < string , unsigned > BackgroundOperation : : dbsInProg ;
set < string > BackgroundOperation : : nsInProg ;
bool BackgroundOperation : : inProgForDb ( const char * db ) {
assertInWriteLock ( ) ;
2010-02-10 15:34:41 -05:00
return dbsInProg [ db ] ! = 0 ;
2010-01-22 15:17:03 -05:00
}
2011-01-04 00:40:41 -05:00
bool BackgroundOperation : : inProgForNs ( const char * ns ) {
2010-01-22 15:17:03 -05:00
assertInWriteLock ( ) ;
return nsInProg . count ( ns ) ! = 0 ;
}
2011-01-04 00:40:41 -05:00
void BackgroundOperation : : assertNoBgOpInProgForDb ( const char * db ) {
2010-01-22 15:17:03 -05:00
uassert ( 12586 , " cannot perform operation: a background operation is currently running for this database " ,
2011-01-04 00:40:41 -05:00
! inProgForDb ( db ) ) ;
2010-01-22 15:17:03 -05:00
}
2011-01-04 00:40:41 -05:00
void BackgroundOperation : : assertNoBgOpInProgForNs ( const char * ns ) {
2010-01-22 15:17:03 -05:00
uassert ( 12587 , " cannot perform operation: a background operation is currently running for this collection " ,
2011-01-04 00:40:41 -05:00
! inProgForNs ( ns ) ) ;
}
2010-01-22 15:17:03 -05:00
2011-01-04 00:40:41 -05:00
BackgroundOperation : : BackgroundOperation ( const char * ns ) : _ns ( ns ) {
2010-01-22 15:17:03 -05:00
assertInWriteLock ( ) ;
dbsInProg [ _ns . db ] + + ;
assert ( nsInProg . count ( _ns . ns ( ) ) = = 0 ) ;
nsInProg . insert ( _ns . ns ( ) ) ;
}
2011-01-04 00:40:41 -05:00
BackgroundOperation : : ~ BackgroundOperation ( ) {
2011-05-04 15:58:37 -04:00
wassert ( dbMutex . isWriteLocked ( ) ) ;
2010-01-22 15:17:03 -05:00
dbsInProg [ _ns . db ] - - ;
nsInProg . erase ( _ns . ns ( ) ) ;
}
2010-02-10 15:34:41 -05:00
void BackgroundOperation : : dump ( stringstream & ss ) {
2011-01-04 00:40:41 -05:00
if ( nsInProg . size ( ) ) {
2010-02-10 15:34:41 -05:00
ss < < " \n <b>Background Jobs in Progress</b> \n " ;
for ( set < string > : : iterator i = nsInProg . begin ( ) ; i ! = nsInProg . end ( ) ; i + + )
ss < < " " < < * i < < ' \n ' ;
}
2011-01-04 00:40:41 -05:00
for ( map < string , unsigned > : : iterator i = dbsInProg . begin ( ) ; i ! = dbsInProg . end ( ) ; i + + ) {
if ( i - > second )
2010-02-10 15:34:41 -05:00
ss < < " database " < < i - > first < < " : " < < i - > second < < ' \n ' ;
}
}
2010-01-22 15:17:03 -05:00
/* ----------------------------------------- */
2009-08-11 14:29:03 -04:00
string dbpath = " /data/db/ " ;
2010-01-26 11:21:31 -08:00
bool directoryperdb = false ;
2010-01-25 11:32:51 -08:00
string repairpath ;
2010-07-13 18:02:04 -04:00
string pidfilepath ;
2009-03-10 10:14:37 -04:00
2009-01-15 10:17:11 -05:00
DataFileMgr theDataFileMgr ;
2010-01-02 01:25:53 -05:00
DatabaseHolder dbHolder ;
2009-01-15 10:17:11 -05:00
int MAGIC = 0x1000 ;
2009-03-19 16:23:04 -04:00
void addNewNamespaceToCatalog ( const char * ns , const BSONObj * options = 0 ) ;
2009-04-20 20:17:39 -04:00
void ensureIdIndexForNewNs ( const char * ns ) {
2010-02-07 02:04:19 -05:00
if ( ( strstr ( ns , " .system. " ) = = 0 | | legalClientSystemNS ( ns , false ) ) & &
2011-01-04 00:40:41 -05:00
strstr ( ns , " .$freelist " ) = = 0 ) {
2010-06-30 17:28:57 -04:00
log ( 1 ) < < " adding _id index for collection " < < ns < < endl ;
2009-04-20 20:17:39 -04:00
ensureHaveIdIndex ( ns ) ;
2011-01-04 00:40:41 -05:00
}
2009-04-20 20:17:39 -04:00
}
2009-01-15 10:17:11 -05:00
string getDbContext ( ) {
stringstream ss ;
2009-10-14 21:53:17 -04:00
Client * c = currentClient . get ( ) ;
2011-01-04 00:40:41 -05:00
if ( c ) {
2010-02-04 10:49:19 -05:00
Client : : Context * cx = c - > getContext ( ) ;
2011-01-04 00:40:41 -05:00
if ( cx ) {
2010-02-04 10:49:19 -05:00
Database * database = cx - > db ( ) ;
if ( database ) {
ss < < database - > name < < ' ' ;
ss < < cx - > ns ( ) < < ' ' ;
}
2009-10-14 21:53:17 -04:00
}
2009-01-15 10:17:11 -05:00
}
return ss . str ( ) ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
/*---------------------------------------------------------------------*/
2010-09-05 10:10:23 -04:00
// inheritable class to implement an operation that may be applied to all
// files in a database using _applyOpToDataFiles()
class FileOp {
public :
virtual ~ FileOp ( ) { }
// Return true if file exists and operation successful
virtual bool apply ( const boost : : filesystem : : path & p ) = 0 ;
virtual const char * op ( ) const = 0 ;
} ;
void _applyOpToDataFiles ( const char * database , FileOp & fo , bool afterAllocator = false , const string & path = dbpath ) ;
2010-12-12 19:47:10 -05:00
void _deleteDataFiles ( const char * database ) {
2010-09-05 10:10:23 -04:00
if ( directoryperdb ) {
2011-01-31 11:37:23 -08:00
FileAllocator : : get ( ) - > waitUntilFinished ( ) ;
2011-04-12 11:37:23 -04:00
MONGO_BOOST_CHECK_EXCEPTION_WITH_MSG ( boost : : filesystem : : remove_all ( boost : : filesystem : : path ( dbpath ) / database ) , " delete data files with a directoryperdb " ) ;
2010-09-05 10:10:23 -04:00
return ;
}
class : public FileOp {
virtual bool apply ( const boost : : filesystem : : path & p ) {
return boost : : filesystem : : remove ( p ) ;
}
virtual const char * op ( ) const {
return " remove " ;
}
} deleter ;
_applyOpToDataFiles ( database , deleter , true ) ;
}
2010-12-24 13:43:06 -05:00
int Extent : : initialSize ( int len ) {
2009-01-15 10:17:11 -05:00
long long sz = len * 16 ;
if ( len < 1000 ) sz = len * 64 ;
if ( sz > 1000000000 )
sz = 1000000000 ;
int z = ( ( int ) sz ) & 0xffffff00 ;
assert ( z > len ) ;
return z ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2010-06-30 17:28:57 -04:00
bool _userCreateNS ( const char * ns , const BSONObj & options , string & err , bool * deferIdIndex ) {
2009-01-15 10:17:11 -05:00
if ( nsdetails ( ns ) ) {
err = " collection already exists " ;
return false ;
2008-12-28 20:28:49 -05:00
}
2009-01-14 17:17:24 -05:00
2010-12-30 23:59:01 -05:00
log ( 1 ) < < " create collection " < < ns < < ' ' < < options < < endl ;
2009-01-15 10:17:11 -05:00
/* todo: do this only when we have allocated space successfully? or we could insert with a { ok: 0 } field
2009-10-08 12:59:03 -04:00
and then go back and set to ok : 1 after we are done .
2009-01-15 10:17:11 -05:00
*/
2009-12-18 17:43:40 -05:00
bool isFreeList = strstr ( ns , " .$freelist " ) ! = 0 ;
if ( ! isFreeList )
2010-06-30 16:52:29 -04:00
addNewNamespaceToCatalog ( ns , options . isEmpty ( ) ? 0 : & options ) ;
2009-01-15 10:17:11 -05:00
2010-12-24 13:43:06 -05:00
long long size = Extent : : initialSize ( 128 ) ;
2010-09-05 12:16:37 -04:00
{
BSONElement e = options . getField ( " size " ) ;
if ( e . isNumber ( ) ) {
size = e . numberLong ( ) ;
size + = 256 ;
size & = 0xffffffffffffff00LL ;
}
2009-01-13 10:28:42 -05:00
}
2011-01-04 00:40:41 -05:00
2011-06-11 19:45:03 -04:00
uassert ( 10083 , " create collection invalid size spec " , size > 0 ) ;
2009-01-15 10:17:11 -05:00
bool newCapped = false ;
int mx = 0 ;
2011-06-11 12:55:46 -04:00
if ( options [ " capped " ] . trueValue ( ) ) {
2009-01-15 10:17:11 -05:00
newCapped = true ;
2010-09-05 12:16:37 -04:00
BSONElement e = options . getField ( " max " ) ;
2009-01-15 10:17:11 -05:00
if ( e . isNumber ( ) ) {
2010-06-14 15:20:45 -04:00
mx = e . numberInt ( ) ;
2009-01-15 10:17:11 -05:00
}
2009-01-13 10:28:42 -05:00
}
2008-06-06 09:43:15 -04:00
2010-12-27 21:41:07 -08:00
// $nExtents just for debug/testing.
2010-09-05 12:16:37 -04:00
BSONElement e = options . getField ( " $nExtents " ) ;
2009-10-14 14:34:38 -04:00
Database * database = cc ( ) . database ( ) ;
2010-12-27 22:15:20 -08:00
if ( e . type ( ) = = Array ) {
// We create one extent per array entry, with size specified
// by the array value.
BSONObjIterator i ( e . embeddedObject ( ) ) ;
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
int size = int ( e . number ( ) ) ;
assert ( size < = 0x7fffffff ) ;
// $nExtents is just for testing - always allocate new extents
// rather than reuse existing extents so we have some predictibility
// in the extent size used by our tests
2011-05-23 20:01:42 -04:00
database - > suitableFile ( ns , ( int ) size , false , false ) - > createExtent ( ns , ( int ) size , newCapped ) ;
2010-12-27 22:15:20 -08:00
}
2011-01-04 00:40:41 -05:00
}
else if ( int ( e . number ( ) ) > 0 ) {
2010-12-27 22:15:20 -08:00
// We create '$nExtents' extents, each of size 'size'.
int nExtents = int ( e . number ( ) ) ;
assert ( size < = 0x7fffffff ) ;
for ( int i = 0 ; i < nExtents ; + + i ) {
assert ( size < = 0x7fffffff ) ;
// $nExtents is just for testing - always allocate new extents
// rather than reuse existing extents so we have some predictibility
// in the extent size used by our tests
2011-05-23 20:01:42 -04:00
database - > suitableFile ( ns , ( int ) size , false , false ) - > createExtent ( ns , ( int ) size , newCapped ) ;
2009-01-15 10:17:11 -05:00
}
2011-01-04 00:40:41 -05:00
}
else {
2010-12-27 22:15:20 -08:00
// This is the non test case, where we don't have a $nExtents spec.
2009-01-15 10:17:11 -05:00
while ( size > 0 ) {
2010-03-31 15:49:28 -04:00
int max = MongoDataFile : : maxSize ( ) - DataFileHeader : : HeaderSize ;
2009-01-18 11:53:33 -05:00
int desiredExtentSize = ( int ) ( size > max ? max : size ) ;
2011-03-01 12:07:56 -08:00
if ( desiredExtentSize < Extent : : minSize ( ) ) {
desiredExtentSize = Extent : : minSize ( ) ;
}
desiredExtentSize & = 0xffffff00 ;
2011-05-23 20:01:42 -04:00
Extent * e = database - > allocExtent ( ns , desiredExtentSize , newCapped , true ) ;
2009-01-15 10:17:11 -05:00
size - = e - > length ;
}
2009-04-15 16:10:34 -04:00
}
2009-01-15 10:17:11 -05:00
NamespaceDetails * d = nsdetails ( ns ) ;
assert ( d ) ;
2009-04-21 15:42:22 -04:00
2010-06-30 17:28:57 -04:00
bool ensure = false ;
2010-06-30 16:52:29 -04:00
if ( options . getField ( " autoIndexId " ) . type ( ) ) {
2011-01-04 00:40:41 -05:00
if ( options [ " autoIndexId " ] . trueValue ( ) ) {
2010-06-30 17:28:57 -04:00
ensure = true ;
2009-05-18 11:18:16 -04:00
}
2011-01-04 00:40:41 -05:00
}
else {
2009-05-18 11:18:16 -04:00
if ( ! newCapped ) {
2010-06-30 17:28:57 -04:00
ensure = true ;
2009-05-18 11:18:16 -04:00
}
}
2011-01-04 00:40:41 -05:00
if ( ensure ) {
2010-06-30 17:28:57 -04:00
if ( deferIdIndex )
* deferIdIndex = true ;
else
ensureIdIndexForNewNs ( ns ) ;
}
2009-01-13 10:28:42 -05:00
2009-01-15 10:17:11 -05:00
if ( mx > 0 )
2010-12-09 14:44:08 -05:00
getDur ( ) . writingInt ( d - > max ) = mx ;
2009-01-14 17:17:24 -05:00
2009-01-15 10:17:11 -05:00
return true ;
}
2008-06-06 09:43:15 -04:00
2010-06-30 17:28:57 -04:00
/** { ..., capped: true, size: ..., max: ... }
@ param deferIdIndex - if not not , defers id index creation . sets the bool value to true if we wanted to create the id index .
@ return true if successful
*/
bool userCreateNS ( const char * ns , BSONObj options , string & err , bool logForReplication , bool * deferIdIndex ) {
2009-05-21 11:07:11 -04:00
const char * coll = strchr ( ns , ' . ' ) + 1 ;
2010-12-22 13:26:54 -05:00
massert ( 10356 , str : : stream ( ) < < " invalid ns: " < < ns , coll & & * coll ) ;
2009-05-21 11:07:11 -04:00
char cl [ 256 ] ;
2009-12-31 16:31:07 -05:00
nsToDatabase ( ns , cl ) ;
2010-06-30 17:28:57 -04:00
bool ok = _userCreateNS ( ns , options , err , deferIdIndex ) ;
2009-05-21 11:07:11 -04:00
if ( logForReplication & & ok ) {
2010-06-30 16:52:29 -04:00
if ( options . getField ( " create " ) . eoo ( ) ) {
2009-05-21 11:07:11 -04:00
BSONObjBuilder b ;
b < < " create " < < coll ;
2010-06-30 16:52:29 -04:00
b . appendElements ( options ) ;
options = b . obj ( ) ;
2009-05-21 11:07:11 -04:00
}
string logNs = string ( cl ) + " .$cmd " ;
2010-06-30 16:52:29 -04:00
logOp ( " c " , logNs . c_str ( ) , options ) ;
2009-05-21 11:07:11 -04:00
}
2009-01-15 10:17:11 -05:00
return ok ;
2009-01-13 10:28:42 -05:00
}
2009-01-15 10:17:11 -05:00
/*---------------------------------------------------------------------*/
2009-01-13 10:28:42 -05:00
2009-01-31 17:27:25 -05:00
int MongoDataFile : : maxSize ( ) {
2010-08-09 12:17:11 -07:00
if ( sizeof ( int * ) = = 4 ) {
2009-01-15 10:17:11 -05:00
return 512 * 1024 * 1024 ;
2011-01-04 00:40:41 -05:00
}
else if ( cmdLine . smallfiles ) {
2010-08-09 12:17:11 -07:00
return 0x7ff00000 > > 2 ;
2011-01-04 00:40:41 -05:00
}
else {
2009-01-15 10:17:11 -05:00
return 0x7ff00000 ;
2010-08-09 12:17:11 -07:00
}
2008-12-27 12:07:20 -05:00
}
2011-05-03 15:53:27 -04:00
NOINLINE_DECL void MongoDataFile : : badOfs2 ( int ofs ) const {
2010-08-20 14:14:12 -04:00
stringstream ss ;
ss < < " bad offset: " < < ofs < < " accessing file: " < < mmf . filename ( ) < < " - consider repairing database " ;
uasserted ( 13441 , ss . str ( ) ) ;
}
2011-05-03 15:53:27 -04:00
NOINLINE_DECL void MongoDataFile : : badOfs ( int ofs ) const {
2010-08-18 16:46:13 -04:00
stringstream ss ;
ss < < " bad offset: " < < ofs < < " accessing file: " < < mmf . filename ( ) < < " - consider repairing database " ;
uasserted ( 13440 , ss . str ( ) ) ;
}
2009-01-31 17:27:25 -05:00
int MongoDataFile : : defaultSize ( const char * filename ) const {
2009-01-15 10:17:11 -05:00
int size ;
if ( fileNo < = 4 )
size = ( 64 * 1024 * 1024 ) < < fileNo ;
else
size = 0x7ff00000 ;
2011-01-20 11:01:57 -05:00
if ( cmdLine . smallfiles ) {
2009-10-08 16:29:04 -04:00
size = size > > 2 ;
}
2009-01-15 10:17:11 -05:00
return size ;
2009-01-14 10:11:51 -05:00
}
2009-01-15 10:17:11 -05:00
2009-04-15 16:10:21 -04:00
void MongoDataFile : : open ( const char * filename , int minSize , bool preallocateOnly ) {
2009-07-09 16:56:34 -04:00
long size = defaultSize ( filename ) ;
2009-01-15 10:17:11 -05:00
while ( size < minSize ) {
if ( size < maxSize ( ) / 2 )
size * = 2 ;
else {
size = maxSize ( ) ;
break ;
}
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
if ( size > maxSize ( ) )
size = maxSize ( ) ;
2010-11-25 11:08:44 -05:00
assert ( size > = 64 * 1024 * 1024 | | cmdLine . smallfiles ) ;
2009-01-15 10:17:11 -05:00
assert ( size % 4096 = = 0 ) ;
2009-04-15 16:10:21 -04:00
if ( preallocateOnly ) {
2009-10-08 16:29:04 -04:00
if ( cmdLine . prealloc ) {
2011-01-09 01:45:11 -05:00
FileAllocator : : get ( ) - > requestAllocation ( filename , size ) ;
2009-10-08 12:59:03 -04:00
}
2009-04-15 16:10:21 -04:00
return ;
}
2010-09-09 07:12:17 -04:00
{
2010-11-27 15:25:08 -05:00
assert ( _mb = = 0 ) ;
2010-09-09 07:12:17 -04:00
unsigned long long sz = size ;
2010-11-05 17:15:39 -04:00
if ( mmf . create ( filename , sz , false ) )
_mb = mmf . getView ( ) ;
2010-09-09 07:12:17 -04:00
assert ( sz < = 0x7fffffff ) ;
size = ( int ) sz ;
}
2010-11-05 17:15:39 -04:00
//header = (DataFileHeader *) _p;
2011-01-04 00:40:41 -05:00
if ( sizeof ( char * ) = = 4 )
2010-11-27 15:25:08 -05:00
uassert ( 10084 , " can't map file memory - mongo requires 64 bit build for larger datasets " , _mb ! = 0 ) ;
2009-02-11 11:28:49 -05:00
else
2010-11-27 15:25:08 -05:00
uassert ( 10085 , " can't map file memory " , _mb ! = 0 ) ;
2011-03-01 18:02:38 -05:00
header ( ) - > init ( fileNo , size , filename ) ;
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
2011-01-04 00:40:41 -05:00
void MongoDataFile : : flush ( bool sync ) {
2010-07-26 17:28:24 -04:00
mmf . flush ( sync ) ;
}
2011-01-04 00:40:41 -05:00
void addNewExtentToNamespace ( const char * ns , Extent * e , DiskLoc eloc , DiskLoc emptyLoc , bool capped ) {
2009-02-24 13:52:34 -05:00
NamespaceIndex * ni = nsindex ( ns ) ;
NamespaceDetails * details = ni - > details ( ns ) ;
if ( details ) {
2009-10-08 12:59:03 -04:00
assert ( ! details - > lastExtent . isNull ( ) ) ;
2009-02-24 13:52:34 -05:00
assert ( ! details - > firstExtent . isNull ( ) ) ;
2010-12-09 14:44:08 -05:00
getDur ( ) . writingDiskLoc ( e - > xprev ) = details - > lastExtent ;
getDur ( ) . writingDiskLoc ( details - > lastExtent . ext ( ) - > xnext ) = eloc ;
2009-10-08 12:59:03 -04:00
assert ( ! eloc . isNull ( ) ) ;
2010-12-09 14:44:08 -05:00
getDur ( ) . writingDiskLoc ( details - > lastExtent ) = eloc ;
2009-02-24 13:52:34 -05:00
}
else {
2009-10-19 16:53:58 -04:00
ni - > add_ns ( ns , eloc , capped ) ;
2009-02-24 13:52:34 -05:00
details = ni - > details ( ns ) ;
}
2010-09-27 12:35:22 -04:00
{
2010-12-28 17:34:59 -08:00
NamespaceDetails * dw = details - > writingWithoutExtra ( ) ;
2010-09-27 12:35:22 -04:00
dw - > lastExtentSize = e - > length ;
}
2009-02-24 13:52:34 -05:00
details - > addDeletedRec ( emptyLoc . drec ( ) , emptyLoc ) ;
}
Extent * MongoDataFile : : createExtent ( const char * ns , int approxSize , bool newCapped , int loops ) {
2011-04-27 09:52:11 -04:00
{
// make sizes align with VM page size
int newSize = ( approxSize + 0xfff ) & 0xfffff000 ;
assert ( newSize > = 0 ) ;
if ( newSize < Extent : : maxSize ( ) )
approxSize = newSize ;
}
2010-11-11 01:47:15 -05:00
massert ( 10357 , " shutdown in progress " , ! inShutdown ( ) ) ;
2011-03-01 12:07:56 -08:00
massert ( 10358 , " bad new extent size " , approxSize > = Extent : : minSize ( ) & & approxSize < = Extent : : maxSize ( ) ) ;
2010-11-05 17:15:39 -04:00
massert ( 10359 , " header==0 on new extent: 32 bit mmap space exceeded? " , header ( ) ) ; // null if file open failed
2011-04-27 09:52:11 -04:00
int ExtentSize = min ( header ( ) - > unusedLength , approxSize ) ;
2009-01-15 10:17:11 -05:00
DiskLoc loc ;
2011-03-01 12:07:56 -08:00
if ( ExtentSize < Extent : : minSize ( ) ) {
2011-04-27 09:52:11 -04:00
/* note there could be a lot of looping here is db just started and
2009-01-15 10:17:11 -05:00
no files are open yet . we might want to do something about that . */
if ( loops > 8 ) {
assert ( loops < 10000 ) ;
2009-01-15 11:26:38 -05:00
out ( ) < < " warning: loops= " < < loops < < " fileno: " < < fileNo < < ' ' < < ns < < ' \n ' ;
2009-01-15 10:17:11 -05:00
}
log ( ) < < " newExtent: " < < ns < < " file " < < fileNo < < " full, adding a new file \n " ;
2009-10-14 14:34:38 -04:00
return cc ( ) . database ( ) - > addAFile ( 0 , true ) - > createExtent ( ns , approxSize , newCapped , loops + 1 ) ;
2009-01-15 10:17:11 -05:00
}
2010-11-05 17:15:39 -04:00
int offset = header ( ) - > unused . getOfs ( ) ;
2010-09-27 12:35:22 -04:00
2011-04-27 09:52:11 -04:00
DataFileHeader * h = header ( ) ;
h - > unused . writing ( ) . set ( fileNo , offset + ExtentSize ) ;
getDur ( ) . writingInt ( h - > unusedLength ) = h - > unusedLength - ExtentSize ;
2010-09-09 07:12:17 -04:00
loc . set ( fileNo , offset ) ;
2009-01-15 10:17:11 -05:00
Extent * e = _getExtent ( loc ) ;
2011-05-02 22:02:18 -04:00
DiskLoc emptyLoc = getDur ( ) . writing ( e ) - > init ( ns , ExtentSize , fileNo , offset , newCapped ) ;
2009-01-15 10:17:11 -05:00
2009-02-24 13:52:34 -05:00
addNewExtentToNamespace ( ns , e , loc , emptyLoc , newCapped ) ;
2009-01-15 10:17:11 -05:00
2010-08-29 22:17:22 -04:00
DEV tlog ( 1 ) < < " new extent " < < ns < < " size: 0x " < < hex < < ExtentSize < < " loc: 0x " < < hex < < offset
< < " emptyLoc: " < < hex < < emptyLoc . getOfs ( ) < < dec < < endl ;
2009-01-15 10:17:11 -05:00
return e ;
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
2011-01-04 00:40:41 -05:00
Extent * DataFileMgr : : allocFromFreeList ( const char * ns , int approxSize , bool capped ) {
2009-10-14 14:34:38 -04:00
string s = cc ( ) . database ( ) - > name + " .$freelist " ;
2009-02-24 13:52:34 -05:00
NamespaceDetails * f = nsdetails ( s . c_str ( ) ) ;
if ( f ) {
int low , high ;
if ( capped ) {
// be strict about the size
low = approxSize ;
if ( low > 2048 ) low - = 256 ;
high = ( int ) ( approxSize * 1.05 ) + 256 ;
}
2011-01-04 00:40:41 -05:00
else {
2009-02-24 13:52:34 -05:00
low = ( int ) ( approxSize * 0.8 ) ;
high = ( int ) ( approxSize * 1.4 ) ;
}
2011-04-14 14:00:17 -04:00
if ( high < = 0 ) {
2011-04-14 13:59:12 -04:00
// overflowed
high = max ( approxSize , Extent : : maxSize ( ) ) ;
}
2009-02-24 13:52:34 -05:00
int n = 0 ;
2009-02-24 16:55:56 -05:00
Extent * best = 0 ;
int bestDiff = 0x7fffffff ;
2009-02-24 17:44:56 -05:00
{
2011-04-14 14:30:52 -04:00
Timer t ;
2009-02-24 17:44:56 -05:00
DiskLoc L = f - > firstExtent ;
2011-01-04 00:40:41 -05:00
while ( ! L . isNull ( ) ) {
2009-02-24 17:44:56 -05:00
Extent * e = L . ext ( ) ;
2011-01-04 00:40:41 -05:00
if ( e - > length > = low & & e - > length < = high ) {
2009-02-24 17:44:56 -05:00
int diff = abs ( e - > length - approxSize ) ;
2011-01-04 00:40:41 -05:00
if ( diff < bestDiff ) {
2009-02-24 17:44:56 -05:00
bestDiff = diff ;
best = e ;
2011-04-14 14:30:52 -04:00
if ( ( ( double ) diff ) / approxSize < 0.1 ) {
// close enough
2009-02-24 17:44:56 -05:00
break ;
2011-04-14 14:30:52 -04:00
}
if ( t . seconds ( ) > = 2 ) {
// have spent lots of time in write lock, and we are in [low,high], so close enough
// could come into play if extent freelist is very long
break ;
}
}
else {
OCCASIONALLY {
if ( high < 64 * 1024 & & t . seconds ( ) > = 2 ) {
// be less picky if it is taking a long time
high = 64 * 1024 ;
}
}
2009-02-24 17:44:56 -05:00
}
2009-02-24 16:55:56 -05:00
}
2009-02-24 17:44:56 -05:00
L = e - > xnext ;
+ + n ;
2011-04-14 14:30:52 -04:00
}
if ( t . seconds ( ) > = 10 ) {
log ( ) < < " warning: slow scan in allocFromFreeList (in write lock) " < < endl ;
2009-02-24 13:52:34 -05:00
}
}
2011-04-12 18:48:32 -04:00
if ( n > 128 ) log ( n < 512 ) < < " warning: newExtent " < < n < < " scanned \n " ;
2009-02-24 16:55:56 -05:00
if ( best ) {
Extent * e = best ;
// remove from the free list
if ( ! e - > xprev . isNull ( ) )
2010-09-28 17:18:36 -04:00
e - > xprev . ext ( ) - > xnext . writing ( ) = e - > xnext ;
2009-02-24 16:55:56 -05:00
if ( ! e - > xnext . isNull ( ) )
2010-09-28 17:18:36 -04:00
e - > xnext . ext ( ) - > xprev . writing ( ) = e - > xprev ;
2009-02-24 17:44:56 -05:00
if ( f - > firstExtent = = e - > myLoc )
2010-09-28 17:18:36 -04:00
f - > firstExtent . writing ( ) = e - > xnext ;
2009-02-24 17:44:56 -05:00
if ( f - > lastExtent = = e - > myLoc )
2010-09-28 17:18:36 -04:00
f - > lastExtent . writing ( ) = e - > xprev ;
2009-02-24 16:55:56 -05:00
// use it
OCCASIONALLY if ( n > 512 ) log ( ) < < " warning: newExtent " < < n < < " scanned \n " ;
2011-05-03 15:53:27 -04:00
DiskLoc emptyLoc = e - > reuse ( ns , capped ) ;
2009-02-24 17:44:56 -05:00
addNewExtentToNamespace ( ns , e , e - > myLoc , emptyLoc , capped ) ;
2009-02-24 16:55:56 -05:00
return e ;
}
2009-02-24 13:52:34 -05:00
}
2009-12-18 17:43:40 -05:00
return 0 ;
// return createExtent(ns, approxSize, capped);
2009-02-24 13:52:34 -05:00
}
2009-01-15 10:17:11 -05:00
/*---------------------------------------------------------------------*/
2011-04-04 12:38:38 -04:00
void Extent : : markEmpty ( ) {
xnext . Null ( ) ;
xprev . Null ( ) ;
firstRecord . Null ( ) ;
lastRecord . Null ( ) ;
}
2011-05-03 15:53:27 -04:00
DiskLoc Extent : : reuse ( const char * nsname , bool capped ) {
return getDur ( ) . writing ( this ) - > _reuse ( nsname , capped ) ;
2010-09-28 17:18:36 -04:00
}
2011-05-03 15:53:27 -04:00
void getEmptyLoc ( const char * ns , const DiskLoc extentLoc , int extentLength , bool capped , /*out*/ DiskLoc & emptyLoc , /*out*/ int & delRecLength ) {
emptyLoc = extentLoc ;
emptyLoc . inc ( Extent : : HeaderSize ( ) ) ;
delRecLength = extentLength - Extent : : HeaderSize ( ) ;
if ( delRecLength > = 32 * 1024 & & str : : contains ( ns , ' $ ' ) & & ! capped ) {
// probably an index. so skip forward to keep its records page aligned
int & ofs = emptyLoc . GETOFS ( ) ;
int newOfs = ( ofs + 0xfff ) & ~ 0xfff ;
delRecLength - = ( newOfs - ofs ) ;
dassert ( delRecLength > 0 ) ;
ofs = newOfs ;
}
}
DiskLoc Extent : : _reuse ( const char * nsname , bool capped ) {
LOG ( 3 ) < < " reset extent was: " < < nsDiagnostic . toString ( ) < < " now: " < < nsname < < ' \n ' ;
2009-12-28 16:43:43 -05:00
massert ( 10360 , " Extent::reset bad magic value " , magic = = 0x41424344 ) ;
2009-10-19 15:30:16 -04:00
nsDiagnostic = nsname ;
2011-04-04 12:38:38 -04:00
markEmpty ( ) ;
2009-02-24 13:52:34 -05:00
2011-05-03 15:53:27 -04:00
DiskLoc emptyLoc ;
int delRecLength ;
getEmptyLoc ( nsname , myLoc , length , capped , emptyLoc , delRecLength ) ;
2009-02-24 13:52:34 -05:00
2011-05-03 15:53:27 -04:00
// todo: some dup code here and below in Extent::init
DeletedRecord * empty = DataFileMgr : : makeDeletedRecord ( emptyLoc , delRecLength ) ;
2010-12-09 14:44:08 -05:00
empty = getDur ( ) . writing ( empty ) ;
2009-02-24 13:52:34 -05:00
empty - > lengthWithHeaders = delRecLength ;
empty - > extentOfs = myLoc . getOfs ( ) ;
empty - > nextDeleted . Null ( ) ;
return emptyLoc ;
}
2009-01-15 10:17:11 -05:00
/* assumes already zeroed -- insufficient for block 'reuse' perhaps */
2011-05-02 22:02:18 -04:00
DiskLoc Extent : : init ( const char * nsname , int _length , int _fileNo , int _offset , bool capped ) {
2009-01-15 10:17:11 -05:00
magic = 0x41424344 ;
2010-09-09 07:12:17 -04:00
myLoc . set ( _fileNo , _offset ) ;
2009-01-15 10:17:11 -05:00
xnext . Null ( ) ;
xprev . Null ( ) ;
2009-10-19 15:30:16 -04:00
nsDiagnostic = nsname ;
2009-01-15 10:17:11 -05:00
length = _length ;
firstRecord . Null ( ) ;
lastRecord . Null ( ) ;
2011-05-03 15:53:27 -04:00
DiskLoc emptyLoc ;
int delRecLength ;
getEmptyLoc ( nsname , myLoc , _length , capped , emptyLoc , delRecLength ) ;
2009-01-15 10:17:11 -05:00
2011-05-03 15:53:27 -04:00
DeletedRecord * empty = getDur ( ) . writing ( DataFileMgr : : makeDeletedRecord ( emptyLoc , delRecLength ) ) ;
empty - > lengthWithHeaders = delRecLength ;
2009-01-15 10:17:11 -05:00
empty - > extentOfs = myLoc . getOfs ( ) ;
return emptyLoc ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
/*
2009-10-08 12:59:03 -04:00
Record * Extent : : newRecord ( int len ) {
2010-04-05 17:26:40 -04:00
if ( firstEmptyRegion . isNull ( ) ) 8
2009-10-08 12:59:03 -04:00
return 0 ;
assert ( len > 0 ) ;
int newRecSize = len + Record : : HeaderSize ;
DiskLoc newRecordLoc = firstEmptyRegion ;
Record * r = getRecord ( newRecordLoc ) ;
int left = r - > netLength ( ) - len ;
if ( left < 0 ) {
//
firstEmptyRegion . Null ( ) ;
return 0 ;
}
DiskLoc nextEmpty = r - > next . getNextEmpty ( firstEmptyRegion ) ;
r - > lengthWithHeaders = newRecSize ;
r - > next . markAsFirstOrLastInExtent ( this ) ; // we're now last in the extent
if ( ! lastRecord . isNull ( ) ) {
assert ( getRecord ( lastRecord ) - > next . lastInExtent ( ) ) ; // it was the last one
getRecord ( lastRecord ) - > next . set ( newRecordLoc ) ; // until now
r - > prev . set ( lastRecord ) ;
}
else {
r - > prev . markAsFirstOrLastInExtent ( this ) ; // we are the first in the extent
assert ( firstRecord . isNull ( ) ) ;
firstRecord = newRecordLoc ;
}
lastRecord = newRecordLoc ;
if ( left < Record : : HeaderSize + 32 ) {
firstEmptyRegion . Null ( ) ;
}
else {
firstEmptyRegion . inc ( newRecSize ) ;
Record * empty = getRecord ( firstEmptyRegion ) ;
empty - > next . set ( nextEmpty ) ; // not for empty records, unless in-use records, next and prev can be null.
empty - > prev . Null ( ) ;
empty - > lengthWithHeaders = left ;
}
return r ;
}
2009-01-15 10:17:11 -05:00
*/
2008-06-06 09:43:15 -04:00
2010-08-09 12:17:11 -07:00
int Extent : : maxSize ( ) {
int maxExtentSize = 0x7ff00000 ;
if ( cmdLine . smallfiles ) {
maxExtentSize > > = 2 ;
}
return maxExtentSize ;
}
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
/*---------------------------------------------------------------------*/
2008-12-28 20:28:49 -05:00
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > DataFileMgr : : findAll ( const char * ns , const DiskLoc & startLoc ) {
2010-02-13 22:15:27 -05:00
NamespaceDetails * d = nsdetails ( ns ) ;
if ( ! d )
2010-05-07 17:25:57 -04:00
return shared_ptr < Cursor > ( new BasicCursor ( DiskLoc ( ) ) ) ;
2010-02-14 10:50:53 -05:00
2010-02-13 22:15:27 -05:00
DiskLoc loc = d - > firstExtent ;
2009-01-15 10:17:11 -05:00
Extent * e = getExtent ( loc ) ;
DEBUGGING {
2009-01-15 11:26:38 -05:00
out ( ) < < " listing extents for " < < ns < < endl ;
2009-01-15 10:17:11 -05:00
DiskLoc tmp = loc ;
set < DiskLoc > extents ;
while ( 1 ) {
Extent * f = getExtent ( tmp ) ;
2009-01-15 11:26:38 -05:00
out ( ) < < " extent: " < < tmp . toString ( ) < < endl ;
2009-01-15 10:17:11 -05:00
extents . insert ( tmp ) ;
tmp = f - > xnext ;
if ( tmp . isNull ( ) )
break ;
f = f - > getNextExtent ( ) ;
}
2008-12-28 20:28:49 -05:00
2009-01-15 11:26:38 -05:00
out ( ) < < endl ;
2010-02-14 10:50:53 -05:00
d - > dumpDeleted ( & extents ) ;
2009-01-13 10:28:42 -05:00
}
2008-06-06 09:43:15 -04:00
2011-01-04 00:40:41 -05:00
if ( d - > capped )
2010-05-07 17:25:57 -04:00
return shared_ptr < Cursor > ( new ForwardCappedCursor ( d , startLoc ) ) ;
2011-01-04 00:40:41 -05:00
2010-02-13 22:15:27 -05:00
if ( ! startLoc . isNull ( ) )
2011-01-04 00:40:41 -05:00
return shared_ptr < Cursor > ( new BasicCursor ( startLoc ) ) ;
2010-02-13 22:15:27 -05:00
while ( e - > firstRecord . isNull ( ) & & ! e - > xnext . isNull ( ) ) {
/* todo: if extent is empty, free it for reuse elsewhere.
that is a bit complicated have to clean up the freelists .
*/
2011-04-30 12:13:17 -04:00
RARELY out ( ) < < " info DFM::findAll(): extent " < < loc . toString ( ) < < " was empty, skipping ahead. ns: " < < ns < < endl ;
2010-02-13 22:15:27 -05:00
// find a nonempty extent
// it might be nice to free the whole extent here! but have to clean up free recs then.
e = e - > getNextExtent ( ) ;
2009-01-13 10:28:42 -05:00
}
2010-05-07 17:25:57 -04:00
return shared_ptr < Cursor > ( new BasicCursor ( e - > firstRecord ) ) ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
/* get a table scan cursor, but can be forward or reverse direction.
order . $ natural - if set , > 0 means forward ( asc ) , < 0 backward ( desc ) .
*/
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > findTableScan ( const char * ns , const BSONObj & order , const DiskLoc & startLoc ) {
2010-01-28 13:41:51 -05:00
BSONElement el = order . getField ( " $natural " ) ; // e.g., { $natural : -1 }
2008-07-24 16:07:18 -04:00
2009-01-15 10:17:11 -05:00
if ( el . number ( ) > = 0 )
2009-03-18 17:50:45 -04:00
return DataFileMgr : : findAll ( ns , startLoc ) ;
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
// "reverse natural order"
NamespaceDetails * d = nsdetails ( ns ) ;
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
if ( ! d )
2010-05-07 17:25:57 -04:00
return shared_ptr < Cursor > ( new BasicCursor ( DiskLoc ( ) ) ) ;
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
if ( ! d - > capped ) {
2009-03-18 17:24:10 -04:00
if ( ! startLoc . isNull ( ) )
2011-01-04 00:40:41 -05:00
return shared_ptr < Cursor > ( new ReverseCursor ( startLoc ) ) ;
2009-01-15 10:17:11 -05:00
Extent * e = d - > lastExtent . ext ( ) ;
while ( e - > lastRecord . isNull ( ) & & ! e - > xprev . isNull ( ) ) {
2009-01-15 11:26:38 -05:00
OCCASIONALLY out ( ) < < " findTableScan: extent empty, skipping ahead " < < endl ;
2009-01-15 10:17:11 -05:00
e = e - > getPrevExtent ( ) ;
}
2010-05-07 17:25:57 -04:00
return shared_ptr < Cursor > ( new ReverseCursor ( e - > lastRecord ) ) ;
2011-01-04 00:40:41 -05:00
}
else {
2010-05-07 17:25:57 -04:00
return shared_ptr < Cursor > ( new ReverseCappedCursor ( d , startLoc ) ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
2008-07-24 16:07:18 -04:00
2011-01-04 00:40:41 -05:00
void printFreeList ( ) {
2009-12-18 17:43:40 -05:00
string s = cc ( ) . database ( ) - > name + " .$freelist " ;
2011-04-04 12:38:38 -04:00
log ( ) < < " dump freelist " < < s < < endl ;
2009-12-18 17:43:40 -05:00
NamespaceDetails * freeExtents = nsdetails ( s . c_str ( ) ) ;
2011-01-04 00:40:41 -05:00
if ( freeExtents = = 0 ) {
2009-12-18 17:43:40 -05:00
log ( ) < < " freeExtents==0 " < < endl ;
return ;
}
DiskLoc a = freeExtents - > firstExtent ;
2011-01-04 00:40:41 -05:00
while ( ! a . isNull ( ) ) {
2009-12-18 17:43:40 -05:00
Extent * e = a . ext ( ) ;
2011-04-04 12:38:38 -04:00
log ( ) < < " extent " < < a . toString ( ) < < " len: " < < e - > length < < " prev: " < < e - > xprev . toString ( ) < < endl ;
2009-12-18 17:43:40 -05:00
a = e - > xnext ;
}
2011-04-04 12:38:38 -04:00
log ( ) < < " end freelist " < < endl ;
}
/** free a list of extents that are no longer in use. this is a double linked list of extents
( could be just one in the list )
*/
void freeExtents ( DiskLoc firstExt , DiskLoc lastExt ) {
{
assert ( ! firstExt . isNull ( ) & & ! lastExt . isNull ( ) ) ;
Extent * f = firstExt . ext ( ) ;
Extent * l = lastExt . ext ( ) ;
assert ( f - > xprev . isNull ( ) ) ;
assert ( l - > xnext . isNull ( ) ) ;
assert ( f = = l | | ! f - > xnext . isNull ( ) ) ;
assert ( f = = l | | ! l - > xprev . isNull ( ) ) ;
}
string s = cc ( ) . database ( ) - > name + " .$freelist " ;
NamespaceDetails * freeExtents = nsdetails ( s . c_str ( ) ) ;
if ( freeExtents = = 0 ) {
string err ;
_userCreateNS ( s . c_str ( ) , BSONObj ( ) , err , 0 ) ; // todo: this actually allocates an extent, which is bad!
freeExtents = nsdetails ( s . c_str ( ) ) ;
massert ( 10361 , " can't create .$freelist " , freeExtents ) ;
}
if ( freeExtents - > firstExtent . isNull ( ) ) {
freeExtents - > firstExtent . writing ( ) = firstExt ;
freeExtents - > lastExtent . writing ( ) = lastExt ;
}
else {
DiskLoc a = freeExtents - > firstExtent ;
assert ( a . ext ( ) - > xprev . isNull ( ) ) ;
getDur ( ) . writingDiskLoc ( a . ext ( ) - > xprev ) = lastExt ;
getDur ( ) . writingDiskLoc ( lastExt . ext ( ) - > xnext ) = a ;
getDur ( ) . writingDiskLoc ( freeExtents - > firstExtent ) = firstExt ;
}
//printFreeList();
2009-12-18 17:43:40 -05:00
}
2009-01-15 10:17:11 -05:00
/* drop a collection/namespace */
2009-01-23 10:15:36 -05:00
void dropNS ( const string & nsToDrop ) {
2009-02-24 13:52:34 -05:00
NamespaceDetails * d = nsdetails ( nsToDrop . c_str ( ) ) ;
2009-12-28 16:43:43 -05:00
uassert ( 10086 , ( string ) " ns not found: " + nsToDrop , d ) ;
2009-02-24 13:52:34 -05:00
2010-01-22 15:17:03 -05:00
BackgroundOperation : : assertNoBgOpInProgForNs ( nsToDrop . c_str ( ) ) ;
2009-10-05 16:40:24 -04:00
NamespaceString s ( nsToDrop ) ;
2009-10-14 14:34:38 -04:00
assert ( s . db = = cc ( ) . database ( ) - > name ) ;
2009-10-05 16:40:24 -04:00
if ( s . isSystem ( ) ) {
2011-01-04 00:40:41 -05:00
if ( s . coll = = " system.profile " )
2009-12-28 16:43:43 -05:00
uassert ( 10087 , " turn off profiling before dropping system.profile collection " , cc ( ) . database ( ) - > profile = = 0 ) ;
2009-10-05 16:40:24 -04:00
else
2009-12-28 16:43:43 -05:00
uasserted ( 12502 , " can't drop system ns " ) ;
2009-10-05 16:40:24 -04:00
}
2009-01-15 10:17:11 -05:00
{
// remove from the system catalog
2009-02-24 13:52:34 -05:00
BSONObj cond = BSON ( " name " < < nsToDrop ) ; // { name: "colltodropname" }
2009-10-14 14:34:38 -04:00
string system_namespaces = cc ( ) . database ( ) - > name + " .system.namespaces " ;
2009-04-09 13:30:28 -04:00
/*int n = */ deleteObjects ( system_namespaces . c_str ( ) , cond , false , false , true ) ;
2011-01-04 00:40:41 -05:00
// no check of return code as this ns won't exist for some of the new storage engines
2008-12-28 20:28:49 -05:00
}
2009-02-24 13:52:34 -05:00
// free extents
if ( ! d - > firstExtent . isNull ( ) ) {
2011-04-04 12:38:38 -04:00
freeExtents ( d - > firstExtent , d - > lastExtent ) ;
2011-03-30 21:15:40 -04:00
getDur ( ) . writingDiskLoc ( d - > firstExtent ) . setInvalid ( ) ;
getDur ( ) . writingDiskLoc ( d - > lastExtent ) . setInvalid ( ) ;
2009-02-24 13:52:34 -05:00
}
2009-01-15 10:17:11 -05:00
// remove from the catalog hashtable
2009-10-19 16:53:58 -04:00
cc ( ) . database ( ) - > namespaceIndex . kill_ns ( nsToDrop . c_str ( ) ) ;
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
2009-05-11 10:45:10 -04:00
void dropCollection ( const string & name , string & errmsg , BSONObjBuilder & result ) {
2009-08-06 11:54:30 -04:00
log ( 1 ) < < " dropCollection: " < < name < < endl ;
2009-05-11 10:45:10 -04:00
NamespaceDetails * d = nsdetails ( name . c_str ( ) ) ;
2010-08-02 14:03:33 -04:00
if ( d = = 0 )
return ;
2010-01-22 15:17:03 -05:00
BackgroundOperation : : assertNoBgOpInProgForNs ( name . c_str ( ) ) ;
2009-05-11 10:45:10 -04:00
if ( d - > nIndexes ! = 0 ) {
2011-01-04 00:40:41 -05:00
try {
2010-01-22 15:58:49 -05:00
assert ( dropIndexes ( d , name . c_str ( ) , " * " , errmsg , result , true ) ) ;
2009-08-06 11:37:33 -04:00
}
2010-08-31 09:43:39 -04:00
catch ( DBException & e ) {
stringstream ss ;
ss < < " drop: dropIndexes for collection failed - consider trying repair " ;
ss < < " cause: " < < e . what ( ) ;
uasserted ( 12503 , ss . str ( ) ) ;
2009-08-06 11:37:33 -04:00
}
2009-05-11 10:45:10 -04:00
assert ( d - > nIndexes = = 0 ) ;
}
2010-01-22 15:17:03 -05:00
log ( 1 ) < < " \t dropIndexes done " < < endl ;
2009-05-11 10:45:10 -04:00
result . append ( " ns " , name . c_str ( ) ) ;
ClientCursor : : invalidate ( name . c_str ( ) ) ;
2010-03-16 16:18:36 -04:00
Top : : global . collectionDropped ( name ) ;
2011-08-03 15:00:02 -07:00
NamespaceDetailsTransient : : eraseForPrefix ( name . c_str ( ) ) ;
2011-01-04 00:40:41 -05:00
dropNS ( name ) ;
2009-05-11 10:45:10 -04:00
}
2011-01-04 00:40:41 -05:00
2010-01-25 12:25:29 -05:00
/* unindex all keys in index for this record. */
static void _unindexRecord ( IndexDetails & id , BSONObj & obj , const DiskLoc & dl , bool logMissing = true ) {
2011-06-02 13:34:26 -04:00
BSONObjSet keys ;
2009-01-15 10:17:11 -05:00
id . getKeysFromObject ( obj , keys ) ;
2011-04-21 14:53:10 -04:00
IndexInterface & ii = id . idxInterface ( ) ;
2011-06-02 13:34:26 -04:00
for ( BSONObjSet : : iterator i = keys . begin ( ) ; i ! = keys . end ( ) ; i + + ) {
2009-01-15 10:17:11 -05:00
BSONObj j = * i ;
2011-02-26 00:33:57 -05:00
2009-01-15 10:17:11 -05:00
bool ok = false ;
try {
2011-04-21 14:53:10 -04:00
ok = ii . unindex ( id . head , id , j , dl ) ;
2009-01-15 10:17:11 -05:00
}
2010-02-20 16:38:37 -05:00
catch ( AssertionException & e ) {
2009-01-15 10:17:11 -05:00
problem ( ) < < " Assertion failure: _unindex failed " < < id . indexNamespace ( ) < < endl ;
2010-02-20 16:38:37 -05:00
out ( ) < < " Assertion failure: _unindex failed: " < < e . what ( ) < < ' \n ' ;
2009-01-15 11:26:38 -05:00
out ( ) < < " obj: " < < obj . toString ( ) < < ' \n ' ;
out ( ) < < " key: " < < j . toString ( ) < < ' \n ' ;
out ( ) < < " dl: " < < dl . toString ( ) < < endl ;
2009-01-15 10:17:11 -05:00
sayDbContext ( ) ;
}
2008-12-28 20:28:49 -05:00
2009-04-21 17:52:11 -04:00
if ( ! ok & & logMissing ) {
2011-02-26 21:20:08 -05:00
log ( ) < < " unindex failed (key too big?) " < < id . indexNamespace ( ) < < " key: " < < j < < " " < < obj [ " _id " ] < < endl ;
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
/* unindex all keys in all indexes for this record. */
2010-01-25 12:25:29 -05:00
static void unindexRecord ( NamespaceDetails * d , Record * todelete , const DiskLoc & dl , bool noWarn = false ) {
2009-01-15 10:17:11 -05:00
BSONObj obj ( todelete ) ;
2010-01-29 14:58:10 -05:00
int n = d - > nIndexes ;
for ( int i = 0 ; i < n ; i + + )
2010-01-25 12:25:29 -05:00
_unindexRecord ( d - > idx ( i ) , obj , dl , ! noWarn ) ;
2011-01-14 16:26:26 -08:00
if ( d - > indexBuildInProgress ) { // background index
2010-01-29 14:58:10 -05:00
// always pass nowarn here, as this one may be missing for valid reasons as we are concurrently building it
2011-01-04 00:40:41 -05:00
_unindexRecord ( d - > idx ( n ) , obj , dl , false ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2011-01-04 00:40:41 -05:00
/* deletes a record, just the pdfile portion -- no index cleanup, no cursor cleanup, etc.
2009-01-26 17:23:45 -05:00
caller must check if capped
*/
2011-01-04 00:40:41 -05:00
void DataFileMgr : : _deleteRecord ( NamespaceDetails * d , const char * ns , Record * todelete , const DiskLoc & dl ) {
2009-01-15 10:17:11 -05:00
/* remove ourself from the record next/prev chain */
{
if ( todelete - > prevOfs ! = DiskLoc : : NullOfs )
2010-12-09 14:44:08 -05:00
getDur ( ) . writingInt ( todelete - > getPrev ( dl ) . rec ( ) - > nextOfs ) = todelete - > nextOfs ;
2009-01-15 10:17:11 -05:00
if ( todelete - > nextOfs ! = DiskLoc : : NullOfs )
2010-12-09 14:44:08 -05:00
getDur ( ) . writingInt ( todelete - > getNext ( dl ) . rec ( ) - > prevOfs ) = todelete - > prevOfs ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
/* remove ourself from extent pointers */
{
2010-12-09 14:44:08 -05:00
Extent * e = getDur ( ) . writing ( todelete - > myExtent ( dl ) ) ;
2009-01-15 10:17:11 -05:00
if ( e - > firstRecord = = dl ) {
if ( todelete - > nextOfs = = DiskLoc : : NullOfs )
e - > firstRecord . Null ( ) ;
else
2010-09-09 07:12:17 -04:00
e - > firstRecord . set ( dl . a ( ) , todelete - > nextOfs ) ;
2009-01-15 10:17:11 -05:00
}
if ( e - > lastRecord = = dl ) {
if ( todelete - > prevOfs = = DiskLoc : : NullOfs )
e - > lastRecord . Null ( ) ;
else
2010-09-09 07:12:17 -04:00
e - > lastRecord . set ( dl . a ( ) , todelete - > prevOfs ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
2009-01-15 10:17:11 -05:00
/* add to the free list */
{
2010-09-27 12:35:22 -04:00
{
2010-12-09 14:44:08 -05:00
NamespaceDetails : : Stats * s = getDur ( ) . writing ( & d - > stats ) ;
2010-09-27 12:35:22 -04:00
s - > datasize - = todelete - > netLength ( ) ;
s - > nrecords - - ;
}
2009-01-15 10:17:11 -05:00
if ( strstr ( ns , " .system.indexes " ) ) {
2010-09-27 12:35:22 -04:00
/* temp: if in system.indexes, don't reuse, and zero out: we want to be
careful until validated more , as IndexDetails has pointers
to this disk location . so an incorrectly done remove would cause
a lot of problems .
*/
2010-12-09 14:44:08 -05:00
memset ( getDur ( ) . writingPtr ( todelete , todelete - > lengthWithHeaders ) , 0 , todelete - > lengthWithHeaders ) ;
2009-01-15 10:17:11 -05:00
}
else {
2010-09-27 12:35:22 -04:00
DEV {
unsigned long long * p = ( unsigned long long * ) todelete - > data ;
2010-12-09 14:44:08 -05:00
* getDur ( ) . writing ( p ) = 0 ;
2010-09-27 12:35:22 -04:00
//DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse.
}
2009-01-15 10:17:11 -05:00
d - > addDeletedRec ( ( DeletedRecord * ) todelete , dl ) ;
}
2008-12-28 20:28:49 -05:00
}
}
2009-01-15 10:17:11 -05:00
2011-07-28 17:23:59 -04:00
void DataFileMgr : : deleteRecord ( const char * ns , Record * todelete , const DiskLoc & dl , bool cappedOK , bool noWarn , bool doLog ) {
2009-01-26 17:23:45 -05:00
dassert ( todelete = = dl . rec ( ) ) ;
NamespaceDetails * d = nsdetails ( ns ) ;
if ( d - > capped & & ! cappedOK ) {
out ( ) < < " failing remove on a capped ns " < < ns < < endl ;
2009-12-28 16:43:43 -05:00
uassert ( 10089 , " can't remove from a capped collection " , 0 ) ;
2009-01-26 17:23:45 -05:00
return ;
}
2011-07-28 17:23:59 -04:00
BSONObj toDelete ;
if ( doLog ) {
BSONElement e = dl . obj ( ) [ " _id " ] ;
if ( e . type ( ) ) {
toDelete = e . wrap ( ) ;
}
}
2009-01-26 17:23:45 -05:00
/* check if any cursors point to us. if so, advance them. */
2009-10-08 12:04:27 -04:00
ClientCursor : : aboutToDelete ( dl ) ;
2009-01-26 17:23:45 -05:00
2009-08-05 16:02:20 -04:00
unindexRecord ( d , todelete , dl , noWarn ) ;
2009-01-26 17:23:45 -05:00
_deleteRecord ( d , ns , todelete , dl ) ;
2009-12-09 18:13:36 -05:00
NamespaceDetailsTransient : : get_w ( ns ) . notifyOfWriteOp ( ) ;
2011-07-28 17:23:59 -04:00
if ( ! toDelete . isEmpty ( ) ) {
logOp ( " d " , ns , toDelete ) ;
}
2009-01-26 17:23:45 -05:00
}
2009-06-18 13:30:49 -04:00
2009-06-05 17:12:49 -04:00
/** Note: if the object shrinks a lot, we don't free up space, we leave extra at end of the record.
2009-10-08 12:59:03 -04:00
*/
2010-02-03 17:31:52 -05:00
const DiskLoc DataFileMgr : : updateRecord (
const char * ns ,
NamespaceDetails * d ,
NamespaceDetailsTransient * nsdt ,
Record * toupdate , const DiskLoc & dl ,
2011-01-04 00:40:41 -05:00
const char * _buf , int _len , OpDebug & debug , bool god ) {
2011-05-07 17:00:57 -04:00
2009-01-15 10:17:11 -05:00
dassert ( toupdate = = dl . rec ( ) ) ;
2009-02-03 19:13:27 -05:00
BSONObj objOld ( toupdate ) ;
2009-06-19 13:26:58 -04:00
BSONObj objNew ( _buf ) ;
2010-02-03 17:31:52 -05:00
DEV assert ( objNew . objsize ( ) = = _len ) ;
DEV assert ( objNew . objdata ( ) = = _buf ) ;
2009-06-19 13:26:58 -04:00
if ( ! objNew . hasElement ( " _id " ) & & objOld . hasElement ( " _id " ) ) {
2011-01-04 00:40:41 -05:00
/* add back the old _id value if the update removes it. Note this implementation is slow
2009-06-19 13:26:58 -04:00
( copies entire object multiple times ) , but this shouldn ' t happen often , so going for simple
code , not speed .
2009-10-08 12:59:03 -04:00
*/
2009-06-19 13:26:58 -04:00
BSONObjBuilder b ;
BSONElement e ;
assert ( objOld . getObjectID ( e ) ) ;
b . append ( e ) ; // put _id first, for best performance
b . appendElements ( objNew ) ;
objNew = b . obj ( ) ;
}
2009-06-18 13:30:49 -04:00
2011-01-04 00:40:41 -05:00
/* duplicate key check. we descend the btree twice - once for this check, and once for the actual inserts, further
2009-06-19 16:03:44 -04:00
below . that is suboptimal , but it ' s pretty complicated to do it the other way without rollbacks . . .
*/
vector < IndexChanges > changes ;
2010-12-14 00:26:46 -05:00
bool changedId = false ;
2010-04-27 00:33:35 -07:00
getIndexChanges ( changes , * d , objNew , objOld , changedId ) ;
2010-12-14 00:26:46 -05:00
uassert ( 13596 , str : : stream ( ) < < " cannot change _id of a document old: " < < objOld < < " new: " < < objNew , ! changedId ) ;
2010-02-04 14:25:49 -05:00
dupCheck ( changes , * d , dl ) ;
2009-06-19 16:03:44 -04:00
2009-06-19 13:26:58 -04:00
if ( toupdate - > netLength ( ) < objNew . objsize ( ) ) {
2009-06-05 17:12:49 -04:00
// doesn't fit. reallocate -----------------------------------------------------
2010-08-19 11:51:04 -04:00
uassert ( 10003 , " failing update: objects in a capped ns cannot grow " , ! ( d & & d - > capped ) ) ;
2009-01-15 10:17:11 -05:00
d - > paddingTooSmall ( ) ;
2011-05-07 17:00:57 -04:00
debug . moved = true ;
2009-01-15 10:17:11 -05:00
deleteRecord ( ns , toupdate , dl ) ;
2010-07-14 16:13:44 -04:00
return insert ( ns , objNew . objdata ( ) , objNew . objsize ( ) , god ) ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2010-02-03 17:31:52 -05:00
nsdt - > notifyOfWriteOp ( ) ;
2009-01-15 10:17:11 -05:00
d - > paddingFits ( ) ;
2009-06-05 17:12:49 -04:00
/* have any index keys changed? */
2010-01-15 17:33:44 -05:00
{
unsigned keyUpdates = 0 ;
2010-02-04 14:25:49 -05:00
int z = d - > nIndexesBeingBuilt ( ) ;
for ( int x = 0 ; x < z ; x + + ) {
2009-10-21 16:00:40 -04:00
IndexDetails & idx = d - > idx ( x ) ;
2011-04-21 14:53:10 -04:00
IndexInterface & ii = idx . idxInterface ( ) ;
2009-06-05 17:12:49 -04:00
for ( unsigned i = 0 ; i < changes [ x ] . removed . size ( ) ; i + + ) {
try {
2011-04-21 14:53:10 -04:00
bool found = ii . unindex ( idx . head , idx , * changes [ x ] . removed [ i ] , dl ) ;
2011-02-28 18:16:47 -05:00
if ( ! found ) {
RARELY warning ( ) < < " ns: " < < ns < < " couldn't unindex key: " < < * changes [ x ] . removed [ i ]
< < " for doc: " < < objOld [ " _id " ] < < endl ;
}
2009-06-05 17:12:49 -04:00
}
catch ( AssertionException & ) {
2011-05-07 17:00:57 -04:00
debug . extra < < " exception update unindex " ;
2009-06-05 17:12:49 -04:00
problem ( ) < < " caught assertion update unindex " < < idx . indexNamespace ( ) < < endl ;
}
}
assert ( ! dl . isNull ( ) ) ;
BSONObj idxKey = idx . info . obj ( ) . getObjectField ( " key " ) ;
2010-04-20 19:42:53 -04:00
Ordering ordering = Ordering : : make ( idxKey ) ;
2010-01-15 17:33:44 -05:00
keyUpdates + = changes [ x ] . added . size ( ) ;
2009-06-05 17:12:49 -04:00
for ( unsigned i = 0 ; i < changes [ x ] . added . size ( ) ; i + + ) {
try {
2009-07-07 16:43:49 -04:00
/* we did the dupCheck() above. so we don't have to worry about it here. */
2011-04-21 14:53:10 -04:00
ii . bt_insert (
2011-01-04 00:40:41 -05:00
idx . head ,
dl , * changes [ x ] . added [ i ] , ordering , /*dupsAllowed*/ true , idx ) ;
2009-06-05 17:12:49 -04:00
}
2010-05-11 15:12:17 -04:00
catch ( AssertionException & e ) {
2011-05-07 17:00:57 -04:00
debug . extra < < " exception update index " ;
2011-02-28 16:09:48 -05:00
problem ( ) < < " caught assertion update index " < < idx . indexNamespace ( ) < < " " < < e < < " " < < objNew [ " _id " ] < < endl ;
2009-06-05 17:12:49 -04:00
}
}
}
2011-05-07 17:00:57 -04:00
debug . keyUpdates = keyUpdates ;
2009-06-05 17:12:49 -04:00
}
2009-01-15 10:17:11 -05:00
2011-01-04 00:40:41 -05:00
// update in place
2010-09-28 17:55:08 -04:00
int sz = objNew . objsize ( ) ;
2010-12-09 14:44:08 -05:00
memcpy ( getDur ( ) . writingPtr ( toupdate - > data , sz ) , objNew . objdata ( ) , sz ) ;
2009-10-30 15:50:21 -04:00
return dl ;
2008-12-28 20:28:49 -05:00
}
2010-12-24 13:43:06 -05:00
int Extent : : followupSize ( int len , int lastExtentLen ) {
2010-08-09 12:17:11 -07:00
assert ( len < Extent : : maxSize ( ) ) ;
2010-12-24 13:43:06 -05:00
int x = initialSize ( len ) ;
2009-01-15 10:17:11 -05:00
int y = ( int ) ( lastExtentLen < 4000000 ? lastExtentLen * 4.0 : lastExtentLen * 1.2 ) ;
int sz = y > x ? y : x ;
2010-04-21 11:41:05 -04:00
2011-01-04 00:40:41 -05:00
if ( sz < lastExtentLen ) {
2010-12-24 13:31:46 -05:00
// this means there was an int overflow
// so we should turn it into maxSize
2010-08-09 12:17:11 -07:00
sz = Extent : : maxSize ( ) ;
2010-12-24 13:31:46 -05:00
}
2011-01-04 00:40:41 -05:00
else if ( sz > Extent : : maxSize ( ) ) {
2010-12-24 13:31:46 -05:00
sz = Extent : : maxSize ( ) ;
}
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
sz = ( ( int ) sz ) & 0xffffff00 ;
assert ( sz > len ) ;
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
return sz ;
}
2010-01-25 12:25:29 -05:00
/* add keys to index idxNo for a new record */
static inline void _indexRecord ( NamespaceDetails * d , int idxNo , BSONObj & obj , DiskLoc recordLoc , bool dupsAllowed ) {
2009-10-21 16:00:40 -04:00
IndexDetails & idx = d - > idx ( idxNo ) ;
2011-06-02 13:34:26 -04:00
BSONObjSet keys ;
2009-01-15 10:17:11 -05:00
idx . getKeysFromObject ( obj , keys ) ;
2011-04-22 18:16:43 -04:00
if ( keys . empty ( ) )
return ;
2009-01-15 10:17:11 -05:00
BSONObj order = idx . keyPattern ( ) ;
2011-04-22 18:16:43 -04:00
IndexInterface & ii = idx . idxInterface ( ) ;
2010-04-20 19:42:53 -04:00
Ordering ordering = Ordering : : make ( order ) ;
2009-07-07 13:17:53 -04:00
int n = 0 ;
2011-06-02 13:34:26 -04:00
for ( BSONObjSet : : iterator i = keys . begin ( ) ; i ! = keys . end ( ) ; i + + ) {
2011-01-04 00:40:41 -05:00
if ( + + n = = 2 ) {
2009-07-07 13:17:53 -04:00
d - > setIndexIsMultikey ( idxNo ) ;
}
2010-01-25 12:25:29 -05:00
assert ( ! recordLoc . isNull ( ) ) ;
2009-01-15 10:17:11 -05:00
try {
2011-04-22 18:16:43 -04:00
ii . bt_insert ( idx . head , recordLoc , * i , ordering , dupsAllowed , idx ) ;
2009-01-15 10:17:11 -05:00
}
2010-02-01 12:01:26 -05:00
catch ( AssertionException & e ) {
2011-01-04 00:40:41 -05:00
if ( e . getCode ( ) = = 10287 & & idxNo = = d - > nIndexes ) {
2010-02-01 12:01:26 -05:00
DEV log ( ) < < " info: caught key already in index on bg indexing (ok) " < < endl ;
continue ;
}
2009-01-26 17:23:45 -05:00
if ( ! dupsAllowed ) {
// dup key exception, presumably.
2009-02-12 15:03:38 -05:00
throw ;
2009-01-26 17:23:45 -05:00
}
2011-02-28 15:45:57 -05:00
problem ( ) < < " caught assertion _indexRecord " < < idx . indexNamespace ( ) < < " " < < obj [ " _id " ] < < endl ;
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
}
2009-01-15 10:17:11 -05:00
2011-06-16 16:35:07 -04:00
#if 0
2011-01-04 00:40:41 -05:00
void testSorting ( ) {
2009-09-24 17:10:45 -04:00
BSONObjBuilder b ;
b . appendNull ( " " ) ;
BSONObj x = b . obj ( ) ;
2011-06-16 16:35:07 -04:00
BSONObjExternalSorter sorter ( * IndexDetails : : iis [ 1 ] ) ;
2009-09-24 17:10:45 -04:00
sorter . add ( x , DiskLoc ( 3 , 7 ) ) ;
sorter . add ( x , DiskLoc ( 4 , 7 ) ) ;
sorter . add ( x , DiskLoc ( 2 , 7 ) ) ;
sorter . add ( x , DiskLoc ( 1 , 7 ) ) ;
sorter . add ( x , DiskLoc ( 3 , 77 ) ) ;
sorter . sort ( ) ;
2011-01-04 00:40:41 -05:00
2009-09-24 17:13:52 -04:00
auto_ptr < BSONObjExternalSorter : : Iterator > i = sorter . iterator ( ) ;
2011-01-04 00:40:41 -05:00
while ( i - > more ( ) ) {
2009-09-24 17:13:52 -04:00
BSONObjExternalSorter : : Data d = i - > next ( ) ;
2010-05-25 21:53:55 -04:00
/*cout << d.second.toString() << endl;
2009-09-24 17:10:45 -04:00
cout < < d . first . objsize ( ) < < endl ;
2010-05-25 21:53:55 -04:00
cout < < " SORTER next: " < < d . first . toString ( ) < < endl ; */
2009-09-24 17:10:45 -04:00
}
}
2011-06-16 16:35:07 -04:00
# endif
2009-09-24 17:10:45 -04:00
2011-04-04 16:30:09 -04:00
SortPhaseOne * precalced = 0 ;
2011-04-22 18:16:43 -04:00
template < class V >
void buildBottomUpPhases2And3 ( bool dupsAllowed , IndexDetails & idx , BSONObjExternalSorter & sorter ,
bool dropDups , list < DiskLoc > & dupsToDrop , CurOp * op , SortPhaseOne * phase1 , ProgressMeterHolder & pm ,
Timer & t
)
{
BtreeBuilder < V > btBuilder ( dupsAllowed , idx ) ;
BSONObj keyLast ;
auto_ptr < BSONObjExternalSorter : : Iterator > i = sorter . iterator ( ) ;
assert ( pm = = op - > setMessage ( " index: (2/3) btree bottom up " , phase1 - > nkeys , 10 ) ) ;
while ( i - > more ( ) ) {
RARELY killCurrentOp . checkForInterrupt ( ) ;
BSONObjExternalSorter : : Data d = i - > next ( ) ;
try {
2011-07-18 14:51:07 -07:00
if ( ! dupsAllowed & & dropDups ) {
LastError : : Disabled led ( lastError . get ( ) ) ;
btBuilder . addKey ( d . first , d . second ) ;
}
else {
btBuilder . addKey ( d . first , d . second ) ;
}
2011-04-22 18:16:43 -04:00
}
catch ( AssertionException & e ) {
if ( dupsAllowed ) {
// unknow exception??
throw ;
}
2011-07-18 14:51:07 -07:00
if ( e . interrupted ( ) ) {
killCurrentOp . checkForInterrupt ( ) ;
}
2011-04-22 18:16:43 -04:00
if ( ! dropDups )
throw ;
/* we could queue these on disk, but normally there are very few dups, so instead we
keep in ram and have a limit .
*/
dupsToDrop . push_back ( d . second ) ;
uassert ( 10092 , " too may dups on index build with dropDups=true " , dupsToDrop . size ( ) < 1000000 ) ;
}
pm . hit ( ) ;
}
pm . finished ( ) ;
op - > setMessage ( " index: (3/3) btree-middle " ) ;
log ( t . seconds ( ) > 10 ? 0 : 1 ) < < " \t done building bottom layer, going to commit " < < endl ;
btBuilder . commit ( ) ;
if ( btBuilder . getn ( ) ! = phase1 - > nkeys & & ! dropDups ) {
warning ( ) < < " not all entries were added to the index, probably some keys were too large " < < endl ;
}
}
2010-01-15 13:19:35 -05:00
// throws DBException
2009-09-24 12:11:55 -04:00
unsigned long long fastBuildIndex ( const char * ns , NamespaceDetails * d , IndexDetails & idx , int idxNo ) {
2010-03-15 11:18:08 -04:00
CurOp * op = cc ( ) . curop ( ) ;
2010-02-04 14:25:49 -05:00
2009-09-26 23:47:01 -04:00
Timer t ;
2009-09-24 17:10:45 -04:00
2010-08-17 22:24:35 -04:00
tlog ( 1 ) < < " fastBuildIndex " < < ns < < " idxNo: " < < idxNo < < ' ' < < idx . info . obj ( ) . toString ( ) < < endl ;
2009-09-24 17:10:45 -04:00
2009-09-21 16:31:00 -04:00
bool dupsAllowed = ! idx . unique ( ) ;
2010-05-07 12:19:00 -04:00
bool dropDups = idx . dropDups ( ) | | inDBRepair ;
2009-09-21 16:31:00 -04:00
BSONObj order = idx . keyPattern ( ) ;
2010-12-09 14:44:08 -05:00
getDur ( ) . writingDiskLoc ( idx . head ) . Null ( ) ;
2011-01-04 00:40:41 -05:00
2010-02-20 17:37:12 -05:00
if ( logLevel > 1 ) printMemInfo ( " before index start " ) ;
2009-09-24 12:11:55 -04:00
/* get and sort all the keys ----- */
2010-09-27 12:35:22 -04:00
ProgressMeterHolder pm ( op - > setMessage ( " index: (1/3) external sort " , d->stats.nrecords , 10 ) ) ;
2011-04-04 16:30:09 -04:00
SortPhaseOne _ours ;
SortPhaseOne * phase1 = precalced ;
if ( phase1 = = 0 ) {
phase1 = & _ours ;
SortPhaseOne & p1 = * phase1 ;
shared_ptr < Cursor > c = theDataFileMgr . findAll ( ns ) ;
2011-06-16 16:35:07 -04:00
p1 . sorter . reset ( new BSONObjExternalSorter ( idx . idxInterface ( ) , order ) ) ;
2011-04-04 16:30:09 -04:00
p1 . sorter - > hintNumObjects ( d - > stats . nrecords ) ;
const IndexSpec & spec = idx . getSpec ( ) ;
while ( c - > ok ( ) ) {
BSONObj o = c - > current ( ) ;
DiskLoc loc = c - > currLoc ( ) ;
2011-04-04 16:41:58 -04:00
p1 . addKeys ( spec , o , loc ) ;
2011-04-04 16:30:09 -04:00
c - > advance ( ) ;
pm . hit ( ) ;
if ( logLevel > 1 & & p1 . n % 10000 = = 0 ) {
printMemInfo ( " \t iterating objects " ) ;
}
} ;
}
2010-03-15 11:18:08 -04:00
pm . finished ( ) ;
2011-04-04 16:30:09 -04:00
BSONObjExternalSorter & sorter = * ( phase1 - > sorter ) ;
if ( phase1 - > multi )
d - > setIndexIsMultikey ( idxNo ) ;
2010-02-20 16:25:10 -05:00
if ( logLevel > 1 ) printMemInfo ( " before final sort " ) ;
2011-04-04 16:30:09 -04:00
phase1 - > sorter - > sort ( ) ;
2010-02-20 16:25:10 -05:00
if ( logLevel > 1 ) printMemInfo ( " after final sort " ) ;
2011-01-04 00:40:41 -05:00
2009-11-19 11:19:48 -05:00
log ( t . seconds ( ) > 5 ? 0 : 1 ) < < " \t external sort used : " < < sorter . numFiles ( ) < < " files " < < " in " < < t . seconds ( ) < < " secs " < < endl ;
2009-09-26 23:47:01 -04:00
2009-09-24 14:21:40 -04:00
list < DiskLoc > dupsToDrop ;
2011-01-04 00:40:41 -05:00
/* build index --- */
2011-04-22 18:16:43 -04:00
if ( idx . version ( ) = = 0 )
buildBottomUpPhases2And3 < V0 > ( dupsAllowed , idx , sorter , dropDups , dupsToDrop , op , phase1 , pm , t ) ;
else if ( idx . version ( ) = = 1 )
buildBottomUpPhases2And3 < V1 > ( dupsAllowed , idx , sorter , dropDups , dupsToDrop , op , phase1 , pm , t ) ;
else
assert ( false ) ;
2011-01-04 00:40:41 -05:00
2009-09-26 23:47:01 -04:00
log ( 1 ) < < " \t fastBuildIndex dupsToDrop: " < < dupsToDrop . size ( ) < < endl ;
2009-09-24 17:10:45 -04:00
2011-03-28 17:38:06 -04:00
for ( list < DiskLoc > : : iterator i = dupsToDrop . begin ( ) ; i ! = dupsToDrop . end ( ) ; i + + ) {
2011-07-28 17:23:59 -04:00
theDataFileMgr . deleteRecord ( ns , i - > rec ( ) , * i , false , true , true ) ;
2011-03-28 17:38:06 -04:00
getDur ( ) . commitIfNeeded ( ) ;
}
2009-09-21 16:31:00 -04:00
2011-04-04 16:30:09 -04:00
return phase1 - > n ;
2009-09-21 16:31:00 -04:00
}
2011-01-04 00:40:41 -05:00
class BackgroundIndexBuildJob : public BackgroundOperation {
2009-01-26 17:23:45 -05:00
2010-01-18 14:34:33 -05:00
unsigned long long addExistingToIndex ( const char * ns , NamespaceDetails * d , IndexDetails & idx , int idxNo ) {
bool dupsAllowed = ! idx . unique ( ) ;
bool dropDups = idx . dropDups ( ) ;
2010-09-27 12:35:22 -04:00
ProgressMeter & progress = cc ( ) . curop ( ) - > setMessage ( " bg index build " , d - > stats . nrecords ) ;
2010-03-22 21:47:03 -04:00
2010-01-18 14:34:33 -05:00
unsigned long long n = 0 ;
2010-01-21 16:35:35 -05:00
auto_ptr < ClientCursor > cc ;
{
2010-05-07 17:25:57 -04:00
shared_ptr < Cursor > c = theDataFileMgr . findAll ( ns ) ;
2010-03-25 16:16:59 -04:00
cc . reset ( new ClientCursor ( QueryOption_NoCursorTimeout , c , ns ) ) ;
2010-01-21 16:35:35 -05:00
}
2010-10-29 11:00:38 -04:00
CursorId id = cc - > cursorid ( ) ;
2010-01-21 16:35:35 -05:00
2010-10-29 11:00:38 -04:00
while ( cc - > ok ( ) ) {
BSONObj js = cc - > current ( ) ;
2011-01-04 00:40:41 -05:00
try {
2011-07-18 14:51:07 -07:00
{
if ( ! dupsAllowed & & dropDups ) {
LastError : : Disabled led ( lastError . get ( ) ) ;
_indexRecord ( d , idxNo , js , cc - > currLoc ( ) , dupsAllowed ) ;
}
else {
_indexRecord ( d , idxNo , js , cc - > currLoc ( ) , dupsAllowed ) ;
}
}
2010-10-29 11:00:38 -04:00
cc - > advance ( ) ;
2011-01-04 00:40:41 -05:00
}
catch ( AssertionException & e ) {
2011-07-18 14:51:07 -07:00
if ( e . interrupted ( ) ) {
killCurrentOp . checkForInterrupt ( ) ;
}
2010-02-04 14:25:49 -05:00
2010-01-18 14:34:33 -05:00
if ( dropDups ) {
2010-10-29 11:00:38 -04:00
DiskLoc toDelete = cc - > currLoc ( ) ;
bool ok = cc - > advance ( ) ;
2010-01-21 16:35:35 -05:00
cc - > updateLocation ( ) ;
2011-07-28 17:23:59 -04:00
theDataFileMgr . deleteRecord ( ns , toDelete . rec ( ) , toDelete , false , true , true ) ;
2010-01-21 16:35:35 -05:00
if ( ClientCursor : : find ( id , false ) = = 0 ) {
cc . release ( ) ;
2011-01-04 00:40:41 -05:00
if ( ! ok ) {
2010-02-02 13:01:11 -05:00
/* we were already at the end. normal. */
}
else {
uasserted ( 12585 , " cursor gone during bg index; dropDups " ) ;
}
2010-01-21 16:35:35 -05:00
break ;
}
2011-01-04 00:40:41 -05:00
}
else {
2010-01-21 16:35:35 -05:00
log ( ) < < " background addExistingToIndex exception " < < e . what ( ) < < endl ;
2010-01-18 14:34:33 -05:00
throw ;
}
2009-08-05 16:02:20 -04:00
}
2010-01-18 14:34:33 -05:00
n + + ;
2010-03-22 21:47:03 -04:00
progress . hit ( ) ;
2011-03-28 17:45:07 -04:00
getDur ( ) . commitIfNeeded ( ) ;
2011-06-23 15:46:10 -04:00
if ( cc - > yieldSometimes ( ClientCursor : : WillNeed ) ) {
2011-03-23 17:07:38 -04:00
progress . setTotalWhileRunning ( d - > stats . nrecords ) ;
}
else {
2010-01-21 16:35:35 -05:00
cc . release ( ) ;
2010-01-22 15:17:03 -05:00
uasserted ( 12584 , " cursor gone during bg index " ) ;
2010-01-21 16:35:35 -05:00
break ;
}
2010-03-22 21:47:03 -04:00
}
2010-03-28 00:33:33 -04:00
progress . finished ( ) ;
2010-01-18 14:34:33 -05:00
return n ;
}
2011-01-04 00:40:41 -05:00
/* we do set a flag in the namespace for quick checking, but this is our authoritative info -
2010-01-18 14:34:33 -05:00
that way on a crash / restart , we don ' t think we are still building one . */
set < NamespaceDetails * > bgJobsInProgress ;
2010-01-22 15:17:03 -05:00
void prep ( const char * ns , NamespaceDetails * d ) {
2010-01-18 14:34:33 -05:00
assertInWriteLock ( ) ;
2010-04-27 14:19:10 -04:00
uassert ( 13130 , " can't start bg index b/c in recursive lock (db.eval?) " , dbMutex . getState ( ) = = 1 ) ;
2010-01-18 14:34:33 -05:00
bgJobsInProgress . insert ( d ) ;
}
2010-03-16 00:58:47 -07:00
void done ( const char * ns , NamespaceDetails * d ) {
NamespaceDetailsTransient : : get_w ( ns ) . addedIndex ( ) ; // clear query optimizer cache
2010-01-22 12:15:24 -05:00
assertInWriteLock ( ) ;
}
2010-01-18 14:34:33 -05:00
public :
2010-01-22 15:17:03 -05:00
BackgroundIndexBuildJob ( const char * ns ) : BackgroundOperation ( ns ) { }
2010-01-18 14:34:33 -05:00
2011-01-04 00:40:41 -05:00
unsigned long long go ( string ns , NamespaceDetails * d , IndexDetails & idx , int idxNo ) {
2010-01-21 16:35:35 -05:00
unsigned long long n = 0 ;
2010-01-22 15:17:03 -05:00
prep ( ns . c_str ( ) , d ) ;
2010-01-21 16:35:35 -05:00
assert ( idxNo = = d - > nIndexes ) ;
2011-01-04 00:40:41 -05:00
try {
2011-04-21 14:53:10 -04:00
idx . head . writing ( ) = idx . idxInterface ( ) . addBucket ( idx ) ;
2010-01-18 14:34:33 -05:00
n = addExistingToIndex ( ns . c_str ( ) , d , idx , idxNo ) ;
2009-01-26 17:23:45 -05:00
}
2011-01-04 00:40:41 -05:00
catch ( . . . ) {
2010-01-22 15:17:03 -05:00
if ( cc ( ) . database ( ) & & nsdetails ( ns . c_str ( ) ) = = d ) {
assert ( idxNo = = d - > nIndexes ) ;
2010-03-16 00:58:47 -07:00
done ( ns . c_str ( ) , d ) ;
2010-01-22 15:17:03 -05:00
}
else {
log ( ) < < " ERROR: db gone during bg index? " < < endl ;
}
2010-01-18 14:34:33 -05:00
throw ;
}
2010-01-22 12:15:24 -05:00
assert ( idxNo = = d - > nIndexes ) ;
2010-03-16 00:58:47 -07:00
done ( ns . c_str ( ) , d ) ;
2010-01-18 14:34:33 -05:00
return n ;
}
2010-01-22 15:17:03 -05:00
} ;
2009-01-15 10:17:11 -05:00
2011-01-14 15:48:11 -08:00
/**
* For the lifetime of this object , an index build is indicated on the specified
* namespace and the newest index is marked as absent . This simplifies
* the cleanup required on recovery .
*/
class RecoverableIndexState {
public :
RecoverableIndexState ( NamespaceDetails * d ) : _d ( d ) {
2011-01-14 16:26:26 -08:00
indexBuildInProgress ( ) = 1 ;
2011-01-14 15:48:11 -08:00
nIndexes ( ) - - ;
}
~ RecoverableIndexState ( ) {
DESTRUCTOR_GUARD (
nIndexes ( ) + + ;
2011-01-14 16:26:26 -08:00
indexBuildInProgress ( ) = 0 ;
2011-01-14 15:48:11 -08:00
)
}
private :
int & nIndexes ( ) { return getDur ( ) . writingInt ( _d - > nIndexes ) ; }
2011-01-14 16:26:26 -08:00
int & indexBuildInProgress ( ) { return getDur ( ) . writingInt ( _d - > indexBuildInProgress ) ; }
2011-01-14 15:48:11 -08:00
NamespaceDetails * _d ;
} ;
2010-01-15 13:19:35 -05:00
// throws DBException
2011-01-04 00:40:41 -05:00
static void buildAnIndex ( string ns , NamespaceDetails * d , IndexDetails & idx , int idxNo , bool background ) {
2011-06-06 13:06:43 -04:00
tlog ( ) < < " build index " < < ns < < ' ' < < idx . keyPattern ( ) < < ( background ? " background " : " " ) < < endl ;
2009-09-21 16:31:00 -04:00
Timer t ;
2011-01-04 00:40:41 -05:00
unsigned long long n ;
2010-01-18 14:34:33 -05:00
2010-02-04 14:25:49 -05:00
assert ( ! BackgroundOperation : : inProgForNs ( ns . c_str ( ) ) ) ; // should have been checked earlier, better not be...
2011-01-14 16:26:26 -08:00
assert ( d - > indexBuildInProgress = = 0 ) ;
2011-01-14 15:48:11 -08:00
assertInWriteLock ( ) ;
RecoverableIndexState recoverable ( d ) ;
2011-03-14 12:50:44 -04:00
// Build index spec here in case the collection is empty and the index details are invalid
idx . getSpec ( ) ;
2010-05-07 12:19:00 -04:00
if ( inDBRepair | | ! background ) {
2011-01-04 00:40:41 -05:00
n = fastBuildIndex ( ns . c_str ( ) , d , idx , idxNo ) ;
assert ( ! idx . head . isNull ( ) ) ;
}
else {
2010-01-22 15:17:03 -05:00
BackgroundIndexBuildJob j ( ns . c_str ( ) ) ;
n = j . go ( ns , d , idx , idxNo ) ;
2011-01-04 00:40:41 -05:00
}
2011-06-06 13:06:43 -04:00
tlog ( ) < < " build index done " < < n < < " records " < < t . millis ( ) / 1000.0 < < " secs " < < endl ;
2009-09-18 13:28:40 -04:00
}
2009-01-15 10:17:11 -05:00
/* add keys to indexes for a new record */
2010-01-29 14:12:02 -05:00
static void indexRecord ( NamespaceDetails * d , BSONObj obj , DiskLoc loc ) {
2010-01-25 12:25:29 -05:00
int n = d - > nIndexesBeingBuilt ( ) ;
for ( int i = 0 ; i < n ; i + + ) {
2011-01-04 00:40:41 -05:00
try {
2009-10-21 16:00:40 -04:00
bool unique = d - > idx ( i ) . unique ( ) ;
2010-01-29 14:12:02 -05:00
_indexRecord ( d , i , obj , loc , /*dupsAllowed*/ ! unique ) ;
2009-04-20 10:29:26 -04:00
}
2011-01-04 00:40:41 -05:00
catch ( DBException & ) {
2009-09-21 16:31:00 -04:00
/* try to roll back previously added index entries
note < = i ( not < i ) is important here as the index we were just attempted
may be multikey and require some cleanup .
*/
2011-01-04 00:40:41 -05:00
for ( int j = 0 ; j < = i ; j + + ) {
2009-04-20 10:29:26 -04:00
try {
2010-01-29 14:12:02 -05:00
_unindexRecord ( d - > idx ( j ) , obj , loc , false ) ;
2009-04-20 10:29:26 -04:00
}
2011-01-04 00:40:41 -05:00
catch ( . . . ) {
2009-04-20 10:29:26 -04:00
log ( 3 ) < < " unindex fails on rollback after unique failure \n " ;
}
}
throw ;
}
2009-01-15 10:17:11 -05:00
}
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2010-01-25 12:25:29 -05:00
extern BSONObj id_obj ; // { _id : 1 }
2008-08-19 17:13:20 -04:00
2009-01-15 10:17:11 -05:00
void ensureHaveIdIndex ( const char * ns ) {
NamespaceDetails * d = nsdetails ( ns ) ;
if ( d = = 0 | | ( d - > flags & NamespaceDetails : : Flag_HaveIdIndex ) )
return ;
2008-08-19 17:13:20 -04:00
2010-12-09 14:44:08 -05:00
* getDur ( ) . writing ( & d - > flags ) | = NamespaceDetails : : Flag_HaveIdIndex ;
2008-08-19 17:13:20 -04:00
2009-10-21 16:00:40 -04:00
{
NamespaceDetails : : IndexIterator i = d - > ii ( ) ;
while ( i . more ( ) ) {
if ( i . next ( ) . isIdIndex ( ) )
return ;
}
}
2009-10-14 14:34:38 -04:00
string system_indexes = cc ( ) . database ( ) - > name + " .system.indexes " ;
2008-08-19 17:13:20 -04:00
2009-01-15 10:17:11 -05:00
BSONObjBuilder b ;
b . append ( " name " , " _id_ " ) ;
b . append ( " ns " , ns ) ;
b . append ( " key " , id_obj ) ;
BSONObj o = b . done ( ) ;
2008-08-19 17:13:20 -04:00
2009-01-15 10:17:11 -05:00
/* edge case: note the insert could fail if we have hit maxindexes already */
2009-05-26 14:33:19 -04:00
theDataFileMgr . insert ( system_indexes . c_str ( ) , o . objdata ( ) , o . objsize ( ) , true ) ;
2009-01-15 10:17:11 -05:00
}
2008-07-07 18:57:04 -04:00
2009-02-01 22:21:32 -05:00
# pragma pack(1)
2011-01-04 00:40:41 -05:00
struct IDToInsert_ {
2009-01-30 18:18:38 -05:00
char type ;
char _id [ 4 ] ;
OID oid ;
2009-02-03 19:13:27 -05:00
IDToInsert_ ( ) {
2009-01-30 18:18:38 -05:00
type = ( char ) jstOID ;
strcpy ( _id , " _id " ) ;
2009-02-03 19:13:27 -05:00
assert ( sizeof ( IDToInsert_ ) = = 17 ) ;
2009-01-30 18:18:38 -05:00
}
2009-02-03 19:13:27 -05:00
} idToInsert_ ;
struct IDToInsert : public BSONElement {
IDToInsert ( ) : BSONElement ( ( char * ) ( & idToInsert_ ) ) { }
2009-01-30 18:18:38 -05:00
} idToInsert ;
2009-02-01 22:21:32 -05:00
# pragma pack()
2011-01-04 00:40:41 -05:00
2009-08-13 13:26:03 -04:00
void DataFileMgr : : insertAndLog ( const char * ns , const BSONObj & o , bool god ) {
2009-05-21 11:07:11 -04:00
BSONObj tmp = o ;
2011-05-29 00:31:17 -04:00
insertWithObjMod ( ns , tmp , god ) ;
2009-05-21 11:07:11 -04:00
logOp ( " i " , ns , tmp ) ;
}
2011-01-04 00:40:41 -05:00
2011-05-24 10:05:31 -04:00
/** @param o the object to insert. can be modified to add _id and thus be an in/out param
*/
2010-04-28 08:25:56 -04:00
DiskLoc DataFileMgr : : insertWithObjMod ( const char * ns , BSONObj & o , bool god ) {
2011-05-29 00:31:17 -04:00
bool addedID = false ;
DiskLoc loc = insert ( ns , o . objdata ( ) , o . objsize ( ) , god , true , & addedID ) ;
if ( addedID & & ! loc . isNull ( ) )
2009-02-03 22:34:51 -05:00
o = BSONObj ( loc . rec ( ) ) ;
return loc ;
}
2009-04-20 12:38:36 -04:00
2010-09-23 16:52:19 -04:00
bool prepareToBuildIndex ( const BSONObj & io , bool god , string & sourceNS , NamespaceDetails * & sourceCollection , BSONObj & fixedIndexObject ) ;
2010-01-15 16:05:14 -05:00
2010-01-20 18:09:27 -08:00
// We are now doing two btree scans for all unique indexes (one here, and one when we've
// written the record to the collection. This could be made more efficient inserting
// dummy data here, keeping pointers to the btree nodes holding the dummy data and then
2011-01-04 00:40:41 -05:00
// updating the dummy data with the DiskLoc of the real record.
2010-01-20 18:09:27 -08:00
void checkNoIndexConflicts ( NamespaceDetails * d , const BSONObj & obj ) {
for ( int idxNo = 0 ; idxNo < d - > nIndexes ; idxNo + + ) {
if ( d - > idx ( idxNo ) . unique ( ) ) {
IndexDetails & idx = d - > idx ( idxNo ) ;
2011-06-02 13:34:26 -04:00
BSONObjSet keys ;
2010-01-20 18:09:27 -08:00
idx . getKeysFromObject ( obj , keys ) ;
BSONObj order = idx . keyPattern ( ) ;
2011-04-21 14:53:10 -04:00
IndexInterface & ii = idx . idxInterface ( ) ;
2011-06-02 13:34:26 -04:00
for ( BSONObjSet : : iterator i = keys . begin ( ) ; i ! = keys . end ( ) ; i + + ) {
2011-04-11 18:21:48 -04:00
// WARNING: findSingle may not be compound index safe. this may need to change. see notes in
// findSingle code.
2010-01-20 18:09:27 -08:00
uassert ( 12582 , " duplicate key insert for unique index of capped collection " ,
2011-04-21 14:53:10 -04:00
ii . findSingle ( idx , idx . head , * i ) . isNull ( ) ) ;
2010-01-20 18:09:27 -08:00
}
}
2011-01-04 00:40:41 -05:00
}
2010-01-20 18:09:27 -08:00
}
2010-07-23 22:44:11 -04:00
2011-04-04 12:38:38 -04:00
/** add a record to the end of the linked list chain within this extent.
require : you must have already declared write intent for the record header .
*/
void addRecordToRecListInExtent ( Record * r , DiskLoc loc ) {
dassert ( loc . rec ( ) = = r ) ;
Extent * e = r - > myExtent ( loc ) ;
if ( e - > lastRecord . isNull ( ) ) {
Extent : : FL * fl = getDur ( ) . writing ( e - > fl ( ) ) ;
fl - > firstRecord = fl - > lastRecord = loc ;
r - > prevOfs = r - > nextOfs = DiskLoc : : NullOfs ;
}
else {
Record * oldlast = e - > lastRecord . rec ( ) ;
r - > prevOfs = e - > lastRecord . getOfs ( ) ;
r - > nextOfs = DiskLoc : : NullOfs ;
getDur ( ) . writingInt ( oldlast - > nextOfs ) = loc . getOfs ( ) ;
getDur ( ) . writingDiskLoc ( e - > lastRecord ) = loc ;
}
}
2011-05-23 20:01:42 -04:00
NOINLINE_DECL DiskLoc outOfSpace ( const char * ns , NamespaceDetails * d , int lenWHdr , bool god , DiskLoc extentLoc ) {
DiskLoc loc ;
if ( d - > capped = = 0 ) { // size capped doesn't grow
log ( 1 ) < < " allocating new extent for " < < ns < < " padding: " < < d - > paddingFactor < < " lenWHdr: " < < lenWHdr < < endl ;
cc ( ) . database ( ) - > allocExtent ( ns , Extent : : followupSize ( lenWHdr , d - > lastExtentSize ) , false , ! god ) ;
loc = d - > alloc ( ns , lenWHdr , extentLoc ) ;
if ( loc . isNull ( ) ) {
log ( ) < < " warning: alloc() failed after allocating new extent. lenWHdr: " < < lenWHdr < < " last extent size: " < < d - > lastExtentSize < < " ; trying again \n " ;
for ( int z = 0 ; z < 10 & & lenWHdr > d - > lastExtentSize ; z + + ) {
log ( ) < < " try # " < < z < < endl ;
cc ( ) . database ( ) - > allocExtent ( ns , Extent : : followupSize ( lenWHdr , d - > lastExtentSize ) , false , ! god ) ;
loc = d - > alloc ( ns , lenWHdr , extentLoc ) ;
if ( ! loc . isNull ( ) )
break ;
}
}
}
return loc ;
}
/** used by insert and also compact
* @ return null loc if out of space
*/
DiskLoc allocateSpaceForANewRecord ( const char * ns , NamespaceDetails * d , int lenWHdr , bool god ) {
2011-04-04 12:38:38 -04:00
DiskLoc extentLoc ;
DiskLoc loc = d - > alloc ( ns , lenWHdr , extentLoc ) ;
if ( loc . isNull ( ) ) {
2011-05-23 20:01:42 -04:00
loc = outOfSpace ( ns , d , lenWHdr , god , extentLoc ) ;
2011-04-04 12:38:38 -04:00
}
return loc ;
}
2011-05-15 23:05:08 -04:00
bool NOINLINE_DECL insert_checkSys ( const char * sys , const char * ns , bool & wouldAddIndex , const void * obuf , bool god ) {
uassert ( 10095 , " attempt to insert in reserved database name 'system' " , sys ! = ns ) ;
if ( strstr ( ns , " .system. " ) ) {
// later:check for dba-type permissions here if have that at some point separate
if ( strstr ( ns , " .system.indexes " ) )
wouldAddIndex = true ;
else if ( legalClientSystemNS ( ns , true ) ) {
if ( obuf & & strstr ( ns , " .system.users " ) ) {
BSONObj t ( reinterpret_cast < const char * > ( obuf ) ) ;
uassert ( 14051 , " system.user entry needs 'user' field to be a string " , t [ " user " ] . type ( ) = = String ) ;
uassert ( 14052 , " system.user entry needs 'pwd' field to be a string " , t [ " pwd " ] . type ( ) = = String ) ;
uassert ( 14053 , " system.user entry needs 'user' field to be non-empty " , t [ " user " ] . String ( ) . size ( ) ) ;
uassert ( 14054 , " system.user entry needs 'pwd' field to be non-empty " , t [ " pwd " ] . String ( ) . size ( ) ) ;
}
}
else if ( ! god ) {
// todo this should probably uasseert rather than doing this:
log ( ) < < " ERROR: attempt to insert in system namespace " < < ns < < endl ;
return false ;
}
}
return true ;
}
NOINLINE_DECL NamespaceDetails * insert_newNamespace ( const char * ns , int len , bool god ) {
addNewNamespaceToCatalog ( ns ) ;
/* todo: shouldn't be in the namespace catalog until after the allocations here work.
also if this is an addIndex , those checks should happen before this !
*/
// This may create first file in the database.
int ies = Extent : : initialSize ( len ) ;
if ( str : : contains ( ns , ' $ ' ) & & len + Record : : HeaderSize > = BtreeData_V1 : : BucketSize - 256 & & len + Record : : HeaderSize < = BtreeData_V1 : : BucketSize + 256 ) {
// probably an index. so we pick a value here for the first extent instead of using initialExtentSize() which is more
// for user collections. TODO: we could look at the # of records in the parent collection to be smarter here.
ies = ( 32 + 4 ) * 1024 ;
}
2011-05-23 20:01:42 -04:00
cc ( ) . database ( ) - > allocExtent ( ns , ies , false , false ) ;
2011-05-15 23:05:08 -04:00
NamespaceDetails * d = nsdetails ( ns ) ;
if ( ! god )
ensureIdIndexForNewNs ( ns ) ;
return d ;
}
void NOINLINE_DECL insert_makeIndex ( NamespaceDetails * tableToIndex , const string & tabletoidxns , const DiskLoc & loc ) {
uassert ( 13143 , " can't create index on system.indexes " , tabletoidxns . find ( " .system.indexes " ) = = string : : npos ) ;
BSONObj info = loc . obj ( ) ;
bool background = info [ " background " ] . trueValue ( ) ;
if ( background & & cc ( ) . isSyncThread ( ) ) {
/* don't do background indexing on slaves. there are nuances. this could be added later
but requires more code .
*/
log ( ) < < " info: indexing in foreground on this replica; was a background index build on the primary " < < endl ;
background = false ;
}
int idxNo = tableToIndex - > nIndexes ;
IndexDetails & idx = tableToIndex - > addIndex ( tabletoidxns . c_str ( ) , ! background ) ; // clear transient info caches so they refresh; increments nIndexes
getDur ( ) . writingDiskLoc ( idx . info ) = loc ;
try {
buildAnIndex ( tabletoidxns , tableToIndex , idx , idxNo , background ) ;
}
catch ( DBException & e ) {
// save our error msg string as an exception or dropIndexes will overwrite our message
LastError * le = lastError . get ( ) ;
int savecode = 0 ;
string saveerrmsg ;
if ( le ) {
savecode = le - > code ;
saveerrmsg = le - > msg ;
}
else {
savecode = e . getCode ( ) ;
saveerrmsg = e . what ( ) ;
}
// roll back this index
string name = idx . indexName ( ) ;
BSONObjBuilder b ;
string errmsg ;
bool ok = dropIndexes ( tableToIndex , tabletoidxns . c_str ( ) , name . c_str ( ) , errmsg , b , true ) ;
if ( ! ok ) {
log ( ) < < " failed to drop index after a unique key error building it: " < < errmsg < < ' ' < < tabletoidxns < < ' ' < < name < < endl ;
}
assert ( le & & ! saveerrmsg . empty ( ) ) ;
raiseError ( savecode , saveerrmsg . c_str ( ) ) ;
throw ;
}
}
/* if god==true, you may pass in obuf of NULL and then populate the returned DiskLoc
after the call - - that will prevent a double buffer copy in some cases ( btree . cpp ) .
@ param mayAddIndex almost always true , except for invocation from rename namespace command .
2011-05-28 13:11:43 -04:00
@ param addedID if not null , set to true if adding _id element . you must assure false before calling
if using .
2009-09-27 14:46:51 -04:00
*/
2011-05-28 13:11:43 -04:00
DiskLoc DataFileMgr : : insert ( const char * ns , const void * obuf , int len , bool god , bool mayAddIndex , bool * addedID ) {
2009-07-29 15:53:14 -04:00
bool wouldAddIndex = false ;
2011-05-23 20:01:42 -04:00
massert ( 10093 , " cannot insert into reserved $ collection " , god | | NamespaceString : : normal ( ns ) ) ;
2010-12-30 23:59:01 -05:00
uassert ( 10094 , str : : stream ( ) < < " invalid ns: " < < ns , isValidNS ( ns ) ) ;
2011-05-15 23:05:08 -04:00
{
const char * sys = strstr ( ns , " system. " ) ;
if ( sys & & ! insert_checkSys ( sys , ns , wouldAddIndex , obuf , god ) )
return DiskLoc ( ) ;
2008-12-28 20:28:49 -05:00
}
2009-08-13 13:26:03 -04:00
bool addIndex = wouldAddIndex & & mayAddIndex ;
2009-07-29 15:53:14 -04:00
2009-01-15 10:17:11 -05:00
NamespaceDetails * d = nsdetails ( ns ) ;
if ( d = = 0 ) {
2011-05-15 23:05:08 -04:00
d = insert_newNamespace ( ns , len , god ) ;
2009-01-15 10:17:11 -05:00
}
d - > paddingFits ( ) ;
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
NamespaceDetails * tableToIndex = 0 ;
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
string tabletoidxns ;
2010-09-23 16:52:19 -04:00
BSONObj fixedIndexObject ;
2009-01-15 10:17:11 -05:00
if ( addIndex ) {
2010-05-06 22:12:13 -04:00
assert ( obuf ) ;
2010-01-21 16:35:35 -05:00
BSONObj io ( ( const char * ) obuf ) ;
2011-04-22 18:16:43 -04:00
if ( ! prepareToBuildIndex ( io , god , tabletoidxns , tableToIndex , fixedIndexObject ) ) {
// prepare creates _id itself, or this indicates to fail the build silently (such
// as if index already exists)
2009-05-26 14:33:19 -04:00
return DiskLoc ( ) ;
2011-04-22 18:16:43 -04:00
}
2011-01-04 00:40:41 -05:00
if ( ! fixedIndexObject . isEmpty ( ) ) {
2010-09-23 16:52:19 -04:00
obuf = fixedIndexObject . objdata ( ) ;
len = fixedIndexObject . objsize ( ) ;
}
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2011-05-28 13:11:43 -04:00
int addID = 0 ; // 0 if not adding _id; if adding, the length of that new element
2009-01-30 18:18:38 -05:00
if ( ! god ) {
2011-01-04 00:40:41 -05:00
/* Check if we have an _id field. If we don't, we'll add it.
2009-01-30 18:18:38 -05:00
Note that btree buckets which we insert aren ' t BSONObj ' s , but in that case god = = true .
*/
BSONObj io ( ( const char * ) obuf ) ;
2009-04-07 11:02:29 -04:00
BSONElement idField = io . getField ( " _id " ) ;
2009-12-28 16:43:43 -05:00
uassert ( 10099 , " _id cannot be an array " , idField . type ( ) ! = Array ) ;
2011-06-03 14:49:41 -04:00
// we don't add _id for capped collections as they don't have an _id index
2011-06-03 15:47:07 -04:00
if ( idField . eoo ( ) & & ! wouldAddIndex & & strstr ( ns , " .local. " ) = = 0 & & d - > haveIdIndex ( ) ) {
2011-05-28 13:11:43 -04:00
if ( addedID )
* addedID = true ;
2009-01-30 18:18:38 -05:00
addID = len ;
2011-05-24 10:05:31 -04:00
idToInsert_ . oid . init ( ) ;
len + = idToInsert . size ( ) ;
2009-01-30 18:18:38 -05:00
}
2011-01-04 00:40:41 -05:00
2009-02-28 18:55:04 -05:00
BSONElementManipulator : : lookForTimestamps ( io ) ;
2009-01-30 18:18:38 -05:00
}
2009-01-15 10:17:11 -05:00
int lenWHdr = len + Record : : HeaderSize ;
lenWHdr = ( int ) ( lenWHdr * d - > paddingFactor ) ;
if ( lenWHdr = = 0 ) {
// old datafiles, backward compatible here.
assert ( d - > paddingFactor = = 0 ) ;
2011-01-03 11:59:13 -05:00
* getDur ( ) . writing ( & d - > paddingFactor ) = 1.0 ;
2009-01-15 10:17:11 -05:00
lenWHdr = len + Record : : HeaderSize ;
2008-12-28 20:28:49 -05:00
}
2011-01-04 00:40:41 -05:00
2010-01-20 18:09:27 -08:00
// If the collection is capped, check if the new object will violate a unique index
// constraint before allocating space.
if ( d - > nIndexes & & d - > capped & & ! god ) {
checkNoIndexConflicts ( d , BSONObj ( reinterpret_cast < const char * > ( obuf ) ) ) ;
}
2011-01-04 00:40:41 -05:00
2011-05-23 20:01:42 -04:00
DiskLoc loc = allocateSpaceForANewRecord ( ns , d , lenWHdr , god ) ;
2008-12-28 20:28:49 -05:00
if ( loc . isNull ( ) ) {
2011-04-04 12:38:38 -04:00
log ( ) < < " insert: couldn't alloc space for object ns: " < < ns < < " capped: " < < d - > capped < < endl ;
assert ( d - > capped ) ;
return DiskLoc ( ) ;
2008-12-28 20:28:49 -05:00
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
Record * r = loc . rec ( ) ;
2010-11-21 19:36:40 -05:00
{
assert ( r - > lengthWithHeaders > = lenWHdr ) ;
2010-12-09 14:44:08 -05:00
r = ( Record * ) getDur ( ) . writingPtr ( r , lenWHdr ) ;
2011-01-04 00:40:41 -05:00
if ( addID ) {
2010-11-21 19:36:40 -05:00
/* a little effort was made here to avoid a double copy when we add an ID */
2011-05-24 10:05:31 -04:00
( ( int & ) * r - > data ) = * ( ( int * ) obuf ) + idToInsert . size ( ) ;
memcpy ( r - > data + 4 , idToInsert . rawdata ( ) , idToInsert . size ( ) ) ;
memcpy ( r - > data + 4 + idToInsert . size ( ) , ( ( char * ) obuf ) + 4 , addID - 4 ) ;
2010-11-21 19:36:40 -05:00
}
else {
2011-05-28 13:11:43 -04:00
if ( obuf ) // obuf can be null from internal callers
2010-11-21 19:36:40 -05:00
memcpy ( r - > data , obuf , len ) ;
}
2009-01-15 10:17:11 -05:00
}
2010-11-21 19:36:40 -05:00
2011-04-04 12:38:38 -04:00
addRecordToRecListInExtent ( r , loc ) ;
2008-06-06 09:43:15 -04:00
2010-09-27 12:35:22 -04:00
/* durability todo : this could be a bit annoying / slow to record constantly */
{
2010-12-09 14:44:08 -05:00
NamespaceDetails : : Stats * s = getDur ( ) . writing ( & d - > stats ) ;
2010-09-27 12:35:22 -04:00
s - > datasize + = r - > netLength ( ) ;
s - > nrecords + + ;
}
2008-06-06 09:43:15 -04:00
2011-05-28 13:11:43 -04:00
// we don't bother resetting query optimizer stats for the god tables - also god is true when adding a btree bucket
2009-02-24 17:48:06 -05:00
if ( ! god )
2009-12-09 18:13:36 -05:00
NamespaceDetailsTransient : : get_w ( ns ) . notifyOfWriteOp ( ) ;
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
if ( tableToIndex ) {
2011-05-15 23:05:08 -04:00
insert_makeIndex ( tableToIndex , tabletoidxns , loc ) ;
2009-01-15 10:17:11 -05:00
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
/* add this record to our indexes */
2009-01-26 17:23:45 -05:00
if ( d - > nIndexes ) {
2011-01-04 00:40:41 -05:00
try {
2010-01-29 14:12:02 -05:00
BSONObj obj ( r - > data ) ;
indexRecord ( d , obj , loc ) ;
2011-01-04 00:40:41 -05:00
}
catch ( AssertionException & e ) {
2009-01-26 17:23:45 -05:00
// should be a dup key error on _id index
2010-01-20 18:09:27 -08:00
if ( tableToIndex | | d - > capped ) {
massert ( 12583 , " unexpected index insertion failure on capped collection " , ! d - > capped ) ;
2009-01-26 17:23:45 -05:00
string s = e . toString ( ) ;
s + = " : on addIndex/capped - collection and its index will not match " ;
uassert_nothrow ( s . c_str ( ) ) ;
2010-10-24 18:10:59 -04:00
error ( ) < < s < < endl ;
2009-01-26 17:23:45 -05:00
}
2011-01-04 00:40:41 -05:00
else {
2009-01-26 17:23:45 -05:00
// normal case -- we can roll back
_deleteRecord ( d , ns , r , loc ) ;
2009-02-12 15:03:38 -05:00
throw ;
2009-01-26 17:23:45 -05:00
}
}
}
2008-06-06 09:43:15 -04:00
2009-01-15 10:17:11 -05:00
return loc ;
2008-12-28 20:28:49 -05:00
}
2008-07-28 17:52:44 -04:00
2009-01-15 10:17:11 -05:00
/* special version of insert for transaction logging -- streamlined a bit.
assumes ns is capped and no indexes
*/
Record * DataFileMgr : : fast_oplog_insert ( NamespaceDetails * d , const char * ns , int len ) {
2010-06-02 10:01:42 -04:00
assert ( d ) ;
2009-01-15 10:17:11 -05:00
RARELY assert ( d = = nsdetails ( ns ) ) ;
2010-06-02 10:01:42 -04:00
DEV assert ( d = = nsdetails ( ns ) ) ;
2008-07-28 17:52:44 -04:00
2009-01-15 10:17:11 -05:00
DiskLoc extentLoc ;
int lenWHdr = len + Record : : HeaderSize ;
DiskLoc loc = d - > alloc ( ns , lenWHdr , extentLoc ) ;
2011-05-28 13:11:43 -04:00
assert ( ! loc . isNull ( ) ) ;
2008-07-28 17:52:44 -04:00
2009-01-15 10:17:11 -05:00
Record * r = loc . rec ( ) ;
assert ( r - > lengthWithHeaders > = lenWHdr ) ;
2008-07-28 17:52:44 -04:00
2009-01-15 10:17:11 -05:00
Extent * e = r - > myExtent ( loc ) ;
if ( e - > lastRecord . isNull ( ) ) {
2010-12-09 14:44:08 -05:00
Extent : : FL * fl = getDur ( ) . writing ( e - > fl ( ) ) ;
2010-09-29 10:18:11 -04:00
fl - > firstRecord = fl - > lastRecord = loc ;
2010-12-09 14:44:08 -05:00
Record : : NP * np = getDur ( ) . writing ( r - > np ( ) ) ;
2010-09-29 10:18:11 -04:00
np - > nextOfs = np - > prevOfs = DiskLoc : : NullOfs ;
2009-01-15 10:17:11 -05:00
}
else {
Record * oldlast = e - > lastRecord . rec ( ) ;
2010-12-09 14:44:08 -05:00
Record : : NP * np = getDur ( ) . writing ( r - > np ( ) ) ;
2010-09-29 10:18:11 -04:00
np - > prevOfs = e - > lastRecord . getOfs ( ) ;
np - > nextOfs = DiskLoc : : NullOfs ;
2010-12-09 14:44:08 -05:00
getDur ( ) . writingInt ( oldlast - > nextOfs ) = loc . getOfs ( ) ;
2010-09-28 18:08:44 -04:00
e - > lastRecord . writing ( ) = loc ;
2009-01-15 10:17:11 -05:00
}
2008-07-28 17:52:44 -04:00
2010-09-27 12:35:22 -04:00
/* todo: don't update for oplog? seems wasteful. */
{
2010-12-09 14:44:08 -05:00
NamespaceDetails : : Stats * s = getDur ( ) . writing ( & d - > stats ) ;
2010-09-27 12:35:22 -04:00
s - > datasize + = r - > netLength ( ) ;
s - > nrecords + + ;
}
2009-01-15 10:17:11 -05:00
return r ;
}
2009-01-14 17:09:51 -05:00
} // namespace mongo
2008-07-24 16:07:18 -04:00
# include "clientcursor.h"
2009-01-14 17:09:51 -05:00
namespace mongo {
2011-01-04 00:40:41 -05:00
void dropAllDatabasesExceptLocal ( ) {
2010-07-22 13:13:50 -04:00
writelock lk ( " " ) ;
2010-07-16 12:51:01 -04:00
vector < string > n ;
getDatabaseNames ( n ) ;
if ( n . size ( ) = = 0 ) return ;
log ( ) < < " dropAllDatabasesExceptLocal " < < n . size ( ) < < endl ;
for ( vector < string > : : iterator i = n . begin ( ) ; i ! = n . end ( ) ; i + + ) {
2010-07-22 13:13:50 -04:00
if ( * i ! = " local " ) {
Client : : Context ctx ( * i ) ;
2010-07-16 12:51:01 -04:00
dropDatabase ( * i ) ;
}
}
}
2010-05-03 16:25:34 -04:00
void dropDatabase ( string db ) {
2010-01-22 15:17:03 -05:00
log ( 1 ) < < " dropDatabase " < < db < < endl ;
2010-08-26 10:42:07 -04:00
Database * d = cc ( ) . database ( ) ;
assert ( d ) ;
assert ( d - > name = = db ) ;
2008-12-16 10:20:24 -05:00
2010-08-26 10:42:07 -04:00
BackgroundOperation : : assertNoBgOpInProgForDb ( d - > name . c_str ( ) ) ;
2010-01-22 15:17:03 -05:00
2011-05-03 00:49:39 -04:00
dbMutex . assertWriteLocked ( ) ;
2011-05-04 15:47:17 -04:00
// Not sure we need this here, so removed. If we do, we need to move it down
// within other calls both (1) as they could be called from elsewhere and
// (2) to keep the lock order right - groupcommitmutex must be locked before
// mmmutex (if both are locked).
//
// RWLockRecursive::Exclusive lk(MongoFile::mmmutex);
2011-05-03 00:49:39 -04:00
2011-01-12 15:54:31 -05:00
getDur ( ) . syncDataAndTruncateJournal ( ) ;
2010-08-26 10:42:07 -04:00
Database : : closeDatabase ( d - > name . c_str ( ) , d - > path ) ;
2010-08-26 12:22:23 -04:00
d = 0 ; // d is now deleted
2011-01-06 18:54:57 -05:00
2010-08-26 12:22:23 -04:00
_deleteDataFiles ( db . c_str ( ) ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-16 10:20:24 -05:00
2009-01-15 10:17:11 -05:00
typedef boost : : filesystem : : path Path ;
2008-12-16 10:20:24 -05:00
2010-06-21 18:01:02 -07:00
void boostRenameWrapper ( const Path & from , const Path & to ) {
try {
boost : : filesystem : : rename ( from , to ) ;
2011-01-04 00:40:41 -05:00
}
catch ( const boost : : filesystem : : filesystem_error & ) {
2010-06-21 18:01:02 -07:00
// boost rename doesn't work across partitions
boost : : filesystem : : copy_file ( from , to ) ;
boost : : filesystem : : remove ( from ) ;
}
}
2011-01-04 00:40:41 -05:00
2009-10-08 12:59:03 -04:00
// back up original database files to 'temp' dir
2009-01-15 10:17:11 -05:00
void _renameForBackup ( const char * database , const Path & reservedPath ) {
2010-01-26 14:40:06 -08:00
Path newPath ( reservedPath ) ;
if ( directoryperdb )
newPath / = database ;
2009-01-15 10:17:11 -05:00
class Renamer : public FileOp {
public :
2010-01-26 14:40:06 -08:00
Renamer ( const Path & newPath ) : newPath_ ( newPath ) { }
2009-01-15 10:17:11 -05:00
private :
2010-01-26 14:40:06 -08:00
const boost : : filesystem : : path & newPath_ ;
2009-01-15 10:17:11 -05:00
virtual bool apply ( const Path & p ) {
if ( ! boost : : filesystem : : exists ( p ) )
return false ;
2010-06-21 18:01:02 -07:00
boostRenameWrapper ( p , newPath_ / ( p . leaf ( ) + " .bak " ) ) ;
2009-01-15 10:17:11 -05:00
return true ;
}
virtual const char * op ( ) const {
return " renaming " ;
}
2010-01-26 14:40:06 -08:00
} renamer ( newPath ) ;
2009-05-27 17:23:47 -04:00
_applyOpToDataFiles ( database , renamer , true ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-16 10:20:24 -05:00
2009-10-08 12:59:03 -04:00
// move temp files to standard data dir
2009-01-15 10:17:11 -05:00
void _replaceWithRecovered ( const char * database , const char * reservedPathString ) {
2010-01-26 14:40:06 -08:00
Path newPath ( dbpath ) ;
if ( directoryperdb )
newPath / = database ;
class Replacer : public FileOp {
public :
Replacer ( const Path & newPath ) : newPath_ ( newPath ) { }
private :
const boost : : filesystem : : path & newPath_ ;
2009-01-15 10:17:11 -05:00
virtual bool apply ( const Path & p ) {
if ( ! boost : : filesystem : : exists ( p ) )
return false ;
2010-06-21 18:01:02 -07:00
boostRenameWrapper ( p , newPath_ / p . leaf ( ) ) ;
2009-01-15 10:17:11 -05:00
return true ;
}
virtual const char * op ( ) const {
return " renaming " ;
}
2010-01-26 14:40:06 -08:00
} replacer ( newPath ) ;
_applyOpToDataFiles ( database , replacer , true , reservedPathString ) ;
2009-01-15 10:17:11 -05:00
}
2008-12-16 10:20:24 -05:00
2009-10-08 12:59:03 -04:00
// generate a directory name for storing temp data files
2009-01-15 10:17:11 -05:00
Path uniqueReservedPath ( const char * prefix ) {
2010-01-25 11:32:51 -08:00
Path repairPath = Path ( repairpath ) ;
2009-01-15 10:17:11 -05:00
Path reservedPath ;
int i = 0 ;
bool exists = false ;
do {
stringstream ss ;
ss < < prefix < < " _repairDatabase_ " < < i + + ;
2010-01-25 11:32:51 -08:00
reservedPath = repairPath / ss . str ( ) ;
2009-01-15 10:17:11 -05:00
BOOST_CHECK_EXCEPTION ( exists = boost : : filesystem : : exists ( reservedPath ) ) ;
2011-01-04 00:40:41 -05:00
}
while ( exists ) ;
2009-01-15 10:17:11 -05:00
return reservedPath ;
}
boost : : intmax_t dbSize ( const char * database ) {
class SizeAccumulator : public FileOp {
public :
SizeAccumulator ( ) : totalSize_ ( 0 ) { }
boost : : intmax_t size ( ) const {
return totalSize_ ;
}
private :
virtual bool apply ( const boost : : filesystem : : path & p ) {
if ( ! boost : : filesystem : : exists ( p ) )
return false ;
totalSize_ + = boost : : filesystem : : file_size ( p ) ;
return true ;
}
virtual const char * op ( ) const {
return " checking size " ;
}
boost : : intmax_t totalSize_ ;
} ;
SizeAccumulator sa ;
_applyOpToDataFiles ( database , sa ) ;
return sa . size ( ) ;
}
2008-12-29 11:56:13 -05:00
2010-05-03 16:25:34 -04:00
bool repairDatabase ( string dbNameS , string & errmsg ,
2009-01-15 10:17:11 -05:00
bool preserveClonedFilesOnFailure , bool backupOriginalFiles ) {
2010-05-07 12:19:00 -04:00
doingRepair dr ;
2010-05-03 16:25:34 -04:00
dbNameS = nsToDatabase ( dbNameS ) ;
const char * dbName = dbNameS . c_str ( ) ;
2009-01-15 10:17:11 -05:00
stringstream ss ;
2009-08-25 10:24:44 -04:00
ss < < " localhost: " < < cmdLine . port ;
2009-01-15 10:17:11 -05:00
string localhost = ss . str ( ) ;
2011-01-04 00:40:41 -05:00
2009-01-15 10:17:11 -05:00
problem ( ) < < " repairDatabase " < < dbName < < endl ;
2009-10-14 14:34:38 -04:00
assert ( cc ( ) . database ( ) - > name = = dbName ) ;
2010-08-26 10:42:07 -04:00
assert ( cc ( ) . database ( ) - > path = = dbpath ) ;
2009-01-15 10:17:11 -05:00
2010-01-22 15:17:03 -05:00
BackgroundOperation : : assertNoBgOpInProgForDb ( dbName ) ;
2011-01-12 15:54:31 -05:00
getDur ( ) . syncDataAndTruncateJournal ( ) ; // Must be done before and after repair
2011-01-10 18:50:17 -05:00
2009-01-15 10:17:11 -05:00
boost : : intmax_t totalSize = dbSize ( dbName ) ;
2011-04-14 15:49:45 -04:00
boost : : intmax_t freeSize = File : : freeSpace ( repairpath ) ;
2009-01-15 10:17:11 -05:00
if ( freeSize > - 1 & & freeSize < totalSize ) {
stringstream ss ;
ss < < " Cannot repair database " < < dbName < < " having size: " < < totalSize
2009-10-08 12:59:03 -04:00
< < " (bytes) because free disk space is: " < < freeSize < < " (bytes) " ;
2009-01-15 10:17:11 -05:00
errmsg = ss . str ( ) ;
problem ( ) < < errmsg < < endl ;
return false ;
}
2008-12-28 20:28:49 -05:00
2009-01-15 10:17:11 -05:00
Path reservedPath =
uniqueReservedPath ( ( preserveClonedFilesOnFailure | | backupOriginalFiles ) ?
2010-01-26 14:40:06 -08:00
" backup " : " $tmp " ) ;
2009-01-15 10:17:11 -05:00
BOOST_CHECK_EXCEPTION ( boost : : filesystem : : create_directory ( reservedPath ) ) ;
string reservedPathString = reservedPath . native_directory_string ( ) ;
2011-01-04 00:40:41 -05:00
2010-01-29 17:22:34 -05:00
bool res ;
2011-01-04 00:40:41 -05:00
{
// clone to temp location, which effectively does repair
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( dbName , reservedPathString ) ;
assert ( ctx . justCreated ( ) ) ;
2011-01-04 00:40:41 -05:00
res = cloneFrom ( localhost . c_str ( ) , errmsg , dbName ,
2011-04-01 18:33:11 -07:00
/*logForReplication=*/ false , /*slaveok*/ false , /*replauth*/ false ,
/*snapshot*/ false , /*mayYield*/ false , /*mayBeInterrupted*/ true ) ;
2010-08-26 10:42:07 -04:00
Database : : closeDatabase ( dbName , reservedPathString . c_str ( ) ) ;
2010-01-29 17:22:34 -05:00
}
2009-01-15 10:17:11 -05:00
if ( ! res ) {
2011-03-27 02:33:10 -04:00
errmsg = str : : stream ( ) < < " clone failed for " < < dbName < < " with error: " < < errmsg ;
problem ( ) < < errmsg < < endl ;
2009-01-15 10:17:11 -05:00
if ( ! preserveClonedFilesOnFailure )
BOOST_CHECK_EXCEPTION ( boost : : filesystem : : remove_all ( reservedPath ) ) ;
2011-01-12 15:54:31 -05:00
getDur ( ) . syncDataAndTruncateJournal ( ) ; // Must be done before and after repair
2009-01-15 10:17:11 -05:00
return false ;
}
2008-12-28 20:28:49 -05:00
2011-01-10 18:50:17 -05:00
MongoFile : : flushAll ( true ) ;
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( dbName ) ;
2010-08-26 10:42:07 -04:00
Database : : closeDatabase ( dbName , dbpath ) ;
2008-12-28 20:28:49 -05:00
2010-01-26 14:40:06 -08:00
if ( backupOriginalFiles ) {
2009-01-15 10:17:11 -05:00
_renameForBackup ( dbName , reservedPath ) ;
2011-01-04 00:40:41 -05:00
}
else {
2009-01-15 10:17:11 -05:00
_deleteDataFiles ( dbName ) ;
2010-01-26 14:40:06 -08:00
BOOST_CHECK_EXCEPTION ( boost : : filesystem : : create_directory ( Path ( dbpath ) / dbName ) ) ;
}
2008-12-28 20:28:49 -05:00
2009-01-15 10:17:11 -05:00
_replaceWithRecovered ( dbName , reservedPathString . c_str ( ) ) ;
2008-12-28 20:28:49 -05:00
2009-01-15 10:17:11 -05:00
if ( ! backupOriginalFiles )
BOOST_CHECK_EXCEPTION ( boost : : filesystem : : remove_all ( reservedPath ) ) ;
2008-12-16 10:20:24 -05:00
2011-01-12 15:54:31 -05:00
getDur ( ) . syncDataAndTruncateJournal ( ) ; // Must be done before and after repair
2009-01-15 10:17:11 -05:00
return true ;
}
2009-01-14 17:09:51 -05:00
2009-08-11 14:29:03 -04:00
void _applyOpToDataFiles ( const char * database , FileOp & fo , bool afterAllocator , const string & path ) {
2009-05-27 17:23:47 -04:00
if ( afterAllocator )
2011-01-09 01:45:11 -05:00
FileAllocator : : get ( ) - > waitUntilFinished ( ) ;
2009-05-27 17:23:47 -04:00
string c = database ;
c + = ' . ' ;
boost : : filesystem : : path p ( path ) ;
2010-01-26 14:40:06 -08:00
if ( directoryperdb )
p / = database ;
2009-05-27 17:23:47 -04:00
boost : : filesystem : : path q ;
q = p / ( c + " ns " ) ;
bool ok = false ;
BOOST_CHECK_EXCEPTION ( ok = fo . apply ( q ) ) ;
if ( ok )
2011-04-15 17:42:22 -04:00
log ( 2 ) < < fo . op ( ) < < " file " < < q . string ( ) < < endl ;
2009-05-27 17:23:47 -04:00
int i = 0 ;
int extra = 10 ; // should not be necessary, this is defensive in case there are missing files
while ( 1 ) {
assert ( i < = DiskLoc : : MaxFiles ) ;
stringstream ss ;
ss < < c < < i ;
q = p / ss . str ( ) ;
BOOST_CHECK_EXCEPTION ( ok = fo . apply ( q ) ) ;
if ( ok ) {
2011-01-04 00:40:41 -05:00
if ( extra ! = 10 ) {
2010-12-30 23:59:01 -05:00
log ( 1 ) < < fo . op ( ) < < " file " < < q . string ( ) < < endl ;
2009-05-27 17:23:47 -04:00
log ( ) < < " _applyOpToDataFiles() warning: extra == " < < extra < < endl ;
}
}
else if ( - - extra < = 0 )
break ;
i + + ;
}
}
2009-09-24 14:21:40 -04:00
NamespaceDetails * nsdetails_notinline ( const char * ns ) { return nsdetails ( ns ) ; }
2011-01-04 00:40:41 -05:00
bool DatabaseHolder : : closeAll ( const string & path , BSONObjBuilder & result , bool force ) {
2010-02-04 10:52:59 -05:00
log ( ) < < " DatabaseHolder::closeAll path: " < < path < < endl ;
2010-01-02 01:25:53 -05:00
dbMutex . assertWriteLocked ( ) ;
2011-01-04 00:40:41 -05:00
2010-01-02 01:25:53 -05:00
map < string , Database * > & m = _paths [ path ] ;
_size - = m . size ( ) ;
2011-01-04 00:40:41 -05:00
2010-01-02 01:25:53 -05:00
set < string > dbs ;
for ( map < string , Database * > : : iterator i = m . begin ( ) ; i ! = m . end ( ) ; i + + ) {
2010-08-26 10:42:07 -04:00
wassert ( i - > second - > path = = path ) ;
2010-01-02 01:25:53 -05:00
dbs . insert ( i - > first ) ;
}
2011-01-04 00:40:41 -05:00
2010-06-21 13:17:05 -04:00
currentClient . get ( ) - > getContext ( ) - > clear ( ) ;
2010-01-02 01:25:53 -05:00
BSONObjBuilder bb ( result . subarrayStart ( " dbs " ) ) ;
int n = 0 ;
2010-02-10 15:34:41 -05:00
int nNotClosed = 0 ;
2010-01-02 01:25:53 -05:00
for ( set < string > : : iterator i = dbs . begin ( ) ; i ! = dbs . end ( ) ; + + i ) {
string name = * i ;
log ( 2 ) < < " DatabaseHolder::closeAll path: " < < path < < " name: " < < name < < endl ;
2010-01-29 17:22:34 -05:00
Client : : Context ctx ( name , path ) ;
2010-02-10 15:34:41 -05:00
if ( ! force & & BackgroundOperation : : inProgForDb ( name . c_str ( ) ) ) {
log ( ) < < " WARNING: can't close database " < < name < < " because a bg job is in progress - try killOp command " < < endl ;
nNotClosed + + ;
}
else {
2010-08-26 10:42:07 -04:00
Database : : closeDatabase ( name . c_str ( ) , path ) ;
2010-07-20 12:07:14 -04:00
bb . append ( bb . numStr ( n + + ) , name ) ;
2010-02-10 15:34:41 -05:00
}
2010-01-02 01:25:53 -05:00
}
bb . done ( ) ;
2010-02-10 15:34:41 -05:00
if ( nNotClosed )
result . append ( " nNotClosed " , nNotClosed ) ;
2010-08-23 11:12:35 -04:00
else {
ClientCursor : : assertNoCursors ( ) ;
}
2010-01-02 01:25:53 -05:00
return true ;
}
2010-12-30 23:59:01 -05:00
2009-01-14 17:09:51 -05:00
} // namespace mongo