From d5bf9b19d9cfec9c4c66e51a9d533e0adb84e347 Mon Sep 17 00:00:00 2001 From: Dwight Date: Tue, 1 Jun 2010 15:17:38 -0400 Subject: [PATCH 01/29] rs --- db/db.vcxproj.filters | 118 +++++++++++++++++------------------ db/oplog.cpp | 11 ++-- db/repl/heartbeat.cpp | 12 ++-- db/repl/replset_commands.cpp | 39 ++---------- db/repl/rs.h | 15 ++++- db/repl/rs_initiate.cpp | 9 +-- 6 files changed, 89 insertions(+), 115 deletions(-) diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters index a6d4e2254c6..1273553ab6f 100755 --- a/db/db.vcxproj.filters +++ b/db/db.vcxproj.filters @@ -188,7 +188,7 @@ db\core - util\util + util\core db\core @@ -212,7 +212,7 @@ db\core - util\util + util\core db\core @@ -224,22 +224,22 @@ db\core - util\util + util\core - util\util + util\core - util\util + util\core db\modules - util\util + util\core - util\util + util\core db\core @@ -257,7 +257,7 @@ db\core - util\util + util\core db\core @@ -287,46 +287,46 @@ db\sharding - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core util\concurrency - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core db @@ -493,25 +493,25 @@ db\core - util\util + util\core - util\util + util\core db\storage engine - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core db\core @@ -526,16 +526,16 @@ db\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core db\core @@ -553,10 +553,10 @@ db\core - util\util + util\core - util\util + util\core db\core @@ -565,19 +565,19 @@ db\core - util\util + util\core - util\util + util\core - util\util + util\core db\core - util\util + util\core client @@ -586,7 +586,7 @@ db\core - util\util + util\core bson @@ -631,31 +631,31 @@ repl_old - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core db\core @@ -664,31 +664,31 @@ util\concurrency - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core - util\util + util\core db @@ -746,12 +746,12 @@ {d7f08f93-36bf-49cd-9e1c-ba1fec3234ce} - - {9775f24c-3a29-4e0d-b5de-991c592cf376} - {e899caa1-9a90-4604-ac2e-68d5ca12425c} + + {9775f24c-3a29-4e0d-b5de-991c592cf376} + diff --git a/db/oplog.cpp b/db/oplog.cpp index 86aeb1b5afe..6a88c876c99 100644 --- a/db/oplog.cpp +++ b/db/oplog.cpp @@ -44,17 +44,14 @@ namespace mongo { static void _logOpRS(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) { DEV assertInWriteLock(); DEV assert( theReplSet ); - static BufBuilder bufbuilder(32*1024); + static BufBuilder bufbuilder(8*1024); if ( strncmp(ns, "local.", 6) == 0 ){ - if ( strncmp(ns, "local.slaves", 12) == 0 ){ + if ( strncmp(ns, "local.slaves", 12) == 0 ) resetSlaveCache(); - } return; } - //Client::Context context; - /* we jump through a bunch of hoops here to avoid copying the obj buffer twice -- instead we do a single copy to the destination position in the memory mapped file. */ @@ -102,7 +99,7 @@ namespace mongo { if ( logLevel >= 6 ) { BSONObj temp(r); - log( 6 ) << "logging op:" << temp << endl; + log( 6 ) << "logOp:" << temp << endl; } } @@ -128,7 +125,7 @@ namespace mongo { */ static void _logOpOld(const char *opstr, const char *ns, const char *logNS, const BSONObj& obj, BSONObj *o2, bool *bb ) { DEV assertInWriteLock(); - static BufBuilder bufbuilder(32*1024); + static BufBuilder bufbuilder(8*1024); if ( strncmp(ns, "local.", 6) == 0 ){ if ( strncmp(ns, "local.slaves", 12) == 0 ){ diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp index 8c375081c1e..6187db25e77 100644 --- a/db/repl/heartbeat.cpp +++ b/db/repl/heartbeat.cpp @@ -36,17 +36,15 @@ namespace mongo { using namespace bson; /* { replSetHeartbeat : } */ - class CmdReplSetHeartbeat : public Command { + class CmdReplSetHeartbeat : public ReplSetCommand { public: - virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return false; } - virtual bool logTheOp() { return false; } - virtual LockType locktype() const { return NONE; } - virtual void help( stringstream &help ) const { help<<"internal"; } - CmdReplSetHeartbeat() : Command("replSetHeartbeat") { } + CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { } virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + /* we don't call ReplSetCommand::check() here because heartbeat + checks many things that are pre-initialization. */ if( !replSet ) { - errmsg = "not a replset member"; + errmsg = "not running with --replSet"; return false; } if( cmdObj["pv"].Int() != 1 ) { diff --git a/db/repl/replset_commands.cpp b/db/repl/replset_commands.cpp index d902c3d45da..0c2cd6b65d6 100644 --- a/db/repl/replset_commands.cpp +++ b/db/repl/replset_commands.cpp @@ -28,61 +28,34 @@ namespace mongo { replSetInitiate - rs_mod.cpp */ - class CmdReplSetGetStatus : public Command { + class CmdReplSetGetStatus : public ReplSetCommand { public: - virtual bool slaveOk() const { return true; } - virtual bool adminOnly() const { return true; } - virtual bool logTheOp() { return false; } - virtual LockType locktype() const { return NONE; } virtual void help( stringstream &help ) const { help << "Report status of a replica set from the POV of this server\n"; help << "{ replSetGetStatus : 1 }"; help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } - - CmdReplSetGetStatus() : Command("replSetGetStatus", true) { } + CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { } virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - if( !replSet ) { - errmsg = "not running with --replSet"; + if( !check(errmsg, result) ) return false; - } - if( theReplSet == 0 ) { - result.append("startupStatus", ReplSet::startupStatus); - errmsg = ReplSet::startupStatusMsg.empty() ? "replset unknown error 1" : ReplSet::startupStatusMsg; - return false; - } - theReplSet->summarizeStatus(result); - return true; } } cmdReplSetGetStatus; - class CmdReplSetFreeze : public Command { + class CmdReplSetFreeze : public ReplSetCommand { public: - virtual bool slaveOk() const { return true; } - virtual bool adminOnly() const { return true; } - virtual bool logTheOp() { return false; } - virtual LockType locktype() const { return NONE; } virtual void help( stringstream &help ) const { help << "Enable / disable failover for the set - locks current primary as primary even if issues occur.\nFor use during system maintenance.\n"; help << "{ replSetFreeze : }"; help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } - CmdReplSetFreeze() : Command("replSetFreeze", true) { } + CmdReplSetFreeze() : ReplSetCommand("replSetFreeze", true) { } virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - if( !replSet ) { - errmsg = "not running with --replSet"; + if( !check(errmsg, result) ) return false; - } - if( theReplSet == 0 ) { - result.append("startupStatus", ReplSet::startupStatus); - errmsg = ReplSet::startupStatusMsg.empty() ? - errmsg = "replset unknown error 1" : ReplSet::startupStatusMsg; - return false; - } - errmsg = "not yet implemented"; /*TODO*/ return false; } diff --git a/db/repl/rs.h b/db/repl/rs.h index 0e0e96eea41..1c950e226fb 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -45,8 +45,14 @@ namespace mongo { class lock : scoped_lock { RSBase& _b; public: - lock(RSBase* b) : scoped_lock(b->m), _b(*b) { b->_locked++; } - ~lock() { _b._locked--; } + lock(RSBase* b) : scoped_lock(b->m), _b(*b) { + DEV assert(b->_locked == 0); + b->_locked++; + } + ~lock() { + DEV assert(_b._locked == 1); + _b._locked--; + } }; bool locked() const { return _locked; } }; @@ -195,9 +201,12 @@ namespace mongo { return false; } + /** base class for repl set commands. checks basic things such as in rs mode before the command + does its real work + */ class ReplSetCommand : public Command { protected: - ReplSetCommand(const char * s) : Command(s) { } + ReplSetCommand(const char * s, bool show=false) : Command(s) { } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return true; } virtual bool logTheOp() { return false; } diff --git a/db/repl/rs_initiate.cpp b/db/repl/rs_initiate.cpp index 5a619881f5e..2baedea1b97 100644 --- a/db/repl/rs_initiate.cpp +++ b/db/repl/rs_initiate.cpp @@ -67,13 +67,10 @@ namespace mongo { } } - class CmdReplSetInitiate : public Command { + class CmdReplSetInitiate : public ReplSetCommand { public: virtual LockType locktype() const { return WRITE; } - virtual bool slaveOk() const { return true; } - virtual bool adminOnly() const { return true; } - virtual bool logTheOp() { return false; } - CmdReplSetInitiate() : Command("replSetInitiate") { } + CmdReplSetInitiate() : ReplSetCommand("replSetInitiate") { } virtual void help(stringstream& h) const { h << "Initiate/christen a replica set."; h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; @@ -81,7 +78,7 @@ namespace mongo { virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetInitiate admin command received from client" << rsLog; - { + if( 0 ) { // just make sure we can get a write lock before doing anything else. we'll reacquire one // later. of course it could be stuck then, but this check lowers the risk if weird things // are up. From fd70ada38016311d609791575285a54d7fbbee61 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 29 May 2010 00:38:12 +0800 Subject: [PATCH 02/29] Ignoring cache files created by ReSharper, a popular and awesome Visual Studio Plugin. http://www.jetbrains.com/resharper/index.html --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ec40a731bf9..d43a15dfd87 100644 --- a/.gitignore +++ b/.gitignore @@ -42,6 +42,7 @@ shell/mongo-server.cpp */*/Release/ db/.gdb* db/makefile.local +db/_ReSharper.db config.log settings.py buildinfo.cpp From 7fb0763b38c93c79cbc9a080f850e558e16a3772 Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 29 May 2010 04:19:03 +0800 Subject: [PATCH 03/29] Added Windows Service Control Manager section. Windows SCM related arguments go in there own section. --- db/db.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/db/db.cpp b/db/db.cpp index a4e5a92bde8..0ccefd2fa5a 100644 --- a/db/db.cpp +++ b/db/db.cpp @@ -593,6 +593,9 @@ int main(int argc, char* argv[], char *envp[] ) getcurns = ourgetns; po::options_description general_options("General options"); + #if defined(_WIN32) + po::options_description windows_scm_options("Windows Service Control Manager options"); + #endif po::options_description replication_options("Replication options"); po::options_description sharding_options("Sharding options"); po::options_description visible_options("Allowed options"); @@ -635,18 +638,21 @@ int main(int argc, char* argv[], char *envp[] ) ("profile",po::value(), "0=off 1=slow, 2=all") ("slowms",po::value(&cmdLine.slowMS)->default_value(100), "value of slow for profile and console log" ) ("maxConns",po::value(), "max number of simultaneous connections") -#if defined(_WIN32) - ("install", "install mongodb service") + #if !defined(_WIN32) + ("nounixsocket", "disable listening on unix sockets") + #endif + ("ipv6", "enable IPv6 support (disabled by default)") + ; + #if defined(_WIN32) + windows_scm_options.add_options() + ("install", "install mongodb service") ("remove", "remove mongodb service") ("service", "start mongodb service") ("serviceName", po::value(), "windows service name") -#else - ("nounixsocket", "disable listening on unix sockets") -#endif - ("ipv6", "enable IPv6 support (disabled by default)") - ; + ; + #endif - replication_options.add_options() + replication_options.add_options() ("master", "master mode") ("slave", "slave mode") ("source", po::value(), "when slave: specify master as ") @@ -674,6 +680,9 @@ int main(int argc, char* argv[], char *envp[] ) positional_options.add("command", 3); visible_options.add(general_options); + #if defined(_WIN32) + visible_options.add(windows_scm_options); + #endif visible_options.add(replication_options); visible_options.add(sharding_options); Module::addOptions( visible_options ); From 285568137248721603b4afe9030eba64a5b2af56 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Tue, 1 Jun 2010 15:38:40 -0400 Subject: [PATCH 04/29] debugging for SERVER-1179 --- db/queryoptimizer.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index 0f4b209c0c9..ecdc2e5f255 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -455,7 +455,20 @@ namespace mongo { QueryPlanSet::PlanPtr QueryPlanSet::getBestGuess() const { assert( plans_.size() ); - massert( 13284, "best guess plan requested, but scan and order required", !plans_[ 0 ]->scanAndOrderRequired() ); + if ( plans_[ 0 ]->scanAndOrderRequired() ){ + for ( unsigned i=1; iscanAndOrderRequired() ) + return plans_[i]; + } + + stringstream ss; + ss << "best guess plan requested, but scan and order required:"; + ss << " query: " << query_; + ss << " order: " << order_; + + string s = ss.str(); + msgassertedNoTrace( 13284, s.c_str() ); + } return plans_[0]; } From 741d9f7a19b569beefd6156517df2cc13eed92c9 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Tue, 1 Jun 2010 15:48:02 -0400 Subject: [PATCH 05/29] more debugging for SERVER-1179 --- db/queryoptimizer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index ecdc2e5f255..5740080af7c 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -465,7 +465,11 @@ namespace mongo { ss << "best guess plan requested, but scan and order required:"; ss << " query: " << query_; ss << " order: " << order_; - + ss << " choices: "; + for ( unsigned i=0; iindexKey() << " "; + } + string s = ss.str(); msgassertedNoTrace( 13284, s.c_str() ); } From 7d76c5e6b346fcb354ca41b3a882577e19d32d55 Mon Sep 17 00:00:00 2001 From: Richard Kreuter Date: Tue, 1 Jun 2010 16:08:47 -0400 Subject: [PATCH 06/29] Aesthetic change to old RedHat sysv init script. --- rpm/init.d-mongod | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/rpm/init.d-mongod b/rpm/init.d-mongod index 12068c802b3..5ee837967f8 100644 --- a/rpm/init.d-mongod +++ b/rpm/init.d-mongod @@ -29,7 +29,8 @@ start() echo -n $"Starting mongod: " daemon --user "$MONGO_USER" $mongod $OPTIONS RETVAL=$? - [ $RETVAL -eq 0 ] && touch /var/lock/subsys/mongod && success + echo + [ $RETVAL -eq 0 ] && touch /var/lock/subsys/mongod } stop() @@ -37,7 +38,8 @@ stop() echo -n $"Stopping mongod: " killproc -p /var/lib/mongo/mongod.lock -t30 -TERM /usr/bin/mongod RETVAL=$? - [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/mongod && success + echo + [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/mongod } restart () { From dd63f76c38f28dd8799350478367beb10a088501 Mon Sep 17 00:00:00 2001 From: Dwight Date: Tue, 1 Jun 2010 16:25:47 -0400 Subject: [PATCH 07/29] rs --- db/repl/consensus.cpp | 48 +++++++---- db/repl/health.cpp | 10 +-- db/repl/heartbeat.cpp | 8 +- db/repl/manager.cpp | 12 +-- db/repl/rs.cpp | 29 ++++--- db/repl/rs.h | 180 +++++++++++++++++++++++------------------- 6 files changed, 164 insertions(+), 123 deletions(-) diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp index 137ab85daa1..d8e4cc5caf5 100644 --- a/db/repl/consensus.cpp +++ b/db/repl/consensus.cpp @@ -40,14 +40,14 @@ namespace mongo { virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; - //task::lam f = boost::bind(&ReplSet::Consensus::electCmdReceived, &theReplSet->elect, cmdObj, &result); + //task::lam f = boost::bind(&Consensus::electCmdReceived, &theReplSet->elect, cmdObj, &result); //theReplSet->mgr->call(f); theReplSet->elect.electCmdReceived(cmdObj, &result); return true; } } cmdReplSetElect; - int ReplSet::Consensus::totalVotes() const { + int Consensus::totalVotes() const { static int complain = 0; int vTot = rs._self->config().votes; for( Member *m = rs.head(); m; m=m->next() ) @@ -57,7 +57,7 @@ namespace mongo { return vTot; } - bool ReplSet::Consensus::aMajoritySeemsToBeUp() const { + bool Consensus::aMajoritySeemsToBeUp() const { int vUp = rs._self->config().votes; for( Member *m = rs.head(); m; m=m->next() ) vUp += m->hbinfo().up() ? m->config().votes : 0; @@ -71,21 +71,24 @@ namespace mongo { const time_t LeaseTime = 30; - unsigned ReplSet::Consensus::yea(unsigned memberId) /* throws VoteException */ { + unsigned Consensus::yea(unsigned memberId) /* throws VoteException */ { Atomic::tran t(ly); LastYea &ly = t.ref(); time_t now = time(0); - if( ly.when + LeaseTime >= now ) + if( ly.when + LeaseTime >= now && ly.who != memberId ) { + log() << "replSet TEMP not voting yea for " << memberId << rsLog; + log() << "replSet TEMP voted for " << ly.who << ' ' << now-ly.when << " secs ago" << rsLog; throw VoteException(); + } ly.when = now; ly.who = memberId; return rs._self->config().votes; } /* todo: threading **************** !!!!!!!!!!!!!!!! */ - void ReplSet::Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) { + void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) { BSONObjBuilder& b = *_b; - log() << "replSet TEMP ELECT " << cmd.toString() << rsLog; + log() << "replSet TEMP RECEIVED ELECT MSG " << cmd.toString() << rsLog; string set = cmd["set"].String(); unsigned whoid = cmd["whoid"].Int(); int cfgver = cmd["cfgver"].Int(); @@ -109,7 +112,7 @@ namespace mongo { try { vote = yea(whoid); rs.relinquish(); - log() << "replSet info voting yea" << rsLog; + log() << "replSet info voting yea for " << whoid << rsLog; } catch(VoteException&) { log() << "replSet voting no already voted for another" << rsLog; @@ -120,13 +123,14 @@ namespace mongo { b.append("round", round); } - void ReplSet::getTargets(list& L) { + void ReplSetImpl::getTargets(list& L) { for( Member *m = head(); m; m=m->next() ) if( m->hbinfo().up() ) L.push_back( Target(m->fullName()) ); } - bool ReplSet::Consensus::weAreFreshest() { + /* allUp only meaningful when true returned! */ + bool Consensus::weAreFreshest(bool& allUp) { BSONObj cmd = BSON( "replSetFresh" << 1 << "set" << rs.name() << @@ -136,25 +140,36 @@ namespace mongo { rs.getTargets(L); multiCommand(cmd, L); int nok = 0; + allUp = true; for( list::iterator i = L.begin(); i != L.end(); i++ ) { if( i->ok ) { nok++; if( i->result["fresher"].trueValue() ) return false; } + else { + log() << "replSet TEMP freshest returns " << i->result.toString() << rsLog; + allUp = false; + } } - log() << "replSet temp we are freshest, nok:" << nok << rsLog; + log() << "replSet TEMP we are freshest of up nodes, nok:" << nok << rsLog; return true; } - void ReplSet::Consensus::_electSelf() { - if( !weAreFreshest() ) { - log() << "replSet info not going to elect self, we are not freshest" << rsLog; + extern time_t started; + + void Consensus::_electSelf() { + bool allUp; + if( !weAreFreshest(allUp) ) { + log() << "replSet info not electing self, we are not freshest" << rsLog; return; } + if( !allUp && time(0) - started < 60 * 5 ) { + log() << "replSet info not electing self, not all members up and we have been up less than 5 minutes" << rsLog; + } time_t start = time(0); - ReplSet::Member& me = *rs._self; + Member& me = *rs._self; int tally = yea( me.id() ); log() << "replSet info electSelf" << rsLog; @@ -185,6 +200,7 @@ namespace mongo { log() << "replSet too much time passed during election, ignoring result" << rsLog; } /* succeeded. */ + log() << "replSet election succeeded assuming primary role" << rsLog; rs.assumePrimary(); return; } @@ -193,7 +209,7 @@ namespace mongo { } } - void ReplSet::Consensus::electSelf() { + void Consensus::electSelf() { try { _electSelf(); } diff --git a/db/repl/health.cpp b/db/repl/health.cpp index 36fce22f603..836be3a19c8 100644 --- a/db/repl/health.cpp +++ b/db/repl/health.cpp @@ -62,7 +62,7 @@ namespace mongo { return s.str(); } - void ReplSet::Member::summarizeAsHtml(stringstream& s) const { + void Member::summarizeAsHtml(stringstream& s) const { s << tr(); { stringstream u; @@ -86,7 +86,7 @@ namespace mongo { s << _tr(); } - string ReplSet::stateAsHtml(MemberState s) { + string ReplSetImpl::stateAsHtml(MemberState s) { if( s == STARTUP ) return a("", "serving still starting up, or still trying to initiate the set", "STARTUP"); if( s == PRIMARY ) return a("", "this server thinks it is primary", "PRIMARY"); if( s == SECONDARY ) return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY"); @@ -96,7 +96,7 @@ namespace mongo { return ""; } - string ReplSet::stateAsStr(MemberState s) { + string ReplSetImpl::stateAsStr(MemberState s) { if( s == STARTUP ) return "STARTUP"; if( s == PRIMARY ) return "PRIMARY"; if( s == SECONDARY ) return "SECONDARY"; @@ -108,7 +108,7 @@ namespace mongo { extern time_t started; - void ReplSet::summarizeAsHtml(stringstream& s) const { + void ReplSetImpl::_summarizeAsHtml(stringstream& s) const { s << table(0, false); s << tr("Set name:", _name); s << tr("Majority up:", elect.aMajoritySeemsToBeUp()?"yes":"no" ); @@ -223,7 +223,7 @@ namespace mongo { s << "\n"; } - void ReplSet::summarizeStatus(BSONObjBuilder& b) const { + void ReplSetImpl::_summarizeStatus(BSONObjBuilder& b) const { Member *m =_members.head(); vector v; diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp index 6187db25e77..5972ae4988a 100644 --- a/db/repl/heartbeat.cpp +++ b/db/repl/heartbeat.cpp @@ -120,7 +120,7 @@ namespace mongo { if( cfg.ok() ) { // received a new config boost::function f = - boost::bind(&ReplSet::Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy()); + boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy()); theReplSet->mgr->send(f); } } @@ -138,7 +138,7 @@ namespace mongo { time_t now = time(0); if( mem.changed(old) || now-last>4 ) { last = now; - theReplSet->mgr->send( boost::bind(&ReplSet::Manager::msgCheckNewState, theReplSet->mgr) ); + theReplSet->mgr->send( boost::bind(&Manager::msgCheckNewState, theReplSet->mgr) ); } } @@ -157,7 +157,7 @@ namespace mongo { note ReplSet object is only created once we get a config - so this won't run until the initiation. */ - void ReplSet::startThreads() { + void ReplSetImpl::startThreads() { task::fork(mgr->taskPtr()); Member* m = _members.head(); @@ -167,7 +167,7 @@ namespace mongo { m = m->next(); } - mgr->send( boost::bind(&ReplSet::Manager::msgCheckNewState, theReplSet->mgr) ); + mgr->send( boost::bind(&Manager::msgCheckNewState, theReplSet->mgr) ); } } diff --git a/db/repl/manager.cpp b/db/repl/manager.cpp index 0035f050b7f..8372f718761 100644 --- a/db/repl/manager.cpp +++ b/db/repl/manager.cpp @@ -28,7 +28,7 @@ namespace mongo { }; /* check members OTHER THAN US to see if they think they are primary */ - const ReplSet::Member * ReplSet::Manager::findOtherPrimary() { + const Member * Manager::findOtherPrimary() { Member *m = rs->head(); Member *p = 0; while( m ) { @@ -43,19 +43,19 @@ namespace mongo { return p; } - ReplSet::Manager::Manager(ReplSet *_rs) : - task::Server("ReplSet::Manager"), rs(_rs), _primary(NOPRIMARY) + Manager::Manager(ReplSetImpl *_rs) : + task::Server("Manager"), rs(_rs), _primary(NOPRIMARY) { } - - void ReplSet::Manager::noteARemoteIsPrimary(const Member *m) { + + void Manager::noteARemoteIsPrimary(const Member *m) { rs->_currentPrimary = m; rs->_self->lhb() = ""; rs->_myState = RECOVERING; } /** called as the health threads get new results */ - void ReplSet::Manager::msgCheckNewState() { + void Manager::msgCheckNewState() { const Member *p = rs->currentPrimary(); const Member *p2; try { p2 = findOtherPrimary(); } diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp index c2b7c04fe8e..15e5e0a9bfe 100644 --- a/db/repl/rs.cpp +++ b/db/repl/rs.cpp @@ -26,14 +26,14 @@ namespace mongo { ReplSet *theReplSet = 0; RSOpTime rsOpTime; - void ReplSet::assumePrimary() { + void ReplSetImpl::assumePrimary() { writelock lk("admin."); // so we are synchronized with _logOp() _myState = PRIMARY; _currentPrimary = _self; log() << "replSet self is now primary" << rsLog; } - void ReplSet::relinquish() { + void ReplSetImpl::relinquish() { if( state() == PRIMARY ) { _myState = RECOVERING; log() << "replSet info relinquished primary state" << rsLog; @@ -42,7 +42,7 @@ namespace mongo { _myState = RECOVERING; } - void ReplSet::msgUpdateHBInfo(HeartbeatInfo h) { + void ReplSetImpl::msgUpdateHBInfo(HeartbeatInfo h) { for( Member *m = _members.head(); m; m=m->next() ) { if( m->id() == h.id() ) { m->_hbinfo = h; @@ -51,7 +51,7 @@ namespace mongo { } } - list ReplSet::memberHostnames() const { + list ReplSetImpl::memberHostnames() const { list L; L.push_back(_self->h()); for( Member *m = _members.head(); m; m = m->next() ) @@ -59,7 +59,7 @@ namespace mongo { return L; } - void ReplSet::fillIsMaster(BSONObjBuilder& b) { + void ReplSetImpl::_fillIsMaster(BSONObjBuilder& b) { b.append("ismaster", 0); b.append("ok", false); b.append("msg", "not yet implemented"); @@ -80,7 +80,7 @@ namespace mongo { } */ /** @param cfgString /, */ - ReplSet::ReplSet(string cfgString) : elect(this), + ReplSetImpl::ReplSetImpl(string cfgString) : elect(this), _self(0), mgr( new Manager(this) ) { @@ -140,7 +140,7 @@ namespace mongo { ReplSet::StartupStatus ReplSet::startupStatus = PRESTART; string ReplSet::startupStatusMsg; - void ReplSet::initFromConfig(ReplSetConfig& c) { //, bool save) { + void ReplSetImpl::initFromConfig(ReplSetConfig& c) { //, bool save) { _cfg = new ReplSetConfig(c); assert( _cfg->ok() ); assert( _name.empty() || _name == _cfg->_id ); @@ -168,7 +168,7 @@ namespace mongo { } // Our own config must be the first one. - void ReplSet::_loadConfigFinish(vector& cfgs) { + void ReplSetImpl::_loadConfigFinish(vector& cfgs) { int v = -1; ReplSetConfig *highest = 0; int myVersion = -2000; @@ -190,7 +190,7 @@ namespace mongo { } } - void ReplSet::loadConfig() { + void ReplSetImpl::loadConfig() { while( 1 ) { startupStatus = LOADINGCONFIG; startupStatusMsg = "loading " + rsConfigNs + " config (LOADINGCONFIG)"; @@ -234,7 +234,7 @@ namespace mongo { startupStatusMsg = "replSet error loading set config (BADCONFIG)"; log() << "replSet error loading configurations " << e.toString() << rsLog; log() << "replSet replication will not start" << rsLog; - fatal(); + _fatal(); throw; } break; @@ -243,9 +243,16 @@ namespace mongo { startupStatus = STARTED; } + void ReplSetImpl::_fatal() + { + lock l(this); + _myState = FATAL; + log() << "replSet error fatal error, stopping replication" << rsLog; + } + /* forked as a thread during startup it can run quite a while looking for config. but once found, - a separate thread takes over as ReplSet::Manager, and this thread + a separate thread takes over as ReplSetImpl::Manager, and this thread terminates. */ void startReplSets() { diff --git a/db/repl/rs.h b/db/repl/rs.h index 1c950e226fb..ded9c8afb90 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -31,10 +31,62 @@ namespace mongo { void newReplUp(); struct Target; + class ReplSetInfo; extern bool replSet; // true if using repl sets extern class ReplSet *theReplSet; // null until initialized extern Tee *rsLog; + class Member : public List1::Base { + public: + Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c); + + string fullName() const { return h().toString(); } + const ReplSetConfig::MemberCfg& config() const { return *_config; } + const HeartbeatInfo& hbinfo() const { return _hbinfo; } + string lhb() { return _hbinfo.lastHeartbeatMsg; } + MemberState state() const { return _hbinfo.hbstate; } + const HostAndPort& h() const { return _h; } + unsigned id() const { return _hbinfo.id(); } + + void summarizeAsHtml(stringstream& s) const; + friend class ReplSetImpl; + private: + const ReplSetConfig::MemberCfg *_config; /* todo: when this changes??? */ + HostAndPort _h; + HeartbeatInfo _hbinfo; + }; + + class Manager : public task::Server { + bool got(const any&); + ReplSetImpl *rs; + int _primary; + const Member* findOtherPrimary(); + void noteARemoteIsPrimary(const Member *); + public: + Manager(ReplSetImpl *rs); + void msgReceivedNewConfig(BSONObj) { assert(false); } + void msgCheckNewState(); + }; + + class Consensus { + ReplSetImpl &rs; + struct LastYea { + LastYea() : when(0), who(0xffffffff) { } + time_t when; + unsigned who; + }; + Atomic ly; + unsigned yea(unsigned memberId); // throws VoteException + void _electSelf(); + bool weAreFreshest(bool& allUp); + public: + Consensus(ReplSetImpl *t) : rs(*t) { } + int totalVotes() const; + bool aMajoritySeemsToBeUp() const; + void electSelf(); + void electCmdReceived(BSONObj, BSONObjBuilder*); + }; + /** most operations on a ReplSet object should be done while locked. */ class RSBase : boost::noncopyable { private: @@ -61,7 +113,7 @@ namespace mongo { /* note: We currently do not free mem when the set goes away - it is assumed the replset is a singleton and long lived. */ - class ReplSet : RSBase { + class ReplSetImpl : RSBase { public: /** info on our state if the replset isn't yet "up". for example, if we are pre-initiation. */ enum StartupStatus { @@ -70,36 +122,44 @@ namespace mongo { }; static StartupStatus startupStatus; static string startupStatusMsg; + static string stateAsStr(MemberState state); + static string stateAsHtml(MemberState state); - void fatal(); - bool isMaster(const char *client); - void fillIsMaster(BSONObjBuilder&); + /* todo thread */ + void msgUpdateHBInfo(HeartbeatInfo); + bool isPrimary() const { return _myState == PRIMARY; } + + private: + Consensus elect; bool ok() const { return _myState != FATAL; } - MemberState state() const { return _myState; } - string name() const { return _name; } /* @return replica set's logical name */ void relinquish(); void assumePrimary(); + protected: + void _fillIsMaster(BSONObjBuilder&); + const ReplSetConfig& config() { return *_cfg; } + string name() const { return _name; } /* @return replica set's logical name */ + MemberState state() const { return _myState; } + void _fatal(); + void _summarizeAsHtml(stringstream&) const; + void _summarizeStatus(BSONObjBuilder&) const; // for replSetGetStatus command + /* cfgString format is replsetname/host1,host2:port,... where :port is optional. throws exception if a problem initializing. */ - ReplSet(string cfgString); + ReplSetImpl(string cfgString); /* call after constructing to start - returns fairly quickly after launching its threads */ - void go() { + void _go() { _myState = STARTUP2; startThreads(); newReplUp(); } - // for replSetGetStatus command - void summarizeStatus(BSONObjBuilder&) const; - void summarizeAsHtml(stringstream&) const; - const ReplSetConfig& config() { return *_cfg; } - private: + MemberState _myState; string _name; const vector *_seeds; @@ -112,50 +172,9 @@ namespace mongo { void loadConfig(); void initFromConfig(ReplSetConfig& c);//, bool save); - class Consensus { - ReplSet &rs; - struct LastYea { - LastYea() : when(0), who(0xffffffff) { } - time_t when; - unsigned who; - }; - Atomic ly; - unsigned yea(unsigned memberId); // throws VoteException - void _electSelf(); - bool weAreFreshest(); - public: - Consensus(ReplSet *t) : rs(*t) { } - int totalVotes() const; - bool aMajoritySeemsToBeUp() const; - void electSelf(); - void electCmdReceived(BSONObj, BSONObjBuilder*); - } elect; - - public: - class Member : public List1::Base { - public: - Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c); - - string fullName() const { return h().toString(); } - const ReplSetConfig::MemberCfg& config() const { return *_config; } - const HeartbeatInfo& hbinfo() const { return _hbinfo; } - string lhb() { return _hbinfo.lastHeartbeatMsg; } - MemberState state() const { return _hbinfo.hbstate; } - const HostAndPort& h() const { return _h; } - unsigned id() const { return _hbinfo.id(); } - - void summarizeAsHtml(stringstream& s) const; - friend class ReplSet; - private: - const ReplSetConfig::MemberCfg *_config; /* todo: when this changes??? */ - HostAndPort _h; - HeartbeatInfo _hbinfo; - }; list memberHostnames() const; const Member* currentPrimary() const { return _currentPrimary; } - bool isPrimary() const { return _myState == PRIMARY; } const ReplSetConfig::MemberCfg& myConfig() const { return _self->config(); } - void msgUpdateHBInfo(HeartbeatInfo); private: const Member *_currentPrimary; @@ -163,43 +182,33 @@ namespace mongo { List1 _members; /* all members of the set EXCEPT self. */ public: - class Manager : public task::Server { - bool got(const any&); - ReplSet *rs; - int _primary; - const Member* findOtherPrimary(); - void noteARemoteIsPrimary(const Member *); - public: - Manager(ReplSet *rs); - void msgReceivedNewConfig(BSONObj) { assert(false); } - void msgCheckNewState(); - }; shared_ptr mgr; private: Member* head() const { return _members.head(); } void getTargets(list&); - static string stateAsStr(MemberState state); - static string stateAsHtml(MemberState state); void startThreads(); friend class FeedbackThread; friend class CmdReplSetElect; + friend class Member; + friend class Manager; + friend class Consensus; }; - inline void ReplSet::fatal() - { - lock l(this); - _myState = FATAL; - log() << "replSet error fatal error, stopping replication" << rsLog; - } - - inline ReplSet::Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c) : - _config(c), _h(h), _hbinfo(ord) { } - - inline bool ReplSet::isMaster(const char *client) { - /* todo replset */ - return false; - } + class ReplSet : public ReplSetImpl { + public: + ReplSet(string cfgString) : ReplSetImpl(cfgString) { } + /* call after constructing to start - returns fairly quickly after launching its threads */ + void go() { _go(); } + void fatal() { _fatal(); } + bool isMaster(const char *client); + MemberState state() const { return ReplSetImpl::state(); } + string name() const { return ReplSetImpl::name(); } + const ReplSetConfig& config() { return ReplSetImpl::config(); } + void summarizeAsHtml(stringstream& ss) const { _summarizeAsHtml(ss); } + void summarizeStatus(BSONObjBuilder& b) const { _summarizeStatus(b); } + void fillIsMaster(BSONObjBuilder& b) { _fillIsMaster(b); } + }; /** base class for repl set commands. checks basic things such as in rs mode before the command does its real work @@ -225,6 +234,15 @@ namespace mongo { return true; } }; + + /** inlines ----------------- */ + inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c) : + _config(c), _h(h), _hbinfo(ord) { } + + inline bool ReplSet::isMaster(const char *client) { + /* todo replset */ + return false; + } } From 2c5209350cb2222065d5ade2f9089d3636a5a594 Mon Sep 17 00:00:00 2001 From: Dwight Date: Tue, 1 Jun 2010 16:36:20 -0400 Subject: [PATCH 08/29] some work on replsetfresh --- db/repl/consensus.cpp | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp index d8e4cc5caf5..ba119ddabb8 100644 --- a/db/repl/consensus.cpp +++ b/db/repl/consensus.cpp @@ -28,8 +28,23 @@ namespace mongo { virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; - errmsg = "not done"; - return false; + + if( cmdObj["set"].String() != theReplSet->name() ) { + errmsg = "wrong repl set name"; + return false; + } + string who = cmdObj["who"].String(); + int cfgver = cmdObj["cfgver"].Int(); + + bool weAreFresher = false; + if( theReplSet->config().version > cfgver ) { + log() << "replSet member " << who << " is not yet aware its cfg version " << cfgver << " is stale" << rsLog; + weAreFresher = true; + } + result.append("fresher", weAreFresher); + + log() << "replSet error: replSetFresh command not implemented yet." << rsLog; + return true; } } cmdReplSetFresh; From d869691eef5d8f49960f0717e4ca67b13d30205a Mon Sep 17 00:00:00 2001 From: Dwight Date: Tue, 1 Jun 2010 17:25:25 -0400 Subject: [PATCH 09/29] some scons fixes for widnows --- SConstruct | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/SConstruct b/SConstruct index 554aef732cf..6595c7c11e9 100644 --- a/SConstruct +++ b/SConstruct @@ -49,7 +49,6 @@ AddOption('--distmod', metavar='DIR', help='additional piece for full dist name') - AddOption( "--64", dest="force64", type="string", @@ -554,8 +553,8 @@ elif os.sys.platform.startswith( "freebsd" ): elif "win32" == os.sys.platform: windows = True - if force64: - release = True + #if force64: + # release = True for pathdir in env['ENV']['PATH'].split(os.pathsep): if os.path.exists(os.path.join(pathdir, 'cl.exe')): @@ -600,24 +599,36 @@ elif "win32" == os.sys.platform: env.Append( CPPPATH=[ boostDir , "pcre-7.4" , winSDKHome + "/Include" ] ) + # consider adding /MP build with multiple processes option. + + # /EHsc exception handling style for visual studio + # /W3 warning level env.Append( CPPFLAGS=" /EHsc /W3 " ) - env.Append( CPPFLAGS=" /wd4355 /wd4800 " ) #some warnings we don't like + + # some warnings we don't like: + env.Append( CPPFLAGS=" /wd4355 /wd4800 /wd4267 /wd4244 " ) + env.Append( CPPDEFINES=["WIN32","_CONSOLE","_CRT_SECURE_NO_WARNINGS","HAVE_CONFIG_H","PCRE_STATIC","_UNICODE","UNICODE","SUPPORT_UCP","SUPPORT_UTF8,PSAPI_VERSION=1" ] ) #env.Append( CPPFLAGS=' /Yu"pch.h" ' ) # this would be for pre-compiled headers, could play with it later + # docs say don't use /FD from command line + # /Gy funtion level linking + # /Gm is minimal rebuild, but may not work in parallel mode. if release: env.Append( CPPDEFINES=[ "NDEBUG" ] ) - env.Append( CPPFLAGS= " /O2 /FD /MT /Gy /Zi /TP /errorReport:prompt /Gm " ) + env.Append( CPPFLAGS= " /O2 /MT /Gy /Zi /TP /errorReport:none " ) # TODO: this has caused some linking problems : + # /GL whole program optimization env.Append( CPPFLAGS= " /GL " ) env.Append( LINKFLAGS=" /LTCG " ) else: env.Append( CPPDEFINES=[ "_DEBUG" ] ) # /Od disable optimization # /ZI debug info w/edit & continue + # /TP it's a c++ file # RTC1 /GZ (Enable Stack Frame Run-Time Error Checking) - env.Append( CPPFLAGS=" /Od /Gm /RTC1 /MDd /ZI " ) + env.Append( CPPFLAGS=" /Od /RTC1 /MDd /Zi /TP /errorReport:none " ) env.Append( CPPFLAGS=' /Fd"mongod.pdb" ' ) env.Append( LINKFLAGS=" /incremental:yes /debug " ) From 1262fcd3a3b7bdfaf936b1535564eebcc7f47e04 Mon Sep 17 00:00:00 2001 From: Dwight Date: Tue, 1 Jun 2010 18:40:31 -0400 Subject: [PATCH 10/29] scons --- SConstruct | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/SConstruct b/SConstruct index 6595c7c11e9..d3b2cc6d5c9 100644 --- a/SConstruct +++ b/SConstruct @@ -607,7 +607,7 @@ elif "win32" == os.sys.platform: # some warnings we don't like: env.Append( CPPFLAGS=" /wd4355 /wd4800 /wd4267 /wd4244 " ) - + env.Append( CPPDEFINES=["WIN32","_CONSOLE","_CRT_SECURE_NO_WARNINGS","HAVE_CONFIG_H","PCRE_STATIC","_UNICODE","UNICODE","SUPPORT_UCP","SUPPORT_UTF8,PSAPI_VERSION=1" ] ) #env.Append( CPPFLAGS=' /Yu"pch.h" ' ) # this would be for pre-compiled headers, could play with it later @@ -675,10 +675,11 @@ elif "win32" == os.sys.platform: env.Append( LIBS=Split(winLibString) ) - if force64: - env.Append( CPPDEFINES=["_AMD64_=1"] ) - else: - env.Append( CPPDEFINES=["_X86_=1"] ) + # dm these should automatically be defined by the compiler. commenting out to see if works. jun2010 + #if force64: + # env.Append( CPPDEFINES=["_AMD64_=1"] ) + #else: + # env.Append( CPPDEFINES=["_X86_=1"] ) env.Append( CPPPATH=["../winpcap/Include"] ) env.Append( LIBPATH=["../winpcap/Lib"] ) From 783bf1e4d7f5f9caadf73233df59f6672b47120f Mon Sep 17 00:00:00 2001 From: dwight Date: Tue, 1 Jun 2010 19:12:38 -0400 Subject: [PATCH 11/29] compile --- db/repl/rs.cpp | 4 ++-- db/repl/rs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp index 15e5e0a9bfe..2c30b936e50 100644 --- a/db/repl/rs.cpp +++ b/db/repl/rs.cpp @@ -137,8 +137,8 @@ namespace mongo { } } - ReplSet::StartupStatus ReplSet::startupStatus = PRESTART; - string ReplSet::startupStatusMsg; + ReplSetImpl::StartupStatus ReplSetImpl::startupStatus = PRESTART; + string ReplSetImpl::startupStatusMsg; void ReplSetImpl::initFromConfig(ReplSetConfig& c) { //, bool save) { _cfg = new ReplSetConfig(c); diff --git a/db/repl/rs.h b/db/repl/rs.h index ded9c8afb90..8701cf0c946 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -31,7 +31,7 @@ namespace mongo { void newReplUp(); struct Target; - class ReplSetInfo; + class ReplSetImpl; extern bool replSet; // true if using repl sets extern class ReplSet *theReplSet; // null until initialized extern Tee *rsLog; From 6b0b96f48a1159e5222d0aec4d0d6808a8c94513 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 10:01:42 -0400 Subject: [PATCH 12/29] defensive asserts --- db/instance.cpp | 8 +++++--- db/pdfile.cpp | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/db/instance.cpp b/db/instance.cpp index 34cd605922d..d9872bb85b6 100644 --- a/db/instance.cpp +++ b/db/instance.cpp @@ -100,11 +100,13 @@ namespace mongo { scoped_lock bl(Client::clientsMutex); for( set::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) { Client *c = *i; + assert( c ); if ( c == &me ) continue; - CurOp& co = *(c->curop()); - if( all || co.active() ) - vals.push_back( co.infoNoauth() ); + CurOp* co = c->curop(); + assert( co ); + if( all || co->active() ) + vals.push_back( co->infoNoauth() ); } } b.append("inprog", vals); diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 117eaecf3cd..ba5e6139179 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -1562,7 +1562,9 @@ namespace mongo { assumes ns is capped and no indexes */ Record* DataFileMgr::fast_oplog_insert(NamespaceDetails *d, const char *ns, int len) { + assert( d ); RARELY assert( d == nsdetails(ns) ); + DEV assert( d == nsdetails(ns) ); DiskLoc extentLoc; int lenWHdr = len + Record::HeaderSize; From 701240d1baa2303b871e232371e19fcb7d36e71f Mon Sep 17 00:00:00 2001 From: Alberto Lerner Date: Wed, 2 Jun 2010 10:53:51 -0400 Subject: [PATCH 13/29] Initial support for histograms (eg. cumulative stats) --- SConstruct | 3 +- dbtests/histogram_test.cpp | 77 +++++++++++++++++++++++ util/histogram.cpp | 121 +++++++++++++++++++++++++++++++++++++ util/histogram.h | 99 ++++++++++++++++++++++++++++++ 4 files changed, 299 insertions(+), 1 deletion(-) create mode 100644 dbtests/histogram_test.cpp create mode 100644 util/histogram.cpp create mode 100644 util/histogram.h diff --git a/SConstruct b/SConstruct index d3b2cc6d5c9..1379a91cc70 100644 --- a/SConstruct +++ b/SConstruct @@ -386,7 +386,8 @@ if distBuild: commonFiles = Split( "pch.cpp buildinfo.cpp db/common.cpp db/jsobj.cpp db/json.cpp db/lasterror.cpp db/nonce.cpp db/queryutil.cpp shell/mongo.cpp" ) commonFiles += [ "util/background.cpp" , "util/mmap.cpp" , "util/ramstore.cpp", "util/sock.cpp" , "util/util.cpp" , "util/message.cpp" , "util/assert_util.cpp" , "util/httpclient.cpp" , "util/md5main.cpp" , "util/base64.cpp", "util/concurrency/vars.cpp", "util/concurrency/task.cpp", "util/debug_util.cpp", - "util/concurrency/thread_pool.cpp", "util/password.cpp", "util/version.cpp" ] + "util/concurrency/thread_pool.cpp", "util/password.cpp", "util/version.cpp", + "util/histogram.cpp"] commonFiles += Glob( "util/*.c" ) commonFiles += Split( "client/connpool.cpp client/dbclient.cpp client/dbclientcursor.cpp client/model.cpp client/syncclusterconnection.cpp s/shardconnection.cpp" ) diff --git a/dbtests/histogram_test.cpp b/dbtests/histogram_test.cpp new file mode 100644 index 00000000000..0ef07d3e9e2 --- /dev/null +++ b/dbtests/histogram_test.cpp @@ -0,0 +1,77 @@ +// histogramtests.cpp : histogram.{h,cpp} unit tests + +/** + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include + +#include "dbtests.h" +#include "../util/histogram.h" + +namespace { + + using mongo::Histogram; + + class BoundariesInit{ + public: + void run(){ + Histogram::Options opts; + opts.numBuckets = 3; + opts.bucketSize = 10; + Histogram h( opts ); + + ASSERT_EQUALS( h.getBucketsNum(), 3u ); + + ASSERT_EQUALS( h.getCount( 0 ), 0u ); + ASSERT_EQUALS( h.getCount( 1 ), 0u ); + ASSERT_EQUALS( h.getCount( 2 ), 0u ); + + ASSERT_EQUALS( h.getBoundary( 0 ), 10u ); + ASSERT_EQUALS( h.getBoundary( 1 ), 20u ); + ASSERT_EQUALS( h.getBoundary( 2 ), numeric_limits::max() ); + } + }; + + class BoundariesFind{ + public: + void run(){ + Histogram::Options opts; + opts.numBuckets = 3; + opts.bucketSize = 10; + Histogram h( opts ); + + h.insert( 10 ); // end of first bucket + h.insert( 15 ); // second bucket + h.insert( 18 ); // second bucket + + ASSERT_EQUALS( h.getCount( 0 ), 1u ); + ASSERT_EQUALS( h.getCount( 1 ), 2u ); + ASSERT_EQUALS( h.getCount( 2 ), 0u ); + } + }; + + class HistogramSuite : public Suite { + public: + HistogramSuite() : Suite( "histogram" ){} + + void setupTests(){ + add< BoundariesInit >(); + add< BoundariesFind >(); + // TODO: complete the test suite + } + } histogramSuite; + +} // anonymous namespace diff --git a/util/histogram.cpp b/util/histogram.cpp new file mode 100644 index 00000000000..45bc9a8ddd2 --- /dev/null +++ b/util/histogram.cpp @@ -0,0 +1,121 @@ +// histogram.cc + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include +#include +#include + +#include "histogram.h" + +namespace mongo { + + using std::ostringstream; + using std::setfill; + using std::setw; + + Histogram::Histogram( const Options& opts ) + : _initialValue( opts.initialValue ) + , _numBuckets( opts.numBuckets ) + , _boundaries( new uint32_t[_numBuckets] ) + , _buckets( new uint64_t[_numBuckets] ){ + + // TODO more sanity checks + // + not too few buckets + // + initialBucket and bucketSize fit within 32 bit ints + + // _boundaries store the maximum value falling in that bucket. + _boundaries[0] = _initialValue + opts.bucketSize; + for ( uint32_t i = 1; i < _numBuckets - 1; i++ ){ + _boundaries[i] = _boundaries[ i-1 ] + opts.bucketSize; + } + _boundaries[ _numBuckets-1 ] = std::numeric_limits::max(); + + for ( uint32_t i = 0; i < _numBuckets; i++ ) { + _buckets[i] = 0; + } + } + + Histogram::~Histogram() { + delete [] _boundaries; + delete [] _buckets; + } + + void Histogram::insert( uint32_t element ){ + if ( element < _initialValue) return; + + _buckets[ findBucket(element) ] += 1; + } + + string Histogram::toHTML() const{ + uint64_t max = 0; + for ( uint32_t i = 0; i < _numBuckets; i++ ){ + if ( _buckets[i] > max ){ + max = _buckets[i]; + } + } + if ( max == 0 ) { + return "histogram is empty\n"; + } + + // normalize buckets to max + const int maxBar = 20; + ostringstream ss; + for ( uint32_t i = 0; i < _numBuckets; i++ ){ + int barSize = _buckets[i] * maxBar / max; + ss << string( barSize,'*' ) + << setfill(' ') << setw( maxBar-barSize + 12 ) + << _boundaries[i] << '\n'; + } + + return ss.str(); + } + + uint64_t Histogram::getCount( uint32_t bucket ) const { + if ( bucket >= _numBuckets ) return 0; + + return _buckets[ bucket ]; + } + + uint32_t Histogram::getBoundary( uint32_t bucket ) const { + if ( bucket >= _numBuckets ) return 0; + + return _boundaries[ bucket ]; + } + + uint32_t Histogram::getBucketsNum() const { + return _numBuckets; + } + + uint32_t Histogram::findBucket( uint32_t element ) const{ + // TODO assert not too small a value? + + uint32_t low = 0; + uint32_t high = _numBuckets - 1; + while ( low < high ){ + // low + ( (high - low) / 2 ); + uint32_t mid = ( low + high ) >> 1; + if ( element > _boundaries[ mid ] ){ + low = mid + 1; + } else { + high = mid; + } + } + return low; + } + +} // namespace mongo diff --git a/util/histogram.h b/util/histogram.h new file mode 100644 index 00000000000..d3fb41692f5 --- /dev/null +++ b/util/histogram.h @@ -0,0 +1,99 @@ +// histogram.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef UTIL_HISTOGRAM_HEADER +#define UTIL_HISTOGRAM_HEADER + +#include +#include + +namespace mongo { + + using std::string; + + /** + * A histogram for a 32-bit integer range. + */ + class Histogram { + public: + /** + * Construct a histogram with 'numBuckets' buckets, each or + * which 'bucketSize' wide. Optionally have the first bucket + * start at 'initialValue' rather than 0. + */ + struct Options { + uint32_t numBuckets; + uint32_t bucketSize; + uint32_t initialValue; + + Options() : numBuckets(0), bucketSize(0), initialValue(0){} + }; + explicit Histogram( const Options& opts ); + ~Histogram(); + + /** + * Find the bucket that 'element' falls into and increment its count. + */ + void insert( uint32_t element ); + + /** + * Render the histogram as string that can be used inside an + * HTML doc. + */ + string toHTML() const; + + // testing interface below -- consider it private + + /** + * Return the count for the 'bucket'-th bucket. + */ + uint64_t getCount( uint32_t bucket ) const; + + /** + * Return the maximum element that would fall in the + * 'bucket'-th bucket. + */ + uint32_t getBoundary( uint32_t bucket ) const; + + /** + * Return the number of buckets in this histogram. + */ + uint32_t getBucketsNum() const; + + private: + /** + * Returns the bucket where 'element' should fall + * into. Currently assumes that 'element' is greater than the + * minimum 'inialValue'. + */ + uint32_t findBucket( uint32_t element ) const; + + uint32_t _initialValue; // no value lower than it is recorded + uint32_t _numBuckets; // total buckets in the histogram + + // all below owned here + uint32_t* _boundaries; // maximum element of each bucket + uint64_t* _buckets; // current count of each bucket + + Histogram( const Histogram& ); + Histogram& operator=( const Histogram& ); + }; + +} // namespace mongo + +#endif // UTIL_HISTOGRAM_HEADER From 49535d34bfd92e105e0da2b9f38ce13c7b8a0d46 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 12:30:08 -0400 Subject: [PATCH 14/29] make getLastError smarter in a sharded environment --- client/dbclient.cpp | 6 +++++- client/dbclient.h | 2 ++ s/commands_admin.cpp | 36 +++++++++++++++++++++++++++++++----- s/request.cpp | 1 + s/request.h | 7 +++++++ 5 files changed, 46 insertions(+), 6 deletions(-) diff --git a/client/dbclient.cpp b/client/dbclient.cpp index bb31fd25908..ef06794a68c 100644 --- a/client/dbclient.cpp +++ b/client/dbclient.cpp @@ -165,10 +165,14 @@ namespace mongo { string DBClientWithCommands::getLastError() { BSONObj info = getLastErrorDetailed(); + return getLastErrorString( info ); + } + + string DBClientWithCommands::getLastErrorString( const BSONObj& info ){ BSONElement e = info["err"]; if( e.eoo() ) return ""; if( e.type() == Object ) return e.toString(); - return e.str(); + return e.str(); } BSONObj getpreverrorcmdobj = fromjson("{getpreverror:1}"); diff --git a/client/dbclient.h b/client/dbclient.h index 1c9ee87b2bf..0724b6f8636 100644 --- a/client/dbclient.h +++ b/client/dbclient.h @@ -326,6 +326,8 @@ namespace mongo { */ BSONObj getLastErrorDetailed(); + static string getLastErrorString( const BSONObj& res ); + /** Return the last error which has occurred, even if not the very last operation. @return { err : , nPrev : , ok : 1 } diff --git a/s/commands_admin.cpp b/s/commands_admin.cpp index c33f1179aee..3770a26ae8d 100644 --- a/s/commands_admin.cpp +++ b/s/commands_admin.cpp @@ -783,8 +783,6 @@ namespace mongo { } } - DBConfigPtr conf = grid.getDBConfig( dbName , false ); - ClientInfo * client = ClientInfo::get(); set * shards = client->getPrev(); @@ -793,27 +791,55 @@ namespace mongo { return true; } + // handle single server if ( shards->size() == 1 ){ string theShard = *(shards->begin() ); result.append( "theshard" , theShard.c_str() ); ShardConnection conn( theShard , "" ); BSONObj res; - bool ok = conn->runCommand( conf->getName() , cmdObj , res ); + bool ok = conn->runCommand( dbName , cmdObj , res ); result.appendElements( res ); conn.done(); + + // hit other machines just to block + for ( set::iterator i=client->sinceLastGetError().begin(); i!=client->sinceLastGetError().end(); ++i ){ + string temp = *i; + if ( temp == theShard ) + continue; + + ShardConnection conn( temp , "" ); + conn->getLastError(); + conn.done(); + } + client->clearSinceLastGetError(); return ok; } + // hit each shard vector errors; for ( set::iterator i = shards->begin(); i != shards->end(); i++ ){ string theShard = *i; ShardConnection conn( theShard , "" ); - string temp = conn->getLastError(); - if ( temp.size() ) + BSONObj res; + bool ok = conn->runCommand( dbName , cmdObj , res ); + string temp = DBClientWithCommands::getLastErrorString( res ); + if ( ok == false || temp.size() ) errors.push_back( temp ); conn.done(); } + // hit other machines just to block + for ( set::iterator i=client->sinceLastGetError().begin(); i!=client->sinceLastGetError().end(); ++i ){ + string temp = *i; + if ( shards->count( temp ) ) + continue; + + ShardConnection conn( temp , "" ); + conn->getLastError(); + conn.done(); + } + client->clearSinceLastGetError(); + if ( errors.size() == 0 ){ result.appendNull( "err" ); return true; diff --git a/s/request.cpp b/s/request.cpp index 8e8fd022365..a2ee0fb9997 100644 --- a/s/request.cpp +++ b/s/request.cpp @@ -168,6 +168,7 @@ namespace mongo { void ClientInfo::addShard( const string& shard ){ _cur->insert( shard ); + _sinceLastGetError.insert( shard ); } void ClientInfo::newRequest( AbstractMessagingPort* p ){ diff --git a/s/request.h b/s/request.h index 6b98738e438..478828a73bc 100644 --- a/s/request.h +++ b/s/request.h @@ -123,6 +123,11 @@ namespace mongo { static ClientInfo * get( int clientId = 0 , bool create = true ); static void disconnect( int clientId ); + const set& sinceLastGetError() const { return _sinceLastGetError; } + void clearSinceLastGetError(){ + _sinceLastGetError.clear(); + } + private: int _id; string _remote; @@ -133,6 +138,8 @@ namespace mongo { set * _prev; int _lastAccess; + set _sinceLastGetError; + static mongo::mutex _clientsLock; static ClientCache& _clients; static boost::thread_specific_ptr _tlInfo; From 3114e51429e588783176e2275972896caeda2352 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 12:32:22 -0400 Subject: [PATCH 15/29] try to fix solaris compile --- util/histogram.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/util/histogram.h b/util/histogram.h index d3fb41692f5..3c7b6a15cb2 100644 --- a/util/histogram.h +++ b/util/histogram.h @@ -19,6 +19,8 @@ #ifndef UTIL_HISTOGRAM_HEADER #define UTIL_HISTOGRAM_HEADER +#include "../pch.h" + #include #include @@ -37,9 +39,9 @@ namespace mongo { * start at 'initialValue' rather than 0. */ struct Options { - uint32_t numBuckets; - uint32_t bucketSize; - uint32_t initialValue; + boost::uint32_t numBuckets; + boost::uint32_t bucketSize; + boost::uint32_t initialValue; Options() : numBuckets(0), bucketSize(0), initialValue(0){} }; @@ -49,7 +51,7 @@ namespace mongo { /** * Find the bucket that 'element' falls into and increment its count. */ - void insert( uint32_t element ); + void insert( boost::uint32_t element ); /** * Render the histogram as string that can be used inside an @@ -62,18 +64,18 @@ namespace mongo { /** * Return the count for the 'bucket'-th bucket. */ - uint64_t getCount( uint32_t bucket ) const; + boost::uint64_t getCount( boost::uint32_t bucket ) const; /** * Return the maximum element that would fall in the * 'bucket'-th bucket. */ - uint32_t getBoundary( uint32_t bucket ) const; + boost::uint32_t getBoundary( boost::uint32_t bucket ) const; /** * Return the number of buckets in this histogram. */ - uint32_t getBucketsNum() const; + boost::uint32_t getBucketsNum() const; private: /** @@ -81,14 +83,14 @@ namespace mongo { * into. Currently assumes that 'element' is greater than the * minimum 'inialValue'. */ - uint32_t findBucket( uint32_t element ) const; + boost::uint32_t findBucket( boost::uint32_t element ) const; - uint32_t _initialValue; // no value lower than it is recorded - uint32_t _numBuckets; // total buckets in the histogram + boost::uint32_t _initialValue; // no value lower than it is recorded + boost::uint32_t _numBuckets; // total buckets in the histogram // all below owned here - uint32_t* _boundaries; // maximum element of each bucket - uint64_t* _buckets; // current count of each bucket + boost::uint32_t* _boundaries; // maximum element of each bucket + boost::uint64_t* _buckets; // current count of each bucket Histogram( const Histogram& ); Histogram& operator=( const Histogram& ); From 441794c5f1e0a176293b3e61cfadeb1027da30c6 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:06:58 -0400 Subject: [PATCH 16/29] fix test --- jstests/sharding/features2.js | 1 + 1 file changed, 1 insertion(+) diff --git a/jstests/sharding/features2.js b/jstests/sharding/features2.js index 3ee9db75b19..1892e396a22 100644 --- a/jstests/sharding/features2.js +++ b/jstests/sharding/features2.js @@ -54,6 +54,7 @@ assert.eq( 0 , db.foo.count() , "D7" ); db.foo2.save( { _id : new ObjectId() } ); db.foo2.save( { _id : new ObjectId() } ); db.foo2.save( { _id : new ObjectId() } ); +db.getLastError(); assert.eq( 1 , s.onNumShards( "foo2" ) , "F1" ); From 70cc54c5082471a1ef85a6bedf1d12c4ae986c77 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:09:43 -0400 Subject: [PATCH 17/29] solaris --- pch.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pch.h b/pch.h index 08de1a2e764..f50c750b4d9 100644 --- a/pch.h +++ b/pch.h @@ -137,4 +137,7 @@ namespace mongo { typedef char _TCHAR; + using boost::uint32_t; + using boost::uint64_t; + } // namespace mongo From dd276b99da24ad337cefb217aa067e0299190a26 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:21:29 -0400 Subject: [PATCH 18/29] more aggressive killing --- buildscripts/cleanbb.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/buildscripts/cleanbb.py b/buildscripts/cleanbb.py index 68a80127525..1afa9d47c18 100644 --- a/buildscripts/cleanbb.py +++ b/buildscripts/cleanbb.py @@ -4,16 +4,28 @@ import os import utils import time +cwd = os.getcwd(); +if cwd.find("buildscripts" ) > 0 : + cwd = cwd.partition( "buildscripts" )[0] + +print( "cwd [" + cwd + "]" ) + +def shouldKill( c ): + if c.find( cwd ) >= 0: + return True + + if c.find( "buildbot" ) >= 0 and c.find( "/mongo/" ) >= 0: + return True + + return False + def killprocs( signal="" ): - cwd = os.getcwd(); - if cwd.find("buildscripts" ) > 0 : - cwd = cwd.partition( "buildscripts" )[0] killed = 0 for x in utils.getprocesslist(): x = x.lstrip() - if x.find( cwd ) < 0: + if not shouldKill( x ): continue pid = x.partition( " " )[0] From 191594cdc3fa255f8f765bcae20084cecbcea016 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:24:41 -0400 Subject: [PATCH 19/29] solaris --- dbtests/histogram_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbtests/histogram_test.cpp b/dbtests/histogram_test.cpp index 0ef07d3e9e2..fcf2e1c2713 100644 --- a/dbtests/histogram_test.cpp +++ b/dbtests/histogram_test.cpp @@ -16,6 +16,7 @@ * along with this program. If not, see . */ +#include "../pch.h" #include #include "dbtests.h" From 9a5638cee679eebf849cf22d67f9843ec56a07d2 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:25:59 -0400 Subject: [PATCH 20/29] win64 --- dbtests/histogram_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/dbtests/histogram_test.cpp b/dbtests/histogram_test.cpp index fcf2e1c2713..680cc51daa1 100644 --- a/dbtests/histogram_test.cpp +++ b/dbtests/histogram_test.cpp @@ -17,7 +17,6 @@ */ #include "../pch.h" -#include #include "dbtests.h" #include "../util/histogram.h" From b0451e7853d7488c1d203bd873c0335f14f6621b Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:35:09 -0400 Subject: [PATCH 21/29] namespace for histogram test for solaris --- dbtests/histogram_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbtests/histogram_test.cpp b/dbtests/histogram_test.cpp index 680cc51daa1..066e1202b7b 100644 --- a/dbtests/histogram_test.cpp +++ b/dbtests/histogram_test.cpp @@ -21,7 +21,7 @@ #include "dbtests.h" #include "../util/histogram.h" -namespace { +namespace mongo { using mongo::Histogram; From 8e7d38906d4bf8a5ba355a0c6cc360f93d37c970 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 13:39:01 -0400 Subject: [PATCH 22/29] kill procs before data --- buildscripts/cleanbb.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/buildscripts/cleanbb.py b/buildscripts/cleanbb.py index 1afa9d47c18..e7df61028e4 100644 --- a/buildscripts/cleanbb.py +++ b/buildscripts/cleanbb.py @@ -38,15 +38,17 @@ def killprocs( signal="" ): def cleanup( root ): + + if killprocs() > 0: + time.sleep(3) + killprocs("-9") + # delete all regular files, directories can stay # NOTE: if we delete directories later, we can't delete diskfulltest for ( dirpath , dirnames , filenames ) in os.walk( root , topdown=False ): for x in filenames: os.remove( dirpath + "/" + x ) - if killprocs() > 0: - time.sleep(3) - killprocs("-9") if __name__ == "__main__": root = "/data/db/" From 4921fd2d1ded9c08e2c9043cbaa9dda682159ca7 Mon Sep 17 00:00:00 2001 From: Alberto Lerner Date: Wed, 2 Jun 2010 13:43:18 -0400 Subject: [PATCH 23/29] Support for cumulative stats for mongod --- SConstruct | 2 +- db/stats/fine_clock.h | 66 ++++++++++++++++++++ db/stats/service_stats.cpp | 69 +++++++++++++++++++++ db/stats/service_stats.h | 66 ++++++++++++++++++++ dbtests/spin_lock_test.cpp | 110 +++++++++++++++++++++++++++++++++ util/concurrency/spin_lock.cpp | 66 ++++++++++++++++++++ util/concurrency/spin_lock.h | 48 ++++++++++++++ 7 files changed, 426 insertions(+), 1 deletion(-) create mode 100644 db/stats/fine_clock.h create mode 100644 db/stats/service_stats.cpp create mode 100644 db/stats/service_stats.h create mode 100644 dbtests/spin_lock_test.cpp create mode 100644 util/concurrency/spin_lock.cpp create mode 100644 util/concurrency/spin_lock.h diff --git a/SConstruct b/SConstruct index 1379a91cc70..cd507892a94 100644 --- a/SConstruct +++ b/SConstruct @@ -387,7 +387,7 @@ commonFiles = Split( "pch.cpp buildinfo.cpp db/common.cpp db/jsobj.cpp db/json.c commonFiles += [ "util/background.cpp" , "util/mmap.cpp" , "util/ramstore.cpp", "util/sock.cpp" , "util/util.cpp" , "util/message.cpp" , "util/assert_util.cpp" , "util/httpclient.cpp" , "util/md5main.cpp" , "util/base64.cpp", "util/concurrency/vars.cpp", "util/concurrency/task.cpp", "util/debug_util.cpp", "util/concurrency/thread_pool.cpp", "util/password.cpp", "util/version.cpp", - "util/histogram.cpp"] + "util/histogram.cpp", "util/concurrency/spin_lock.cpp" ] commonFiles += Glob( "util/*.c" ) commonFiles += Split( "client/connpool.cpp client/dbclient.cpp client/dbclientcursor.cpp client/model.cpp client/syncclusterconnection.cpp s/shardconnection.cpp" ) diff --git a/db/stats/fine_clock.h b/db/stats/fine_clock.h new file mode 100644 index 00000000000..1f23175cd51 --- /dev/null +++ b/db/stats/fine_clock.h @@ -0,0 +1,66 @@ +// fine_clock.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef DB_STATS_FINE_CLOCK_HEADER +#define DB_STATS_FINE_CLOCK_HEADER + +#include // struct timespec + +namespace mongo { + + /** + * This is a nano-second precision clock. We're skipping the + * harware TSC in favor of clock_gettime() which in some systems + * does not involve a trip to the OS (VDSO). + * + * We're exporting a type WallTime that is and should remain + * opaque. The business of getting accurate time is still ongoing + * and we may change the internal representation of this class. + * (http://lwn.net/Articles/388188/) + * + * Really, you shouldn't be using this class in hot code paths for + * platforms you're not sure whether the overhead is low. + */ + class FineClock{ + public: + + typedef timespec WallTime; + + static WallTime now(){ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return ts; + } + + static uint64_t diffInNanos( WallTime end, WallTime start ){ + uint64_t diff; + if ( end.tv_nsec < start.tv_nsec ){ + diff = 1000000000 * ( end.tv_sec - start.tv_sec - 1); + diff += 1000000000 + end.tv_nsec - start.tv_nsec; + } else { + diff = 1000000000 * ( end.tv_sec - start.tv_sec ); + diff += end.tv_nsec - start.tv_nsec; + } + return diff; + } + + }; +} + +#endif // DB_STATS_FINE_CLOCK_HEADER + diff --git a/db/stats/service_stats.cpp b/db/stats/service_stats.cpp new file mode 100644 index 00000000000..0d66abb74ac --- /dev/null +++ b/db/stats/service_stats.cpp @@ -0,0 +1,69 @@ +// service_stats.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include + +#include "../../util/histogram.h" +#include "service_stats.h" + +namespace mongo { + + using std::ostringstream; + + ServiceStats::ServiceStats(){ + // TODO exponentially increasing buckets perhaps would be + // better for the following histograms + + // Time histogram covers up to 2.5msec in 250usec intervals + // (and lumps anything higher in last bucket) + Histogram::Options timeOpts; + timeOpts.numBuckets = 10; + timeOpts.bucketSize = 250; + _timeHistogram = new Histogram( timeOpts ); + + // Space histogram covers up to 4MB in 256k intervals (and + // lumps anything higher in last bucket) + Histogram::Options spaceOpts; + spaceOpts.numBuckets = 16; + spaceOpts.bucketSize = 2 << 18; + _spaceHistogram = new Histogram( spaceOpts ); + } + + ServiceStats::~ServiceStats(){ + delete _timeHistogram; + delete _spaceHistogram; + } + + void ServiceStats::logResponse( uint64_t duration, uint64_t bytes ){ + _spinLock.lock(); + _timeHistogram->insert( duration / 1000 /* in usecs */ ); + _spaceHistogram->insert( bytes ); + _spinLock.unlock(); + } + + string ServiceStats::toHTML() const { + ostringstream res ; + res << "Cumulative wire stats\n" + << "Response times\n" << _timeHistogram->toHTML() + << "Response sizes\n" << _spaceHistogram->toHTML() + << '\n'; + + return res.str(); + } + +} // mongo diff --git a/db/stats/service_stats.h b/db/stats/service_stats.h new file mode 100644 index 00000000000..5b0e75fdcb9 --- /dev/null +++ b/db/stats/service_stats.h @@ -0,0 +1,66 @@ +// service_stats.h + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef DB_STATS_SERVICE_STATS_HEADER +#define DB_STATS_SERVICE_STATS_HEADER + +#include + +#include "../../util/concurrency/spin_lock.h" + +namespace mongo { + + using std::string; + + class Histogram; + + /** + * ServiceStats keeps track of the time a request/response message + * took inside a service as well as the size of the response + * generated. + */ + class ServiceStats { + public: + ServiceStats(); + ~ServiceStats(); + + /** + * Record the 'duration' in microseconds a request/response + * message took and the size in bytes of the generated + * response. + */ + void logResponse( uint64_t duration, uint64_t bytes ); + + /** + * Render the histogram as string that can be used inside an + * HTML doc. + */ + string toHTML() const; + + private: + SpinLock _spinLock; // protects state below + Histogram* _timeHistogram; + Histogram* _spaceHistogram; + + ServiceStats( const ServiceStats& ); + ServiceStats operator=( const ServiceStats& ); + }; + +} // namespace mongo + +#endif // DB_STATS_SERVICE_STATS_HEADER diff --git a/dbtests/spin_lock_test.cpp b/dbtests/spin_lock_test.cpp new file mode 100644 index 00000000000..7ac29921564 --- /dev/null +++ b/dbtests/spin_lock_test.cpp @@ -0,0 +1,110 @@ +// spin_lock_test.cpp : spin_lcok.{h, cpp} unit test + +/** + * Copyright (C) 2010 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +#include + +#include "dbtests.h" +#include "../util/concurrency/spin_lock.h" + +namespace { + + using mongo::SpinLock; + + class LockTester{ + public: + LockTester( SpinLock* spin, int* counter ) + : _spin(spin), _counter(counter), _requests(0){} + + ~LockTester(){} + + void start( int increments ){ + _t = boost::thread( boost::bind(&LockTester::test, this, increments) ); + } + + void join(){ + _t.join(); + } + + int requests() const{ + return _requests; + } + + private: + SpinLock* _spin; // not owned here + int* _counter; // not owned here + int _requests; + boost::thread _t; + + void test( int increments ){ + while ( increments-- > 0 ) { + _spin->lock(); + ++(*_counter); + ++_requests; + _spin->unlock(); + } + } + + LockTester( LockTester& ); + LockTester& operator=( LockTester& ); + }; + + class ConcurrentIncs{ + public: + void run(){ + +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + + SpinLock spin; + int counter = 0; + + const int threads = 64; + const int incs = 10000; + LockTester* testers[threads]; + + for ( int i = 0; i < threads; i++ ){ + testers[i] = new LockTester( &spin, &counter ); + } + for ( int i = 0; i < threads; i++ ){ + testers[i]->start( incs ); + } + for ( int i = 0; i < threads; i++ ){ + testers[i]->join(); + ASSERT_EQUALS( testers[i]->requests(), incs ); + delete testers[i]; + } + + ASSERT_EQUALS( counter, threads*incs ); +#else + +#warning "TODO Missing spin lock in this platform." + +#endif + + } + }; + + class SpinLockSuite : public Suite{ + public: + SpinLockSuite() : Suite( "spinlock" ){} + + void setupTests(){ + add< ConcurrentIncs >(); + } + } spinLockSuite; + +} // anonymous namespace diff --git a/util/concurrency/spin_lock.cpp b/util/concurrency/spin_lock.cpp new file mode 100644 index 00000000000..36a5e329b38 --- /dev/null +++ b/util/concurrency/spin_lock.cpp @@ -0,0 +1,66 @@ +// spin_lock.cpp + +/** +* Copyright (C) 2010 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#include +#include "spin_lock.h" + +namespace mongo { + + SpinLock::SpinLock() : _locked( false ){} + + SpinLock::~SpinLock(){} + + void SpinLock::lock(){ +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + + // fast path + if (!_locked && !__sync_lock_test_and_set(&_locked, true)) { + return; + } + + // wait for lock + int wait = 1000; + while ((wait-- > 0) && (_locked)) {} + + // if failed to grab lock, sleep + struct timespec t; + t.tv_sec = 0; + t.tv_nsec = 5000000; + while (__sync_lock_test_and_set(&_locked, true)) { + nanosleep(&t, NULL); + } +#else + +#warning "TODO Missing spin lock in this platform." + +#endif + } + + void SpinLock::unlock(){ +#if defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) + + __sync_lock_release(&_locked); + +#else + +#warning "TODO Missing spin lock in this platform." + +#endif + } + +} // namespace mongo diff --git a/util/concurrency/spin_lock.h b/util/concurrency/spin_lock.h new file mode 100644 index 00000000000..110290dd547 --- /dev/null +++ b/util/concurrency/spin_lock.h @@ -0,0 +1,48 @@ +// spin_lock.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see . +*/ + +#ifndef CONCURRENCY_SPINLOCK_HEADER +#define CONCURRENCY_SPINLOCK_HEADER + +namespace mongo { + + /** + * BIG WARNING - COMPILES, BUT NOT READY FOR USE - BIG WARNING + * + * The spinlock currently requires late GCC support + * routines. Support for other platforms will be added soon. + */ + class SpinLock{ + public: + SpinLock(); + ~SpinLock(); + + void lock(); + void unlock(); + + private: + bool _locked; + + // Non-copyable, non-assignable + SpinLock(SpinLock&); + SpinLock& operator=(SpinLock&); + }; + +} // namespace mongo + +#endif // CONCURRENCY_SPINLOCK_HEADER From 46d835d6dc542a3c83925f38c782a50e34b408ec Mon Sep 17 00:00:00 2001 From: Alberto Lerner Date: Wed, 2 Jun 2010 14:13:42 -0400 Subject: [PATCH 24/29] Build fix (SunOS 32bit) --- dbtests/spin_lock_test.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/dbtests/spin_lock_test.cpp b/dbtests/spin_lock_test.cpp index 7ac29921564..952ec1a12db 100644 --- a/dbtests/spin_lock_test.cpp +++ b/dbtests/spin_lock_test.cpp @@ -30,14 +30,16 @@ namespace { LockTester( SpinLock* spin, int* counter ) : _spin(spin), _counter(counter), _requests(0){} - ~LockTester(){} + ~LockTester(){ + delete _t; + } void start( int increments ){ - _t = boost::thread( boost::bind(&LockTester::test, this, increments) ); + _t = new boost::thread( boost::bind(&LockTester::test, this, increments) ); } void join(){ - _t.join(); + if ( _t ) _t->join(); } int requests() const{ @@ -45,10 +47,10 @@ namespace { } private: - SpinLock* _spin; // not owned here - int* _counter; // not owned here - int _requests; - boost::thread _t; + SpinLock* _spin; // not owned here + int* _counter; // not owned here + int _requests; + boost::thread* _t; void test( int increments ){ while ( increments-- > 0 ) { From 4d201c39db21a9201313d421bcbcd91eff6ca3cf Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 14:15:54 -0400 Subject: [PATCH 25/29] don't look at module links --- buildscripts/errorcodes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py index cce94f394c7..a9243128c5a 100755 --- a/buildscripts/errorcodes.py +++ b/buildscripts/errorcodes.py @@ -12,7 +12,7 @@ def getAllSourceFiles( arr=None , prefix="." ): if x.startswith( "." ) or x.startswith( "pcre-" ) or x.startswith( "32bit" ) or x.startswith( "mongodb-" ) or x.startswith("debian") or x.startswith( "mongo-cxx-driver" ): continue full = prefix + "/" + x - if os.path.isdir( full ): + if os.path.isdir( full ) and not os.path.islink( full ): getAllSourceFiles( arr , full ) else: if full.endswith( ".cpp" ) or full.endswith( ".h" ) or full.endswith( ".c" ): From 11e1c574e2a834ff9ed7fb76ce8ccbb2df43ee70 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 14:16:01 -0400 Subject: [PATCH 26/29] some debugging --- util/concurrency/mutex.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/util/concurrency/mutex.h b/util/concurrency/mutex.h index d8efcad2550..fac91bcd51c 100644 --- a/util/concurrency/mutex.h +++ b/util/concurrency/mutex.h @@ -39,14 +39,27 @@ namespace mongo { set *preceeding = us.get(); if( preceeding == 0 ) us.reset( preceeding = new set() ); + + bool failed = false; + string err; { boost::mutex::scoped_lock lk(x); followers[m]; for( set::iterator i = preceeding->begin(); i != preceeding->end(); i++ ) { followers[*i].insert(m); - assert( followers[m].count(*i) == 0 ); + if ( followers[m].count(*i) != 0 ){ + failed = true; + stringstream ss; + ss << "mid: " << m << " followers[m] first: " << *(followers[m].begin()); + err = ss.str(); + break; + } } } + if ( failed ){ + cout << "ERROR with mutex: " << err << endl; + assert( 0 ); + } preceeding->insert(m); } void leaving(mid m) { From ade56fd7297f976cf750f81b0a5d9330cd70bed7 Mon Sep 17 00:00:00 2001 From: Alberto Lerner Date: Wed, 2 Jun 2010 14:25:02 -0400 Subject: [PATCH 27/29] Build fix (Win 64bit) --- dbtests/spin_lock_test.cpp | 4 +++- util/concurrency/spin_lock.cpp | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dbtests/spin_lock_test.cpp b/dbtests/spin_lock_test.cpp index 952ec1a12db..acaf2d39744 100644 --- a/dbtests/spin_lock_test.cpp +++ b/dbtests/spin_lock_test.cpp @@ -93,8 +93,10 @@ namespace { ASSERT_EQUALS( counter, threads*incs ); #else -#warning "TODO Missing spin lock in this platform." + // WARNING "TODO Missing spin lock in this platform." + ASSERT( true ); + #endif } diff --git a/util/concurrency/spin_lock.cpp b/util/concurrency/spin_lock.cpp index 36a5e329b38..b3e689a1f13 100644 --- a/util/concurrency/spin_lock.cpp +++ b/util/concurrency/spin_lock.cpp @@ -46,7 +46,7 @@ namespace mongo { } #else -#warning "TODO Missing spin lock in this platform." + // WARNING "TODO Missing spin lock in this platform." #endif } @@ -58,7 +58,7 @@ namespace mongo { #else -#warning "TODO Missing spin lock in this platform." + // WARNING "TODO Missing spin lock in this platform." #endif } From 41132cdd1ae4487bfc5a5a8605dd0e9cf9d32a1e Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 14:54:42 -0400 Subject: [PATCH 28/29] debugging --- jstests/disk/directoryperdb.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jstests/disk/directoryperdb.js b/jstests/disk/directoryperdb.js index a5fd18eca36..062602d2344 100644 --- a/jstests/disk/directoryperdb.js +++ b/jstests/disk/directoryperdb.js @@ -6,7 +6,7 @@ dbpath = "/data/db/" + baseDir + "/"; var m = startMongod( "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); db = m.getDB( baseName ); db[ baseName ].save( {} ); -assert.eq( 1, db[ baseName ].count() ); +assert.eq( 1, db[ baseName ].count() , "A" ); checkDir = function( dir ) { db.runCommand( {fsync:1} ); @@ -22,7 +22,7 @@ checkDir = function( dir ) { files = listFiles( dir + baseName ); for( f in files ) { - assert( new RegExp( baseName + "/" + baseName + "." ).test( files[ f ].name ) ); + assert( new RegExp( baseName + "/" + baseName + "." ).test( files[ f ].name ) , "B dir:" + dir + " f: " + f ); } } checkDir( dbpath ); @@ -40,7 +40,7 @@ for( f in files ) { } } checkDir( backupDir ); -assert.eq( 1, db[ baseName ].count() ); +assert.eq( 1, db[ baseName ].count() , "C" ); // tool test stopMongod( port ); @@ -53,7 +53,7 @@ runMongoProgram( "mongorestore", "--dbpath", dbpath, "--directoryperdb", "--dir" m = startMongoProgram( "mongod", "--directoryperdb", "--port", port, "--dbpath", dbpath, "--nohttpinterface", "--bind_ip", "127.0.0.1" ); db = m.getDB( baseName ); checkDir( dbpath ); -assert.eq( 1, db[ baseName ].count() ); +assert.eq( 1, db[ baseName ].count() , "C" ); assert( m.getDBs().totalSize > 0, "bad size calc" ); // drop db test From ed66c6013e7f6274061ce99a774018c51b3eb2f4 Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Wed, 2 Jun 2010 15:03:03 -0400 Subject: [PATCH 29/29] debugging --- buildscripts/cleanbb.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/buildscripts/cleanbb.py b/buildscripts/cleanbb.py index e7df61028e4..dac08d4634e 100644 --- a/buildscripts/cleanbb.py +++ b/buildscripts/cleanbb.py @@ -23,7 +23,16 @@ def killprocs( signal="" ): killed = 0 - for x in utils.getprocesslist(): + l = utils.getprocesslist() + print( "num procs:" + str( len( l ) ) ) + if len(l) == 0: + print( "no procs" ) + try: + print( execsys( "/sbin/ifconfig -a" ) ) + except Exception,e: + print( "can't get interfaces" + str( e ) ) + + for x in l: x = x.lstrip() if not shouldKill( x ): continue