301 lines
10 KiB
C++
301 lines
10 KiB
C++
/**
|
|
* Copyright (C) 2008 10gen Inc.
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3,
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,b
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "pch.h"
|
|
#include "replset.h"
|
|
#include "health.h"
|
|
#include "../../util/background.h"
|
|
#include "../../client/dbclient.h"
|
|
#include "../commands.h"
|
|
#include "../../util/concurrency/value.h"
|
|
#include "../../util/mongoutils/html.h"
|
|
#include "../../util/goodies.h"
|
|
#include "../../util/ramlog.h"
|
|
#include "../helpers/dblogger.h"
|
|
#include "connections.h"
|
|
|
|
namespace mongo {
|
|
/* decls for connections.h */
|
|
ScopedConn::M& ScopedConn::_map = *(new ScopedConn::M());
|
|
mutex ScopedConn::mapMutex;
|
|
}
|
|
|
|
namespace mongo {
|
|
|
|
using namespace mongoutils::html;
|
|
|
|
static RamLog _rsLog;
|
|
Tee *rsLog = &_rsLog;
|
|
|
|
/* { replSetHeartbeat : <setname> } */
|
|
class CmdReplSetHeartbeat : public Command {
|
|
public:
|
|
virtual bool slaveOk() const { return true; }
|
|
virtual bool adminOnly() const { return false; }
|
|
virtual bool logTheOp() { return false; }
|
|
virtual LockType locktype() const { return NONE; }
|
|
virtual void help( stringstream &help ) const { help<<"internal"; }
|
|
CmdReplSetHeartbeat() : Command("replSetHeartbeat") { }
|
|
virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
|
|
if( !replSet ) {
|
|
errmsg = "not a replset member";
|
|
return false;
|
|
}
|
|
result.append("rs", true);
|
|
if( !startsWith(cmdLine.replSet, cmdObj.getStringField("replSetHeartbeat")+'/' ) ) {
|
|
errmsg = "repl set names do not match";
|
|
cout << cmdLine.replSet << endl;
|
|
cout << cmdObj.getStringField("replSetHeartbeat") << endl;
|
|
result.append("mismatch", true);
|
|
return false;
|
|
}
|
|
if( theReplSet == 0 ) {
|
|
errmsg = "still initializing";
|
|
return false;
|
|
}
|
|
if( theReplSet->getName() != cmdObj.getStringField("replSetHeartbeat") ) {
|
|
errmsg = "repl set names do not match (2)";
|
|
result.append("mismatch", true);
|
|
return false;
|
|
}
|
|
/* todo: send our state*/
|
|
result.append("set", theReplSet->getName());
|
|
return true;
|
|
}
|
|
} cmdReplSetHeartbeat;
|
|
|
|
/* throws dbexception */
|
|
bool requestHeartbeat(string setName, string memberFullName, BSONObj& result) {
|
|
BSONObj cmd = BSON( "replSetHeartbeat" << setName );
|
|
ScopedConn conn(memberFullName);
|
|
return conn->runCommand("admin", cmd, result);
|
|
}
|
|
|
|
/* poll every other set member to check its status */
|
|
class FeedbackThread : public BackgroundJob {
|
|
public:
|
|
ReplSet::Member *m;
|
|
|
|
private:
|
|
void down() {
|
|
m->_health = 0.0;
|
|
if( m->_upSince ) {
|
|
m->_upSince = 0;
|
|
log() << "replSet " << m->fullName() << " is now down" << rsLog;
|
|
}
|
|
}
|
|
|
|
public:
|
|
void run() {
|
|
mongo::lastError.reset( new LastError() );
|
|
while( 1 ) {
|
|
try {
|
|
BSONObj info;
|
|
bool ok = requestHeartbeat(theReplSet->getName(), m->fullName(), info);
|
|
m->_lastHeartbeat = time(0); // we set this on any response - we don't get this far if couldn't connect because exception is thrown
|
|
if( ok ) {
|
|
if( m->_upSince == 0 ) {
|
|
log() << "replSet " << m->fullName() << " is now up" << rsLog;
|
|
m->_upSince = m->_lastHeartbeat;
|
|
}
|
|
m->_health = 1.0;
|
|
m->_lastHeartbeatErrMsg.set("");
|
|
}
|
|
else {
|
|
down();
|
|
m->_lastHeartbeatErrMsg.set(info.getStringField("errmsg"));
|
|
}
|
|
}
|
|
catch(...) {
|
|
down();
|
|
m->_lastHeartbeatErrMsg.set("connect/transport error");
|
|
}
|
|
sleepsecs(2);
|
|
}
|
|
}
|
|
};
|
|
|
|
void ReplSet::Member::summarizeAsHtml(stringstream& s) const {
|
|
s << tr();
|
|
{
|
|
stringstream u;
|
|
u << "http://" << _h.host() << ':' << (_h.port() + 1000) << "/_replSet";
|
|
s << td( a(u.str(), "", fullName()) );
|
|
}
|
|
s << td(health());
|
|
s << td(upSince());
|
|
{
|
|
stringstream h;
|
|
time_t hb = lastHeartbeat();
|
|
time_t now = time(0);
|
|
if( hb == 0 ) h << "never";
|
|
else {
|
|
if( now > hb ) h << now-hb;
|
|
else h << 0;
|
|
h << " secs ago";
|
|
}
|
|
s << td(h.str());
|
|
}
|
|
s << td(config().votes);
|
|
s << td(_lastHeartbeatErrMsg.get());
|
|
s << _tr();
|
|
}
|
|
|
|
string ReplSet::stateAsHtml(State s) {
|
|
if( s == STARTUP ) return a("", "serving still starting up, or still trying to initiate the set", "STARTUP");
|
|
if( s == PRIMARY ) return a("", "this server thinks it is primary", "PRIMARY");
|
|
if( s == SECONDARY ) return a("", "this server thinks it is a secondary (slave mode)", "SECONDARY");
|
|
if( s == RECOVERING ) return a("", "recovering/resyncing; after recovery usually auto-transitions to secondary", "RECOVERING");
|
|
if( s == FATAL ) return a("", "something bad has occurred and server is not completely offline with regard to the replica set. fatal error.", "FATAL");
|
|
return "???";
|
|
}
|
|
|
|
string ReplSet::stateAsStr(State s) {
|
|
if( s == STARTUP ) return "STARTUP";
|
|
if( s == PRIMARY ) return "PRIMARY";
|
|
if( s == SECONDARY ) return "SECONDARY";
|
|
if( s == RECOVERING ) return "RECOVERING";
|
|
if( s == FATAL ) return "FATAL";
|
|
return "???";
|
|
}
|
|
|
|
void ReplSet::summarizeAsHtml(stringstream& s) const {
|
|
s << table(0, false);
|
|
s << tr("Set name:", _name);
|
|
s << tr("My state:", stateAsHtml(_myState));
|
|
s << tr("Majority up:", elect.aMajoritySeemsToBeUp()?"yes":"no" );
|
|
s << _table();
|
|
|
|
const char *h[] = {"Member", "Up", "Uptime",
|
|
"<a title=\"when this server last received a heartbeat response - includes error code responses\">Last heartbeat</a>",
|
|
"Votes", "Status", 0};
|
|
s << table(h);
|
|
s << tr() << td(_self->fullName()) <<
|
|
td("1") <<
|
|
td("") <<
|
|
td("") <<
|
|
td(ToString(_self->config().votes)) <<
|
|
td("self") <<
|
|
_tr();
|
|
Member *m = head();
|
|
while( m ) {
|
|
m->summarizeAsHtml(s);
|
|
m = m->next();
|
|
}
|
|
s << _table();
|
|
}
|
|
|
|
static int repeats(const vector<const char *>& v, int i) {
|
|
for( int j = i-1; j >= 0 && j+8 > i; j-- ) {
|
|
if( strcmp(v[i]+20,v[j]+20) == 0 ) {
|
|
for( int x = 1; ; x++ ) {
|
|
if( j+x == i ) return j;
|
|
if( i+x>=v.size() ) return -1;
|
|
if( strcmp(v[i+x]+20,v[j+x]+20) ) return -1;
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
static string clean(const vector<const char *>& v, int i, string line="") {
|
|
if( line.empty() ) line = v[i];
|
|
if( i > 0 && strncmp(v[i], v[i-1], 11) == 0 )
|
|
return string(" ") + line.substr(11);
|
|
return v[i];
|
|
}
|
|
|
|
static bool isWarning(const char *line) {
|
|
const char *p = strstr(line, "replSet ");
|
|
if( p ) {
|
|
p += 8;
|
|
return startsWith(p, "warning") || startsWith(p, "error");
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void fillRsLog(stringstream& s) {
|
|
bool first = true;
|
|
s << "<pre>\n";
|
|
vector<const char *> v = _rsLog.get();
|
|
for( int i = 0; i < v.size(); i++ ) {
|
|
assert( strlen(v[i]) > 20 );
|
|
int r = repeats(v, i);
|
|
if( r < 0 ) {
|
|
s << red( clean(v,i), isWarning(v[i]) );
|
|
} else {
|
|
stringstream x;
|
|
x << string(v[i], 0, 20);
|
|
int nr = (i-r);
|
|
int last = i+nr-1;
|
|
for( ; r < i ; r++ ) x << '.';
|
|
if( 1 ) {
|
|
stringstream r;
|
|
if( nr == 1 ) r << "repeat last line";
|
|
else r << "repeats last " << nr << " lines; ends " << string(v[last]+4,0,15);
|
|
first = false; s << a("", r.str(), clean(v,i,x.str()));
|
|
}
|
|
else s << x.str();
|
|
s << '\n';
|
|
i = last;
|
|
}
|
|
}
|
|
s << "</pre>\n";
|
|
}
|
|
|
|
void ReplSet::summarizeStatus(BSONObjBuilder& b) const {
|
|
Member *m =_members.head();
|
|
vector<BSONObj> v;
|
|
|
|
// add self
|
|
{
|
|
HostAndPort h(getHostName(), cmdLine.port);
|
|
v.push_back( BSON( "name" << h.toString() << "self" << true ) );
|
|
}
|
|
|
|
while( m ) {
|
|
BSONObjBuilder bb;
|
|
bb.append("name", m->fullName());
|
|
bb.append("health", m->health());
|
|
bb.append("uptime", (unsigned) (m->upSince() ? (time(0)-m->upSince()) : 0));
|
|
bb.appendDate("lastHeartbeat", m->lastHeartbeat());
|
|
bb.append("errmsg", m->_lastHeartbeatErrMsg.get());
|
|
v.push_back(bb.obj());
|
|
m = m->next();
|
|
}
|
|
b.append("set", getName());
|
|
b.appendDate("date", time(0));
|
|
b.append("myState", _myState);
|
|
b.append("members", v);
|
|
}
|
|
|
|
void ReplSet::startHealthThreads() {
|
|
Member* m = _members.head();
|
|
while( m ) {
|
|
FeedbackThread *f = new FeedbackThread();
|
|
f->m = m;
|
|
f->go();
|
|
m = m->next();
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/* todo:
|
|
stop bg job and delete on removefromset
|
|
*/
|