425 lines
14 KiB
C++
425 lines
14 KiB
C++
// @file dur_journal.cpp writing to the writeahead logging journal
|
|
|
|
/**
|
|
* Copyright (C) 2010 10gen Inc.
|
|
*
|
|
* This program is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License, version 3,
|
|
* as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Affero General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Affero General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "pch.h"
|
|
#include "client.h"
|
|
#include "namespace.h"
|
|
#include "dur_journal.h"
|
|
#include "dur_journalformat.h"
|
|
#include "dur_stats.h"
|
|
#include "../util/logfile.h"
|
|
#include "../util/timer.h"
|
|
#include "../util/alignedbuilder.h"
|
|
#include "../util/message.h" // getelapsedtimemillis
|
|
#include <boost/static_assert.hpp>
|
|
#undef assert
|
|
#define assert MONGO_assert
|
|
#include "../util/mongoutils/str.h"
|
|
#include "dur_journalimpl.h"
|
|
|
|
using namespace mongoutils;
|
|
|
|
namespace mongo {
|
|
|
|
class AlignedBuilder;
|
|
|
|
namespace dur {
|
|
BOOST_STATIC_ASSERT( sizeof(JHeader) == 8192 );
|
|
BOOST_STATIC_ASSERT( sizeof(JSectHeader) == 12 );
|
|
BOOST_STATIC_ASSERT( sizeof(JSectFooter) == 32 );
|
|
BOOST_STATIC_ASSERT( sizeof(JEntry) == 12 );
|
|
BOOST_STATIC_ASSERT( sizeof(LSNFile) == 88 );
|
|
|
|
filesystem::path getJournalDir() {
|
|
filesystem::path p(dbpath);
|
|
p /= "journal";
|
|
return p;
|
|
}
|
|
|
|
path lsnPath() {
|
|
return getJournalDir()/"lsn";
|
|
}
|
|
|
|
/** this should be called when something really bad happens so that we can flag appropriately
|
|
*/
|
|
void journalingFailure(const char *msg) {
|
|
/** todo:
|
|
(1) don't log too much
|
|
(2) make an indicator in the journal dir that something bad happened.
|
|
(2b) refuse to do a recovery startup if that is there without manual override.
|
|
*/
|
|
log() << "journaling error " << msg << endl;
|
|
assert(false);
|
|
}
|
|
|
|
JHeader::JHeader(string fname) {
|
|
magic[0] = 'j'; magic[1] = '\n';
|
|
_version = CurrentVersion;
|
|
memset(ts, 0, sizeof(ts));
|
|
strncpy(ts, time_t_to_String_short(time(0)).c_str(), sizeof(ts)-1);
|
|
memset(dbpath, 0, sizeof(dbpath));
|
|
strncpy(dbpath, fname.c_str(), sizeof(dbpath)-1);
|
|
memset(reserved3, 0, sizeof(reserved3));
|
|
txt2[0] = txt2[1] = '\n';
|
|
n1 = n2 = n3 = n4 = '\n';
|
|
}
|
|
|
|
// class Journal
|
|
|
|
Journal j;
|
|
|
|
Journal::Journal() :
|
|
toUnlink(*(new MVar<path>)), /* freeing MVar at program termination would be problematic */
|
|
toStoreLastSeqNum(*(new MVar<unsigned long long>)),
|
|
_curLogFileMutex("JournalLfMutex")
|
|
{
|
|
_written = 0;
|
|
_nextFileNumber = 0;
|
|
_curLogFile = 0;
|
|
}
|
|
|
|
path Journal::getFilePathFor(int filenumber) const {
|
|
filesystem::path p(dir);
|
|
p /= string(str::stream() << "j._" << filenumber);
|
|
return p;
|
|
}
|
|
|
|
bool Journal::tryToCloseCurJournalFile() {
|
|
mutex::try_lock lk(_curLogFileMutex, 2000);
|
|
if( lk.ok ) {
|
|
closeCurrentJournalFile();
|
|
}
|
|
return lk.ok;
|
|
}
|
|
|
|
/** never throws
|
|
@return true if journal dir is not emptya
|
|
*/
|
|
bool haveJournalFiles() {
|
|
try {
|
|
for ( boost::filesystem::directory_iterator i( getJournalDir() );
|
|
i != boost::filesystem::directory_iterator();
|
|
++i ) {
|
|
string fileName = boost::filesystem::path(*i).leaf();
|
|
if( str::startsWith(fileName, "j._") )
|
|
return true;
|
|
}
|
|
}
|
|
catch(...) { }
|
|
return false;
|
|
}
|
|
|
|
/** throws */
|
|
void removeJournalFiles() {
|
|
log() << "removeJournalFiles" << endl;
|
|
try {
|
|
for ( boost::filesystem::directory_iterator i( getJournalDir() );
|
|
i != boost::filesystem::directory_iterator();
|
|
++i ) {
|
|
string fileName = boost::filesystem::path(*i).leaf();
|
|
if( str::startsWith(fileName, "j._") ) {
|
|
try {
|
|
boost::filesystem::remove(*i);
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "couldn't remove " << fileName << ' ' << e.what() << endl;
|
|
}
|
|
}
|
|
}
|
|
try {
|
|
boost::filesystem::remove(lsnPath());
|
|
}
|
|
catch(...) {
|
|
log() << "couldn't remove " << lsnPath().string() << endl;
|
|
}
|
|
}
|
|
catch( std::exception& e ) {
|
|
log() << "error removing journal files " << e.what() << endl;
|
|
throw;
|
|
}
|
|
log() << "removeJournalFiles end" << endl; // temp
|
|
}
|
|
|
|
/** at clean shutdown */
|
|
bool okToCleanUp = false; // failed recovery would set this to false
|
|
void journalCleanup() {
|
|
if( testIntent )
|
|
return;
|
|
if( !okToCleanUp )
|
|
return;
|
|
if( !j.tryToCloseCurJournalFile() ) {
|
|
return;
|
|
}
|
|
try {
|
|
removeJournalFiles();
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "error couldn't remove journal file during shutdown " << e.what() << endl;
|
|
}
|
|
}
|
|
|
|
/** assure journal/ dir exists. throws. call during startup. */
|
|
void journalMakeDir() {
|
|
j.init();
|
|
|
|
filesystem::path p = getJournalDir();
|
|
j.dir = p.string();
|
|
DEV log() << "dev journalMakeDir() " << j.dir << endl;
|
|
if( !exists(j.dir) ) {
|
|
try {
|
|
create_directory(j.dir);
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "error creating directory " << j.dir << ' ' << e.what() << endl;
|
|
throw;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Journal::_open() {
|
|
assert( _curLogFile == 0 );
|
|
string fname = getFilePathFor(_nextFileNumber).string();
|
|
_curLogFile = new LogFile(fname);
|
|
_nextFileNumber++;
|
|
{
|
|
JHeader h(fname);
|
|
AlignedBuilder b(8192);
|
|
b.appendStruct(h);
|
|
_curLogFile->synchronousAppend(b.buf(), b.len());
|
|
}
|
|
}
|
|
|
|
void Journal::init() {
|
|
assert( _curLogFile == 0 );
|
|
MongoFile::notifyPreFlush = preFlush;
|
|
MongoFile::notifyPostFlush = postFlush;
|
|
}
|
|
|
|
void Journal::open() {
|
|
assert( MongoFile::notifyPreFlush == preFlush );
|
|
mutex::scoped_lock lk(_curLogFileMutex);
|
|
_open();
|
|
}
|
|
|
|
/** background removal of old journal files */
|
|
void unlinkThread() {
|
|
Client::initThread("unlink");
|
|
while( 1 ) {
|
|
path p = j.toUnlink.take();
|
|
try {
|
|
remove(p);
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "error unlink of journal file " << p.string() << " failed " << e.what() << endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void LSNFile::set(unsigned long long x) {
|
|
lsn = x;
|
|
checkbytes = ~x;
|
|
}
|
|
|
|
/** logs details of the situation, and returns 0, if anything surprising in the LSNFile
|
|
if something highly surprising, throws to abort
|
|
*/
|
|
unsigned long long LSNFile::get() {
|
|
uassert(13614, "unexpected version number of lsn file in journal/ directory", ver == 0);
|
|
if( ~lsn != checkbytes ) {
|
|
log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn << " checkbytes: " << hex << checkbytes << endl;
|
|
return 0;
|
|
}
|
|
return lsn;
|
|
}
|
|
|
|
/** called during recovery (the error message text below assumes that)
|
|
*/
|
|
unsigned long long journalReadLSN() {
|
|
if( !debug ) {
|
|
// in nondebug build, for now, be conservative until more tests written, and apply the whole journal.
|
|
// however we will still write the lsn file to exercise that code, and use in _DEBUG build.
|
|
return 0;
|
|
}
|
|
|
|
if( !MemoryMappedFile::exists(lsnPath()) ) {
|
|
log() << "info no lsn file in journal/ directory" << endl;
|
|
return 0;
|
|
}
|
|
|
|
try {
|
|
// os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
|
|
// however, given we actually close the file when writing, that seems unlikely.
|
|
MemoryMappedFile f;
|
|
LSNFile *L = static_cast<LSNFile*>(f.map(lsnPath().string().c_str()));
|
|
assert(L);
|
|
unsigned long long lsn = L->get();
|
|
return lsn;
|
|
}
|
|
catch(std::exception& e) {
|
|
uasserted(13611, str::stream() << "can't read lsn file in journal directory : " << e.what());
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/** remember "last sequence number" to speed recoveries */
|
|
void lsnThread() {
|
|
Client::initThread("lsn");
|
|
|
|
time_t last = 0;
|
|
while( 1 ) {
|
|
unsigned long long lsn = j.toStoreLastSeqNum.take();
|
|
|
|
// if you are on a really fast fsync interval, we don't write this as often
|
|
if( time(0) - last < 5 )
|
|
continue;
|
|
|
|
last = time(0);
|
|
|
|
try {
|
|
// os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
|
|
// however, given we actually close the file, that seems unlikely.
|
|
MemoryMappedFile f;
|
|
unsigned long long length = sizeof(LSNFile);
|
|
LSNFile *lsnf = static_cast<LSNFile*>( f.map(lsnPath().string().c_str(), length) );
|
|
assert(lsnf);
|
|
lsnf->set(lsn);
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "write to lsn file fails " << e.what() << endl;
|
|
}
|
|
}
|
|
}
|
|
|
|
void Journal::preFlush() {
|
|
j._preFlushTime = Listener::getElapsedTimeMillis();
|
|
}
|
|
|
|
void Journal::postFlush() {
|
|
j._lastFlushTime = j._preFlushTime;
|
|
j.toStoreLastSeqNum.tryPut( j._lastFlushTime );
|
|
}
|
|
|
|
// call from within _curLogFileMutex
|
|
void Journal::closeCurrentJournalFile() {
|
|
assert(_curLogFile);
|
|
|
|
JFile jf;
|
|
jf.filename = _curLogFile->_name;
|
|
jf.lastEventTimeMs = Listener::getElapsedTimeMillis();
|
|
|
|
delete _curLogFile; // close
|
|
_curLogFile = 0;
|
|
_written = 0;
|
|
}
|
|
|
|
/** remove older journal files.
|
|
be in mutex when calling
|
|
*/
|
|
void Journal::removeUnneededJournalFiles() {
|
|
while( !_oldJournalFiles.empty() ) {
|
|
JFile f = _oldJournalFiles.front();
|
|
|
|
if( f.lastEventTimeMs < _lastFlushTime + ExtraKeepTimeMs ) {
|
|
// eligible for deletion
|
|
path p( f.filename );
|
|
log() << "old journal file will be removed: " << f.filename << endl;
|
|
|
|
// we do the unlink in a separate thread unless for some reason unlinks are backlogging
|
|
if( !j.toUnlink.tryPut(p) ) {
|
|
/* DR___ for durability error and warning codes
|
|
Compare to RS___ for replica sets
|
|
*/
|
|
log() << "DR100 latency warning on journal unlink " << endl;
|
|
Timer t;
|
|
j.toUnlink.put(p);
|
|
log() << "toUnlink.put(" << f.filename << ") " << t.millis() << "ms" << endl;
|
|
}
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
|
|
_oldJournalFiles.pop_front();
|
|
}
|
|
}
|
|
|
|
/** check if time to rotate files. assure a file is open.
|
|
done separately from the journal() call as we can do this part
|
|
outside of lock.
|
|
thread: durThread()
|
|
*/
|
|
void journalRotate() {
|
|
j.rotate();
|
|
}
|
|
void Journal::rotate() {
|
|
if( _curLogFile && _written < DataLimit )
|
|
return;
|
|
|
|
scoped_lock lk(_curLogFileMutex);
|
|
if( _curLogFile && _written < DataLimit )
|
|
return;
|
|
|
|
if( _curLogFile ) {
|
|
|
|
closeCurrentJournalFile();
|
|
|
|
removeUnneededJournalFiles();
|
|
}
|
|
|
|
try {
|
|
Timer t;
|
|
_open();
|
|
int ms = t.millis();
|
|
if( ms >= 200 ) {
|
|
log() << "DR101 latency warning on journal file open " << ms << "ms" << endl;
|
|
}
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "warning exception opening journal file " << e.what() << endl;
|
|
}
|
|
}
|
|
|
|
/** write to journal
|
|
thread: durThread()
|
|
*/
|
|
void journal(const AlignedBuilder& b) {
|
|
j.journal(b);
|
|
}
|
|
void Journal::journal(const AlignedBuilder& b) {
|
|
try {
|
|
mutex::scoped_lock lk(_curLogFileMutex);
|
|
if( _curLogFile == 0 )
|
|
open();
|
|
stats.curr._journaledBytes += b.len();
|
|
_written += b.len();
|
|
_curLogFile->synchronousAppend((void *) b.buf(), b.len());
|
|
}
|
|
catch(std::exception& e) {
|
|
log() << "warning exception in dur::journal " << e.what() << endl;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
/* todo
|
|
test (and handle) disk full on journal append. best quick thing to do is to terminate.
|
|
if we roll back operations, there are nuances such as is ReplSetImpl::lastOpTimeWritten too new in ram then?
|
|
*/
|