Files
mongo/db/dur_journal.cpp

425 lines
14 KiB
C++
Raw Normal View History

// @file dur_journal.cpp writing to the writeahead logging journal
2010-11-13 13:04:48 -05:00
2010-11-04 17:43:02 -04:00
/**
* Copyright (C) 2010 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "pch.h"
2010-11-14 22:28:04 -05:00
#include "client.h"
#include "namespace.h"
2010-11-13 17:42:41 -05:00
#include "dur_journal.h"
2010-11-26 18:18:24 -05:00
#include "dur_journalformat.h"
2010-12-17 12:13:50 -05:00
#include "dur_stats.h"
2010-11-08 15:22:03 -05:00
#include "../util/logfile.h"
2010-11-14 22:28:04 -05:00
#include "../util/timer.h"
2010-12-20 09:28:19 -05:00
#include "../util/alignedbuilder.h"
#include "../util/message.h" // getelapsedtimemillis
2010-11-13 17:42:41 -05:00
#include <boost/static_assert.hpp>
#undef assert
#define assert MONGO_assert
2010-11-14 22:28:04 -05:00
#include "../util/mongoutils/str.h"
2010-12-19 14:15:32 -05:00
#include "dur_journalimpl.h"
2010-11-08 15:22:03 -05:00
using namespace mongoutils;
2010-11-04 17:43:02 -04:00
namespace mongo {
2010-11-14 22:28:04 -05:00
2010-12-20 09:28:19 -05:00
class AlignedBuilder;
2010-11-15 22:13:48 -05:00
2010-11-04 17:43:02 -04:00
namespace dur {
2010-11-14 22:28:04 -05:00
BOOST_STATIC_ASSERT( sizeof(JHeader) == 8192 );
BOOST_STATIC_ASSERT( sizeof(JSectHeader) == 12 );
BOOST_STATIC_ASSERT( sizeof(JSectFooter) == 32 );
2010-11-26 18:18:24 -05:00
BOOST_STATIC_ASSERT( sizeof(JEntry) == 12 );
BOOST_STATIC_ASSERT( sizeof(LSNFile) == 88 );
2010-11-14 22:28:04 -05:00
2010-11-23 14:13:46 -05:00
filesystem::path getJournalDir() {
filesystem::path p(dbpath);
p /= "journal";
return p;
}
2010-12-20 22:49:37 -05:00
path lsnPath() {
return getJournalDir()/"lsn";
}
2010-11-23 14:13:46 -05:00
/** this should be called when something really bad happens so that we can flag appropriately
*/
2010-11-14 22:28:04 -05:00
void journalingFailure(const char *msg) {
/** todo:
(1) don't log too much
(2) make an indicator in the journal dir that something bad happened.
2010-11-26 18:18:24 -05:00
(2b) refuse to do a recovery startup if that is there without manual override.
2010-11-14 22:28:04 -05:00
*/
log() << "journaling error " << msg << endl;
2010-11-26 18:18:24 -05:00
assert(false);
2010-11-14 22:28:04 -05:00
}
2010-11-24 22:19:31 -05:00
JHeader::JHeader(string fname) {
magic[0] = 'j'; magic[1] = '\n';
2010-12-14 20:27:24 -05:00
_version = CurrentVersion;
2010-11-24 22:19:31 -05:00
memset(ts, 0, sizeof(ts));
strncpy(ts, time_t_to_String_short(time(0)).c_str(), sizeof(ts)-1);
memset(dbpath, 0, sizeof(dbpath));
strncpy(dbpath, fname.c_str(), sizeof(dbpath)-1);
memset(reserved3, 0, sizeof(reserved3));
txt2[0] = txt2[1] = '\n';
n1 = n2 = n3 = n4 = '\n';
}
2010-12-19 14:15:32 -05:00
// class Journal
2010-11-14 22:28:04 -05:00
Journal j;
2010-11-14 22:28:04 -05:00
2010-12-19 14:15:32 -05:00
Journal::Journal() :
toUnlink(*(new MVar<path>)), /* freeing MVar at program termination would be problematic */
2010-12-20 22:49:37 -05:00
toStoreLastSeqNum(*(new MVar<unsigned long long>)),
2010-12-19 14:15:32 -05:00
_curLogFileMutex("JournalLfMutex")
{
_written = 0;
_nextFileNumber = 0;
_curLogFile = 0;
}
2010-11-14 22:28:04 -05:00
path Journal::getFilePathFor(int filenumber) const {
filesystem::path p(dir);
2010-12-13 20:58:36 -05:00
p /= string(str::stream() << "j._" << filenumber);
2010-11-14 22:28:04 -05:00
return p;
}
bool Journal::tryToCloseCurJournalFile() {
2010-12-19 14:15:32 -05:00
mutex::try_lock lk(_curLogFileMutex, 2000);
if( lk.ok ) {
closeCurrentJournalFile();
2010-12-19 14:15:32 -05:00
}
return lk.ok;
}
/** never throws
@return true if journal dir is not emptya
*/
bool haveJournalFiles() {
try {
for ( boost::filesystem::directory_iterator i( getJournalDir() );
i != boost::filesystem::directory_iterator();
++i ) {
string fileName = boost::filesystem::path(*i).leaf();
if( str::startsWith(fileName, "j._") )
return true;
}
}
catch(...) { }
return false;
}
2010-11-23 10:12:31 -05:00
/** throws */
void removeJournalFiles() {
2010-12-24 12:56:25 -05:00
log() << "removeJournalFiles" << endl;
2010-11-23 14:13:46 -05:00
try {
for ( boost::filesystem::directory_iterator i( getJournalDir() );
i != boost::filesystem::directory_iterator();
++i ) {
string fileName = boost::filesystem::path(*i).leaf();
if( str::startsWith(fileName, "j._") ) {
try {
boost::filesystem::remove(*i);
}
catch(std::exception& e) {
log() << "couldn't remove " << fileName << ' ' << e.what() << endl;
}
2010-11-17 22:31:38 -05:00
}
}
2010-12-24 12:56:25 -05:00
try {
boost::filesystem::remove(lsnPath());
}
catch(...) {
log() << "couldn't remove " << lsnPath().string() << endl;
}
2010-11-17 22:31:38 -05:00
}
2010-11-23 14:13:46 -05:00
catch( std::exception& e ) {
log() << "error removing journal files " << e.what() << endl;
throw;
}
2010-12-24 12:56:25 -05:00
log() << "removeJournalFiles end" << endl; // temp
2010-11-17 22:31:38 -05:00
}
2010-11-23 10:12:31 -05:00
/** at clean shutdown */
2010-11-25 11:09:18 -05:00
bool okToCleanUp = false; // failed recovery would set this to false
2010-11-23 10:12:31 -05:00
void journalCleanup() {
2010-11-27 15:37:16 -05:00
if( testIntent )
return;
2010-11-25 11:09:18 -05:00
if( !okToCleanUp )
return;
if( !j.tryToCloseCurJournalFile() ) {
2010-11-23 10:12:31 -05:00
return;
}
try {
removeJournalFiles();
}
catch(std::exception& e) {
log() << "error couldn't remove journal file during shutdown " << e.what() << endl;
}
}
/** assure journal/ dir exists. throws. call during startup. */
2010-11-23 10:12:31 -05:00
void journalMakeDir() {
j.init();
2010-11-23 10:12:31 -05:00
filesystem::path p = getJournalDir();
2010-11-14 22:28:04 -05:00
j.dir = p.string();
DEV log() << "dev journalMakeDir() " << j.dir << endl;
2010-11-14 22:28:04 -05:00
if( !exists(j.dir) ) {
try {
create_directory(j.dir);
}
catch(std::exception& e) {
log() << "error creating directory " << j.dir << ' ' << e.what() << endl;
throw;
}
}
}
2010-11-14 22:28:04 -05:00
2010-11-24 00:42:19 -05:00
void Journal::_open() {
2010-12-19 14:05:08 -05:00
assert( _curLogFile == 0 );
2010-11-24 22:19:31 -05:00
string fname = getFilePathFor(_nextFileNumber).string();
2010-12-19 14:05:08 -05:00
_curLogFile = new LogFile(fname);
2010-11-24 22:19:31 -05:00
_nextFileNumber++;
2010-11-14 22:28:04 -05:00
{
JHeader h(fname);
2010-11-15 23:05:39 -05:00
AlignedBuilder b(8192);
b.appendStruct(h);
2010-12-19 14:05:08 -05:00
_curLogFile->synchronousAppend(b.buf(), b.len());
2010-11-14 22:28:04 -05:00
}
}
void Journal::init() {
assert( _curLogFile == 0 );
MongoFile::notifyPreFlush = preFlush;
MongoFile::notifyPostFlush = postFlush;
}
2010-11-27 15:25:08 -05:00
void Journal::open() {
assert( MongoFile::notifyPreFlush == preFlush );
2010-12-19 14:05:08 -05:00
mutex::scoped_lock lk(_curLogFileMutex);
2010-11-27 15:25:08 -05:00
_open();
}
/** background removal of old journal files */
2010-11-15 16:03:56 -05:00
void unlinkThread() {
Client::initThread("unlink");
2010-11-14 22:28:04 -05:00
while( 1 ) {
2010-11-16 10:22:26 -05:00
path p = j.toUnlink.take();
2010-11-14 22:28:04 -05:00
try {
remove(p);
}
catch(std::exception& e) {
log() << "error unlink of journal file " << p.string() << " failed " << e.what() << endl;
}
}
}
2010-12-23 14:51:18 -05:00
void LSNFile::set(unsigned long long x) {
lsn = x;
checkbytes = ~x;
}
/** logs details of the situation, and returns 0, if anything surprising in the LSNFile
if something highly surprising, throws to abort
*/
unsigned long long LSNFile::get() {
2010-12-23 15:15:40 -05:00
uassert(13614, "unexpected version number of lsn file in journal/ directory", ver == 0);
2010-12-23 14:51:18 -05:00
if( ~lsn != checkbytes ) {
log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn << " checkbytes: " << hex << checkbytes << endl;
return 0;
}
return lsn;
}
/** called during recovery (the error message text below assumes that)
2010-12-22 14:23:36 -05:00
*/
2010-12-20 22:49:37 -05:00
unsigned long long journalReadLSN() {
if( !debug ) {
// in nondebug build, for now, be conservative until more tests written, and apply the whole journal.
// however we will still write the lsn file to exercise that code, and use in _DEBUG build.
return 0;
}
2010-12-22 14:23:36 -05:00
if( !MemoryMappedFile::exists(lsnPath()) ) {
log() << "info no lsn file in journal/ directory" << endl;
return 0;
}
2010-12-20 22:49:37 -05:00
try {
// os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
2010-12-23 14:51:18 -05:00
// however, given we actually close the file when writing, that seems unlikely.
2010-12-20 22:49:37 -05:00
MemoryMappedFile f;
2010-12-23 14:51:18 -05:00
LSNFile *L = static_cast<LSNFile*>(f.map(lsnPath().string().c_str()));
2010-12-20 22:49:37 -05:00
assert(L);
unsigned long long lsn = L->get();
return lsn;
2010-12-20 22:49:37 -05:00
}
catch(std::exception& e) {
2010-12-22 14:23:36 -05:00
uasserted(13611, str::stream() << "can't read lsn file in journal directory : " << e.what());
2010-12-20 22:49:37 -05:00
}
return 0;
}
/** remember "last sequence number" to speed recoveries */
void lsnThread() {
Client::initThread("lsn");
time_t last = 0;
while( 1 ) {
unsigned long long lsn = j.toStoreLastSeqNum.take();
// if you are on a really fast fsync interval, we don't write this as often
if( time(0) - last < 5 )
continue;
last = time(0);
try {
// os can flush as it likes. if it flushes slowly, we will just do extra work on recovery.
// however, given we actually close the file, that seems unlikely.
MemoryMappedFile f;
unsigned long long length = sizeof(LSNFile);
2010-12-23 14:51:18 -05:00
LSNFile *lsnf = static_cast<LSNFile*>( f.map(lsnPath().string().c_str(), length) );
assert(lsnf);
lsnf->set(lsn);
2010-12-20 22:49:37 -05:00
}
catch(std::exception& e) {
log() << "write to lsn file fails " << e.what() << endl;
}
}
}
void Journal::preFlush() {
j._preFlushTime = Listener::getElapsedTimeMillis();
}
void Journal::postFlush() {
j._lastFlushTime = j._preFlushTime;
2010-12-20 22:49:37 -05:00
j.toStoreLastSeqNum.tryPut( j._lastFlushTime );
}
// call from within _curLogFileMutex
void Journal::closeCurrentJournalFile() {
assert(_curLogFile);
JFile jf;
jf.filename = _curLogFile->_name;
jf.lastEventTimeMs = Listener::getElapsedTimeMillis();
delete _curLogFile; // close
_curLogFile = 0;
_written = 0;
}
/** remove older journal files.
be in mutex when calling
*/
void Journal::removeUnneededJournalFiles() {
while( !_oldJournalFiles.empty() ) {
JFile f = _oldJournalFiles.front();
if( f.lastEventTimeMs < _lastFlushTime + ExtraKeepTimeMs ) {
// eligible for deletion
path p( f.filename );
log() << "old journal file will be removed: " << f.filename << endl;
// we do the unlink in a separate thread unless for some reason unlinks are backlogging
if( !j.toUnlink.tryPut(p) ) {
/* DR___ for durability error and warning codes
Compare to RS___ for replica sets
*/
log() << "DR100 latency warning on journal unlink " << endl;
Timer t;
j.toUnlink.put(p);
log() << "toUnlink.put(" << f.filename << ") " << t.millis() << "ms" << endl;
}
}
else {
break;
}
_oldJournalFiles.pop_front();
}
}
2010-11-14 22:28:04 -05:00
/** check if time to rotate files. assure a file is open.
done separately from the journal() call as we can do this part
outside of lock.
2010-11-17 22:31:38 -05:00
thread: durThread()
2010-11-14 22:28:04 -05:00
*/
void journalRotate() {
j.rotate();
}
void Journal::rotate() {
2010-12-19 14:05:08 -05:00
if( _curLogFile && _written < DataLimit )
2010-11-17 22:31:38 -05:00
return;
2010-12-19 14:05:08 -05:00
scoped_lock lk(_curLogFileMutex);
if( _curLogFile && _written < DataLimit )
2010-11-14 22:28:04 -05:00
return;
2010-12-19 14:05:08 -05:00
if( _curLogFile ) {
closeCurrentJournalFile();
removeUnneededJournalFiles();
2010-11-14 22:28:04 -05:00
}
try {
Timer t;
2010-11-24 00:42:19 -05:00
_open();
2010-11-14 22:28:04 -05:00
int ms = t.millis();
if( ms >= 200 ) {
log() << "DR101 latency warning on journal file open " << ms << "ms" << endl;
}
}
catch(std::exception& e) {
2010-11-17 13:59:29 -05:00
log() << "warning exception opening journal file " << e.what() << endl;
2010-11-14 22:28:04 -05:00
}
}
2010-11-17 22:31:38 -05:00
/** write to journal
thread: durThread()
2010-11-14 22:28:04 -05:00
*/
2010-12-20 09:28:19 -05:00
void journal(const AlignedBuilder& b) {
2010-11-14 22:28:04 -05:00
j.journal(b);
}
2010-12-20 09:28:19 -05:00
void Journal::journal(const AlignedBuilder& b) {
2010-11-14 22:28:04 -05:00
try {
2010-12-19 14:05:08 -05:00
mutex::scoped_lock lk(_curLogFileMutex);
if( _curLogFile == 0 )
2010-11-14 22:28:04 -05:00
open();
2010-12-17 12:13:50 -05:00
stats.curr._journaledBytes += b.len();
2010-11-24 22:19:31 -05:00
_written += b.len();
2010-12-19 14:05:08 -05:00
_curLogFile->synchronousAppend((void *) b.buf(), b.len());
2010-11-14 22:28:04 -05:00
}
catch(std::exception& e) {
log() << "warning exception in dur::journal " << e.what() << endl;
}
}
2010-11-04 17:43:02 -04:00
}
}
2010-11-14 22:28:04 -05:00
/* todo
test (and handle) disk full on journal append. best quick thing to do is to terminate.
if we roll back operations, there are nuances such as is ReplSetImpl::lastOpTimeWritten too new in ram then?
2010-11-14 22:28:04 -05:00
*/