// @file dur_journal.cpp writing to the writeahead logging journal /** * Copyright (C) 2010 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "pch.h" #include "client.h" #include "namespace.h" #include "dur_journal.h" #include "dur_journalformat.h" #include "dur_stats.h" #include "../util/logfile.h" #include "../util/timer.h" #include "../util/alignedbuilder.h" #include "../util/message.h" // getelapsedtimemillis #include #undef assert #define assert MONGO_assert #include "../util/mongoutils/str.h" #include "dur_journalimpl.h" using namespace mongoutils; namespace mongo { class AlignedBuilder; namespace dur { BOOST_STATIC_ASSERT( sizeof(JHeader) == 8192 ); BOOST_STATIC_ASSERT( sizeof(JSectHeader) == 12 ); BOOST_STATIC_ASSERT( sizeof(JSectFooter) == 32 ); BOOST_STATIC_ASSERT( sizeof(JEntry) == 12 ); BOOST_STATIC_ASSERT( sizeof(LSNFile) == 88 ); filesystem::path getJournalDir() { filesystem::path p(dbpath); p /= "journal"; return p; } path lsnPath() { return getJournalDir()/"lsn"; } /** this should be called when something really bad happens so that we can flag appropriately */ void journalingFailure(const char *msg) { /** todo: (1) don't log too much (2) make an indicator in the journal dir that something bad happened. (2b) refuse to do a recovery startup if that is there without manual override. */ log() << "journaling error " << msg << endl; assert(false); } JHeader::JHeader(string fname) { magic[0] = 'j'; magic[1] = '\n'; _version = CurrentVersion; memset(ts, 0, sizeof(ts)); strncpy(ts, time_t_to_String_short(time(0)).c_str(), sizeof(ts)-1); memset(dbpath, 0, sizeof(dbpath)); strncpy(dbpath, fname.c_str(), sizeof(dbpath)-1); memset(reserved3, 0, sizeof(reserved3)); txt2[0] = txt2[1] = '\n'; n1 = n2 = n3 = n4 = '\n'; } // class Journal Journal j; Journal::Journal() : toUnlink(*(new MVar)), /* freeing MVar at program termination would be problematic */ toStoreLastSeqNum(*(new MVar)), _curLogFileMutex("JournalLfMutex") { _written = 0; _nextFileNumber = 0; _curLogFile = 0; } path Journal::getFilePathFor(int filenumber) const { filesystem::path p(dir); p /= string(str::stream() << "j._" << filenumber); return p; } bool Journal::tryToCloseCurJournalFile() { mutex::try_lock lk(_curLogFileMutex, 2000); if( lk.ok ) { closeCurrentJournalFile(); } return lk.ok; } /** never throws @return true if journal dir is not emptya */ bool haveJournalFiles() { try { for ( boost::filesystem::directory_iterator i( getJournalDir() ); i != boost::filesystem::directory_iterator(); ++i ) { string fileName = boost::filesystem::path(*i).leaf(); if( str::startsWith(fileName, "j._") ) return true; } } catch(...) { } return false; } /** throws */ void removeJournalFiles() { log() << "removeJournalFiles" << endl; try { for ( boost::filesystem::directory_iterator i( getJournalDir() ); i != boost::filesystem::directory_iterator(); ++i ) { string fileName = boost::filesystem::path(*i).leaf(); if( str::startsWith(fileName, "j._") ) { try { boost::filesystem::remove(*i); } catch(std::exception& e) { log() << "couldn't remove " << fileName << ' ' << e.what() << endl; } } } try { boost::filesystem::remove(lsnPath()); } catch(...) { log() << "couldn't remove " << lsnPath().string() << endl; } } catch( std::exception& e ) { log() << "error removing journal files " << e.what() << endl; throw; } log() << "removeJournalFiles end" << endl; // temp } /** at clean shutdown */ bool okToCleanUp = false; // failed recovery would set this to false void journalCleanup() { if( testIntent ) return; if( !okToCleanUp ) return; if( !j.tryToCloseCurJournalFile() ) { return; } try { removeJournalFiles(); } catch(std::exception& e) { log() << "error couldn't remove journal file during shutdown " << e.what() << endl; } } /** assure journal/ dir exists. throws. call during startup. */ void journalMakeDir() { j.init(); filesystem::path p = getJournalDir(); j.dir = p.string(); DEV log() << "dev journalMakeDir() " << j.dir << endl; if( !exists(j.dir) ) { try { create_directory(j.dir); } catch(std::exception& e) { log() << "error creating directory " << j.dir << ' ' << e.what() << endl; throw; } } } void Journal::_open() { assert( _curLogFile == 0 ); string fname = getFilePathFor(_nextFileNumber).string(); _curLogFile = new LogFile(fname); _nextFileNumber++; { JHeader h(fname); AlignedBuilder b(8192); b.appendStruct(h); _curLogFile->synchronousAppend(b.buf(), b.len()); } } void Journal::init() { assert( _curLogFile == 0 ); MongoFile::notifyPreFlush = preFlush; MongoFile::notifyPostFlush = postFlush; } void Journal::open() { assert( MongoFile::notifyPreFlush == preFlush ); mutex::scoped_lock lk(_curLogFileMutex); _open(); } /** background removal of old journal files */ void unlinkThread() { Client::initThread("unlink"); while( 1 ) { path p = j.toUnlink.take(); try { remove(p); } catch(std::exception& e) { log() << "error unlink of journal file " << p.string() << " failed " << e.what() << endl; } } } void LSNFile::set(unsigned long long x) { lsn = x; checkbytes = ~x; } /** logs details of the situation, and returns 0, if anything surprising in the LSNFile if something highly surprising, throws to abort */ unsigned long long LSNFile::get() { uassert(13614, "unexpected version number of lsn file in journal/ directory", ver == 0); if( ~lsn != checkbytes ) { log() << "lsnfile not valid. recovery will be from log start. lsn: " << hex << lsn << " checkbytes: " << hex << checkbytes << endl; return 0; } return lsn; } /** called during recovery (the error message text below assumes that) */ unsigned long long journalReadLSN() { if( !debug ) { // in nondebug build, for now, be conservative until more tests written, and apply the whole journal. // however we will still write the lsn file to exercise that code, and use in _DEBUG build. return 0; } if( !MemoryMappedFile::exists(lsnPath()) ) { log() << "info no lsn file in journal/ directory" << endl; return 0; } try { // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery. // however, given we actually close the file when writing, that seems unlikely. MemoryMappedFile f; LSNFile *L = static_cast(f.map(lsnPath().string().c_str())); assert(L); unsigned long long lsn = L->get(); return lsn; } catch(std::exception& e) { uasserted(13611, str::stream() << "can't read lsn file in journal directory : " << e.what()); } return 0; } /** remember "last sequence number" to speed recoveries */ void lsnThread() { Client::initThread("lsn"); time_t last = 0; while( 1 ) { unsigned long long lsn = j.toStoreLastSeqNum.take(); // if you are on a really fast fsync interval, we don't write this as often if( time(0) - last < 5 ) continue; last = time(0); try { // os can flush as it likes. if it flushes slowly, we will just do extra work on recovery. // however, given we actually close the file, that seems unlikely. MemoryMappedFile f; unsigned long long length = sizeof(LSNFile); LSNFile *lsnf = static_cast( f.map(lsnPath().string().c_str(), length) ); assert(lsnf); lsnf->set(lsn); } catch(std::exception& e) { log() << "write to lsn file fails " << e.what() << endl; } } } void Journal::preFlush() { j._preFlushTime = Listener::getElapsedTimeMillis(); } void Journal::postFlush() { j._lastFlushTime = j._preFlushTime; j.toStoreLastSeqNum.tryPut( j._lastFlushTime ); } // call from within _curLogFileMutex void Journal::closeCurrentJournalFile() { assert(_curLogFile); JFile jf; jf.filename = _curLogFile->_name; jf.lastEventTimeMs = Listener::getElapsedTimeMillis(); delete _curLogFile; // close _curLogFile = 0; _written = 0; } /** remove older journal files. be in mutex when calling */ void Journal::removeUnneededJournalFiles() { while( !_oldJournalFiles.empty() ) { JFile f = _oldJournalFiles.front(); if( f.lastEventTimeMs < _lastFlushTime + ExtraKeepTimeMs ) { // eligible for deletion path p( f.filename ); log() << "old journal file will be removed: " << f.filename << endl; // we do the unlink in a separate thread unless for some reason unlinks are backlogging if( !j.toUnlink.tryPut(p) ) { /* DR___ for durability error and warning codes Compare to RS___ for replica sets */ log() << "DR100 latency warning on journal unlink " << endl; Timer t; j.toUnlink.put(p); log() << "toUnlink.put(" << f.filename << ") " << t.millis() << "ms" << endl; } } else { break; } _oldJournalFiles.pop_front(); } } /** check if time to rotate files. assure a file is open. done separately from the journal() call as we can do this part outside of lock. thread: durThread() */ void journalRotate() { j.rotate(); } void Journal::rotate() { if( _curLogFile && _written < DataLimit ) return; scoped_lock lk(_curLogFileMutex); if( _curLogFile && _written < DataLimit ) return; if( _curLogFile ) { closeCurrentJournalFile(); removeUnneededJournalFiles(); } try { Timer t; _open(); int ms = t.millis(); if( ms >= 200 ) { log() << "DR101 latency warning on journal file open " << ms << "ms" << endl; } } catch(std::exception& e) { log() << "warning exception opening journal file " << e.what() << endl; } } /** write to journal thread: durThread() */ void journal(const AlignedBuilder& b) { j.journal(b); } void Journal::journal(const AlignedBuilder& b) { try { mutex::scoped_lock lk(_curLogFileMutex); if( _curLogFile == 0 ) open(); stats.curr._journaledBytes += b.len(); _written += b.len(); _curLogFile->synchronousAppend((void *) b.buf(), b.len()); } catch(std::exception& e) { log() << "warning exception in dur::journal " << e.what() << endl; } } } } /* todo test (and handle) disk full on journal append. best quick thing to do is to terminate. if we roll back operations, there are nuances such as is ReplSetImpl::lastOpTimeWritten too new in ram then? */