From 6113b4cd262bb6068141c5eefe60dafaea59beef Mon Sep 17 00:00:00 2001 From: dwight Date: Thu, 24 Feb 2011 13:34:42 -0500 Subject: [PATCH] change to a faster checksum (old was md5). new is less discriminating but fast. also, now includes the JSectHeader in the checksum. --- db/dur_journal.cpp | 26 +++++++++++------------ db/dur_journalformat.h | 2 +- db/dur_preplogbuffer.cpp | 14 ++++++------ db/dur_recover.cpp | 7 ++---- dbtests/perftests.cpp | 41 +++++++++++++++++++++++++++++++++++- dbtests/test.vcxproj | 1 + dbtests/test.vcxproj.filters | 3 +++ util/checksum.h | 37 ++++++++++++++++++++++++++++++++ 8 files changed, 103 insertions(+), 28 deletions(-) create mode 100644 util/checksum.h diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp index 96973c17310..9335868ca4a 100644 --- a/db/dur_journal.cpp +++ b/db/dur_journal.cpp @@ -33,7 +33,7 @@ #include "../util/mongoutils/str.h" #include "dur_journalimpl.h" #include "../util/file.h" -#include "../util/md5.hpp" +#include "../util/checksum.h" using namespace mongoutils; @@ -42,6 +42,7 @@ namespace mongo { class AlignedBuilder; namespace dur { + BOOST_STATIC_ASSERT( sizeof(Checksum) == 16 ); BOOST_STATIC_ASSERT( sizeof(JHeader) == 8192 ); BOOST_STATIC_ASSERT( sizeof(JSectHeader) == 20 ); BOOST_STATIC_ASSERT( sizeof(JSectFooter) == 32 ); @@ -81,22 +82,19 @@ namespace mongo { reserved = 0; magic[0] = magic[1] = magic[2] = magic[3] = '\n'; - // skip section header since size modified after hashing - (const char*&)begin += sizeof(JSectHeader); - len -= sizeof(JSectHeader); - - md5(begin, len, hash); + Checksum c; + c.gen(begin, (unsigned) len); + memcpy(hash, c.bytes, sizeof(hash)); } bool JSectFooter::checkHash(const void* begin, int len) const { - // skip section header since size modified after hashing - // todo: skipping the header must be fixed, as we won't catch corruption of it then... - (const char*&)begin += sizeof(JSectHeader); - len -= sizeof(JSectHeader); - md5digest current; - md5(begin, len, current); - DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(current, 16) << endl; - return (memcmp(hash, current, sizeof(hash)) == 0); + Checksum c; + c.gen(begin, len); + DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(c.bytes, 16) << endl; + if( memcmp(hash, c.bytes, sizeof(hash)) == 0 ) + return true; + log() << "dur checkHash mismatch, got: " << toHex(c.bytes, 16) << " expected: " << toHex(hash,16) << endl; + return false; } JHeader::JHeader(string fname) { diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h index 4b60afa0854..72587ccd7b6 100644 --- a/db/dur_journalformat.h +++ b/db/dur_journalformat.h @@ -34,7 +34,7 @@ namespace mongo { // x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near // that. simply incrementing the version # is safe on a fwd basis. - enum { CurrentVersion = 0x4147 }; + enum { CurrentVersion = 0x4148 }; unsigned short _version; // these are just for diagnostic ease (make header more useful as plain text) diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp index 1648e899cb8..c1f6903c088 100644 --- a/db/dur_preplogbuffer.cpp +++ b/db/dur_preplogbuffer.cpp @@ -161,19 +161,19 @@ namespace mongo { prepBasicWrites(bb); } + // pad to alignment, and set the total section length in the JSectHeader + assert( 0xffffe000 == (~(Alignment-1)) ); + unsigned lenWillBe = bb.len() + sizeof(JSectFooter); + unsigned L = (lenWillBe + Alignment-1) & (~(Alignment-1)); + dassert( L >= lenWillBe ); + *((unsigned*)bb.atOfs(0)) = L; + { JSectFooter f(bb.buf(), bb.len()); bb.appendStruct(f); } { - // pad to alignment, and set the total section length in the JSectHeader - assert( 0xffffe000 == (~(Alignment-1)) ); - unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1)); - dassert( L >= (unsigned) bb.len() ); - - *((unsigned*)bb.atOfs(0)) = L; - unsigned padding = L - bb.len(); bb.skip(padding); dassert( bb.len() % Alignment == 0 ); diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp index ae19d7ad871..a42c38b09a0 100644 --- a/db/dur_recover.cpp +++ b/db/dur_recover.cpp @@ -30,10 +30,10 @@ #include "database.h" #include "db.h" #include "../util/unittest.h" +#include "../util/checksum.h" #include "cmdline.h" #include "curop.h" #include "mongommf.h" -#include "../util/md5.hpp" #include #include @@ -120,10 +120,7 @@ namespace mongo { const JSectFooter& footer = *(const JSectFooter*)pos; int len = pos - (char*)_sectHead; if (!footer.checkHash(_sectHead, len)) { - massert(13594, str::stream() << "Journal checksum doesn't match. recorded: " - << toHex(footer.hash, sizeof(footer.hash)) - << " actual: " << md5simpledigest(_sectHead, len) - , false); + massert(13594, "dur journal checksum doesn't match", false); } } return false; // false return value denotes end of section diff --git a/dbtests/perftests.cpp b/dbtests/perftests.cpp index 2ac7d4671cf..7ede0371ca0 100644 --- a/dbtests/perftests.cpp +++ b/dbtests/perftests.cpp @@ -33,6 +33,7 @@ #include "../util/timer.h" #include "dbtests.h" #include "../db/dur_stats.h" +#include "../util/checksum.h" namespace PerfTests { typedef DBDirectClient DBClientType; @@ -64,6 +65,43 @@ namespace PerfTests { }; DBClientType ClientBase::_client; + class Checksum { + public: + void run() { + { + // the checksum code assumes 'standard' rollover on addition overflows. let's check that: + unsigned long long x = 0xffffffffffffffffUL; + ASSERT( x+2 == 1 ); + } + + unsigned sz = 1024 * 1024 * 100 + 3; + void *p = malloc(sz); + mongo::Checksum last; + for( int i = 0; i < 4; i++ ) { + Timer t; + mongo::Checksum c; + c.gen(p, sz); + cout << "checksum " << t.millis() << "ms" << endl; + ASSERT( i == 0 || c == last ); + last = c; + } + { + mongo::Checksum c; + c.gen(p, sz-1); + ASSERT( c != last ); + ((char *&)p)[0]++; // check same data, different order, doesn't give same checksum + ((char *&)p)[1]--; + c.gen(p, sz); + ASSERT( c != last ); + ((char *&)p)[1]++; // check same data, different order, doesn't give same checksum (different longwords case) + ((char *&)p)[8]--; + c.gen(p, sz); + ASSERT( c != last ); + } + free(p); + } + }; + // todo: use a couple threads. not a very good test yet. class TaskQueueTest { static int tot; @@ -110,7 +148,7 @@ namespace PerfTests { virtual void post() { } virtual string name() = 0; virtual unsigned long long expectation() = 0; - virtual int howLongMillis() { return 5000; } + virtual int howLongMillis() { return 5000; } // how long to run test public: void say(unsigned long long n, int ms, string s) { //cout << setw(36) << left << s << ' ' << right << setw(7) << n*1000/ms << "/sec " << setw(4) << ms << "ms" << endl; @@ -325,6 +363,7 @@ namespace PerfTests { } void setupTests() { + add< Checksum >(); add< TaskQueueTest >(); cout << "stats\t" << "test\trps\ttime\t" diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj index 8ff5aabcd07..dc55dce0c89 100644 --- a/dbtests/test.vcxproj +++ b/dbtests/test.vcxproj @@ -242,6 +242,7 @@ + diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters index 6bbc15918d8..f3b41fb7031 100755 --- a/dbtests/test.vcxproj.filters +++ b/dbtests/test.vcxproj.filters @@ -247,6 +247,9 @@ util\h + + util + diff --git a/util/checksum.h b/util/checksum.h new file mode 100644 index 00000000000..009ab56fbeb --- /dev/null +++ b/util/checksum.h @@ -0,0 +1,37 @@ +#pragma once +#include "../pch.h" +namespace mongo { + /** a simple, rather dumb, but very fast checksum. see perftests.cpp for unit tests. */ + struct Checksum { + union { + unsigned char bytes[16]; + unsigned long long words[2]; + }; + + // if you change this you must bump dur::CurrentVersion + void gen(const void *buf, unsigned len) { + wassert( ((size_t)buf) % 8 == 0 ); // performance warning + unsigned n = len / 8 / 2; + const unsigned long long *p = (const unsigned long long *) buf; + unsigned long long a = 0; + for( unsigned i = 0; i < n; i++ ) { + a += (*p ^ i); + p++; + } + unsigned long long b = 0; + for( unsigned i = 0; i < n; i++ ) { + b += (*p ^ i); + p++; + } + unsigned long long c = 0; + for( unsigned i = n * 2 * 8; i < len; i++ ) { // 0-7 bytes left + c = (c << 8) | ((const char *)buf)[i]; + } + words[0] = a ^ len; + words[1] = b ^ c; + } + + bool operator==(const Checksum& rhs) const { return words[0]==rhs.words[0] && words[1]==rhs.words[1]; } + bool operator!=(const Checksum& rhs) const { return words[0]!=rhs.words[0] || words[1]!=rhs.words[1]; } + }; +}