From 2418da510eaf2fc5d1661ba816942086dfc9090e Mon Sep 17 00:00:00 2001 From: Eliot Horowitz Date: Sun, 12 Sep 2010 18:23:27 -0400 Subject: [PATCH] use splitVector instead of datasize and mediankey. 1 command vs. 2 and should be faster and more extensible. still some code cleanup to do probably SERVER-1550 SERVER-1779 --- s/chunk.cpp | 37 +++++++++++++++++++++++++++---------- s/chunk.h | 10 +++++++++- s/d_split.cpp | 25 ++++++++++++++++++------- 3 files changed, 54 insertions(+), 18 deletions(-) diff --git a/s/chunk.cpp b/s/chunk.cpp index dafa42509fb..3f980ba6575 100644 --- a/s/chunk.cpp +++ b/s/chunk.cpp @@ -83,7 +83,7 @@ namespace mongo { return _manager->getShardKey().globalMax().woCompare( getMax() ) == 0; } - BSONObj Chunk::pickSplitPoint() const{ + BSONObj Chunk::pickSplitPoint( const vector * possibleSplitPoints ) const{ int sort = 0; if ( minIsInf() ){ @@ -117,6 +117,9 @@ namespace mongo { if ( ! end.isEmpty() ) return _manager->getShardKey().extractKey( end ); } + + if ( possibleSplitPoints && possibleSplitPoints->size() ) + return possibleSplitPoints->at(0); BSONObj cmd = BSON( "medianKey" << _manager->getns() << "keyPattern" << _manager->getShardKey().key() @@ -164,7 +167,7 @@ namespace mongo { cmd.append( "keyPattern" , _manager->getShardKey().key() ); cmd.append( "min" , getMin() ); cmd.append( "max" , getMax() ); - cmd.append( "maxChunkSize" , chunkSize / (1<<20) /* in MBs */ ); + cmd.append( "maxChunkSizeBytes" , chunkSize ); BSONObj cmdObj = cmd.obj(); if ( ! conn->runCommand( "admin" , cmdObj , result )){ @@ -377,21 +380,35 @@ namespace mongo { log(3) << "\t splitIfShould entering decision area : " << *this << endl; - _dataWritten = 0; + _dataWritten = 0; // reset so we check often enough - BSONObj splitPoint = pickSplitPoint(); + // TODO: add a max number of split points to find + // that way if we get a mega chunk for some reason - + // this won't take an inordinant amount of time + vector possibleSplitPoints; + pickSplitVector( possibleSplitPoints , splitThreshold ); + + if ( possibleSplitPoints.size() <= 1 ) { + // no split points means there isn't enough data to split on + // 1 split point means we have between half the chunk size to full chunk size + // so we shouldn't split + return false; + } + + BSONObj splitPoint = pickSplitPoint( &possibleSplitPoints ); if ( splitPoint.isEmpty() || _min == splitPoint || _max == splitPoint) { + // TODO: this check might be redundany, but probably not that bad error() << "want to split chunk, but can't find split point " << " chunk: " << toString() << " got: " << splitPoint << endl; return false; } - long size = getPhysicalSize(); - if ( size < splitThreshold ) - return false; - - log() << "autosplitting " << _manager->getns() << " size: " << size << " shard: " << toString() - << " on: " << splitPoint << "(splitThreshold " << splitThreshold << ")" << endl; + log() << "autosplitting " << _manager->getns() << " shard: " << toString() + << " on: " << splitPoint << "(splitThreshold " << splitThreshold << ")" +#ifdef _DEBUG + << " size: " << getPhysicalSize() // slow - but can be usefule when debugging +#endif + << endl; vector splitPoints; splitPoints.push_back( splitPoint ); diff --git a/s/chunk.h b/s/chunk.h index 0f23a8ed23e..6e5ef591d12 100644 --- a/s/chunk.h +++ b/s/chunk.h @@ -88,9 +88,17 @@ namespace mongo { bool minIsInf() const; bool maxIsInf() const; - BSONObj pickSplitPoint() const; + /** + * @param a vector of possible split points + * used as a hint only + */ + BSONObj pickSplitPoint( const vector * possibleSplitPoints = 0 ) const; ChunkPtr split(); + /** + * @param splitPoints - vector to be filled in + * @param chunkSize - chunk size to target in bytes + */ void pickSplitVector( vector& splitPoints , int chunkSize ) const; ChunkPtr multiSplit( const vector& splitPoints ); diff --git a/s/d_split.cpp b/s/d_split.cpp index 41c05c8d622..61238ad7650 100644 --- a/s/d_split.cpp +++ b/s/d_split.cpp @@ -134,15 +134,26 @@ namespace mongo { errmsg = "either provide both min and max or leave both empty"; return false; } - + long long maxChunkSize = 0; - BSONElement maxSizeElem = jsobj[ "maxChunkSize" ]; - if ( maxSizeElem.eoo() ){ - errmsg = "need to specify the desired max chunk size"; - return false; + { + BSONElement maxSizeElem = jsobj[ "maxChunkSize" ]; + if ( maxSizeElem.isNumber() ){ + maxChunkSize = maxSizeElem.numberLong() * 1<<20; + } + else { + maxSizeElem = jsobj["maxChunkSizeBytes"]; + if ( maxSizeElem.isNumber() ){ + maxChunkSize = maxSizeElem.numberLong(); + } + } + + if ( maxChunkSize <= 0 ){ + errmsg = "need to specify the desired max chunk size (maxChunkSize or maxChunkSizeBytes)"; + return false; + } } - maxChunkSize = maxSizeElem.numberLong() * 1<<20; - + Client::Context ctx( ns ); NamespaceDetails *d = nsdetails( ns );