Files
mongo/s/chunk.cpp

1137 lines
38 KiB
C++
Raw Normal View History

2008-11-09 17:49:37 -05:00
// shard.cpp
2008-09-15 09:14:42 -04:00
/**
2009-02-18 10:10:39 -05:00
* Copyright (C) 2008 10gen Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2008-09-15 09:14:42 -04:00
2010-04-27 15:27:52 -04:00
#include "pch.h"
#include "chunk.h"
2009-02-12 21:09:06 -05:00
#include "config.h"
2009-02-18 10:10:39 -05:00
#include "../util/unittest.h"
#include "../client/connpool.h"
#include "../client/distlock.h"
#include "../db/queryutil.h"
#include "cursors.h"
2009-04-03 14:21:00 -04:00
#include "strategy.h"
2008-09-15 09:14:42 -04:00
2009-01-14 17:09:51 -05:00
namespace mongo {
inline bool allOfType(BSONType type, const BSONObj& o){
BSONObjIterator it(o);
while(it.more()){
if (it.next().type() != type)
return false;
}
return true;
}
2010-05-27 15:21:10 -04:00
RWLock chunkSplitLock("rw:chunkSplitLock");
2010-04-22 13:32:22 -04:00
// ------- Shard --------
2010-06-14 10:33:34 -04:00
int Chunk::MaxChunkSize = 1024 * 1024 * 200;
2009-02-03 17:10:44 -05:00
2010-06-30 14:01:47 -04:00
Chunk::Chunk( ChunkManager * manager )
: _manager(manager),
_lastmod(0), _modified(false), _dataWritten(0)
{}
Chunk::Chunk(ChunkManager * info , const BSONObj& min, const BSONObj& max, const Shard& shard)
: _manager(info), _min(min), _max(max), _shard(shard),
_lastmod(0), _modified(false), _dataWritten(0)
{}
string Chunk::getns() const {
2010-07-02 16:48:21 -04:00
assert( _manager );
return _manager->getns();
}
void Chunk::setShard( const Shard& s ){
_shard = s;
_manager->_migrationNotification(this);
2009-02-27 12:51:49 -05:00
_markModified();
}
bool Chunk::contains( const BSONObj& obj ) const{
return
_manager->getShardKey().compare( getMin() , obj ) <= 0 &&
_manager->getShardKey().compare( obj , getMax() ) < 0;
}
bool ChunkRange::contains(const BSONObj& obj) const {
// same as Chunk method
return
_manager->getShardKey().compare( getMin() , obj ) <= 0 &&
_manager->getShardKey().compare( obj , getMax() ) < 0;
}
bool Chunk::minIsInf() const {
return _manager->getShardKey().globalMin().woCompare( getMin() ) == 0;
}
bool Chunk::maxIsInf() const {
return _manager->getShardKey().globalMax().woCompare( getMax() ) == 0;
}
BSONObj Chunk::pickSplitPoint() const{
int sort = 0;
if ( minIsInf() ){
sort = 1;
}
else if ( maxIsInf() ){
sort = -1;
}
if ( sort ){
2010-06-30 13:32:01 -04:00
ShardConnection conn( getShard().getConnString() , _manager->getns() );
Query q;
if ( sort == 1 )
q.sort( _manager->getShardKey().key() );
else {
BSONObj k = _manager->getShardKey().key();
BSONObjBuilder r;
BSONObjIterator i(k);
while( i.more() ) {
BSONElement e = i.next();
uassert( 10163 , "can only handle numbers here - which i think is correct" , e.isNumber() );
r.append( e.fieldName() , -1 * e.number() );
}
q.sort( r.obj() );
}
2010-06-30 13:32:01 -04:00
BSONObj end = conn->findOne( _manager->getns() , q );
conn.done();
if ( ! end.isEmpty() )
return _manager->getShardKey().extractKey( end );
}
ScopedDbConnection conn( getShard().getConnString() );
BSONObj result;
2010-06-30 13:32:01 -04:00
if ( ! conn->runCommand( "admin" , BSON( "medianKey" << _manager->getns()
<< "keyPattern" << _manager->getShardKey().key()
<< "min" << getMin()
<< "max" << getMax()
) , result ) ){
stringstream ss;
ss << "medianKey command failed: " << result;
uassert( 10164 , ss.str() , 0 );
}
BSONObj median = result.getObjectField( "median" );
if (median == getMin()){
//TODO compound support
BSONElement key = getMin().firstElement();
BSONObjBuilder b;
2010-05-26 16:01:56 -04:00
b.appendAs(key, "$gt");
Query q = QUERY(key.fieldName() << b.obj());
q.sort(_manager->getShardKey().key());
2010-06-30 13:32:01 -04:00
median = conn->findOne(_manager->getns(), q);
median = _manager->getShardKey().extractKey( median );
PRINT(median);
}
conn.done();
return median.getOwned();
2009-03-30 14:33:40 -04:00
}
ChunkPtr Chunk::split(){
2009-03-30 14:33:40 -04:00
return split( pickSplitPoint() );
2009-02-19 17:32:19 -05:00
}
2009-02-27 14:23:52 -05:00
ChunkPtr Chunk::split( const BSONObj& m ){
const int maxSplitPoints = 256;
uassert( 10165 , "can't split as shard doesn't have a manager" , _manager );
uassert( 13332 , "need a split key to split chunk" , !m.isEmpty() );
uassert( 13333 , "can't split a chunk in that many parts", m.nFields() < maxSplitPoints );
uassert( 13003 , "can't split a chunk with only one distinct value" , _min.woCompare(_max) );
2010-07-02 16:48:21 -04:00
DistributedLock lockSetup( ConnectionString( modelServer() , ConnectionString::SYNC ) , getns() );
dist_lock_try dlk( &lockSetup , string("split-") + toString() );
2010-07-02 16:48:21 -04:00
uassert( 10166 , "locking namespace failed" , dlk.got() );
2010-04-20 17:07:10 -04:00
BSONObjBuilder detail;
2010-04-20 17:07:10 -04:00
appendShortVersion( "before" , detail );
log(1) << " before split on " << m.nFields() << " points\n" << "\t self : " << toString() << endl;
2010-06-30 14:01:47 -04:00
// Iterate over the split points in 'm', splitting off a new chunk per entry. That chunk's range
// convert until the next entry in 'm' or _max .
vector<ChunkPtr> newChunks;
BSONObjIterator i( m );
BSONElement nextPoint = i.next();
2009-02-27 12:51:49 -05:00
_markModified();
do {
BSONElement splitPoint = nextPoint;
BSONElement nextPoint = i.more() ? i.next() : _max.firstElement();
ChunkPtr s( new Chunk( _manager, splitPoint.wrap().getOwned() , nextPoint.wrap().getOwned() , _shard) );
s->_markModified();
newChunks.push_back(s);
} while ( i.more() );
// Have the chunk manager reflect the key change for the first chunk and create an entry for every
// new chunk spawned by it.
{
rwlock lk( _manager->_lock , true );
setMax(m.firstElement().wrap().getOwned());
2010-05-28 17:23:32 -04:00
DEV assert( shared_from_this() );
_manager->_chunkMap[_max] = shared_from_this();
for ( vector<ChunkPtr>::const_iterator it = newChunks.begin(); it != newChunks.end(); ++it ){
ChunkPtr s = *it;
_manager->_chunkMap[s->getMax()] = s;
}
}
log(1) << " after split:\n" << toString() << endl;
for ( vector<ChunkPtr>::const_iterator it = newChunks.begin(); it != newChunks.end(); ++it ){
ChunkPtr s = *it;
log(1) << "\t new chunk" << s->toString() << endl;
}
// Save the new key boundaries in the configDB.
2009-04-15 22:36:33 -04:00
_manager->save();
2010-04-20 17:07:10 -04:00
// Log all these changes in the configDB's log.
appendShortVersion( "left" , detail );
if ( newChunks.size() == 1 ){
newChunks[0]->appendShortVersion( "right" , detail );
} else {
for ( size_t i=0; i < newChunks.size(); i++ ){
ChunkPtr s = newChunks[i];
ostringstream os;
os << "right" << i;
s->appendShortVersion( os.str().c_str() , detail );
}
}
configServer.logChange( "split", _manager->getns() , detail.obj() );
return newChunks[0];
}
2009-04-03 13:52:06 -04:00
bool Chunk::moveAndCommit( const Shard& to , string& errmsg ){
uassert( 10167 , "can't move shard to its current location!" , getShard() != to );
2010-04-20 17:07:10 -04:00
2010-06-30 13:32:01 -04:00
log() << "moving chunk ns: " << _manager->getns() << " moving ( " << toString() << ") " << _shard.toString() << " -> " << to.toString() << endl;
2009-04-03 13:52:06 -04:00
Shard from = _shard;
2009-04-03 13:52:06 -04:00
BSONObj filter;
{
BSONObjBuilder b;
getFilter( b );
filter = b.obj();
}
ScopedDbConnection fromconn( from);
BSONObj res;
bool worked = fromconn->runCommand( "admin" ,
BSON( "moveChunk" << _manager->getns() <<
"from" << from.getConnString() <<
"to" << to.getConnString() <<
"filter" << filter <<
"shardId" << genID() <<
"configdb" << configServer.modelServer()
) ,
res
);
2009-04-03 13:52:06 -04:00
fromconn.done();
if ( worked ){
_manager->_reload();
return true;
}
2009-04-03 13:52:06 -04:00
errmsg = res["errmsg"].String();
errmsg += " " + res.toString();
return false;
2009-04-03 13:52:06 -04:00
}
bool Chunk::splitIfShould( long dataWritten ){
_dataWritten += dataWritten;
int myMax = MaxChunkSize;
if ( minIsInf() || maxIsInf() ){
myMax = (int)( (double)myMax * .9 );
}
if ( _dataWritten < myMax / 5 )
return false;
2010-03-20 23:46:19 -04:00
2010-04-22 13:32:22 -04:00
if ( ! chunkSplitLock.lock_try(0) )
return false;
rwlock lk( chunkSplitLock , 1 , true );
log(1) << "\t splitIfShould : " << this << endl;
_dataWritten = 0;
BSONObj split_point = pickSplitPoint();
if ( split_point.isEmpty() || _min == split_point || _max == split_point) {
log() << "SHARD PROBLEM** shard is too big, but can't split: " << toString() << endl;
return false;
}
long size = getPhysicalSize();
if ( size < myMax )
return false;
2010-06-30 13:32:01 -04:00
log() << "autosplitting " << _manager->getns() << " size: " << size << " shard: " << toString() << endl;
ChunkPtr newShard = split(split_point);
2009-04-20 17:42:01 -04:00
moveIfShould( newShard );
return true;
}
bool Chunk::moveIfShould( ChunkPtr newChunk ){
ChunkPtr toMove;
2009-04-20 17:42:01 -04:00
if ( newChunk->countObjects() <= 1 ){
toMove = newChunk;
2009-04-20 17:42:01 -04:00
}
else if ( this->countObjects() <= 1 ){
2010-05-28 17:23:32 -04:00
DEV assert( shared_from_this() );
toMove = shared_from_this();
2009-04-20 17:42:01 -04:00
}
else {
log(1) << "don't know how to decide if i should move inner shard" << endl;
}
if ( ! toMove )
return false;
2010-05-03 11:18:27 -04:00
Shard newLocation = Shard::pick();
if ( getShard() == newLocation ){
2009-04-20 17:42:01 -04:00
// if this is the best server, then we shouldn't do anything!
log(1) << "not moving chunk: " << toString() << " b/c would move to same place " << newLocation.toString() << " -> " << getShard().toString() << endl;
2009-04-20 17:42:01 -04:00
return 0;
}
2010-06-15 08:52:22 -04:00
log() << "moving chunk (auto): " << toMove->toString() << " to: " << newLocation.toString() << " #objects: " << toMove->countObjects() << endl;
2009-04-20 17:42:01 -04:00
string errmsg;
massert( 10412 , (string)"moveAndCommit failed: " + errmsg ,
2009-04-20 17:42:01 -04:00
toMove->moveAndCommit( newLocation , errmsg ) );
return true;
}
long Chunk::getPhysicalSize() const{
ScopedDbConnection conn( getShard().getConnString() );
2009-04-15 22:26:19 -04:00
BSONObj result;
2010-04-28 12:19:46 -04:00
uassert( 10169 , "datasize failed!" , conn->runCommand( "admin" ,
2010-06-30 13:32:01 -04:00
BSON( "datasize" << _manager->getns()
2010-04-28 12:19:46 -04:00
<< "keyPattern" << _manager->getShardKey().key()
<< "min" << getMin()
<< "max" << getMax()
<< "maxSize" << ( MaxChunkSize + 1 )
) , result ) );
2009-04-15 22:26:19 -04:00
conn.done();
return (long)result["size"].number();
}
2009-04-20 17:42:01 -04:00
template <typename ChunkType>
inline long countObjectsHelper(const ChunkType* chunk, const BSONObj& filter){
2010-05-20 13:36:29 -04:00
ShardConnection conn( chunk->getShard().getConnString() , chunk->getManager()->getns() );
2009-04-20 17:42:01 -04:00
BSONObj f = chunk->getFilter();
if ( ! filter.isEmpty() )
2009-11-03 10:35:48 -05:00
f = ClusteredCursor::concatQuery( f , filter );
2009-04-20 17:42:01 -04:00
BSONObj result;
unsigned long long n = conn->count( chunk->getManager()->getns() , f );
2009-04-20 17:42:01 -04:00
conn.done();
return (long)n;
2009-04-20 17:42:01 -04:00
}
long Chunk::countObjects( const BSONObj& filter ) const { return countObjectsHelper(this, filter); }
long ChunkRange::countObjects( const BSONObj& filter ) const { return countObjectsHelper(this, filter); }
2010-04-20 17:07:10 -04:00
void Chunk::appendShortVersion( const char * name , BSONObjBuilder& b ){
BSONObjBuilder bb( b.subobjStart( name ) );
bb.append( "min" , _min );
bb.append( "max" , _max );
bb.done();
}
2009-04-15 22:26:19 -04:00
bool Chunk::operator==( const Chunk& s ) const{
return
_manager->getShardKey().compare( _min , s._min ) == 0 &&
_manager->getShardKey().compare( _max , s._max ) == 0
;
}
void Chunk::getFilter( BSONObjBuilder& b ) const{
_manager->getShardKey().getFilter( b , _min , _max );
}
void ChunkRange::getFilter( BSONObjBuilder& b ) const{
_manager->getShardKey().getFilter( b , _min , _max );
2009-02-20 13:46:57 -05:00
}
2009-02-27 12:51:49 -05:00
void Chunk::serialize(BSONObjBuilder& to,ShardChunkVersion myLastMod){
2010-04-22 10:29:05 -04:00
2010-06-30 13:32:01 -04:00
to.append( "_id" , genID( _manager->getns() , _min ) );
2010-04-22 10:29:05 -04:00
2010-06-30 00:03:10 -04:00
if ( myLastMod.isSet() ){
to.appendTimestamp( "lastmod" , myLastMod );
}
2010-06-30 00:03:10 -04:00
else if ( _lastmod.isSet() ){
assert( _lastmod > 0 && _lastmod < 1000 );
2009-10-28 13:41:38 -04:00
to.appendTimestamp( "lastmod" , _lastmod );
}
else {
assert(0);
}
2010-06-30 13:32:01 -04:00
to << "ns" << _manager->getns();
to << "min" << _min;
to << "max" << _max;
to << "shard" << _shard.getName();
}
2010-04-22 10:29:05 -04:00
string Chunk::genID( const string& ns , const BSONObj& o ){
StringBuilder buf( ns.size() + o.objsize() + 16 );
buf << ns << "-";
BSONObjIterator i(o);
while ( i.more() ){
BSONElement e = i.next();
buf << e.fieldName() << "_" << e.toString( false );
}
return buf.str();
}
2009-02-27 12:51:49 -05:00
void Chunk::unserialize(const BSONObj& from){
2010-06-30 13:32:01 -04:00
string ns = from.getStringField( "ns" );
_shard.reset( from.getStringField( "shard" ) );
_lastmod = from["lastmod"];
assert( _lastmod > 0 );
BSONElement e = from["minDotted"];
if (e.eoo()){
_min = from.getObjectField( "min" ).getOwned();
_max = from.getObjectField( "max" ).getOwned();
}
else { // TODO delete this case after giving people a chance to migrate
_min = e.embeddedObject().getOwned();
_max = from.getObjectField( "maxDotted" ).getOwned();
}
2010-06-30 13:32:01 -04:00
uassert( 10170 , "Chunk needs a ns" , ! ns.empty() );
uassert( 13327 , "Chunk ns must match server ns" , ns == _manager->getns() );
uassert( 10171 , "Chunk needs a server" , _shard.ok() );
2009-02-20 13:46:57 -05:00
uassert( 10172 , "Chunk needs a min" , ! _min.isEmpty() );
uassert( 10173 , "Chunk needs a max" , ! _max.isEmpty() );
}
string Chunk::modelServer() {
// TODO: this could move around?
return configServer.modelServer();
2009-02-20 13:46:57 -05:00
}
void Chunk::_markModified(){
2009-02-27 12:51:49 -05:00
_modified = true;
}
void Chunk::ensureIndex(){
ScopedDbConnection conn( getShard().getConnString() );
2010-06-30 13:32:01 -04:00
conn->ensureIndex( _manager->getns() , _manager->getShardKey().key() , _manager->_unique );
conn.done();
}
2009-03-27 16:55:26 -04:00
string Chunk::toString() const {
stringstream ss;
2010-06-30 13:32:01 -04:00
ss << "shard ns:" << _manager->getns() << " shard: " << _shard.toString() << " lastmod: " << _lastmod.toString() << " min: " << _min << " max: " << _max;
return ss.str();
}
2009-04-20 17:42:01 -04:00
ShardKeyPattern Chunk::skey() const{
2009-04-20 17:42:01 -04:00
return _manager->getShardKey();
}
// ------- ChunkManager --------
2010-04-27 21:39:58 -04:00
AtomicUInt ChunkManager::NextSequenceNumber = 1;
2009-03-30 10:50:10 -04:00
2009-09-03 16:48:34 -04:00
ChunkManager::ChunkManager( DBConfig * config , string ns , ShardKeyPattern pattern , bool unique ) :
2010-04-27 21:39:58 -04:00
_config( config ) , _ns( ns ) ,
_key( pattern ) , _unique( unique ) ,
2010-05-27 15:21:10 -04:00
_sequenceNumber( ++NextSequenceNumber ), _lock("rw:ChunkManager")
{
2010-06-02 17:01:28 -04:00
_reload_inlock();
2010-06-30 14:35:23 -04:00
if ( _chunkMap.empty() ){
2010-06-30 14:01:47 -04:00
ChunkPtr c( new Chunk(this, _key.globalMin(), _key.globalMax(), config->getPrimary()) );
c->_markModified();
2009-03-27 16:55:26 -04:00
_chunkMap[c->getMax()] = c;
_chunkRanges.reloadAll(_chunkMap);
2010-06-29 20:40:29 -04:00
_shards.insert(c->getShard());
save_inlock();
log() << "no chunks for:" << ns << " so creating first: " << c->toString() << endl;
}
}
ChunkManager::~ChunkManager(){
_chunkMap.clear();
_chunkRanges.clear();
2010-06-29 20:40:29 -04:00
_shards.clear();
}
2010-04-29 21:04:54 -04:00
void ChunkManager::_reload(){
rwlock lk( _lock , true );
2010-06-02 17:01:28 -04:00
_reload_inlock();
}
2010-06-02 17:01:28 -04:00
void ChunkManager::_reload_inlock(){
int tries = 3;
while (tries--){
_chunkMap.clear();
_chunkRanges.clear();
2010-06-29 20:40:29 -04:00
_shards.clear();
_load();
if (_isValid()){
_chunkRanges.reloadAll(_chunkMap);
return;
}
_printChunks();
sleepmillis(10 * (3-tries));
sleepsecs(10);
}
msgasserted(13282, "Couldn't load a valid config for " + _ns + " after 3 tries. Giving up");
2010-04-29 21:04:54 -04:00
}
void ChunkManager::_load(){
2010-06-30 14:01:47 -04:00
static Chunk temp(0);
2010-04-29 21:04:54 -04:00
ScopedDbConnection conn( temp.modelServer() );
2010-04-29 21:04:54 -04:00
auto_ptr<DBClientCursor> cursor = conn->query(temp.getNS(), QUERY("ns" << _ns).sort("lastmod",1), 0, 0, 0, 0,
(DEBUG_BUILD ? 2 : 1000000)); // batch size. Try to induce potential race conditions in debug builds
2010-04-29 21:04:54 -04:00
while ( cursor->more() ){
BSONObj d = cursor->next();
if ( d["isMaxMarker"].trueValue() ){
continue;
}
ChunkPtr c( new Chunk( this ) );
2010-04-29 21:04:54 -04:00
c->unserialize( d );
2010-04-29 21:04:54 -04:00
_chunkMap[c->getMax()] = c;
2010-06-29 20:40:29 -04:00
_shards.insert(c->getShard());
2010-04-29 21:04:54 -04:00
}
conn.done();
}
bool ChunkManager::_isValid() const {
#define ENSURE(x) do { if(!(x)) { log() << "ChunkManager::_isValid failed: " #x << endl; return false; } } while(0)
2010-06-30 14:35:23 -04:00
if (_chunkMap.empty())
return true;
// Check endpoints
ENSURE(allOfType(MinKey, _chunkMap.begin()->second->getMin()));
ENSURE(allOfType(MaxKey, prior(_chunkMap.end())->second->getMax()));
// Make sure there are no gaps or overlaps
for (ChunkMap::const_iterator it=boost::next(_chunkMap.begin()), end=_chunkMap.end(); it != end; ++it){
ChunkMap::const_iterator last = prior(it);
ENSURE(it->second->getMin() == last->second->getMax());
}
return true;
#undef ENSURE
}
void ChunkManager::_printChunks() const {
for (ChunkMap::const_iterator it=_chunkMap.begin(), end=_chunkMap.end(); it != end; ++it) {
log() << *it->second << endl;
}
}
bool ChunkManager::hasShardKey( const BSONObj& obj ){
2009-02-20 10:46:42 -05:00
return _key.hasShardKey( obj );
}
ChunkPtr ChunkManager::findChunk( const BSONObj & obj , bool retry ){
BSONObj key = _key.extractKey(obj);
2010-04-29 21:04:54 -04:00
{
2010-04-30 11:59:28 -04:00
BSONObj foo;
ChunkPtr c;
2010-04-30 11:59:28 -04:00
{
rwlock lk( _lock , false );
ChunkMap::iterator it = _chunkMap.upper_bound(key);
if (it != _chunkMap.end()){
foo = it->first;
c = it->second;
}
}
2010-04-29 21:04:54 -04:00
2010-04-30 11:59:28 -04:00
if ( c ){
if ( c->contains( obj ) )
return c;
2010-04-30 11:59:28 -04:00
PRINT(foo);
PRINT(*c);
PRINT(key);
_reload();
massert(13141, "Chunk map pointed to incorrect chunk", false);
2010-04-29 14:05:52 -04:00
}
}
2010-04-29 21:04:54 -04:00
if ( retry ){
stringstream ss;
ss << "couldn't find a chunk aftry retry which should be impossible extracted: " << key;
throw UserException( 8070 , ss.str() );
}
log() << "ChunkManager: couldn't find chunk for: " << key << " going to retry" << endl;
_reload();
return findChunk( obj , true );
}
ChunkPtr ChunkManager::findChunkOnServer( const Shard& shard ) const {
rwlock lk( _lock , false );
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
ChunkPtr c = i->second;
if ( c->getShard() == shard )
return c;
2009-03-27 16:55:26 -04:00
}
return ChunkPtr();
2009-03-27 16:55:26 -04:00
}
int ChunkManager::_getChunksForQuery( vector<shared_ptr<ChunkRange> >& chunks , const BSONObj& query ){
rwlock lk( _lock , false );
FieldRangeSet ranges(_ns.c_str(), query, false);
BSONObjIterator fields(_key.key());
BSONElement field = fields.next();
FieldRange range = ranges.range(field.fieldName());
2010-04-12 23:34:00 -04:00
uassert(13088, "no support for special queries yet", range.getSpecial().empty());
if (range.empty()) {
return 0;
} else if (range.equality()) {
chunks.push_back( _chunkRanges.upper_bound(BSON(field.fieldName() << range.min()))->second );
return 1;
} else if (!range.nontrivial()) {
return -1; // all chunks
} else {
set<shared_ptr<ChunkRange>, ChunkCmp> chunkSet;
for (vector<FieldInterval>::const_iterator it=range.intervals().begin(), end=range.intervals().end();
it != end;
++it)
{
const FieldInterval& fi = *it;
assert(fi.valid());
2010-05-12 10:58:16 -07:00
BSONObj minObj = BSON(field.fieldName() << fi._lower._bound);
BSONObj maxObj = BSON(field.fieldName() << fi._upper._bound);
ChunkRangeMap::const_iterator min, max;
2010-05-12 10:58:16 -07:00
min = (fi._lower._inclusive ? _chunkRanges.upper_bound(minObj) : _chunkRanges.lower_bound(minObj));
max = (fi._upper._inclusive ? _chunkRanges.upper_bound(maxObj) : _chunkRanges.lower_bound(maxObj));
assert(min != _chunkRanges.ranges().end());
// make max non-inclusive like end iterators
if(max != _chunkRanges.ranges().end())
++max;
for (ChunkRangeMap::const_iterator it=min; it != max; ++it){
chunkSet.insert(it->second);
}
}
chunks.assign(chunkSet.begin(), chunkSet.end());
return chunks.size();
}
}
2009-12-02 16:36:46 -05:00
int ChunkManager::getChunksForQuery( vector<shared_ptr<ChunkRange> >& chunks , const BSONObj& query ){
int ret = _getChunksForQuery(chunks, query);
if (ret == -1){
for (ChunkRangeMap::const_iterator it=_chunkRanges.ranges().begin(), end=_chunkRanges.ranges().end(); it != end; ++it){
chunks.push_back(it->second);
}
}
return chunks.size();
//return ret;
}
int ChunkManager::getShardsForQuery( set<Shard>& shards , const BSONObj& query ){
vector<shared_ptr<ChunkRange> > chunks;
int ret = _getChunksForQuery(chunks, query);
if (ret == -1){
getAllShards(shards);
}
else {
for ( vector<shared_ptr<ChunkRange> >::iterator it=chunks.begin(), end=chunks.end(); it != end; ++it ){
shared_ptr<ChunkRange> c = *it;
shards.insert(c->getShard());
}
}
return shards.size();
}
void ChunkManager::getAllShards( set<Shard>& all ){
rwlock lk( _lock , false );
2010-06-29 20:40:29 -04:00
all = _shards;
2009-12-02 16:36:46 -05:00
}
2009-03-25 17:35:38 -04:00
void ChunkManager::ensureIndex(){
ensureIndex_inlock();
}
void ChunkManager::ensureIndex_inlock(){
set<Shard> seen;
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
ChunkPtr c = i->second;
if ( seen.count( c->getShard() ) )
continue;
seen.insert( c->getShard() );
c->ensureIndex();
}
}
void ChunkManager::drop( ChunkManagerPtr me ){
rwlock lk( _lock , true );
2010-07-02 16:48:21 -04:00
DistributedLock lockSetup( ConnectionString( configServer.modelServer() , ConnectionString::SYNC ) , getns() );
dist_lock_try dlk( &lockSetup , "drop" );
uassert( 13331 , "locking namespace failed" , dlk.got() );
uassert( 10174 , "config servers not all up" , configServer.allUp() );
2010-07-02 16:48:21 -04:00
set<Shard> seen;
log(1) << "ChunkManager::drop : " << _ns << endl;
// lock all shards so no one can do a split/migrate
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
ChunkPtr c = i->second;
2010-07-02 16:48:21 -04:00
seen.insert( c->getShard() );
}
log(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl;
// wipe my meta-data
_chunkMap.clear();
_chunkRanges.clear();
2010-06-29 20:40:29 -04:00
_shards.clear();
// delete data from mongod
2010-07-02 16:48:21 -04:00
for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ){
ScopedDbConnection conn( *i );
conn->dropCollection( _ns );
conn.done();
}
log(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl;
// clean up database meta-data
uassert( 10176 , "no sharding data?" , _config->removeSharding( _ns ) );
_config->save();
// remove chunk data
2010-06-30 14:01:47 -04:00
static Chunk temp(0);
2010-05-20 13:36:29 -04:00
ScopedDbConnection conn( temp.modelServer() );
conn->remove( temp.getNS() , BSON( "ns" << _ns ) );
conn.done();
log(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl;
2010-07-02 16:48:21 -04:00
for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ){
ScopedDbConnection conn( *i );
BSONObj res;
if ( ! setShardVersion( conn.conn() , _ns , 0 , true , res ) )
throw UserException( 8071 , (string)"OH KNOW, cleaning up after drop failed: " + res.toString() );
conn.done();
}
log(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl;
}
void ChunkManager::save(){
rwlock lk( _lock , true );
save_inlock();
}
void ChunkManager::save_inlock(){
ShardChunkVersion a = getVersion_inlock();
2010-06-30 14:35:23 -04:00
assert( a > 0 || _chunkMap.size() <= 1 );
2010-06-30 00:03:10 -04:00
ShardChunkVersion nextChunkVersion = a.incMajor();
vector<ChunkPtr> toFix;
vector<ShardChunkVersion> newVersions;
2009-03-27 16:55:26 -04:00
BSONObjBuilder cmdBuilder;
BSONArrayBuilder updates( cmdBuilder.subarrayStart( "applyOps" ) );
int numOps = 0;
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
ChunkPtr c = i->second;
if ( ! c->_modified )
continue;
numOps++;
2009-03-30 10:50:10 -04:00
_sequenceNumber = ++NextSequenceNumber;
2010-06-30 00:03:10 -04:00
ShardChunkVersion myVersion = nextChunkVersion;
++nextChunkVersion;
toFix.push_back( c );
newVersions.push_back( myVersion );
BSONObjBuilder op;
op.append( "op" , "u" );
op.appendBool( "b" , true );
op.append( "ns" , ShardNS::chunk );
BSONObjBuilder n( op.subobjStart( "o" ) );
c->serialize( n , myVersion );
n.done();
BSONObjBuilder q( op.subobjStart( "o2" ) );
q.append( "_id" , c->genID() );
q.done();
updates.append( op.obj() );
}
if ( numOps == 0 )
return;
updates.done();
2010-06-30 14:35:23 -04:00
if ( a > 0 || _chunkMap.size() > 1 ){
BSONArrayBuilder temp( cmdBuilder.subarrayStart( "preCondition" ) );
BSONObjBuilder b;
b.append( "ns" , ShardNS::chunk );
b.append( "q" , BSON( "query" << BSON( "ns" << _ns ) << "orderby" << BSON( "lastmod" << -1 ) ) );
{
BSONObjBuilder bb( b.subobjStart( "res" ) );
2010-06-30 00:03:10 -04:00
bb.appendTimestamp( "lastmod" , a );
bb.done();
}
temp.append( b.obj() );
temp.done();
2009-02-17 15:34:52 -05:00
}
BSONObj cmd = cmdBuilder.obj();
2010-07-03 22:06:44 -04:00
log(7) << "ChunkManager::save update: " << cmd << endl;
2010-07-02 16:48:21 -04:00
ScopedDbConnection conn( Chunk(0).modelServer() );
BSONObj res;
bool ok = conn->runCommand( "config" , cmd , res );
conn.done();
if ( ! ok ){
stringstream ss;
ss << "saving chunks failed. cmd: " << cmd << " result: " << res;
log( LL_ERROR ) << ss.str() << endl;
msgasserted( 13327 , ss.str() );
}
for ( unsigned i=0; i<toFix.size(); i++ ){
toFix[i]->_lastmod = newVersions[i];
}
massert( 10417 , "how did version get smalled" , getVersion_inlock() >= a );
ensureIndex_inlock(); // TODO: this is too aggressive - but not really sooo bad
2009-03-27 16:55:26 -04:00
}
ShardChunkVersion ChunkManager::getVersion( const Shard& shard ) const{
2010-04-22 17:07:47 -04:00
rwlock lk( _lock , false );
2009-03-25 17:35:38 -04:00
// TODO: cache or something?
ShardChunkVersion max = 0;
2009-03-27 16:55:26 -04:00
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
ChunkPtr c = i->second;
DEV assert( c );
if ( c->getShard() != shard )
2009-03-25 17:35:38 -04:00
continue;
if ( c->_lastmod > max )
max = c->_lastmod;
2009-03-25 17:35:38 -04:00
}
return max;
}
ShardChunkVersion ChunkManager::getVersion() const{
rwlock lk( _lock , false );
return getVersion_inlock();
}
ShardChunkVersion ChunkManager::getVersion_inlock() const{
ShardChunkVersion max = 0;
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
ChunkPtr c = i->second;
if ( c->_lastmod > max )
max = c->_lastmod;
2009-03-27 16:55:26 -04:00
}
return max;
}
string ChunkManager::toString() const {
rwlock lk( _lock , false );
stringstream ss;
2010-04-20 12:29:00 -04:00
ss << "ChunkManager: " << _ns << " key:" << _key.toString() << '\n';
2010-06-30 14:35:23 -04:00
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ){
const ChunkPtr c = i->second;
2010-04-20 12:29:00 -04:00
ss << "\t" << c->toString() << '\n';
}
return ss.str();
}
void ChunkManager::_migrationNotification(Chunk* c){
_chunkRanges.reloadRange(_chunkMap, c->getMin(), c->getMax());
2010-06-29 20:40:29 -04:00
_shards.insert(c->getShard());
}
void ChunkRangeManager::assertValid() const{
if (_ranges.empty())
return;
try {
// No Nulls
for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it){
assert(it->second);
}
// Check endpoints
assert(allOfType(MinKey, _ranges.begin()->second->getMin()));
assert(allOfType(MaxKey, prior(_ranges.end())->second->getMax()));
// Make sure there are no gaps or overlaps
2010-05-04 17:43:53 -04:00
for (ChunkRangeMap::const_iterator it=boost::next(_ranges.begin()), end=_ranges.end(); it != end; ++it){
ChunkRangeMap::const_iterator last = prior(it);
assert(it->second->getMin() == last->second->getMax());
}
// Check Map keys
for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it){
assert(it->first == it->second->getMax());
}
// Make sure we match the original chunks
2010-06-30 14:35:23 -04:00
const ChunkMap chunks = _ranges.begin()->second->getManager()->_chunkMap;
for ( ChunkMap::const_iterator i=chunks.begin(); i!=chunks.end(); ++i ){
const ChunkPtr chunk = i->second;
ChunkRangeMap::const_iterator min = _ranges.upper_bound(chunk->getMin());
ChunkRangeMap::const_iterator max = _ranges.lower_bound(chunk->getMax());
assert(min != _ranges.end());
assert(max != _ranges.end());
assert(min == max);
assert(min->second->getShard() == chunk->getShard());
assert(min->second->contains( chunk->getMin() ));
assert(min->second->contains( chunk->getMax() ) || (min->second->getMax() == chunk->getMax()));
}
} catch (...) {
log( LL_ERROR ) << "\t invalid ChunkRangeMap! printing ranges:" << endl;
for (ChunkRangeMap::const_iterator it=_ranges.begin(), end=_ranges.end(); it != end; ++it)
cout << it->first << ": " << *it->second << endl;
throw;
}
}
void ChunkRangeManager::reloadRange(const ChunkMap& chunks, const BSONObj& min, const BSONObj& max){
if (_ranges.empty()){
reloadAll(chunks);
return;
}
ChunkRangeMap::iterator low = _ranges.upper_bound(min);
ChunkRangeMap::iterator high = _ranges.lower_bound(max);
assert(low != _ranges.end());
assert(high != _ranges.end());
assert(low->second);
assert(high->second);
ChunkMap::const_iterator begin = chunks.upper_bound(low->second->getMin());
ChunkMap::const_iterator end = chunks.lower_bound(high->second->getMax());
assert(begin != chunks.end());
assert(end != chunks.end());
// C++ end iterators are one-past-last
++high;
++end;
// update ranges
_ranges.erase(low, high); // invalidates low
_insertRange(begin, end);
assert(!_ranges.empty());
DEV assertValid();
// merge low-end if possible
low = _ranges.upper_bound(min);
assert(low != _ranges.end());
if (low != _ranges.begin()){
shared_ptr<ChunkRange> a = prior(low)->second;
shared_ptr<ChunkRange> b = low->second;
if (a->getShard() == b->getShard()){
shared_ptr<ChunkRange> cr (new ChunkRange(*a, *b));
_ranges.erase(prior(low));
_ranges.erase(low); // invalidates low
_ranges[cr->getMax()] = cr;
}
}
DEV assertValid();
// merge high-end if possible
high = _ranges.lower_bound(max);
if (high != prior(_ranges.end())){
shared_ptr<ChunkRange> a = high->second;
2010-05-04 17:43:53 -04:00
shared_ptr<ChunkRange> b = boost::next(high)->second;
if (a->getShard() == b->getShard()){
shared_ptr<ChunkRange> cr (new ChunkRange(*a, *b));
2010-05-04 17:43:53 -04:00
_ranges.erase(boost::next(high));
_ranges.erase(high); //invalidates high
_ranges[cr->getMax()] = cr;
}
}
DEV assertValid();
}
void ChunkRangeManager::reloadAll(const ChunkMap& chunks){
_ranges.clear();
_insertRange(chunks.begin(), chunks.end());
DEV assertValid();
}
void ChunkRangeManager::_insertRange(ChunkMap::const_iterator begin, const ChunkMap::const_iterator end){
while (begin != end){
ChunkMap::const_iterator first = begin;
Shard shard = first->second->getShard();
while (begin != end && (begin->second->getShard() == shard))
++begin;
shared_ptr<ChunkRange> cr (new ChunkRange(first, begin));
_ranges[cr->getMax()] = cr;
}
}
2009-02-18 10:10:39 -05:00
class ChunkObjUnitTest : public UnitTest {
2009-02-18 10:10:39 -05:00
public:
void runShard(){
ChunkPtr c;
assert( ! c );
c.reset( new Chunk( 0 ) );
assert( c );
}
2010-06-30 00:03:10 -04:00
void runShardChunkVersion(){
vector<ShardChunkVersion> all;
all.push_back( ShardChunkVersion(1,1) );
all.push_back( ShardChunkVersion(1,2) );
all.push_back( ShardChunkVersion(2,1) );
all.push_back( ShardChunkVersion(2,2) );
for ( unsigned i=0; i<all.size(); i++ ){
for ( unsigned j=i+1; j<all.size(); j++ ){
assert( all[i] < all[j] );
}
}
}
void run(){
runShard();
2010-06-30 00:03:10 -04:00
runShardChunkVersion();
2009-02-18 10:10:39 -05:00
log(1) << "shardObjTest passed" << endl;
}
} shardObjTest;
2009-01-14 17:09:51 -05:00
// ----- to be removed ---
extern OID serverID;
bool setShardVersion( DBClientBase & conn , const string& ns , ShardChunkVersion version , bool authoritative , BSONObj& result ){
BSONObjBuilder cmdBuilder;
cmdBuilder.append( "setShardVersion" , ns.c_str() );
cmdBuilder.append( "configdb" , configServer.modelServer() );
cmdBuilder.appendTimestamp( "version" , version.toLong() );
cmdBuilder.appendOID( "serverID" , &serverID );
if ( authoritative )
cmdBuilder.appendBool( "authoritative" , 1 );
Shard s = Shard::make( conn.getServerAddress() );
cmdBuilder.append( "shard" , s.getName() );
cmdBuilder.append( "shardHost" , s.getConnString() );
BSONObj cmd = cmdBuilder.obj();
log(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << " " << &conn << endl;
return conn.runCommand( "admin" , cmd , result );
}
2009-01-14 17:09:51 -05:00
} // namespace mongo