Files
mongo/db/queryoptimizer.h

285 lines
12 KiB
C
Raw Normal View History

2008-11-15 19:04:01 -05:00
/* queryoptimizer.h */
2008-11-14 16:19:47 -05:00
/**
* Copyright (C) 2008 10gen Inc.
2008-12-28 20:28:49 -05:00
*
2008-11-14 16:19:47 -05:00
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License, version 3,
* as published by the Free Software Foundation.
2008-12-28 20:28:49 -05:00
*
2008-11-14 16:19:47 -05:00
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
2008-12-28 20:28:49 -05:00
*
2008-11-14 16:19:47 -05:00
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2008-12-28 20:28:49 -05:00
#pragma once
2008-11-14 16:19:47 -05:00
2009-02-17 15:53:19 -05:00
#include "cursor.h"
#include "jsobj.h"
2009-02-24 14:14:45 -05:00
#include "queryutil.h"
2009-02-17 15:53:19 -05:00
2009-01-14 17:09:51 -05:00
namespace mongo {
2009-02-17 15:53:19 -05:00
2009-02-20 11:08:22 -05:00
class IndexDetails;
2010-03-10 22:44:51 -05:00
class IndexType;
2009-07-07 14:12:53 -04:00
class QueryPlan : boost::noncopyable {
public:
2009-07-07 14:12:53 -04:00
QueryPlan(NamespaceDetails *_d,
int _idxNo, // -1 = no index
const FieldRangeSet &fbs,
const BSONObj &order,
const BSONObj &startKey = BSONObj(),
2010-02-25 16:24:34 -05:00
const BSONObj &endKey = BSONObj() ,
string special="" );
2009-07-07 14:12:53 -04:00
/* If true, no other index can do better. */
2009-02-17 20:57:36 -05:00
bool optimal() const { return optimal_; }
/* ScanAndOrder processing will be required if true */
2009-02-17 20:57:36 -05:00
bool scanAndOrderRequired() const { return scanAndOrderRequired_; }
/* When true, the index we are using has keys such that it can completely resolve the
2009-02-17 20:57:36 -05:00
query expression to match by itself without ever checking the main object.
*/
bool exactKeyMatch() const { return exactKeyMatch_; }
/* If true, the startKey and endKey are unhelpful and the index order doesn't match the
requested sort order */
bool unhelpful() const { return unhelpful_; }
2009-02-19 13:58:22 -05:00
int direction() const { return direction_; }
shared_ptr<Cursor> newCursor( const DiskLoc &startLoc = DiskLoc() , int numWanted=0 ) const;
shared_ptr<Cursor> newReverseCursor() const;
BSONObj indexKey() const;
const char *ns() const { return fbs_.ns(); }
NamespaceDetails *nsd() const { return d; }
BSONObj query() const { return fbs_.query(); }
2009-05-13 11:10:29 -04:00
BSONObj simplifiedQuery( const BSONObj& fields = BSONObj() ) const { return fbs_.simplifiedQuery( fields ); }
const FieldRange &range( const char *fieldName ) const { return fbs_.range( fieldName ); }
2009-03-24 12:12:04 -04:00
void registerSelf( long long nScanned ) const;
// just for testing
BoundList indexBounds() const { return indexBounds_; }
2009-02-17 20:57:36 -05:00
private:
NamespaceDetails *d;
int idxNo;
const FieldRangeSet &fbs_;
2009-02-20 11:08:22 -05:00
const BSONObj &order_;
const IndexDetails *index_;
2009-02-17 20:57:36 -05:00
bool optimal_;
bool scanAndOrderRequired_;
bool exactKeyMatch_;
2009-02-19 13:58:22 -05:00
int direction_;
BoundList indexBounds_;
bool endKeyInclusive_;
bool unhelpful_;
2010-02-25 16:24:34 -05:00
string _special;
IndexType * _type;
};
// Inherit from this interface to implement a new query operation.
2009-12-21 09:26:16 -08:00
// The query optimizer will clone the QueryOp that is provided, giving
// each clone its own query plan.
2009-02-20 11:08:22 -05:00
class QueryOp {
public:
2010-05-12 11:03:51 -07:00
QueryOp() : _complete(), _stopRequested(), _qp(), _error() {}
2009-02-20 11:08:22 -05:00
virtual ~QueryOp() {}
2010-02-16 11:13:31 -05:00
/** this gets called after a query plan is set? ERH 2/16/10 */
virtual void init() = 0;
virtual void next() = 0;
2009-02-24 18:23:11 -05:00
virtual bool mayRecordPlan() const = 0;
2010-02-16 11:13:31 -05:00
/** @return a copy of the inheriting class, which will be run with its own
2010-05-12 10:38:58 -07:00
query plan. If multiple plan sets are required for an $or query,
the QueryOp of the winning plan from a given set will be cloned
to generate QueryOps for the subsequent plan set.
2010-02-16 11:13:31 -05:00
*/
2009-02-20 11:08:22 -05:00
virtual QueryOp *clone() const = 0;
2010-05-12 11:03:51 -07:00
bool complete() const { return _complete; }
bool error() const { return _error; }
bool stopRequested() const { return _stopRequested; }
2010-05-12 11:03:51 -07:00
string exceptionMessage() const { return _exceptionMessage; }
const QueryPlan &qp() const { return *_qp; }
// To be called by QueryPlanSet::Runner only.
2010-05-12 11:03:51 -07:00
void setQueryPlan( const QueryPlan *qp ) { _qp = qp; }
2009-02-24 15:45:18 -05:00
void setExceptionMessage( const string &exceptionMessage ) {
2010-05-12 11:03:51 -07:00
_error = true;
_exceptionMessage = exceptionMessage;
2009-02-24 15:45:18 -05:00
}
protected:
2010-05-12 11:03:51 -07:00
void setComplete() { _complete = true; }
void setStop() { setComplete(); _stopRequested = true; }
2009-02-20 11:08:22 -05:00
private:
2010-05-12 11:03:51 -07:00
bool _complete;
bool _stopRequested;
string _exceptionMessage;
const QueryPlan *_qp;
bool _error;
2009-02-20 11:08:22 -05:00
};
2009-05-28 15:22:24 -04:00
// Set of candidate query plans for a particular query. Used for running
// a QueryOp on these plans.
class QueryPlanSet {
public:
2010-03-29 12:55:44 -04:00
typedef boost::shared_ptr< QueryPlan > PlanPtr;
typedef vector< PlanPtr > PlanSet;
QueryPlanSet( const char *ns,
const BSONObj &query,
const BSONObj &order,
const BSONElement *hint = 0,
bool honorRecordedPlan = true,
const BSONObj &min = BSONObj(),
const BSONObj &max = BSONObj() );
int nPlans() const { return plans_.size(); }
2009-02-20 15:37:24 -05:00
shared_ptr< QueryOp > runOp( QueryOp &op );
template< class T >
shared_ptr< T > runOp( T &op ) {
return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) );
}
BSONObj explain() const;
2009-02-26 15:53:33 -05:00
bool usingPrerecordedPlan() const { return usingPrerecordedPlan_; }
2010-03-29 12:55:44 -04:00
PlanPtr getBestGuess() const;
2010-05-12 10:58:16 -07:00
//for testing
const FieldRangeSet &fbs() const { return fbs_; }
private:
void addOtherPlans( bool checkFirst );
void addPlan( PlanPtr plan, bool checkFirst ) {
if ( checkFirst && plan->indexKey().woCompare( plans_[ 0 ]->indexKey() ) == 0 )
return;
plans_.push_back( plan );
}
2009-02-25 10:48:41 -05:00
void init();
void addHint( IndexDetails &id );
struct Runner {
Runner( QueryPlanSet &plans, QueryOp &op );
2009-02-20 15:37:24 -05:00
shared_ptr< QueryOp > run();
2009-02-20 11:08:22 -05:00
QueryOp &op_;
QueryPlanSet &plans_;
2009-02-26 10:28:27 -05:00
static void initOp( QueryOp &op );
static void nextOp( QueryOp &op );
2009-02-20 11:08:22 -05:00
};
const char *ns;
2010-02-05 12:02:01 -05:00
BSONObj query_;
FieldRangeSet fbs_;
2009-02-20 11:08:22 -05:00
PlanSet plans_;
2009-02-25 10:48:41 -05:00
bool mayRecordPlan_;
bool usingPrerecordedPlan_;
BSONObj hint_;
BSONObj order_;
2009-03-24 12:12:04 -04:00
long long oldNScanned_;
2009-02-26 15:53:33 -05:00
bool honorRecordedPlan_;
BSONObj min_;
BSONObj max_;
string _special;
};
2009-01-14 17:09:51 -05:00
2010-05-12 10:38:58 -07:00
// Handles $or type queries by generating a QueryPlanSet for each $or clause
// NOTE on our $or implementation: In our current qo implementation we don't
// keep statistics on our data, but we can conceptualize the problem of
// selecting an index when statistics exist for all index ranges. The
// d-hitting set problem on k sets and n elements can be reduced to the
// problem of index selection on k $or clauses and n index ranges (where
// d is the max number of indexes, and the number of ranges n is unbounded).
// In light of the fact that d-hitting set is np complete, and we don't even
// track statistics (so cost calculations are expensive) our first
// implementation uses the following greedy approach: We take one $or clause
// at a time and treat each as a separate query for index selection purposes.
// But if an index range is scanned for a particular $or clause, we eliminate
// that range from all subsequent clauses. One could imagine an opposite
// implementation where we select indexes based on the union of index ranges
// for all $or clauses, but this can have much poorer worst case behavior.
// (An index range that suits one $or clause may not suit another, and this
// is worse than the typical case of index range choice staleness because
// with $or the clauses may likely be logically distinct.) The greedy
// implementation won't do any worse than all the $or clauses individually,
// and it can often do better. In the first cut we are intentionally using
// QueryPattern tracking to record successful plans on $or queries for use by
// subsequent $or queries, even though there may be a significant aggregate
// $nor component that would not be represented in QueryPattern.
class MultiPlanScanner {
public:
MultiPlanScanner( const char *ns,
const BSONObj &query,
const BSONObj &order,
const BSONElement *hint = 0,
bool honorRecordedPlan = true,
const BSONObj &min = BSONObj(),
const BSONObj &max = BSONObj() );
shared_ptr< QueryOp > runOp( QueryOp &op );
template< class T >
shared_ptr< T > runOp( T &op ) {
return dynamic_pointer_cast< T >( runOp( static_cast< QueryOp& >( op ) ) );
}
shared_ptr< QueryOp > runOpOnce( QueryOp &op );
template< class T >
shared_ptr< T > runOpOnce( T &op ) {
return dynamic_pointer_cast< T >( runOpOnce( static_cast< QueryOp& >( op ) ) );
}
bool mayRunMore() const { return _i < _n; }
BSONObj explain() const { assertNotOr(); return _currentQps->explain(); }
bool usingPrerecordedPlan() const { assertNotOr(); return _currentQps->usingPrerecordedPlan(); }
QueryPlanSet::PlanPtr getBestGuess() const { assertNotOr(); return _currentQps->getBestGuess(); }
private:
//temp
void assertNotOr() const {
massert( 13266, "not implemented for $or query", !_or );
}
// temp (and yucky)
BSONObj nextSimpleQuery() {
massert( 13267, "only generate simple query if $or", _or );
massert( 13270, "no more simple queries", mayRunMore() );
BSONObjBuilder b;
BSONArrayBuilder norb;
BSONObjIterator i( _query );
while( i.more() ) {
BSONElement e = i.next();
if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
massert( 13269, "$nor must be array", e.type() == Array );
BSONObjIterator j( e.embeddedObject() );
while( j.more() ) {
norb << j.next();
}
} else if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
BSONObjIterator j( e.embeddedObject() );
for( int k = 0; k < _i; ++k ) {
norb << j.next();
}
b << "$or" << BSON_ARRAY( j.next() );
} else {
b.append( e );
}
}
BSONArray nor = norb.arr();
if ( !nor.isEmpty() ) {
b << "$nor" << nor;
}
++_i;
BSONObj ret = b.obj();
return ret;
}
const char * _ns;
bool _or;
BSONObj _query;
// FieldRangeOrSet _fros;
auto_ptr< QueryPlanSet > _currentQps;
int _i;
int _n;
bool _honorRecordedPlan;
2010-05-12 10:38:58 -07:00
};
// NOTE min, max, and keyPattern will be updated to be consistent with the selected index.
IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern );
2010-02-05 12:02:01 -05:00
inline bool isSimpleIdQuery( const BSONObj& query ){
return
strcmp( query.firstElement().fieldName() , "_id" ) == 0 &&
query.nFields() == 1 &&
query.firstElement().isSimpleType();
}
2009-01-14 17:09:51 -05:00
} // namespace mongo