2010-04-13 15:23:57 -04:00
// index_key.cpp
/**
* Copyright ( C ) 2008 10 gen Inc .
*
* This program is free software : you can redistribute it and / or modify
* it under the terms of the GNU Affero General Public License , version 3 ,
* as published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Affero General Public License for more details .
*
* You should have received a copy of the GNU Affero General Public License
* along with this program . If not , see < http : //www.gnu.org/licenses/>.
*/
2010-04-27 15:27:52 -04:00
# include "pch.h"
2010-11-04 09:07:28 -04:00
# include "namespace-inl.h"
2010-04-13 15:23:57 -04:00
# include "index.h"
# include "btree.h"
2011-06-22 15:51:08 -04:00
# include "ops/query.h"
2010-04-13 15:23:57 -04:00
# include "background.h"
2011-07-31 08:36:17 -07:00
# include "../util/text.h"
2010-04-13 15:23:57 -04:00
namespace mongo {
2011-07-31 09:34:04 -07:00
/** old (<= v1.8) : 0
1 is new version
*/
const int DefaultIndexVersionNumber = 1 ;
2010-04-13 15:23:57 -04:00
map < string , IndexPlugin * > * IndexPlugin : : _plugins ;
IndexType : : IndexType ( const IndexPlugin * plugin , const IndexSpec * spec )
2011-01-04 00:40:41 -05:00
: _plugin ( plugin ) , _spec ( spec ) {
2010-04-13 15:23:57 -04:00
}
2011-01-04 00:40:41 -05:00
IndexType : : ~ IndexType ( ) {
2010-04-13 15:23:57 -04:00
}
2011-01-04 00:40:41 -05:00
const BSONObj & IndexType : : keyPattern ( ) const {
return _spec - > keyPattern ;
2010-04-13 15:23:57 -04:00
}
IndexPlugin : : IndexPlugin ( const string & name )
2011-01-04 00:40:41 -05:00
: _name ( name ) {
2010-04-13 15:23:57 -04:00
if ( ! _plugins )
_plugins = new map < string , IndexPlugin * > ( ) ;
( * _plugins ) [ name ] = this ;
}
2010-09-23 17:51:01 -04:00
2011-01-04 00:40:41 -05:00
string IndexPlugin : : findPluginName ( const BSONObj & keyPattern ) {
2010-09-23 17:51:01 -04:00
string pluginName = " " ;
2011-01-04 00:40:41 -05:00
2010-09-23 17:51:01 -04:00
BSONObjIterator i ( keyPattern ) ;
2011-01-04 00:40:41 -05:00
2010-09-23 17:51:01 -04:00
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
if ( e . type ( ) ! = String )
continue ;
2011-01-04 00:40:41 -05:00
2010-09-24 01:13:24 -04:00
uassert ( 13007 , " can only have 1 index plugin / bad index key pattern " , pluginName . size ( ) = = 0 | | pluginName = = e . String ( ) ) ;
pluginName = e . String ( ) ;
2010-09-23 17:51:01 -04:00
}
2011-01-04 00:40:41 -05:00
2010-09-23 17:51:01 -04:00
return pluginName ;
}
2011-01-04 00:40:41 -05:00
2010-04-13 15:23:57 -04:00
int IndexType : : compare ( const BSONObj & l , const BSONObj & r ) const {
return l . woCompare ( r , _spec - > keyPattern ) ;
}
2011-01-04 00:40:41 -05:00
void IndexSpec : : _init ( ) {
2010-04-13 15:23:57 -04:00
assert ( keyPattern . objsize ( ) ) ;
2010-11-20 17:07:53 -05:00
2010-11-20 18:02:59 -05:00
// some basics
_nFields = keyPattern . nFields ( ) ;
_sparse = info [ " sparse " ] . trueValue ( ) ;
uassert ( 13529 , " sparse only works for single field keys " , ! _sparse | | _nFields ) ;
2011-01-04 00:40:41 -05:00
2010-11-20 17:07:53 -05:00
{
2011-01-04 00:40:41 -05:00
// build _nullKey
2010-11-20 17:07:53 -05:00
BSONObjBuilder b ;
BSONObjIterator i ( keyPattern ) ;
2011-01-04 00:40:41 -05:00
2010-11-20 17:07:53 -05:00
while ( i . more ( ) ) {
BSONElement e = i . next ( ) ;
_fieldNames . push_back ( e . fieldName ( ) ) ;
_fixed . push_back ( BSONElement ( ) ) ;
2011-01-04 00:40:41 -05:00
b . appendNull ( " " ) ;
2010-11-20 17:07:53 -05:00
}
_nullKey = b . obj ( ) ;
2010-04-13 15:23:57 -04:00
}
2011-01-04 00:40:41 -05:00
2010-11-20 17:07:53 -05:00
{
// _nullElt
BSONObjBuilder b ;
b . appendNull ( " " ) ;
_nullObj = b . obj ( ) ;
_nullElt = _nullObj . firstElement ( ) ;
}
2010-04-13 15:23:57 -04:00
2011-07-31 08:36:17 -07:00
{
// _undefinedElt
BSONObjBuilder b ;
b . appendUndefined ( " " ) ;
_undefinedObj = b . obj ( ) ;
_undefinedElt = _undefinedObj . firstElement ( ) ;
}
2011-01-04 00:40:41 -05:00
{
2010-11-20 17:07:53 -05:00
// handle plugins
2011-01-04 00:40:41 -05:00
string pluginName = IndexPlugin : : findPluginName ( keyPattern ) ;
if ( pluginName . size ( ) ) {
2010-11-20 17:07:53 -05:00
IndexPlugin * plugin = IndexPlugin : : get ( pluginName ) ;
2011-01-04 00:40:41 -05:00
if ( ! plugin ) {
2010-11-20 17:07:53 -05:00
log ( ) < < " warning: can't find plugin [ " < < pluginName < < " ] " < < endl ;
}
else {
2011-01-04 00:40:41 -05:00
_indexType . reset ( plugin - > generate ( this ) ) ;
2010-11-20 17:07:53 -05:00
}
2010-04-13 15:23:57 -04:00
}
}
2011-01-04 00:40:41 -05:00
2010-04-13 15:23:57 -04:00
_finishedInit = true ;
}
2011-07-31 08:36:17 -07:00
void assertParallelArrays ( const char * first , const char * second ) {
stringstream ss ;
ss < < " cannot index parallel arrays [ " < < first < < " ] [ " < < second < < " ] " ;
uasserted ( 10088 , ss . str ( ) ) ;
2010-04-13 15:23:57 -04:00
}
2011-07-31 08:36:17 -07:00
class KeyGeneratorV0 {
public :
KeyGeneratorV0 ( const IndexSpec & spec ) : _spec ( spec ) { }
void getKeys ( const BSONObj & obj , BSONObjSet & keys ) const {
if ( _spec . _indexType . get ( ) ) { //plugin (eg geo)
_spec . _indexType - > getKeys ( obj , keys ) ;
return ;
2010-11-20 18:02:59 -05:00
}
2011-07-31 08:36:17 -07:00
vector < const char * > fieldNames ( _spec . _fieldNames ) ;
vector < BSONElement > fixed ( _spec . _fixed ) ;
_getKeys ( fieldNames , fixed , obj , keys ) ;
if ( keys . empty ( ) & & ! _spec . _sparse )
keys . insert ( _spec . _nullKey ) ;
}
private :
void _getKeys ( vector < const char * > fieldNames , vector < BSONElement > fixed , const BSONObj & obj , BSONObjSet & keys ) const {
BSONElement arrElt ;
unsigned arrIdx = ~ 0 ;
int numNotFound = 0 ;
for ( unsigned i = 0 ; i < fieldNames . size ( ) ; + + i ) {
if ( * fieldNames [ i ] = = ' \0 ' )
continue ;
BSONElement e = obj . getFieldDottedOrArray ( fieldNames [ i ] ) ;
if ( e . eoo ( ) ) {
e = _spec . _nullElt ; // no matching field
numNotFound + + ;
}
if ( e . type ( ) ! = Array )
fieldNames [ i ] = " " ; // no matching field or non-array match
if ( * fieldNames [ i ] = = ' \0 ' )
fixed [ i ] = e ; // no need for further object expansion (though array expansion still possible)
if ( e . type ( ) = = Array & & arrElt . eoo ( ) ) { // we only expand arrays on a single path -- track the path here
arrIdx = i ;
arrElt = e ;
}
// enforce single array path here
if ( e . type ( ) = = Array & & e . rawdata ( ) ! = arrElt . rawdata ( ) ) {
assertParallelArrays ( e . fieldName ( ) , arrElt . fieldName ( ) ) ;
}
2010-04-13 15:23:57 -04:00
}
2011-07-31 08:36:17 -07:00
bool allFound = true ; // have we found elements for all field names in the key spec?
for ( vector < const char * > : : const_iterator i = fieldNames . begin ( ) ; i ! = fieldNames . end ( ) ; + + i ) {
if ( * * i ! = ' \0 ' ) {
allFound = false ;
break ;
}
2010-06-03 10:02:38 -04:00
}
2011-07-31 08:36:17 -07:00
if ( _spec . _sparse & & numNotFound = = _spec . _nFields ) {
// we didn't find any fields
// so we're not going to index this document
return ;
2010-04-13 15:23:57 -04:00
}
2011-07-31 08:36:17 -07:00
bool insertArrayNull = false ;
if ( allFound ) {
if ( arrElt . eoo ( ) ) {
// no terminal array element to expand
BSONObjBuilder b ( _spec . _sizeTracker ) ;
for ( vector < BSONElement > : : iterator i = fixed . begin ( ) ; i ! = fixed . end ( ) ; + + i )
b . appendAs ( * i , " " ) ;
keys . insert ( b . obj ( ) ) ;
}
else {
// terminal array element to expand, so generate all keys
BSONObjIterator i ( arrElt . embeddedObject ( ) ) ;
if ( i . more ( ) ) {
while ( i . more ( ) ) {
BSONObjBuilder b ( _spec . _sizeTracker ) ;
for ( unsigned j = 0 ; j < fixed . size ( ) ; + + j ) {
if ( j = = arrIdx )
b . appendAs ( i . next ( ) , " " ) ;
else
b . appendAs ( fixed [ j ] , " " ) ;
}
keys . insert ( b . obj ( ) ) ;
}
}
else if ( fixed . size ( ) > 1 ) {
insertArrayNull = true ;
}
}
2011-01-04 00:40:41 -05:00
}
2010-04-13 15:23:57 -04:00
else {
2011-07-31 08:36:17 -07:00
// nonterminal array element to expand, so recurse
assert ( ! arrElt . eoo ( ) ) ;
2010-04-13 15:23:57 -04:00
BSONObjIterator i ( arrElt . embeddedObject ( ) ) ;
2011-01-04 00:40:41 -05:00
if ( i . more ( ) ) {
2010-04-13 15:23:57 -04:00
while ( i . more ( ) ) {
2011-07-31 08:36:17 -07:00
BSONElement e = i . next ( ) ;
if ( e . type ( ) = = Object ) {
_getKeys ( fieldNames , fixed , e . embeddedObject ( ) , keys ) ;
2010-04-13 15:23:57 -04:00
}
}
}
2011-07-31 08:36:17 -07:00
else {
2010-05-04 00:12:36 -04:00
insertArrayNull = true ;
2010-04-13 15:23:57 -04:00
}
}
2011-07-31 08:36:17 -07:00
if ( insertArrayNull ) {
// x : [] - need to insert undefined
BSONObjBuilder b ( _spec . _sizeTracker ) ;
for ( unsigned j = 0 ; j < fixed . size ( ) ; + + j ) {
if ( j = = arrIdx ) {
b . appendUndefined ( " " ) ;
}
else {
BSONElement e = fixed [ j ] ;
if ( e . eoo ( ) )
b . appendNull ( " " ) ;
else
b . appendAs ( e , " " ) ;
2010-05-04 00:12:36 -04:00
}
}
2011-07-31 08:36:17 -07:00
keys . insert ( b . obj ( ) ) ;
2010-05-04 00:12:36 -04:00
}
2011-07-31 08:36:17 -07:00
}
const IndexSpec & _spec ;
} ;
class KeyGeneratorV1 {
public :
KeyGeneratorV1 ( const IndexSpec & spec ) : _spec ( spec ) { }
void getKeys ( const BSONObj & obj , BSONObjSet & keys ) const {
if ( _spec . _indexType . get ( ) ) { //plugin (eg geo)
_spec . _indexType - > getKeys ( obj , keys ) ;
return ;
}
vector < const char * > fieldNames ( _spec . _fieldNames ) ;
vector < BSONElement > fixed ( _spec . _fixed ) ;
_getKeys ( fieldNames , fixed , obj , keys ) ;
if ( keys . empty ( ) & & ! _spec . _sparse )
keys . insert ( _spec . _nullKey ) ;
}
private :
/**
* @ param arrayNestedArray - set if the returned element is an array nested directly within arr .
*/
BSONElement extractNextElement ( const BSONObj & obj , const BSONObj & arr , const char * & field , bool & arrayNestedArray ) const {
string firstField = mongoutils : : str : : before ( field , ' . ' ) ;
bool haveObjField = ! obj . getField ( firstField ) . eoo ( ) ;
BSONElement arrField = arr . getField ( firstField ) ;
bool haveArrField = ! arrField . eoo ( ) ;
// An index component field name cannot exist in both a document array and one of that array's children.
2011-09-28 11:51:46 -04:00
uassert ( 15855 , str : : stream ( ) < < " Ambiguous field name found in array (do not use numeric field names in embedded elements in an array), field: ' " < < arrField . fieldName ( ) < < " ' for array: " < < arr , ! haveObjField | | ! haveArrField ) ;
2011-07-31 08:36:17 -07:00
arrayNestedArray = false ;
if ( haveObjField ) {
return obj . getFieldDottedOrArray ( field ) ;
}
else if ( haveArrField ) {
if ( arrField . type ( ) = = Array ) {
arrayNestedArray = true ;
}
return arr . getFieldDottedOrArray ( field ) ;
2010-05-04 00:12:36 -04:00
}
2011-07-31 08:36:17 -07:00
return BSONElement ( ) ;
2010-05-04 00:12:36 -04:00
}
2011-07-31 08:36:17 -07:00
void _getKeysArrEltFixed ( vector < const char * > & fieldNames , vector < BSONElement > & fixed , const BSONElement & arrEntry , BSONObjSet & keys , int numNotFound , const BSONElement & arrObjElt , const set < unsigned > & arrIdxs , bool mayExpandArrayUnembedded ) const {
// set up any terminal array values
for ( set < unsigned > : : const_iterator j = arrIdxs . begin ( ) ; j ! = arrIdxs . end ( ) ; + + j ) {
if ( * fieldNames [ * j ] = = ' \0 ' ) {
fixed [ * j ] = mayExpandArrayUnembedded ? arrEntry : arrObjElt ;
}
}
// recurse
_getKeys ( fieldNames , fixed , ( arrEntry . type ( ) = = Object ) ? arrEntry . embeddedObject ( ) : BSONObj ( ) , keys , numNotFound , arrObjElt . embeddedObject ( ) ) ;
}
/**
* @ param fieldNames - fields to index , may be postfixes in recursive calls
* @ param fixed - values that have already been identified for their index fields
* @ param obj - object from which keys should be extracted , based on names in fieldNames
* @ param keys - set where index keys are written
* @ param numNotFound - number of index fields that have already been identified as missing
* @ param array - array from which keys should be extracted , based on names in fieldNames
* If obj and array are both nonempty , obj will be one of the elements of array .
*/
void _getKeys ( vector < const char * > fieldNames , vector < BSONElement > fixed , const BSONObj & obj , BSONObjSet & keys , int numNotFound = 0 , const BSONObj & array = BSONObj ( ) ) const {
BSONElement arrElt ;
set < unsigned > arrIdxs ;
bool mayExpandArrayUnembedded = true ;
for ( unsigned i = 0 ; i < fieldNames . size ( ) ; + + i ) {
if ( * fieldNames [ i ] = = ' \0 ' ) {
continue ;
}
bool arrayNestedArray ;
// Extract element matching fieldName[ i ] from object xor array.
BSONElement e = extractNextElement ( obj , array , fieldNames [ i ] , arrayNestedArray ) ;
if ( e . eoo ( ) ) {
// if field not present, set to null
fixed [ i ] = _spec . _nullElt ;
// done expanding this field name
fieldNames [ i ] = " " ;
numNotFound + + ;
}
else if ( e . type ( ) = = Array ) {
arrIdxs . insert ( i ) ;
if ( arrElt . eoo ( ) ) {
// we only expand arrays on a single path -- track the path here
arrElt = e ;
}
else if ( e . rawdata ( ) ! = arrElt . rawdata ( ) ) {
// enforce single array path here
assertParallelArrays ( e . fieldName ( ) , arrElt . fieldName ( ) ) ;
}
if ( arrayNestedArray ) {
mayExpandArrayUnembedded = false ;
}
2010-05-04 19:18:59 -04:00
}
else {
2011-07-31 08:36:17 -07:00
// not an array - no need for further expansion
fixed [ i ] = e ;
}
}
if ( arrElt . eoo ( ) ) {
// No array, so generate a single key.
if ( _spec . _sparse & & numNotFound = = _spec . _nFields ) {
return ;
}
BSONObjBuilder b ( _spec . _sizeTracker ) ;
for ( vector < BSONElement > : : iterator i = fixed . begin ( ) ; i ! = fixed . end ( ) ; + + i ) {
b . appendAs ( * i , " " ) ;
}
keys . insert ( b . obj ( ) ) ;
}
else if ( arrElt . embeddedObject ( ) . firstElement ( ) . eoo ( ) ) {
// Empty array, so set matching fields to undefined.
_getKeysArrEltFixed ( fieldNames , fixed , _spec . _undefinedElt , keys , numNotFound , arrElt , arrIdxs , true ) ;
}
else {
// Non empty array that can be expanded, so generate a key for each member.
BSONObj arrObj = arrElt . embeddedObject ( ) ;
BSONObjIterator i ( arrObj ) ;
while ( i . more ( ) ) {
_getKeysArrEltFixed ( fieldNames , fixed , i . next ( ) , keys , numNotFound , arrElt , arrIdxs , mayExpandArrayUnembedded ) ;
2010-05-04 19:18:59 -04:00
}
2010-04-13 15:23:57 -04:00
}
2011-07-31 08:36:17 -07:00
}
const IndexSpec & _spec ;
} ;
void IndexSpec : : getKeys ( const BSONObj & obj , BSONObjSet & keys ) const {
switch ( indexVersion ( ) ) {
case 0 : {
KeyGeneratorV0 g ( * this ) ;
g . getKeys ( obj , keys ) ;
break ;
}
case 1 : {
KeyGeneratorV1 g ( * this ) ;
g . getKeys ( obj , keys ) ;
break ;
}
default :
2011-07-31 09:34:04 -07:00
massert ( 15869 , " Invalid index version for key generation. " , false ) ;
2010-04-13 15:23:57 -04:00
}
}
2011-01-04 00:40:41 -05:00
bool anyElementNamesMatch ( const BSONObj & a , const BSONObj & b ) {
2010-04-13 15:23:57 -04:00
BSONObjIterator x ( a ) ;
2011-01-04 00:40:41 -05:00
while ( x . more ( ) ) {
2010-04-13 15:23:57 -04:00
BSONElement e = x . next ( ) ;
BSONObjIterator y ( b ) ;
2011-01-04 00:40:41 -05:00
while ( y . more ( ) ) {
2010-04-13 15:23:57 -04:00
BSONElement f = y . next ( ) ;
FieldCompareResult res = compareDottedFieldNames ( e . fieldName ( ) , f . fieldName ( ) ) ;
if ( res = = SAME | | res = = LEFT_SUBFIELD | | res = = RIGHT_SUBFIELD )
return true ;
}
}
return false ;
}
2011-01-04 00:40:41 -05:00
2010-04-13 15:23:57 -04:00
IndexSuitability IndexSpec : : suitability ( const BSONObj & query , const BSONObj & order ) const {
if ( _indexType . get ( ) )
return _indexType - > suitability ( query , order ) ;
return _suitability ( query , order ) ;
}
2011-01-04 00:40:41 -05:00
2010-04-13 15:23:57 -04:00
IndexSuitability IndexSpec : : _suitability ( const BSONObj & query , const BSONObj & order ) const {
// TODO: optimize
if ( anyElementNamesMatch ( keyPattern , query ) = = 0 & & anyElementNamesMatch ( keyPattern , order ) = = 0 )
return USELESS ;
return HELPFUL ;
}
IndexSuitability IndexType : : suitability ( const BSONObj & query , const BSONObj & order ) const {
return _spec - > _suitability ( query , order ) ;
}
2011-07-31 08:36:17 -07:00
int IndexSpec : : indexVersion ( ) const {
if ( ! info . hasField ( " v " ) ) {
return DefaultIndexVersionNumber ;
}
return IndexDetails : : versionForIndexObj ( info ) ;
}
2010-04-13 15:23:57 -04:00
bool IndexType : : scanAndOrderRequired ( const BSONObj & query , const BSONObj & order ) const {
return ! order . isEmpty ( ) ;
}
}