Files
mongo/db/btree.cpp

901 lines
24 KiB
C++
Raw Normal View History

2007-11-08 21:43:31 -05:00
// btree.cpp
#include "stdafx.h"
#include "btree.h"
#include "pdfile.h"
/* it is easy to do custom sizes for a namespace - all the same for now */
const int BucketSize = 8192;
const int KeyMax = BucketSize / 10;
2007-11-08 21:43:31 -05:00
int ninserts = 0;
2007-12-02 11:33:59 -05:00
extern int otherTraceLevel;
2008-02-14 12:50:13 -05:00
int split_debug = 0;
int insert_debug = 0;
2008-03-13 15:39:09 -04:00
DiskLoc maxDiskLoc(0x7fffffff, 0x7fffffff);
DiskLoc minDiskLoc(0, 1);
2007-11-08 21:43:31 -05:00
inline KeyNode::KeyNode(BucketBasics& bb, _KeyNode &k) :
2007-12-08 15:50:47 -05:00
prevChildBucket(k.prevChildBucket),
2008-03-13 15:39:09 -04:00
recordLoc(k.recordLoc), key(bb.data+k.keyDataOfs())
2007-12-06 19:03:23 -05:00
{ }
2007-11-08 21:43:31 -05:00
2007-12-08 15:50:47 -05:00
/* BucketBasics --------------------------------------------------- */
2007-11-08 21:43:31 -05:00
inline void BucketBasics::setNotPacked() { flags &= ~Packed; }
inline void BucketBasics::setPacked() { flags |= Packed; }
2007-12-06 17:04:20 -05:00
void BucketBasics::_shape(int level, stringstream& ss) {
for( int i = 0; i < level; i++ ) ss << ' ';
ss << "*\n";
for( int i = 0; i < n; i++ )
if( !k(i).prevChildBucket.isNull() )
k(i).prevChildBucket.btree()->_shape(level+1,ss);
if( !nextChild.isNull() )
nextChild.btree()->_shape(level+1,ss);
}
2007-12-16 20:45:25 -05:00
int bt_fv=0;
int bt_dmp=0;
2008-03-13 15:39:09 -04:00
void BucketBasics::dumpTree(DiskLoc thisLoc) {
bt_dmp=1;
fullValidate(thisLoc);
bt_dmp=0;
}
2007-12-16 20:45:25 -05:00
2008-03-13 15:39:09 -04:00
int BucketBasics::fullValidate(const DiskLoc& thisLoc) {
assertValid(true);
// if( bt_fv==0 )
// return;
2007-12-17 12:51:54 -05:00
2008-03-13 15:39:09 -04:00
if( bt_dmp ) {
cout << thisLoc.toString() << ' ';
2007-12-16 20:45:25 -05:00
((BtreeBucket *) this)->dump();
2008-03-13 15:39:09 -04:00
}
// keycount
int kc = 0;
2007-12-14 12:48:47 -05:00
2007-12-06 17:04:20 -05:00
for( int i = 0; i < n; i++ ) {
_KeyNode& kn = k(i);
2008-03-13 15:39:09 -04:00
if( kn.isUsed() ) kc++;
2007-12-06 17:04:20 -05:00
if( !kn.prevChildBucket.isNull() ) {
DiskLoc left = kn.prevChildBucket;
BtreeBucket *b = left.btree();
2008-02-14 12:59:10 -05:00
wassert( b->parent == thisLoc );
2008-03-13 15:39:09 -04:00
kc += b->fullValidate(kn.prevChildBucket);
2007-12-06 17:04:20 -05:00
}
}
if( !nextChild.isNull() ) {
BtreeBucket *b = nextChild.btree();
2008-02-14 12:59:10 -05:00
wassert( b->parent == thisLoc );
2008-03-13 15:39:09 -04:00
kc += b->fullValidate(nextChild);
2007-12-06 17:04:20 -05:00
}
2008-03-13 15:39:09 -04:00
return kc;
2007-12-06 17:04:20 -05:00
}
2008-02-20 23:01:45 -05:00
int nDumped = 0;
2008-03-13 15:39:09 -04:00
void BucketBasics::assertValid(bool force) {
if( !debug && !force )
2007-11-19 22:24:17 -05:00
return;
2008-02-20 23:01:45 -05:00
wassert( n >= 0 && n < BucketSize );
wassert( emptySize >= 0 && emptySize < BucketSize );
wassert( topSize >= n && topSize <= BucketSize );
wassert( Size == BucketSize );
2007-11-19 22:24:17 -05:00
if( 1 ) {
// slow:
for( int i = 0; i < n-1; i++ ) {
JSObj k1 = keyNode(i).key;
JSObj k2 = keyNode(i+1).key;
2008-03-13 15:39:09 -04:00
int z = k1.woCompare(k2); //OK
2007-12-02 11:33:59 -05:00
if( z > 0 ) {
cout << "ERROR: btree key order corrupt. Keys:" << endl;
2008-02-20 23:01:45 -05:00
if( ++nDumped < 5 ) {
for( int j = 0; j < n; j++ ) {
cout << " " << keyNode(j).key.toString() << endl;
}
((BtreeBucket *) this)->dump();
2007-12-02 11:33:59 -05:00
}
2008-02-20 23:01:45 -05:00
wassert(false);
break;
2007-12-02 11:33:59 -05:00
}
2008-03-13 15:39:09 -04:00
else if( z == 0 ) {
wassert( k(i).recordLoc < k(i+1).recordLoc );
}
2007-11-19 22:24:17 -05:00
}
}
else {
//faster:
if( n > 1 ) {
JSObj k1 = keyNode(0).key;
JSObj k2 = keyNode(n-1).key;
int z = k1.woCompare(k2);
2008-02-20 23:01:45 -05:00
wassert( z <= 0 );
2007-11-19 22:24:17 -05:00
}
}
}
2007-12-06 17:04:20 -05:00
inline void BucketBasics::markUnused(int keypos) {
assert( keypos >= 0 && keypos < n );
k(keypos).setUnused();
}
inline int BucketBasics::totalDataSize() const {
2007-11-08 21:43:31 -05:00
return Size - (data-(char*)this);
}
void BucketBasics::init(){
parent.Null(); nextChild.Null();
Size = BucketSize;
flags = Packed;
n = 0;
emptySize = totalDataSize(); topSize = 0;
2007-11-08 21:43:31 -05:00
reserved = 0;
}
/* we allocate space from the end of the buffer for data.
the keynodes grow from the front.
*/
inline int BucketBasics::_alloc(int bytes) {
topSize += bytes;
2007-11-08 21:43:31 -05:00
emptySize -= bytes;
int ofs = totalDataSize() - topSize;
assert( ofs > 0 );
2007-11-08 21:43:31 -05:00
return ofs;
}
2007-12-06 17:04:20 -05:00
void BucketBasics::_delKeyAtPos(int keypos) {
2007-11-08 21:43:31 -05:00
assert( keypos >= 0 && keypos <= n );
2007-12-03 11:52:46 -05:00
assert( childForPos(keypos).isNull() );
2007-11-08 21:43:31 -05:00
n--;
2007-12-06 17:04:20 -05:00
assert( n > 0 || nextChild.isNull() );
2007-11-08 21:43:31 -05:00
for( int j = keypos; j < n; j++ )
k(j) = k(j+1);
emptySize += sizeof(_KeyNode);
setNotPacked();
}
/* add a key. must be > all existing. be careful to set next ptr right. */
void BucketBasics::pushBack(const DiskLoc& recordLoc, JSObj& key, DiskLoc prevChild) {
2007-11-08 21:43:31 -05:00
int bytesNeeded = key.objsize() + sizeof(_KeyNode);
assert( bytesNeeded <= emptySize );
assert( n == 0 || keyNode(n-1).key.woCompare(key) <= 0 );
2007-11-08 21:43:31 -05:00
emptySize -= sizeof(_KeyNode);
_KeyNode& kn = k(n++);
2007-11-08 21:43:31 -05:00
kn.prevChildBucket = prevChild;
2007-12-06 17:04:20 -05:00
kn.recordLoc = recordLoc;
kn.setKeyDataOfs( (short) _alloc(key.objsize()) );
2008-03-13 15:39:09 -04:00
char *p = dataAt(kn.keyDataOfs());
2007-11-08 21:43:31 -05:00
memcpy(p, key.objdata(), key.objsize());
}
bool BucketBasics::basicInsert(int keypos, const DiskLoc& recordLoc, JSObj& key) {
assert( keypos >= 0 && keypos <= n );
int bytesNeeded = key.objsize() + sizeof(_KeyNode);
if( bytesNeeded > emptySize ) {
pack();
if( bytesNeeded > emptySize )
return false;
}
for( int j = n; j > keypos; j-- ) // make room
k(j) = k(j-1);
n++;
emptySize -= sizeof(_KeyNode);
_KeyNode& kn = k(keypos);
kn.prevChildBucket.Null();
kn.recordLoc = recordLoc;
2007-12-06 17:04:20 -05:00
kn.setKeyDataOfs((short) _alloc(key.objsize()) );
2008-03-13 15:39:09 -04:00
char *p = dataAt(kn.keyDataOfs());
2007-11-08 21:43:31 -05:00
memcpy(p, key.objdata(), key.objsize());
return true;
}
/* when we delete things we just leave empty space until the node is
full and then we repack it.
*/
void BucketBasics::pack() {
if( flags & Packed )
return;
int tdz = totalDataSize();
char temp[BucketSize];
int ofs = tdz;
2007-11-19 22:24:17 -05:00
topSize = 0;
for( int j = 0; j < n; j++ ) {
2008-03-13 15:39:09 -04:00
short ofsold = k(j).keyDataOfs();
2007-11-08 21:43:31 -05:00
int sz = keyNode(j).key.objsize();
2007-11-19 22:24:17 -05:00
ofs -= sz;
topSize += sz;
memcpy(temp+ofs, dataAt(ofsold), sz);
2008-03-13 15:39:09 -04:00
k(j).setKeyDataOfsSavingUse( ofs );
2007-11-08 21:43:31 -05:00
}
int dataUsed = tdz - ofs;
memcpy(data + ofs, temp + ofs, dataUsed);
emptySize = tdz - dataUsed - n * sizeof(_KeyNode);
assert( emptySize >= 0 );
2007-11-08 21:43:31 -05:00
setPacked();
assertValid();
2007-11-08 21:43:31 -05:00
}
inline void BucketBasics::truncateTo(int N) {
n = N;
setNotPacked();
pack();
2007-11-08 21:43:31 -05:00
}
/* - BtreeBucket --------------------------------------------------- */
2007-12-16 20:45:25 -05:00
/* return largest key in the subtree. */
void BtreeBucket::findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey) {
DiskLoc loc = thisLoc;
while( 1 ) {
BtreeBucket *b = loc.btree();
2008-02-25 18:22:10 -05:00
// b->dump();
2007-12-16 20:45:25 -05:00
if( !b->nextChild.isNull() ) {
loc = b->nextChild;
continue;
}
2008-02-25 18:22:10 -05:00
assert(b->n>0);
largestLoc = loc;
largestKey = b->n-1;
2007-12-16 20:45:25 -05:00
break;
}
}
2007-11-08 21:43:31 -05:00
/* pos: for existing keys k0...kn-1.
2007-11-19 22:24:17 -05:00
returns # it goes BEFORE. so key[pos-1] < key < key[pos]
2007-11-08 21:43:31 -05:00
returns n if it goes after the last existing key.
2007-12-06 17:04:20 -05:00
note result might be Unused!
2007-11-08 21:43:31 -05:00
*/
2008-03-13 15:39:09 -04:00
bool BtreeBucket::find(JSObj& key, DiskLoc recordLoc, int& pos) {
2007-11-08 21:43:31 -05:00
/* binary search for this key */
int l=0; int h=n-1;
while( l <= h ) {
int m = (l+h)/2;
KeyNode M = keyNode(m);
int x = key.woCompare(M.key);
2008-03-13 15:39:09 -04:00
if( x == 0 )
x = recordLoc.compare(M.recordLoc);
if( x < 0 ) // key < M.key
2007-11-08 21:43:31 -05:00
h = m-1;
else if( x > 0 )
l = m+1;
else {
2007-12-06 19:03:23 -05:00
// found it. however, if dup keys are here, be careful we might have
// found one in the middle. we want find() to return the leftmost instance.
2008-03-13 15:39:09 -04:00
/*
2007-12-06 19:03:23 -05:00
while( m >= 1 && keyNode(m-1).key.woEqual(key) )
m--;
2008-03-13 15:39:09 -04:00
*/
2007-12-16 20:45:25 -05:00
2007-11-08 21:43:31 -05:00
pos = m;
2007-12-16 20:45:25 -05:00
/*
2007-12-16 20:45:25 -05:00
DiskLoc ch = k(m).prevChildBucket;
if( !ch.isNull() ) {
// if dup keys, might be dups to the left.
DiskLoc largestLoc;
int largestKey;
ch.btree()->findLargestKey(ch, largestLoc, largestKey);
if( !largestLoc.isNull() ) {
if( largestLoc.btree()->keyAt(largestKey).woEqual(key) )
return false;
}
}
*/
2007-12-16 20:45:25 -05:00
2007-11-08 21:43:31 -05:00
return true;
}
2008-03-13 15:39:09 -04:00
//? x = key.woCompare(M.key);
2007-11-08 21:43:31 -05:00
}
// not found
pos = l;
if( pos != n ) {
JSObj keyatpos = keyNode(pos).key;
2008-02-20 23:01:45 -05:00
wassert( key.woCompare(keyatpos) <= 0 );
2007-11-19 22:24:17 -05:00
if( pos > 0 ) {
2008-02-20 23:01:45 -05:00
wassert( keyNode(pos-1).key.woCompare(key) <= 0 );
2007-11-19 22:24:17 -05:00
}
}
2007-12-17 12:51:54 -05:00
2007-11-08 21:43:31 -05:00
return false;
}
2007-12-06 17:04:20 -05:00
void BtreeBucket::delBucket(const DiskLoc& thisLoc, const char *ns) {
assert( !isHead() );
BtreeBucket *p = parent.btree();
if( p->nextChild == thisLoc ) {
p->nextChild.Null();
}
else {
for( int i = 0; i < p->n; i++ ) {
if( p->k(i).prevChildBucket == thisLoc ) {
p->k(i).prevChildBucket.Null();
goto found;
}
}
cout << "ERROR: can't find ref to deleted bucket.\n";
cout << "To delete:\n";
dump();
cout << "Parent:\n";
p->dump();
assert(false);
}
found:
//defensive:
n = -1;
parent.Null();
theDataFileMgr.deleteRecord(ns, thisLoc.rec(), thisLoc);
}
/* note: may delete the entire bucket! this invalid upon return sometimes. */
void BtreeBucket::delKeyAtPos(const DiskLoc& thisLoc, const char *ns, int p) {
assert(n>0);
DiskLoc left = childForPos(p);
if( n == 1 ) {
if( left.isNull() && nextChild.isNull() ) {
if( isHead() )
_delKeyAtPos(p); // we don't delete the top bucket ever
else
delBucket(thisLoc, ns);
return;
2007-12-08 15:50:47 -05:00
}
markUnused(p);
2007-12-06 17:04:20 -05:00
return;
}
if( left.isNull() )
_delKeyAtPos(p);
else
markUnused(p);
}
2007-12-08 15:50:47 -05:00
int verbose = 0;
int qqq = 0;
bool BtreeBucket::unindex(const DiskLoc& thisLoc, const char *ns, JSObj& key, const DiskLoc& recordLoc ) {
if( key.objsize() > KeyMax ) {
problem() << "unindex: key too large to index, skipping " << ns << ' ' << key.toString() << endl;
return false;
}
2008-03-13 15:39:09 -04:00
int pos;
bool found;
DiskLoc loc = locate(thisLoc, key, pos, found, recordLoc, 1);
if( found ) {
loc.btree()->delKeyAtPos(loc, ns, pos);
return true;
2008-02-25 18:22:10 -05:00
}
2007-12-06 19:03:23 -05:00
return false;
2007-11-11 14:21:02 -05:00
}
2007-11-08 21:43:31 -05:00
BtreeBucket* BtreeBucket::allocTemp() {
BtreeBucket *b = (BtreeBucket*) malloc(BucketSize);
b->init();
return b;
}
2007-12-06 17:04:20 -05:00
inline void fix(const DiskLoc& thisLoc, const DiskLoc& child) {
2008-02-14 12:50:13 -05:00
if( !child.isNull() ) {
if( insert_debug )
cout << " " << child.toString() << ".parent=" << thisLoc.toString() << endl;
2007-12-06 17:04:20 -05:00
child.btree()->parent = thisLoc;
2008-02-14 12:50:13 -05:00
}
2007-12-06 17:04:20 -05:00
}
/* this sucks. maybe get rid of parent ptrs. */
void BtreeBucket::fixParentPtrs(const DiskLoc& thisLoc) {
fix(thisLoc, nextChild);
for( int i = 0; i < n; i++ )
fix(thisLoc, k(i).prevChildBucket);
}
/* keypos - where to insert the key i3n range 0..n. 0=make leftmost, n=make rightmost.
*/
2007-12-17 12:51:54 -05:00
void BtreeBucket::insertHere(DiskLoc thisLoc, const char *ns, int keypos,
DiskLoc recordLoc, JSObj& key,
DiskLoc lchild, DiskLoc rchild, IndexDetails& idx)
{
2008-02-14 12:50:13 -05:00
if( insert_debug )
cout << " " << thisLoc.toString() << ".insertHere " << key.toString() << '/' << recordLoc.toString() << ' '
<< lchild.toString() << ' ' << rchild.toString() << " keypos:" << keypos << endl;
2007-12-17 12:51:54 -05:00
DiskLoc oldLoc = thisLoc;
2007-11-08 21:43:31 -05:00
if( basicInsert(keypos, recordLoc, key) ) {
_KeyNode& kn = k(keypos);
if( keypos+1 == n ) { // last key
2008-02-14 00:19:21 -05:00
if( nextChild != lchild ) {
cout << "ERROR nextChild != lchild" << endl;
cout << " thisLoc: " << thisLoc.toString() << ' ' << ns << endl;
cout << " keyPos: " << keypos << " n:" << n << endl;
cout << " nextChild: " << nextChild.toString() << " lchild: " << lchild.toString() << endl;
cout << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
cout << " key: " << key.toString() << endl;
dump();
#if defined(_WIN32)
cout << "\n\nDUMPING FULL INDEX" << endl;
bt_dmp=1;
bt_fv=1;
idx.head.btree()->fullValidate(idx.head);
#endif
assert(false);
}
2007-11-08 21:43:31 -05:00
kn.prevChildBucket = nextChild;
assert( kn.prevChildBucket == lchild );
2008-02-14 12:50:13 -05:00
nextChild = rchild;
if( !rchild.isNull() )
rchild.btree()->parent = thisLoc;
2007-11-08 21:43:31 -05:00
}
else {
k(keypos).prevChildBucket = lchild;
if( k(keypos+1).prevChildBucket != lchild ) {
cout << "ERROR k(keypos+1).prevChildBucket != lchild" << endl;
cout << " thisLoc: " << thisLoc.toString() << ' ' << ns << endl;
cout << " keyPos: " << keypos << " n:" << n << endl;
cout << " k(keypos+1).pcb: " << k(keypos+1).prevChildBucket.toString() << " lchild: " << lchild.toString() << endl;
cout << " recordLoc: " << recordLoc.toString() << " rchild: " << rchild.toString() << endl;
cout << " key: " << key.toString() << endl;
dump();
2008-02-14 00:19:21 -05:00
#if defined(_WIN32)
cout << "\n\nDUMPING FULL INDEX" << endl;
bt_dmp=1;
bt_fv=1;
idx.head.btree()->fullValidate(idx.head);
2008-02-14 00:19:21 -05:00
#endif
assert(false);
}
2007-11-08 21:43:31 -05:00
k(keypos+1).prevChildBucket = rchild;
2008-02-14 12:50:13 -05:00
if( !rchild.isNull() )
rchild.btree()->parent = thisLoc;
2007-11-08 21:43:31 -05:00
}
return;
}
// split
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " " << thisLoc.toString() << ".split" << endl;
2008-02-24 20:16:36 -05:00
int mid = n / 2;
/* on duplicate key, we need to ensure that they all end up on the RHS */
if( 0 ) {
assert(mid>0);
while( 1 ) {
KeyNode mn = keyNode(mid);
KeyNode left = keyNode(mid-1);
if( left.key < mn.key )
break;
mid--;
if( mid < 3 ) {
2008-04-25 17:24:27 -04:00
problem() << "Assertion failure - mid<3: duplicate key bug not fixed yet" << endl;
2008-02-24 20:16:36 -05:00
cout << "Assertion failure - mid<3: duplicate key bug not fixed yet" << endl;
cout << " ns:" << ns << endl;
cout << " key:" << mn.key.toString() << endl;
break;
}
}
}
2007-11-08 21:43:31 -05:00
BtreeBucket *r = allocTemp();
DiskLoc rLoc;
2008-02-24 20:16:36 -05:00
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " mid:" << mid << ' ' << keyNode(mid).key.toString() << " n:" << n << endl;
2007-11-08 21:43:31 -05:00
for( int i = mid+1; i < n; i++ ) {
KeyNode kn = keyNode(i);
2007-11-19 22:24:17 -05:00
if( i == keypos ) {
// slip in the new one
r->pushBack(recordLoc, key, kn.prevChildBucket);
r->pushBack(kn.recordLoc, kn.key, rchild);
}
else
r->pushBack(kn.recordLoc, kn.key, kn.prevChildBucket);
2007-11-08 21:43:31 -05:00
}
r->nextChild = nextChild;
r->assertValid();
2007-12-17 12:51:54 -05:00
//r->dump();
2007-11-08 21:43:31 -05:00
rLoc = theDataFileMgr.insert(ns, r, r->Size, true);
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " new rLoc:" << rLoc.toString() << endl;
2007-11-08 21:43:31 -05:00
free(r); r = 0;
2007-12-06 17:04:20 -05:00
rLoc.btree()->fixParentPtrs(rLoc);
2007-11-08 21:43:31 -05:00
{
KeyNode middle = keyNode(mid);
nextChild = middle.prevChildBucket; // middle key gets promoted, its children will be thisLoc (l) and rLoc (r)
2008-02-14 12:50:13 -05:00
if( split_debug ) {
//rLoc.btree()->dump();
cout << " middle key:" << middle.key.toString() << endl;
}
2007-11-08 21:43:31 -05:00
// promote middle to a parent node
2007-11-08 21:43:31 -05:00
if( parent.isNull() ) {
// make a new parent if we were the root
BtreeBucket *p = allocTemp();
p->pushBack(middle.recordLoc, middle.key, thisLoc);
2007-11-08 21:43:31 -05:00
p->nextChild = rLoc;
p->assertValid();
2007-11-20 14:00:27 -05:00
parent = idx.head = theDataFileMgr.insert(ns, p, p->Size, true);
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " we were root, making new root:" << hex << parent.getOfs() << dec << endl;
2007-11-08 21:43:31 -05:00
free(p);
2007-12-17 12:51:54 -05:00
rLoc.btree()->parent = parent;
2007-11-08 21:43:31 -05:00
}
else {
2007-12-17 12:51:54 -05:00
/* set this before calling _insert - if it splits it will do fixParent() logic and fix the value,
so we don't want to overwrite that if it happens.
*/
rLoc.btree()->parent = parent;
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " promoting middle key " << middle.key.toString() << endl;
parent.btree()->_insert(parent, ns, middle.recordLoc, middle.key, false, thisLoc, rLoc, idx);
}
2007-12-17 12:51:54 -05:00
BtreeBucket *br = rLoc.btree();
//br->dump();
//parent.btree()->dump();
//idx.head.btree()->dump();
}
// mark on left that we no longer have anything from midpoint on.
2007-12-08 15:50:47 -05:00
bool highest = keypos == n;
truncateTo(mid); // note this may trash middle.key! thus we had to promote it before finishing up here.
// add our new key, there is room now
{
2008-02-25 18:22:10 -05:00
//dump();
if( keypos <= mid ) {
// if( keypos < mid ) {
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " keypos<mid, insertHere() the new key" << endl;
insertHere(thisLoc, ns, keypos, recordLoc, key, lchild, rchild, idx);
2008-02-25 18:22:10 -05:00
//dump();
2007-12-08 15:50:47 -05:00
} else if( highest ) {
// else handled above already.
int kp = keypos-mid-1; assert(kp>=0);
rLoc.btree()->insertHere(rLoc, ns, kp, recordLoc, key, lchild, rchild, idx);
2008-02-14 12:50:13 -05:00
// set a bp here.
// if( !lchild.isNull() ) cout << lchild.btree()->parent.toString() << endl;
// if( !rchild.isNull() ) cout << rchild.btree()->parent.toString() << endl;
// cout << "temp" << endl;
2007-11-08 21:43:31 -05:00
}
}
2007-12-06 17:04:20 -05:00
2008-02-14 12:50:13 -05:00
if( split_debug )
cout << " split end " << hex << thisLoc.getOfs() << dec << endl;
2007-11-08 21:43:31 -05:00
}
DiskLoc BtreeBucket::addHead(const char *ns) {
BtreeBucket *p = allocTemp();
DiskLoc loc = theDataFileMgr.insert(ns, p, p->Size, true);
return loc;
}
2007-11-11 14:21:02 -05:00
DiskLoc BtreeBucket::getHead(const DiskLoc& thisLoc) {
DiskLoc p = thisLoc;
while( !p.btree()->isHead() )
p = p.btree()->parent;
return p;
}
2008-02-01 13:56:41 -05:00
DiskLoc BtreeBucket::advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) {
if( keyOfs < 0 || keyOfs >= n ) {
cout << "ASSERT failure BtreeBucket::advance, caller: " << caller << endl;
cout << " thisLoc: " << thisLoc.toString() << endl;
2008-02-11 23:38:10 -05:00
cout << " keyOfs: " << keyOfs << " n:" << n << " direction: " << direction << endl;
2008-02-01 13:56:41 -05:00
cout << bucketSummary() << endl;
assert( keyOfs >= 0 && keyOfs < n );
}
2007-11-12 19:22:33 -05:00
int adj = direction < 0 ? 1 : 0;
int ko = keyOfs + direction;
DiskLoc nextDown = childForPos(ko+adj);
2007-11-10 16:46:30 -05:00
if( !nextDown.isNull() ) {
2008-02-25 18:22:10 -05:00
// nextDown.btree()->dump();//TEMP:
2007-11-12 19:22:33 -05:00
while( 1 ) {
keyOfs = direction>0 ? 0 : nextDown.btree()->n - 1;
DiskLoc loc= nextDown.btree()->childForPos(keyOfs + adj);
if( loc.isNull() )
break;
nextDown = loc;
}
2007-11-10 16:46:30 -05:00
return nextDown;
}
2007-11-12 19:22:33 -05:00
if( ko < n && ko >= 0 ) {
2007-11-10 16:46:30 -05:00
keyOfs = ko;
return thisLoc;
}
// end of bucket. traverse back up.
DiskLoc childLoc = thisLoc;
DiskLoc ancestor = parent;
while( 1 ) {
if( ancestor.isNull() )
break;
BtreeBucket *an = ancestor.btree();
for( int i = 0; i < an->n; i++ ) {
2007-11-12 19:22:33 -05:00
if( an->childForPos(i+adj) == childLoc ) {
2007-11-10 16:46:30 -05:00
keyOfs = i;
return ancestor;
}
}
2007-11-12 19:22:33 -05:00
assert( direction<0 || an->nextChild == childLoc );
2007-11-10 16:46:30 -05:00
// parent exhausted also, keep going up
childLoc = ancestor;
ancestor = an->parent;
}
return DiskLoc();
}
2008-03-13 15:39:09 -04:00
DiskLoc BtreeBucket::locate(const DiskLoc& thisLoc, JSObj& key, int& pos, bool& found, DiskLoc recordLoc, int direction) {
2007-11-10 16:46:30 -05:00
int p;
2008-03-13 15:39:09 -04:00
found = find(key, recordLoc, p);
2007-11-10 16:46:30 -05:00
if( found ) {
pos = p;
return thisLoc;
}
2007-11-11 14:21:02 -05:00
DiskLoc child = childForPos(p);
2007-11-10 16:46:30 -05:00
if( !child.isNull() ) {
2008-03-13 15:39:09 -04:00
DiskLoc l = child.btree()->locate(child, key, pos, found, recordLoc, direction);
2007-11-10 16:46:30 -05:00
if( !l.isNull() )
return l;
}
2007-11-12 19:22:33 -05:00
if( direction == -1 && p == n && n ) {
p--;
}
2007-11-10 16:46:30 -05:00
pos = p;
return pos == n ? DiskLoc() /*theend*/ : thisLoc;
}
2007-11-08 21:43:31 -05:00
/* thisloc is the location of this bucket object. you must pass that in. */
2007-12-17 12:51:54 -05:00
int BtreeBucket::_insert(DiskLoc thisLoc, const char *ns, DiskLoc recordLoc,
2007-11-08 21:43:31 -05:00
JSObj& key, bool dupsAllowed,
DiskLoc lChild, DiskLoc rChild, IndexDetails& idx) {
2007-11-08 21:43:31 -05:00
if( key.objsize() > KeyMax ) {
problem() << "ERROR: key too large len:" << key.objsize() << " max:" << KeyMax << ' ' << ns << endl;
2007-11-08 21:43:31 -05:00
return 2;
}
2008-02-14 12:50:13 -05:00
assert( key.objsize() > 0 );
2008-03-13 15:39:09 -04:00
2007-11-08 21:43:31 -05:00
int pos;
2008-03-13 15:39:09 -04:00
bool found = find(key, recordLoc, pos);
2008-02-14 12:50:13 -05:00
if( insert_debug ) {
cout << " " << thisLoc.toString() << '.' << "_insert " <<
key.toString() << '/' << recordLoc.toString() <<
" l:" << lChild.toString() << " r:" << rChild.toString() << endl;
cout << " found:" << found << " pos:" << pos << " n:" << n << endl;
}
2007-11-08 21:43:31 -05:00
if( found ) {
2008-03-13 15:39:09 -04:00
if( k(pos).isUnused() ) {
cout << "an unused already occupying keyslot, write more code.\n";
cout << " index may be corrupt (missing data) now.\n";
}
cout << "_insert(): key already exists in index\n";
cout << " " << ns << " thisLoc:" << thisLoc.toString() << '\n';
cout << " " << key.toString() << '\n';
cout << " " << "recordLoc:" << recordLoc.toString() << " pos:" << pos << endl;
cout << " old l r: " << childForPos(pos).toString() << ' ' << childForPos(pos+1).toString() << endl;
cout << " new l r: " << lChild.toString() << ' ' << rChild.toString() << endl;
assert(false);
2007-12-06 19:03:23 -05:00
// on a dup key always insert on the right or else you will be broken.
2008-03-13 15:39:09 -04:00
// pos++;
// on a promotion, find the right point to update if dup keys.
/* not needed: we always insert right after the first key so we are ok with just pos++...
if( !rChild.isNull() ) {
while( pos < n && k(pos).prevChildBucket != lchild ) {
pos++;
cout << "looking for the right dup key" << endl;
}
}
2007-12-06 19:03:23 -05:00
*/
2007-11-08 21:43:31 -05:00
}
// cout << "TEMP: key: " << key.toString() << endl;
2007-11-08 21:43:31 -05:00
DiskLoc& child = getChild(pos);
2008-02-14 12:50:13 -05:00
if( insert_debug )
cout << " getChild(" << pos << "): " << child.toString() << endl;
2007-12-17 12:51:54 -05:00
if( child.isNull() || !rChild.isNull() /* means an 'internal' insert */ ) {
insertHere(thisLoc, ns, pos, recordLoc, key, lChild, rChild, idx);
2007-11-08 21:43:31 -05:00
return 0;
}
2007-12-17 12:51:54 -05:00
return child.btree()->insert(child, ns, recordLoc, key, dupsAllowed, idx, false);
2007-11-08 21:43:31 -05:00
}
2007-11-19 22:24:17 -05:00
void BtreeBucket::dump() {
2007-12-08 15:50:47 -05:00
cout << "DUMP btreebucket: ";
cout << " parent:" << hex << parent.getOfs() << dec;
2007-11-19 22:24:17 -05:00
for( int i = 0; i < n; i++ ) {
2007-12-08 15:50:47 -05:00
cout << '\n';
2007-11-19 22:24:17 -05:00
KeyNode k = keyNode(i);
2007-12-08 15:50:47 -05:00
cout << '\t' << i << '\t' << k.key.toString() << "\tleft:" << hex <<
k.prevChildBucket.getOfs() << "\trec:" << k.recordLoc.getOfs() << dec;
2007-12-06 17:04:20 -05:00
if( this->k(i).isUnused() )
cout << " UNUSED";
2007-11-19 22:24:17 -05:00
}
2007-12-08 15:50:47 -05:00
cout << " right:" << hex << nextChild.getOfs() << dec << endl;
2007-11-19 22:24:17 -05:00
}
2008-02-25 18:22:10 -05:00
JSObj *music = 0;
void tempMusic(DiskLoc thisLoc)
{
BtreeCursor c(thisLoc, *music, 1, true);
while( c.ok() ) {
KeyNode kn = c.currKeyNode();
if( !kn.key.woEqual(*music) )
break;
if( kn.recordLoc.getOfs() == 0x4c8d7c0 ) {
cout << "*** found it" << endl;
// c.bucket.btree()->dump();
return;
}
c.advance();
}
cout << "*** NOT FOUND" << endl;
}
/* todo: meaning of return code unclear clean up */
2007-12-17 12:51:54 -05:00
int BtreeBucket::insert(DiskLoc thisLoc, const char *ns, DiskLoc recordLoc,
JSObj& key, bool dupsAllowed, IndexDetails& idx, bool toplevel)
2007-11-08 21:43:31 -05:00
{
2008-02-14 00:19:21 -05:00
if( toplevel ) {
if( key.objsize() > KeyMax ) {
problem() << "Btree::insert: key too large to index, skipping " << ns << ' ' << key.toString() << '\n';
return 3;
}
++ninserts;
2008-02-14 12:50:13 -05:00
if( /*ninserts > 127250 || */ninserts % 1000 == 0 ) {
cout << "ninserts: " << ninserts << endl;
if( 0 && ninserts >= 127287 ) {
cout << "debug?" << endl;
split_debug = 1;
}
}
2008-02-14 00:19:21 -05:00
}
2007-12-17 12:51:54 -05:00
bool chk = false;
int x = _insert(thisLoc, ns, recordLoc, key, dupsAllowed, DiskLoc(), DiskLoc(), idx);
2007-12-06 17:04:20 -05:00
assertValid();
2008-02-25 18:22:10 -05:00
/* if( toplevel ) {
if( recordLoc.getOfs() == 0x4c8d7c0 ) {
if( key.toString() == "{ _searchIndex: \"music\" }" ) {
tempMusic(thisLoc);
}
}
}
*/
return x;
2007-11-08 21:43:31 -05:00
}
2007-11-10 16:46:30 -05:00
2007-12-06 17:04:20 -05:00
void BtreeBucket::shape(stringstream& ss) {
_shape(0, ss);
}
2007-11-10 16:46:30 -05:00
/* - BtreeCursor --------------------------------------------------- */
2007-12-06 19:03:23 -05:00
BtreeCursor::BtreeCursor(DiskLoc head, JSObj& k, int _direction, bool sm) :
2007-11-12 19:22:33 -05:00
direction(_direction), stopmiss(sm)
{
2007-12-16 20:45:25 -05:00
//otherTraceLevel = 999;
2007-11-10 16:46:30 -05:00
bool found;
2007-12-02 11:33:59 -05:00
if( otherTraceLevel >= 12 ) {
2007-12-16 20:45:25 -05:00
if( otherTraceLevel >= 200 ) {
cout << "::BtreeCursor() qtl>200. validating entire index." << endl;
head.btree()->fullValidate(head);
}
else {
cout << "BTreeCursor(). dumping head bucket" << endl;
head.btree()->dump();
}
2007-12-02 11:33:59 -05:00
}
2008-03-13 15:39:09 -04:00
bucket = head.btree()->locate(head, k, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction);
2007-12-06 17:04:20 -05:00
checkUnused();
2007-11-10 16:46:30 -05:00
}
int zzz = 0;
2007-12-06 17:04:20 -05:00
/* skip unused keys. */
void BtreeCursor::checkUnused() {
int u = 0;
while( 1 ) {
if( !ok() )
break;
BtreeBucket *b = bucket.btree();
_KeyNode& kn = b->k(keyOfs);
if( kn.isUsed() )
break;
2008-02-01 13:56:41 -05:00
bucket = b->advance(bucket, keyOfs, direction, "checkUnused");
2007-12-06 17:04:20 -05:00
u++;
}
if( u > 10 && ++zzz % 16 == 0 )
2007-12-06 17:04:20 -05:00
cout << "btree unused skipped:" << u << endl;
}
2007-11-10 16:46:30 -05:00
2007-12-06 19:03:23 -05:00
/*DiskLoc BtreeCursor::currLoc() {
2007-11-10 16:46:30 -05:00
assert( !bucket.isNull() );
2007-12-06 17:04:20 -05:00
_KeyNode& kn = bucket.btree()->k(keyOfs);
assert( kn.isUsed() );
return kn.recordLoc;
2007-12-06 19:03:23 -05:00
}*/
2007-11-10 16:46:30 -05:00
bool BtreeCursor::advance() {
if( bucket.isNull() )
return false;
2008-02-01 13:56:41 -05:00
bucket = bucket.btree()->advance(bucket, keyOfs, direction, "BtreeCursor::advance");
2007-12-06 17:04:20 -05:00
checkUnused();
2007-11-10 16:46:30 -05:00
return !bucket.isNull();
}
2007-11-11 14:21:02 -05:00
void BtreeCursor::noteLocation() {
if( !eof() ) {
JSObj o = bucket.btree()->keyAt(keyOfs).copy();
keyAtKeyOfs = o;
2008-03-13 15:39:09 -04:00
locAtKeyOfs = bucket.btree()->k(keyOfs).recordLoc;
2007-11-11 14:21:02 -05:00
}
}
2008-02-14 12:50:13 -05:00
int clctr = 0;
2007-11-11 14:21:02 -05:00
/* see if things moved around (deletes, splits, inserts) */
void BtreeCursor::checkLocation() {
2008-02-12 10:12:07 -05:00
try {
if( eof() )
return;
BtreeBucket *b = bucket.btree();
if( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) &&
2008-03-13 15:39:09 -04:00
b->k(keyOfs).recordLoc == locAtKeyOfs ) {
if( !b->k(keyOfs).isUsed() )
checkUnused();
2008-02-12 10:12:07 -05:00
return;
2008-03-13 15:39:09 -04:00
}
2008-02-12 10:12:07 -05:00
}
catch( AssertionException ) {
cout << "Caught exception in checkLocation(), that's maybe ok" << endl;
}
2007-11-11 14:21:02 -05:00
bool found;
DiskLoc bold = bucket;
2008-02-14 12:50:13 -05:00
/* TODO: Switch to keep indexdetails and do idx.head! */
2007-11-11 14:21:02 -05:00
/* didn't find, check from the top */
DiskLoc head = bold.btree()->getHead(bold);
2008-03-13 15:39:09 -04:00
bucket = head.btree()->locate(head, keyAtKeyOfs, keyOfs, found, locAtKeyOfs, direction);
2008-02-14 12:50:13 -05:00
if( clctr++ % 128 == 0 )
cout << " key seems to have moved in the index, refinding. found:" << found << endl;
2008-02-12 10:12:07 -05:00
if( found )
checkUnused();
2007-11-11 14:21:02 -05:00
}
2008-03-13 15:39:09 -04:00
/* ----------------------------------------------------------------------------- */
struct BtreeUnitTest {
BtreeUnitTest() {
assert( minDiskLoc.compare(maxDiskLoc) < 0 );
}
} btut;