00001
00002
00019 #pragma once
00020
00021 #include "../pch.h"
00022 #include "jsobj.h"
00023 #include "diskloc.h"
00024 #include "pdfile.h"
00025
00026 namespace mongo {
00027
00028 const int BucketSize = 8192;
00029
00030 #pragma pack(1)
00031 struct _KeyNode {
00033 _KeyNode& writing() const;
00034 DiskLoc prevChildBucket;
00035 DiskLoc recordLoc;
00036 short keyDataOfs() const {
00037 return (short) _kdo;
00038 }
00039 unsigned short _kdo;
00040 void setKeyDataOfs(short s) {
00041 _kdo = s;
00042 assert(s>=0);
00043 }
00044 void setKeyDataOfsSavingUse(short s) {
00045 _kdo = s;
00046 assert(s>=0);
00047 }
00048 void setUsed() { recordLoc.GETOFS() &= ~1; }
00049 void setUnused() {
00050
00051
00052
00053
00054 recordLoc.GETOFS() |= 1;
00055 }
00056 int isUnused() const {
00057 return recordLoc.getOfs() & 1;
00058 }
00059 int isUsed() const {
00060 return !isUnused();
00061 }
00062 };
00063 #pragma pack()
00064
00065 class BucketBasics;
00066
00073 class KeyNode {
00074 public:
00075 KeyNode(const BucketBasics& bb, const _KeyNode &k);
00076 const DiskLoc& prevChildBucket;
00077 const DiskLoc& recordLoc;
00078 BSONObj key;
00079 };
00080
00081 #pragma pack(1)
00082 class BtreeData {
00083 protected:
00084 DiskLoc parent;
00085 DiskLoc nextChild;
00086 unsigned short _wasSize;
00087 unsigned short _reserved1;
00088 int flags;
00089
00090
00091 int emptySize;
00092 int topSize;
00093 int n;
00094
00095 int reserved;
00096 char data[4];
00097 };
00098
00121 class BucketBasics : public BtreeData {
00122 friend class BtreeBuilder;
00123 friend class KeyNode;
00124 public:
00126 void assertWritable();
00127
00128 void assertValid(const Ordering &order, bool force = false) const;
00129 void assertValid(const BSONObj &orderObj, bool force = false) const { return assertValid(Ordering::make(orderObj),force); }
00130
00135 const KeyNode keyNode(int i) const {
00136 if ( i >= n ) {
00137 massert( 13000 , (string)"invalid keyNode: " + BSON( "i" << i << "n" << n ).jsonString() , i < n );
00138 }
00139 return KeyNode(*this, k(i));
00140 }
00141
00142 static int headerSize() {
00143 const BucketBasics *d = 0;
00144 return (char*)&(d->data) - (char*)&(d->parent);
00145 }
00146 static int bodySize() { return BucketSize - headerSize(); }
00147
00148
00149 int nKeys() const { return n; }
00150 const DiskLoc getNextChild() const { return nextChild; }
00151
00152 protected:
00153 char * dataAt(short ofs) { return data + ofs; }
00154
00155 void init();
00156
00164 bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering &order) const;
00165
00167 bool _pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild);
00168 void pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild) {
00169 bool ok = _pushBack( recordLoc , key , order , prevChild );
00170 assert(ok);
00171 }
00172
00184 void popBack(DiskLoc& recLoc, BSONObj& key);
00185
00186 void _delKeyAtPos(int keypos, bool mayEmpty = false);
00187
00188
00189
00190
00191
00192 enum Flags { Packed=1 };
00193
00194 const DiskLoc& childForPos(int p) const { return p == n ? nextChild : k(p).prevChildBucket; }
00195 DiskLoc& childForPos(int p) { return p == n ? nextChild : k(p).prevChildBucket; }
00196
00197 int totalDataSize() const;
00199 bool mayDropKey( int index, int refPos ) const;
00200
00207 void _pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const;
00209 void _packReadyForMod(const Ordering &order, int &refPos);
00210
00215 int packedDataSize( int refPos ) const;
00216 void setNotPacked() { flags &= ~Packed; }
00217 void setPacked() { flags |= Packed; }
00218 int _alloc(int bytes);
00219 void _unalloc(int bytes);
00220 void truncateTo(int N, const Ordering &order, int &refPos);
00222 void dropFront(int nDrop, const Ordering &order, int &refPos);
00223 void markUnused(int keypos);
00224
00229 const DiskLoc& tempNext() const { return parent; }
00230 DiskLoc& tempNext() { return parent; }
00231
00232 void _shape(int level, stringstream&) const;
00233 int Size() const;
00234 const _KeyNode& k(int i) const { return ((const _KeyNode*)data)[i]; }
00235 _KeyNode& k(int i) { return ((_KeyNode*)data)[i]; }
00236
00238 int splitPos( int keypos ) const;
00239
00245 void reserveKeysFront( int nAdd );
00246
00251 void setKey( int i, const DiskLoc recordLoc, const BSONObj &key, const DiskLoc prevChildBucket );
00252 };
00253
00282 class BtreeBucket : public BucketBasics {
00283 friend class BtreeCursor;
00284 public:
00285 bool isHead() const { return parent.isNull(); }
00286 void dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const;
00287 int fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount = 0, bool strict = false) const;
00288
00289 bool isUsed( int i ) const { return k(i).isUsed(); }
00290 string bucketSummary() const;
00291 void dump() const;
00292
00300 bool exists(const IndexDetails& idx, const DiskLoc &thisLoc, const BSONObj& key, const Ordering& order) const;
00301
00302 bool wouldCreateDup(
00303 const IndexDetails& idx, const DiskLoc &thisLoc,
00304 const BSONObj& key, const Ordering& order,
00305 const DiskLoc &self) const;
00306
00307 static DiskLoc addBucket(const IndexDetails&);
00309 void deallocBucket(const DiskLoc thisLoc, const IndexDetails &id);
00310
00311 static void renameIndexNamespace(const char *oldNs, const char *newNs);
00312
00314 int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
00315 const BSONObj& key, const Ordering &order, bool dupsAllowed,
00316 IndexDetails& idx, bool toplevel = true) const;
00317
00319 bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const;
00320
00328 DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order,
00329 int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const;
00330
00337 DiskLoc findSingle( const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const;
00338
00340 DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const;
00341
00342 void advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const;
00343 void customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const;
00344
00345 const DiskLoc getHead(const DiskLoc& thisLoc) const;
00346
00348 void shape(stringstream&) const;
00349
00350 static void a_test(IndexDetails&);
00351
00352 static int getLowWaterMark();
00353 static int getKeyMax();
00354
00355 protected:
00361 void fixParentPtrs(const DiskLoc thisLoc, int firstIndex = 0, int lastIndex = -1) const;
00362
00364 void delBucket(const DiskLoc thisLoc, const IndexDetails&);
00366 void delKeyAtPos(const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order);
00367
00374 bool mayBalanceWithNeighbors(const DiskLoc thisLoc, IndexDetails &id, const Ordering &order) const;
00375
00377 bool tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const;
00378 void doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order );
00379 void doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split,
00380 BtreeBucket *l, const DiskLoc lchild,
00381 BtreeBucket *r, const DiskLoc rchild,
00382 IndexDetails &id, const Ordering &order );
00383 void doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split,
00384 BtreeBucket *l, const DiskLoc lchild,
00385 BtreeBucket *r, const DiskLoc rchild,
00386 IndexDetails &id, const Ordering &order );
00387
00389 void doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order);
00390
00392 void replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id );
00393
00395 bool canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const;
00396
00406 int rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const;
00407
00408 int indexInParent( const DiskLoc &thisLoc ) const;
00409 BSONObj keyAt(int keyOfs) const {
00410 return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key;
00411 }
00412 static BtreeBucket* allocTemp();
00413
00415 void split(const DiskLoc thisLoc, int keypos,
00416 const DiskLoc recordLoc, const BSONObj& key,
00417 const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx);
00418
00419 void insertHere(const DiskLoc thisLoc, int keypos,
00420 const DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
00421 const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx) const;
00422
00423 int _insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
00424 const BSONObj& key, const Ordering &order, bool dupsAllowed,
00425 const DiskLoc lChild, const DiskLoc rChild, IndexDetails &idx) const;
00426 bool find(const IndexDetails& idx, const BSONObj& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const;
00427 bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const;
00428 static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey);
00429 static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction );
00430 static void fix(const DiskLoc thisLoc, const DiskLoc child);
00431
00433 void setInternalKey( const DiskLoc thisLoc, int keypos,
00434 const DiskLoc recordLoc, const BSONObj &key, const Ordering &order,
00435 const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx);
00436
00442 void deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order );
00443 public:
00445 static string dupKeyError( const IndexDetails& idx , const BSONObj& key );
00446 };
00447 #pragma pack()
00448
00449 class BtreeCursor : public Cursor {
00450 public:
00451 BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction );
00452 BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction );
00453 virtual bool ok() { return !bucket.isNull(); }
00454 virtual bool advance();
00455 virtual void noteLocation();
00456 virtual void checkLocation();
00457 virtual bool supportGetMore() { return true; }
00458 virtual bool supportYields() { return true; }
00459
00467 virtual bool getsetdup(DiskLoc loc) {
00468 if( _multikey ) {
00469 pair<set<DiskLoc>::iterator, bool> p = _dups.insert(loc);
00470 return !p.second;
00471 }
00472 return false;
00473 }
00474
00475 virtual bool modifiedKeys() const { return _multikey; }
00476 virtual bool isMultiKey() const { return _multikey; }
00477
00478 const _KeyNode& _currKeyNode() const {
00479 assert( !bucket.isNull() );
00480 const _KeyNode& kn = bucket.btree()->k(keyOfs);
00481 assert( kn.isUsed() );
00482 return kn;
00483 }
00484 const KeyNode currKeyNode() const {
00485 assert( !bucket.isNull() );
00486 return bucket.btree()->keyNode(keyOfs);
00487 }
00488
00489 virtual BSONObj currKey() const { return currKeyNode().key; }
00490 virtual BSONObj indexKeyPattern() { return indexDetails.keyPattern(); }
00491
00492 virtual void aboutToDeleteBucket(const DiskLoc& b) {
00493 if ( bucket == b )
00494 keyOfs = -1;
00495 }
00496
00497 virtual DiskLoc currLoc() { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); }
00498 virtual DiskLoc refLoc() { return currLoc(); }
00499 virtual Record* _current() { return currLoc().rec(); }
00500 virtual BSONObj current() { return BSONObj(_current()); }
00501 virtual string toString() {
00502 string s = string("BtreeCursor ") + indexDetails.indexName();
00503 if ( _direction < 0 ) s += " reverse";
00504 if ( _bounds.get() && _bounds->size() > 1 ) s += " multi";
00505 return s;
00506 }
00507
00508 BSONObj prettyKey( const BSONObj &key ) const {
00509 return key.replaceFieldNames( indexDetails.keyPattern() ).clientReadable();
00510 }
00511
00512 virtual BSONObj prettyIndexBounds() const {
00513 if ( !_independentFieldRanges ) {
00514 return BSON( "start" << prettyKey( startKey ) << "end" << prettyKey( endKey ) );
00515 }
00516 else {
00517 return _bounds->obj();
00518 }
00519 }
00520
00521 void forgetEndKey() { endKey = BSONObj(); }
00522
00523 virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
00524
00525 virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { _matcher = matcher; }
00526
00527 virtual long long nscanned() { return _nscanned; }
00528
00530 const DiskLoc getBucket() const { return bucket; }
00531
00532 private:
00537 bool skipUnusedKeys( bool mayJump );
00538 bool skipOutOfRangeKeysAndCheckEnd();
00539 void skipAndCheck();
00540 void checkEnd();
00541
00543 void audit();
00544
00546 void init();
00547
00549 void advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive );
00550
00551 friend class BtreeBucket;
00552
00553 set<DiskLoc> _dups;
00554 NamespaceDetails * const d;
00555 const int idxNo;
00556 BSONObj startKey;
00557 BSONObj endKey;
00558 bool _endKeyInclusive;
00559 bool _multikey;
00560 const IndexDetails& indexDetails;
00561 const BSONObj _order;
00562 const Ordering _ordering;
00563 DiskLoc bucket;
00564 int keyOfs;
00565 const int _direction;
00566 BSONObj keyAtKeyOfs;
00567 DiskLoc locAtKeyOfs;
00568 const shared_ptr< FieldRangeVector > _bounds;
00569 auto_ptr< FieldRangeVector::Iterator > _boundsIterator;
00570 const IndexSpec& _spec;
00571 shared_ptr< CoveredIndexMatcher > _matcher;
00572 bool _independentFieldRanges;
00573 long long _nscanned;
00574 };
00575
00576
00577 inline bool IndexDetails::hasKey(const BSONObj& key) {
00578 return head.btree()->exists(*this, head, key, Ordering::make(keyPattern()));
00579 }
00580 inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) {
00581 return head.btree()->wouldCreateDup(*this, head, key, Ordering::make(keyPattern()), self);
00582 }
00583
00588 class BtreeBuilder {
00589 bool dupsAllowed;
00590 IndexDetails& idx;
00591 unsigned long long n;
00592 BSONObj keyLast;
00593 BSONObj order;
00594 Ordering ordering;
00595 bool committed;
00596
00597 DiskLoc cur, first;
00598 BtreeBucket *b;
00599
00600 void newBucket();
00601 void buildNextLevel(DiskLoc);
00602 void mayCommitProgressDurably();
00603
00604 public:
00605 ~BtreeBuilder();
00606
00607 BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx);
00608
00610 void addKey(BSONObj& key, DiskLoc loc);
00611
00616 void commit();
00617
00618 unsigned long long getn() { return n; }
00619 };
00620
00621 }