Compare commits
58 Commits
mongodb-3.
...
mongodb-3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5cdd3e320c | ||
|
|
563b7823f7 | ||
|
|
5e3a56f0ab | ||
|
|
bc929dbcf1 | ||
|
|
07966a492a | ||
|
|
67e412d4c5 | ||
|
|
3c2ad56b50 | ||
|
|
b1768d0d9f | ||
|
|
2893117baa | ||
|
|
4380cec93d | ||
|
|
21b5f9951e | ||
|
|
decd9166cc | ||
|
|
d835a0c0a8 | ||
|
|
48e1343e40 | ||
|
|
eb838c7f12 | ||
|
|
a6957512a4 | ||
|
|
197eef00fd | ||
|
|
7a4f3259b4 | ||
|
|
8326df6b76 | ||
|
|
b65381f64c | ||
|
|
0019262fed | ||
|
|
4d72349b8a | ||
|
|
4898aa408f | ||
|
|
9d375e3416 | ||
|
|
d9ec1ff8ec | ||
|
|
465dca8b46 | ||
|
|
f95877af13 | ||
|
|
62c1a7aa36 | ||
|
|
0dc3f20df6 | ||
|
|
0537648e03 | ||
|
|
3c856645c8 | ||
|
|
10208e8284 | ||
|
|
16e3e48d98 | ||
|
|
5205bb1f0f | ||
|
|
dca63120b7 | ||
|
|
0cccab30c0 | ||
|
|
578a856c19 | ||
|
|
a85c5cda41 | ||
|
|
6da2dc175b | ||
|
|
7ffa315e39 | ||
|
|
26d1ad271f | ||
|
|
fdedd3621c | ||
|
|
4187f419f8 | ||
|
|
42823c9682 | ||
|
|
fbaf1cf4f5 | ||
|
|
3d845c98cb | ||
|
|
1d2fe8a145 | ||
|
|
bdaaaec87d | ||
|
|
35cc116acd | ||
|
|
cbe0fad3e9 | ||
|
|
4f9aa1c548 | ||
|
|
1f44c05f91 | ||
|
|
e31aa8cf29 | ||
|
|
c90bc747e1 | ||
|
|
2c1b7aa80b | ||
|
|
41762ae13c | ||
|
|
f7691f63a6 | ||
|
|
9be5497753 |
2523
NEWS.MONGODB
Normal file
2523
NEWS.MONGODB
Normal file
File diff suppressed because it is too large
Load Diff
@@ -89,11 +89,11 @@ __ref_is_leaf(WT_REF *ref)
|
||||
}
|
||||
|
||||
/*
|
||||
* __page_ascend --
|
||||
* __ref_ascend --
|
||||
* Ascend the tree one level.
|
||||
*/
|
||||
static void
|
||||
__page_ascend(WT_SESSION_IMPL *session,
|
||||
static inline void
|
||||
__ref_ascend(WT_SESSION_IMPL *session,
|
||||
WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
|
||||
{
|
||||
WT_REF *parent_ref, *ref;
|
||||
@@ -163,12 +163,12 @@ __page_ascend(WT_SESSION_IMPL *session,
|
||||
}
|
||||
|
||||
/*
|
||||
* __page_descend --
|
||||
* Descend the tree one level.
|
||||
* __ref_descend_prev --
|
||||
* Descend the tree one level, during a previous-cursor walk.
|
||||
*/
|
||||
static void
|
||||
__page_descend(WT_SESSION_IMPL *session,
|
||||
WT_PAGE *page, WT_PAGE_INDEX **pindexp, uint32_t *slotp, bool prev)
|
||||
static inline void
|
||||
__ref_descend_prev(
|
||||
WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
|
||||
{
|
||||
WT_PAGE_INDEX *pindex;
|
||||
|
||||
@@ -177,9 +177,6 @@ __page_descend(WT_SESSION_IMPL *session,
|
||||
* we have a hazard pointer.
|
||||
*/
|
||||
for (;; __wt_yield()) {
|
||||
WT_INTL_INDEX_GET(session, page, pindex);
|
||||
*slotp = prev ? pindex->entries - 1 : 0;
|
||||
|
||||
/*
|
||||
* There's a split race when a cursor moving backwards through
|
||||
* the tree descends the tree. If we're splitting an internal
|
||||
@@ -233,20 +230,40 @@ __page_descend(WT_SESSION_IMPL *session,
|
||||
* being split and part of its namespace moved. We have the
|
||||
* correct page and we don't have to move, all we have to do is
|
||||
* wait until the split page's page index is updated.
|
||||
*
|
||||
* No test is necessary for a next-cursor movement because we
|
||||
* do right-hand splits on internal pages and the initial part
|
||||
* of the page's namespace won't change as part of a split.
|
||||
* Instead of testing the direction boolean, do the test the
|
||||
* previous cursor movement requires in all cases, even though
|
||||
* it will always succeed for a next-cursor movement.
|
||||
*/
|
||||
if (pindex->index[*slotp]->home == page)
|
||||
WT_INTL_INDEX_GET(session, ref->page, pindex);
|
||||
if (pindex->index[pindex->entries - 1]->home == ref->page)
|
||||
break;
|
||||
}
|
||||
*pindexp = pindex;
|
||||
}
|
||||
|
||||
/*
|
||||
* __ref_initial_descent_prev --
|
||||
* Descend the tree one level, when setting up the initial cursor position
|
||||
* for a previous-cursor walk.
|
||||
*/
|
||||
static inline bool
|
||||
__ref_initial_descent_prev(
|
||||
WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
|
||||
{
|
||||
WT_PAGE_INDEX *pindex;
|
||||
|
||||
/*
|
||||
* We're passed a child page into which we're descending, and on which
|
||||
* we have a hazard pointer.
|
||||
*
|
||||
* Acquire a page index for the child page and then confirm we haven't
|
||||
* raced with a parent split.
|
||||
*/
|
||||
WT_INTL_INDEX_GET(session, ref->page, pindex);
|
||||
if (__wt_split_descent_race(session, ref, *pindexp))
|
||||
return (false);
|
||||
|
||||
*pindexp = pindex;
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
* __tree_walk_internal --
|
||||
* Move to the next/previous page in the tree.
|
||||
@@ -259,11 +276,12 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
|
||||
WT_DECL_RET;
|
||||
WT_PAGE_INDEX *pindex;
|
||||
WT_REF *couple, *couple_orig, *ref;
|
||||
bool empty_internal, prev, skip;
|
||||
bool empty_internal, initial_descent, prev, skip;
|
||||
uint32_t slot;
|
||||
|
||||
btree = S2BT(session);
|
||||
empty_internal = false;
|
||||
pindex = NULL;
|
||||
empty_internal = initial_descent = false;
|
||||
|
||||
/*
|
||||
* Tree walks are special: they look inside page structures that splits
|
||||
@@ -323,22 +341,30 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
|
||||
couple = couple_orig = ref = *refp;
|
||||
*refp = NULL;
|
||||
|
||||
/* If no page is active, begin a walk from the start of the tree. */
|
||||
/* If no page is active, begin a walk from the start/end of the tree. */
|
||||
if (ref == NULL) {
|
||||
ref = &btree->root;
|
||||
restart: /*
|
||||
* We can reach here with a NULL or root reference; the release
|
||||
* function handles them internally, don't complicate this code
|
||||
* by calling them out.
|
||||
*/
|
||||
WT_ERR(__wt_page_release(session, couple, flags));
|
||||
|
||||
couple = couple_orig = ref = &btree->root;
|
||||
if (ref->page == NULL)
|
||||
goto done;
|
||||
|
||||
initial_descent = true;
|
||||
goto descend;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the active page was the root, we've reached the walk's end.
|
||||
* Release any hazard-pointer we're holding.
|
||||
* If the active page was the root, we've reached the walk's end; we
|
||||
* only get here if we've returned the root to our caller, so we're
|
||||
* holding no hazard pointers.
|
||||
*/
|
||||
if (__wt_ref_is_root(ref)) {
|
||||
WT_ERR(__wt_page_release(session, couple, flags));
|
||||
if (__wt_ref_is_root(ref))
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Figure out the current slot in the WT_REF array. */
|
||||
__ref_index_slot(session, ref, &pindex, &slot);
|
||||
@@ -352,7 +378,7 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
|
||||
while ((prev && slot == 0) ||
|
||||
(!prev && slot == pindex->entries - 1)) {
|
||||
/* Ascend to the parent. */
|
||||
__page_ascend(session, &ref, &pindex, &slot);
|
||||
__ref_ascend(session, &ref, &pindex, &slot);
|
||||
|
||||
/*
|
||||
* If we got all the way through an internal page and
|
||||
@@ -520,17 +546,22 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
|
||||
if (ret == WT_RESTART) {
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* If a cursor is setting up at the end of the
|
||||
* tree, we can't use our parent page's index,
|
||||
* because it may have already split; restart
|
||||
* the walk.
|
||||
*/
|
||||
if (prev && initial_descent)
|
||||
goto restart;
|
||||
|
||||
/*
|
||||
* If a new walk that never coupled from the
|
||||
* root to a new saved position in the tree,
|
||||
* restart the walk.
|
||||
*/
|
||||
if (couple == &btree->root) {
|
||||
ref = &btree->root;
|
||||
if (ref->page == NULL)
|
||||
goto done;
|
||||
goto descend;
|
||||
}
|
||||
if (couple == &btree->root)
|
||||
goto restart;
|
||||
|
||||
/*
|
||||
* If restarting from some original position,
|
||||
@@ -561,9 +592,55 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
|
||||
descend: couple = ref;
|
||||
empty_internal = true;
|
||||
|
||||
__page_descend(
|
||||
session, ref->page, &pindex, &slot, prev);
|
||||
/*
|
||||
* There's a split race when a cursor is setting
|
||||
* up at the end of the tree or moving backwards
|
||||
* through the tree and descending a level. When
|
||||
* splitting an internal page into its parent,
|
||||
* we move the WT_REF structures and update the
|
||||
* parent's page index before updating the split
|
||||
* page's page index, and it's not an atomic
|
||||
* update. A thread can read the parent page's
|
||||
* replacement page index, then read the split
|
||||
* page's original index, or the parent page's
|
||||
* original and the split page's replacement.
|
||||
*
|
||||
* This isn't a problem for a cursor setting up
|
||||
* at the start of the tree or moving forwards
|
||||
* through the tree because we do right-hand
|
||||
* splits on internal pages and the initial part
|
||||
* of the split page's namespace won't change as
|
||||
* part of a split. A thread reading the parent
|
||||
* page's and split page's indexes will move to
|
||||
* the same slot no matter what order of indexes
|
||||
* are read.
|
||||
*
|
||||
* Handle a cursor setting up at the end of the
|
||||
* tree or moving backwards through the tree.
|
||||
*/
|
||||
if (!prev) {
|
||||
WT_INTL_INDEX_GET(
|
||||
session, ref->page, pindex);
|
||||
slot = 0;
|
||||
} else if (initial_descent) {
|
||||
if (!__ref_initial_descent_prev(
|
||||
session, ref, &pindex))
|
||||
goto restart;
|
||||
slot = pindex->entries - 1;
|
||||
} else {
|
||||
__ref_descend_prev(
|
||||
session, ref, &pindex);
|
||||
slot = pindex->entries - 1;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* At the lowest tree level (considering a leaf
|
||||
* page), turn off the initial-descent state.
|
||||
* Descent race tests are different when moving
|
||||
* through the tree vs. the initial descent.
|
||||
*/
|
||||
initial_descent = false;
|
||||
|
||||
/*
|
||||
* Optionally skip leaf pages, the second half.
|
||||
* We didn't have an on-page cell to figure out
|
||||
@@ -605,7 +682,7 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
|
||||
/*
|
||||
* __wt_tree_walk_count --
|
||||
* Move to the next/previous page in the tree, tracking how many
|
||||
* references were visited to get there.
|
||||
* references were visited to get there.
|
||||
*/
|
||||
int
|
||||
__wt_tree_walk_count(WT_SESSION_IMPL *session,
|
||||
|
||||
@@ -137,12 +137,12 @@ restart_page: page = current->page;
|
||||
* If on the last slot (the key is larger than any key
|
||||
* on the page), check for an internal page split race.
|
||||
*/
|
||||
if (parent_pindex != NULL &&
|
||||
__wt_split_intl_race(
|
||||
session, current->home, parent_pindex)) {
|
||||
if (__wt_split_descent_race(
|
||||
session, current, parent_pindex)) {
|
||||
WT_RET(__wt_page_release(session, current, 0));
|
||||
goto restart_root;
|
||||
}
|
||||
|
||||
goto descend;
|
||||
}
|
||||
|
||||
|
||||
@@ -418,9 +418,8 @@ restart_page: page = current->page;
|
||||
* page), check for an internal page split race.
|
||||
*/
|
||||
if (pindex->entries == base) {
|
||||
append: if (parent_pindex != NULL &&
|
||||
__wt_split_intl_race(
|
||||
session, current->home, parent_pindex)) {
|
||||
append: if (__wt_split_descent_race(
|
||||
session, current, parent_pindex)) {
|
||||
if ((ret = __wt_page_release(
|
||||
session, current, 0)) != 0)
|
||||
return (ret);
|
||||
|
||||
@@ -1293,20 +1293,20 @@ __wt_page_swap_func(
|
||||
WT_DECL_RET;
|
||||
bool acquired;
|
||||
|
||||
/*
|
||||
* In rare cases when walking the tree, we try to swap to the same
|
||||
* page. Fast-path that to avoid thinking about error handling.
|
||||
*/
|
||||
if (held == want)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* This function is here to simplify the error handling during hazard
|
||||
* pointer coupling so we never leave a hazard pointer dangling. The
|
||||
* assumption is we're holding a hazard pointer on "held", and want to
|
||||
* acquire a hazard pointer on "want", releasing the hazard pointer on
|
||||
* "held" when we're done.
|
||||
*
|
||||
* When walking the tree, we sometimes swap to the same page. Fast-path
|
||||
* that to avoid thinking about error handling.
|
||||
*/
|
||||
if (held == want)
|
||||
return (0);
|
||||
|
||||
/* Get the wanted page. */
|
||||
ret = __wt_page_in_func(session, want, flags
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
, file, line
|
||||
@@ -1446,15 +1446,19 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize)
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_split_intl_race --
|
||||
* __wt_split_descent_race --
|
||||
* Return if we raced with an internal page split when descending the tree.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_split_intl_race(
|
||||
WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE_INDEX *saved_pindex)
|
||||
__wt_split_descent_race(
|
||||
WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex)
|
||||
{
|
||||
WT_PAGE_INDEX *pindex;
|
||||
|
||||
/* No test when starting the descent (there's no home to check). */
|
||||
if (__wt_ref_is_root(ref))
|
||||
return (false);
|
||||
|
||||
/*
|
||||
* A place to hang this comment...
|
||||
*
|
||||
@@ -1509,6 +1513,6 @@ __wt_split_intl_race(
|
||||
* content the split page retains after the split, and we ignore this
|
||||
* race.
|
||||
*/
|
||||
WT_INTL_INDEX_GET(session, parent, pindex);
|
||||
WT_INTL_INDEX_GET(session, ref->home, pindex);
|
||||
return (pindex != saved_pindex);
|
||||
}
|
||||
|
||||
@@ -275,15 +275,11 @@ __create_colgroup(WT_SESSION_IMPL *session,
|
||||
WT_ERR(__wt_schema_create(session, source, sourceconf));
|
||||
|
||||
WT_ERR(__wt_config_collapse(session, cfg, &cgconf));
|
||||
if (exists) {
|
||||
if (strcmp(cgconf, origconf) != 0)
|
||||
WT_ERR_MSG(session, EINVAL,
|
||||
"%s: does not match existing configuration", name);
|
||||
goto err;
|
||||
}
|
||||
WT_ERR(__wt_metadata_insert(session, name, cgconf));
|
||||
|
||||
WT_ERR(__wt_schema_open_colgroups(session, table));
|
||||
if (!exists) {
|
||||
WT_ERR(__wt_metadata_insert(session, name, cgconf));
|
||||
WT_ERR(__wt_schema_open_colgroups(session, table));
|
||||
}
|
||||
|
||||
err: __wt_free(session, cgconf);
|
||||
__wt_free(session, sourceconf);
|
||||
@@ -539,20 +535,17 @@ __create_index(WT_SESSION_IMPL *session,
|
||||
cfg[1] = sourceconf;
|
||||
cfg[2] = confbuf.data;
|
||||
WT_ERR(__wt_config_collapse(session, cfg, &idxconf));
|
||||
if (exists) {
|
||||
if (strcmp(idxconf, origconf) != 0)
|
||||
WT_ERR_MSG(session, EINVAL,
|
||||
"%s: does not match existing configuration", name);
|
||||
goto err;
|
||||
|
||||
if (!exists) {
|
||||
WT_ERR(__wt_metadata_insert(session, name, idxconf));
|
||||
|
||||
/* Make sure that the configuration is valid. */
|
||||
WT_ERR(__wt_schema_open_index(
|
||||
session, table, idxname, strlen(idxname), &idx));
|
||||
|
||||
/* If there is data in the table, fill the index. */
|
||||
WT_ERR(__fill_index(session, table, idx));
|
||||
}
|
||||
WT_ERR(__wt_metadata_insert(session, name, idxconf));
|
||||
|
||||
/* Make sure that the configuration is valid. */
|
||||
WT_ERR(__wt_schema_open_index(
|
||||
session, table, idxname, strlen(idxname), &idx));
|
||||
|
||||
/* If there is data in the table, fill the index. */
|
||||
WT_ERR(__fill_index(session, table, idx));
|
||||
|
||||
err: __wt_free(session, idxconf);
|
||||
__wt_free(session, origconf);
|
||||
@@ -612,23 +605,21 @@ __create_table(WT_SESSION_IMPL *session,
|
||||
WT_ERR_NOTFOUND_OK(ret);
|
||||
|
||||
WT_ERR(__wt_config_collapse(session, cfg, &tableconf));
|
||||
if (exists) {
|
||||
if (strcmp(tableconf, table->config) != 0)
|
||||
WT_ERR_MSG(session, EINVAL,
|
||||
"%s: does not match existing configuration", name);
|
||||
goto err;
|
||||
}
|
||||
WT_ERR(__wt_metadata_insert(session, name, tableconf));
|
||||
|
||||
/* Attempt to open the table now to catch any errors. */
|
||||
WT_ERR(__wt_schema_get_table(
|
||||
session, tablename, strlen(tablename), true, &table));
|
||||
if (!exists) {
|
||||
WT_ERR(__wt_metadata_insert(session, name, tableconf));
|
||||
|
||||
if (ncolgroups == 0) {
|
||||
cgsize = strlen("colgroup:") + strlen(tablename) + 1;
|
||||
WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
|
||||
snprintf(cgname, cgsize, "colgroup:%s", tablename);
|
||||
WT_ERR(__create_colgroup(session, cgname, exclusive, config));
|
||||
/* Attempt to open the table now to catch any errors. */
|
||||
WT_ERR(__wt_schema_get_table(
|
||||
session, tablename, strlen(tablename), true, &table));
|
||||
|
||||
if (ncolgroups == 0) {
|
||||
cgsize = strlen("colgroup:") + strlen(tablename) + 1;
|
||||
WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
|
||||
snprintf(cgname, cgsize, "colgroup:%s", tablename);
|
||||
WT_ERR(__create_colgroup(
|
||||
session, cgname, exclusive, config));
|
||||
}
|
||||
}
|
||||
|
||||
if (0) {
|
||||
|
||||
@@ -226,10 +226,6 @@ class test_index01(wttest.WiredTigerTestCase):
|
||||
self.assertRaises(wiredtiger.WiredTigerError,
|
||||
lambda: self.session.create(self.index[0],
|
||||
'columns=(dept),exclusive'))
|
||||
# non-exclusive create with differing configuration
|
||||
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
|
||||
lambda: self.session.create(self.index[0],
|
||||
'columns=(salary)'), '/does not match existing configuration/')
|
||||
self.drop_table()
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -103,10 +103,6 @@ class test_schema02(wttest.WiredTigerTestCase):
|
||||
self.expect_failure_colgroup("main:c1", "columns=(S1,i2),exclusive",
|
||||
"")
|
||||
|
||||
# exists with different config
|
||||
self.expect_failure_colgroup("main:c1", "columns=(S1,i4)",
|
||||
"/does not match existing configuration/")
|
||||
|
||||
# colgroup not declared in initial create
|
||||
self.expect_failure_colgroup("main:c3", "columns=(S3,i4)",
|
||||
"/Column group 'c3' not found in"
|
||||
|
||||
Reference in New Issue
Block a user