Merge pull request #2271 from wiredtiger/reverse-split-fix
SERVER-21027 Fix reverse splits to keep the original child ref locked
This commit is contained in:
@@ -1010,8 +1010,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
* reading thread will restart. Include the ref we are splitting in
|
||||
* the count to be deleted.
|
||||
*/
|
||||
deleted_entries = ref_new != NULL ? 1 : 0;
|
||||
for (i = 0; i < parent_entries; ++i) {
|
||||
for (deleted_entries = 1, i = 0; i < parent_entries; ++i) {
|
||||
next_ref = pindex->index[i];
|
||||
WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
|
||||
if (next_ref->state == WT_REF_DELETED &&
|
||||
@@ -1033,7 +1032,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
*/
|
||||
if (result_entries == 0) {
|
||||
next_ref = pindex->index[0];
|
||||
WT_ASSERT(session, next_ref->state == WT_REF_SPLIT);
|
||||
WT_ASSERT(session, next_ref->state == WT_REF_SPLIT ||
|
||||
(next_ref == ref && ref->state == WT_REF_LOCKED));
|
||||
next_ref->state = WT_REF_DELETED;
|
||||
--deleted_entries;
|
||||
result_entries = 1;
|
||||
@@ -1119,9 +1119,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
|
||||
WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
|
||||
"%s split into parent %" PRIu32 " -> %" PRIu32
|
||||
" (%" PRIu32 ")",
|
||||
__wt_page_type_string(ref->page->type), parent_entries,
|
||||
result_entries, result_entries - parent_entries));
|
||||
" (%" PRIu32 ")", ref->page == NULL ?
|
||||
"reverse" : __wt_page_type_string(ref->page->type),
|
||||
parent_entries, result_entries, result_entries - parent_entries));
|
||||
|
||||
/*
|
||||
* The new page index is in place, free the WT_REF we were splitting
|
||||
@@ -1522,23 +1522,18 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
|
||||
/*
|
||||
* __wt_split_reverse --
|
||||
* Lock, then reverse split an internal page (remove deleted refs).
|
||||
* We have a locked ref that is empty and we want to rewrite the index in
|
||||
* its parent.
|
||||
*/
|
||||
int
|
||||
__wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *parent;
|
||||
WT_REF dummy_child;
|
||||
bool hazard;
|
||||
|
||||
WT_CLEAR(dummy_child);
|
||||
dummy_child.home = dummy_child.page = ref->page;
|
||||
dummy_child.state = WT_REF_MEM;
|
||||
|
||||
WT_RET(__split_parent_lock(session, &dummy_child, &parent, &hazard));
|
||||
WT_ASSERT(session, parent == ref->page);
|
||||
ret = __split_parent(session, &dummy_child, NULL, 0, 0, 0);
|
||||
WT_RET(__split_parent_lock(session, ref, &parent, &hazard));
|
||||
ret = __split_parent(session, ref, NULL, 0, 0, 0);
|
||||
WT_TRET(__split_parent_unlock(session, parent, hazard));
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -32,6 +32,9 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
|
||||
/* Make sure the oldest transaction ID is up-to-date. */
|
||||
__wt_txn_update_oldest(session, true);
|
||||
|
||||
if (txn->isolation == WT_ISO_READ_COMMITTED)
|
||||
__wt_txn_get_snapshot(session);
|
||||
|
||||
/* Walk the tree, discarding pages. */
|
||||
next_ref = NULL;
|
||||
WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
|
||||
@@ -59,11 +62,12 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
|
||||
* and the write will fail with EBUSY. Our caller handles that
|
||||
* error, retrying later.
|
||||
*/
|
||||
if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page)) {
|
||||
if (txn->isolation == WT_ISO_READ_COMMITTED)
|
||||
__wt_txn_get_snapshot(session);
|
||||
if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page))
|
||||
WT_ERR(__wt_reconcile(session, ref, NULL, WT_EVICTING));
|
||||
}
|
||||
|
||||
/* Update our snapshot for each new page. */
|
||||
if (txn->isolation == WT_ISO_READ_COMMITTED)
|
||||
__wt_txn_get_snapshot(session);
|
||||
|
||||
/*
|
||||
* We can't evict the page just returned to us (it marks our
|
||||
|
||||
@@ -143,25 +143,47 @@ done: if (((inmem_split && ret == 0) || (forced_eviction && ret == EBUSY)) &&
|
||||
return (ret);
|
||||
}
|
||||
/*
|
||||
* __evict_reverse_split_check --
|
||||
* Check if an internal page needs a reverse split.
|
||||
* __evict_delete_ref --
|
||||
* Mark a page reference deleted and check if the parent can reverse
|
||||
* split.
|
||||
*/
|
||||
static int
|
||||
__evict_reverse_split_check(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
__evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *parent;
|
||||
WT_PAGE_INDEX *pindex;
|
||||
uint32_t deleted_entries;
|
||||
uint32_t ndeleted;
|
||||
|
||||
if (__wt_ref_is_root(ref))
|
||||
return (0);
|
||||
|
||||
parent = ref->home;
|
||||
WT_INTL_INDEX_GET(session, parent, pindex);
|
||||
deleted_entries = __wt_atomic_addv32(&pindex->deleted_entries, 1);
|
||||
if (deleted_entries > pindex->entries / 10)
|
||||
WT_RET(__wt_split_reverse(session, parent->pg_intl_parent_ref));
|
||||
/*
|
||||
* Avoid doing reverse splits when closing the file, it is
|
||||
* wasted work and some structure may already have been freed.
|
||||
*/
|
||||
if (!closing) {
|
||||
parent = ref->home;
|
||||
WT_INTL_INDEX_GET(session, parent, pindex);
|
||||
ndeleted = __wt_atomic_addv32(&pindex->deleted_entries, 1);
|
||||
|
||||
/*
|
||||
* If more than 10% of the parent references are deleted, try a
|
||||
* reverse split. Don't bother if there is a single deleted
|
||||
* reference: the internal page is empty and we have to wait
|
||||
* for eviction to notice.
|
||||
*
|
||||
* This will consume the deleted ref (and eventually free it).
|
||||
* If the reverse split can't get the access it needs because
|
||||
* something is busy, be sure that the page still ends up
|
||||
* marked deleted.
|
||||
*/
|
||||
if (ndeleted > pindex->entries / 10 && pindex->entries > 1 &&
|
||||
(ret = __wt_split_reverse(session, ref)) != EBUSY)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
WT_PUBLISH(ref->state, WT_REF_DELETED);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -189,16 +211,9 @@ __wt_evict_page_clean_update(
|
||||
*/
|
||||
__wt_ref_out(session, ref);
|
||||
if (ref->addr == NULL) {
|
||||
WT_PUBLISH(ref->state, WT_REF_DELETED);
|
||||
/*
|
||||
* Avoid doing reverse splits when closing the file, it is
|
||||
* wasted work and some structure may already have been freed.
|
||||
*/
|
||||
if (!closing) {
|
||||
WT_WITH_PAGE_INDEX(session,
|
||||
ret = __evict_reverse_split_check(session, ref));
|
||||
WT_RET_BUSY_OK(ret);
|
||||
}
|
||||
WT_WITH_PAGE_INDEX(session,
|
||||
ret = __evict_delete_ref(session, ref, closing));
|
||||
WT_RET_BUSY_OK(ret);
|
||||
} else
|
||||
WT_PUBLISH(ref->state, WT_REF_DISK);
|
||||
|
||||
@@ -242,9 +257,8 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
*/
|
||||
__wt_ref_out(session, ref);
|
||||
ref->addr = NULL;
|
||||
WT_PUBLISH(ref->state, WT_REF_DELETED);
|
||||
WT_WITH_PAGE_INDEX(session,
|
||||
ret = __evict_reverse_split_check(session, ref));
|
||||
ret = __evict_delete_ref(session, ref, closing));
|
||||
WT_RET_BUSY_OK(ret);
|
||||
break;
|
||||
case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
|
||||
|
||||
Reference in New Issue
Block a user