|
|
|
|
@@ -25,12 +25,25 @@ typedef struct {
|
|
|
|
|
WT_PAGE *page;
|
|
|
|
|
uint32_t flags; /* Caller's configuration */
|
|
|
|
|
|
|
|
|
|
WT_ITEM disk_image; /* Temporary disk-image buffer */
|
|
|
|
|
/*
|
|
|
|
|
* Temporary buffer used to write out a disk image when managing two
|
|
|
|
|
* chunks worth of data in memory
|
|
|
|
|
* Reconciliation can end up requiring two temporary disk image buffers
|
|
|
|
|
* if a page split is involved. These two disk images are pointed to by
|
|
|
|
|
* current and the previous image pointers. During initialization the
|
|
|
|
|
* first image is allocated and pointed to by the current image pointer.
|
|
|
|
|
* If and when a split is involved the second image gets allocated and
|
|
|
|
|
* is pointed to by the current image pointer. The previous image
|
|
|
|
|
* pointer is made to refer the first image at this point. Two images
|
|
|
|
|
* are kept in memory to redistribute data among them in case the last
|
|
|
|
|
* split chunk ends up being smaller than the minimum required. As
|
|
|
|
|
* reconciliation generates more split chunks, the image referred to by
|
|
|
|
|
* the previous image pointer is written to the disk, the current and
|
|
|
|
|
* the previous image pointers are swapped, making space for another
|
|
|
|
|
* split chunk to be reconciled in the buffer that was just written out
|
|
|
|
|
* to the disk.
|
|
|
|
|
*/
|
|
|
|
|
WT_ITEM *interim_buf;
|
|
|
|
|
WT_ITEM disk_image[2]; /* Temporary disk-image buffers */
|
|
|
|
|
WT_ITEM *cur_img_ptr;
|
|
|
|
|
WT_ITEM *prev_img_ptr;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Track start/stop write generation to decide if all changes to the
|
|
|
|
|
@@ -146,17 +159,6 @@ typedef struct {
|
|
|
|
|
* that references all of our split pages.
|
|
|
|
|
*/
|
|
|
|
|
struct __rec_boundary {
|
|
|
|
|
/*
|
|
|
|
|
* Offset is the byte offset in the initial split buffer of the
|
|
|
|
|
* first byte of the split chunk, recorded before we decide to
|
|
|
|
|
* split the page; the difference between chunk[1]'s offset and
|
|
|
|
|
* chunk[0]'s offset is chunk[0]'s length.
|
|
|
|
|
*
|
|
|
|
|
* Once we split a page, we stop filling in offset values, we're
|
|
|
|
|
* writing the split chunks as we find them.
|
|
|
|
|
*/
|
|
|
|
|
size_t offset; /* Split's first byte */
|
|
|
|
|
|
|
|
|
|
WT_ADDR addr; /* Split's written location */
|
|
|
|
|
uint32_t size; /* Split's size */
|
|
|
|
|
uint32_t checksum; /* Split's checksum */
|
|
|
|
|
@@ -338,7 +340,8 @@ static int __rec_split_write(WT_SESSION_IMPL *,
|
|
|
|
|
WT_RECONCILE *, WT_BOUNDARY *, WT_ITEM *, bool);
|
|
|
|
|
static int __rec_update_las(
|
|
|
|
|
WT_SESSION_IMPL *, WT_RECONCILE *, uint32_t, WT_BOUNDARY *);
|
|
|
|
|
static int __rec_write_check_complete(WT_SESSION_IMPL *, WT_RECONCILE *);
|
|
|
|
|
static int __rec_write_check_complete(
|
|
|
|
|
WT_SESSION_IMPL *, WT_RECONCILE *, bool *);
|
|
|
|
|
static int __rec_write_init(WT_SESSION_IMPL *,
|
|
|
|
|
WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
|
|
|
|
|
static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *);
|
|
|
|
|
@@ -438,7 +441,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
|
|
|
|
|
|
|
|
|
|
/* Checks for a successful reconciliation. */
|
|
|
|
|
if (ret == 0)
|
|
|
|
|
ret = __rec_write_check_complete(session, r);
|
|
|
|
|
ret = __rec_write_check_complete(session, r, lookaside_retryp);
|
|
|
|
|
|
|
|
|
|
/* Wrap up the page reconciliation. */
|
|
|
|
|
if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
|
|
|
|
|
@@ -449,14 +452,6 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
|
|
|
|
|
/* Release the reconciliation lock. */
|
|
|
|
|
WT_PAGE_UNLOCK(session, page);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If our caller can configure lookaside table reconciliation, flag if
|
|
|
|
|
* that's worth trying. The lookaside table doesn't help if we skipped
|
|
|
|
|
* updates, it can only help with older readers preventing eviction.
|
|
|
|
|
*/
|
|
|
|
|
if (lookaside_retryp != NULL && r->update_mem_uncommitted == 0)
|
|
|
|
|
*lookaside_retryp = true;
|
|
|
|
|
|
|
|
|
|
/* Update statistics. */
|
|
|
|
|
WT_STAT_CONN_INCR(session, rec_pages);
|
|
|
|
|
WT_STAT_DATA_INCR(session, rec_pages);
|
|
|
|
|
@@ -561,7 +556,8 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
* Check that reconciliation should complete.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
__rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
__rec_write_check_complete(
|
|
|
|
|
WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *lookaside_retryp)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Tests in this function are lookaside tests and tests to decide if
|
|
|
|
|
@@ -581,17 +577,62 @@ __rec_write_check_complete(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
return (EBUSY);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If when doing update/restore based eviction, we didn't split and
|
|
|
|
|
* didn't apply any updates, then give up.
|
|
|
|
|
* Eviction can configure lookaside table reconciliation, consider if
|
|
|
|
|
* it's worth giving up this reconciliation attempt and falling back to
|
|
|
|
|
* using the lookaside table. We continue with evict/restore if
|
|
|
|
|
* switching to the lookaside doesn't make sense for any reason: we
|
|
|
|
|
* won't retry an evict/restore reconciliation until/unless the
|
|
|
|
|
* transactional system moves forward, so at worst it's a single wasted
|
|
|
|
|
* effort.
|
|
|
|
|
*
|
|
|
|
|
* This may lead to saving the page to the lookaside table: that
|
|
|
|
|
* decision is made by eviction.
|
|
|
|
|
* First, check if the lookaside table is a possible alternative.
|
|
|
|
|
*/
|
|
|
|
|
if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->bnd_next == 1 &&
|
|
|
|
|
r->update_mem_all != 0 && r->update_mem_all == r->update_mem_saved)
|
|
|
|
|
return (EBUSY);
|
|
|
|
|
if (lookaside_retryp == NULL)
|
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
|
/*
|
|
|
|
|
* We only suggest lookaside if currently in an evict/restore attempt
|
|
|
|
|
* and some updates were saved. Our caller sets the evict/restore flag
|
|
|
|
|
* based on various conditions (like if this is a leaf page), which is
|
|
|
|
|
* why we're testing that flag instead of a set of other conditions.
|
|
|
|
|
* If no updates were saved, eviction will succeed without needing to
|
|
|
|
|
* restore anything.
|
|
|
|
|
*/
|
|
|
|
|
if (!F_ISSET(r, WT_EVICT_UPDATE_RESTORE) || r->bnd->supd == NULL)
|
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check if this reconciliation attempt is making progress. If there's
|
|
|
|
|
* any sign of progress, don't fall back to the lookaside table.
|
|
|
|
|
*
|
|
|
|
|
* Check if the current reconciliation split, in which case we'll
|
|
|
|
|
* likely get to write at least one of the blocks. If that page is
|
|
|
|
|
* empty, that's also progress.
|
|
|
|
|
*/
|
|
|
|
|
if (r->bnd_next != 1)
|
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check if the current reconciliation applied some updates, in which
|
|
|
|
|
* case evict/restore should gain us some space.
|
|
|
|
|
*/
|
|
|
|
|
if (r->update_mem_saved != r->update_mem_all)
|
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Check if lookaside eviction is possible. If any of the updates we
|
|
|
|
|
* saw were uncommitted, the lookaside table cannot be used: it only
|
|
|
|
|
* helps with older readers preventing eviction.
|
|
|
|
|
*/
|
|
|
|
|
if (r->update_mem_uncommitted != 0)
|
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The current evict/restore approach shows no signs of being useful,
|
|
|
|
|
* lookaside is possible, suggest the lookaside table.
|
|
|
|
|
*/
|
|
|
|
|
*lookaside_retryp = true;
|
|
|
|
|
return (EBUSY);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
@@ -832,7 +873,8 @@ __rec_write_init(WT_SESSION_IMPL *session,
|
|
|
|
|
r->last = &r->_last;
|
|
|
|
|
|
|
|
|
|
/* Disk buffers need to be aligned for writing. */
|
|
|
|
|
F_SET(&r->disk_image, WT_ITEM_ALIGNED);
|
|
|
|
|
F_SET(&r->disk_image[0], WT_ITEM_ALIGNED);
|
|
|
|
|
F_SET(&r->disk_image[1], WT_ITEM_ALIGNED);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Reconciliation is not re-entrant, make sure that doesn't happen. */
|
|
|
|
|
@@ -977,8 +1019,8 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep)
|
|
|
|
|
return;
|
|
|
|
|
*(WT_RECONCILE **)reconcilep = NULL;
|
|
|
|
|
|
|
|
|
|
__wt_buf_free(session, &r->disk_image);
|
|
|
|
|
__wt_scr_free(session, &r->interim_buf);
|
|
|
|
|
__wt_buf_free(session, &r->disk_image[0]);
|
|
|
|
|
__wt_buf_free(session, &r->disk_image[1]);
|
|
|
|
|
|
|
|
|
|
__wt_free(session, r->raw_entries);
|
|
|
|
|
__wt_free(session, r->raw_offsets);
|
|
|
|
|
@@ -1766,7 +1808,7 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size)
|
|
|
|
|
*/
|
|
|
|
|
WT_ASSERT(session, r->space_avail >= size);
|
|
|
|
|
WT_ASSERT(session, WT_BLOCK_FITS(
|
|
|
|
|
r->first_free, size, r->disk_image.mem, r->disk_image.memsize));
|
|
|
|
|
r->first_free, size, r->cur_img_ptr->mem, r->cur_img_ptr->memsize));
|
|
|
|
|
|
|
|
|
|
r->entries += v;
|
|
|
|
|
r->space_avail -= size;
|
|
|
|
|
@@ -1853,7 +1895,7 @@ __rec_dict_replace(
|
|
|
|
|
* copy cell instead.
|
|
|
|
|
*/
|
|
|
|
|
if (dp->offset == 0)
|
|
|
|
|
dp->offset = WT_PTRDIFF32(r->first_free, r->disk_image.mem);
|
|
|
|
|
dp->offset = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem);
|
|
|
|
|
else {
|
|
|
|
|
/*
|
|
|
|
|
* The offset is the byte offset from this cell to the previous,
|
|
|
|
|
@@ -1861,7 +1903,7 @@ __rec_dict_replace(
|
|
|
|
|
* page.
|
|
|
|
|
*/
|
|
|
|
|
offset = (uint64_t)WT_PTRDIFF(r->first_free,
|
|
|
|
|
(uint8_t *)r->disk_image.mem + dp->offset);
|
|
|
|
|
(uint8_t *)r->cur_img_ptr->mem + dp->offset);
|
|
|
|
|
val->len = val->cell_len =
|
|
|
|
|
__wt_cell_pack_copy(&val->cell, rle, offset);
|
|
|
|
|
val->buf.data = NULL;
|
|
|
|
|
@@ -1997,7 +2039,6 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
static void
|
|
|
|
|
__rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd)
|
|
|
|
|
{
|
|
|
|
|
bnd->offset = 0;
|
|
|
|
|
bnd->max_bnd_recno = WT_RECNO_OOB;
|
|
|
|
|
bnd->max_bnd_entries = 0;
|
|
|
|
|
|
|
|
|
|
@@ -2210,15 +2251,14 @@ __rec_split_init(WT_SESSION_IMPL *session,
|
|
|
|
|
* Ensure the disk image buffer is large enough for the max object, as
|
|
|
|
|
* corrected by the underlying block manager.
|
|
|
|
|
*
|
|
|
|
|
* The buffer that we build disk image in, needs to hold two chunks
|
|
|
|
|
* worth of data. Since we want to support split_size more than the page
|
|
|
|
|
* size (to allow for adjustments based on the compression), this buffer
|
|
|
|
|
* should be greater of twice of split_size and page_size.
|
|
|
|
|
* Since we want to support split_size more than the page size (to allow
|
|
|
|
|
* for adjustments based on the compression), this buffer should be
|
|
|
|
|
* greater of split_size and page_size.
|
|
|
|
|
*/
|
|
|
|
|
corrected_page_size = r->page_size;
|
|
|
|
|
disk_img_buf_size = 2 * WT_MAX(corrected_page_size, r->split_size);
|
|
|
|
|
WT_RET(bm->write_size(bm, session, &corrected_page_size));
|
|
|
|
|
WT_RET(__wt_buf_init(session, &r->disk_image, disk_img_buf_size));
|
|
|
|
|
disk_img_buf_size = WT_MAX(corrected_page_size, r->split_size);
|
|
|
|
|
WT_RET(__wt_buf_init(session, &r->disk_image[0], disk_img_buf_size));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Clear the disk page header to ensure all of it is initialized, even
|
|
|
|
|
@@ -2228,15 +2268,17 @@ __rec_split_init(WT_SESSION_IMPL *session,
|
|
|
|
|
* fixed-length column-store sets bits in bytes, where the bytes are
|
|
|
|
|
* assumed to initially be 0.
|
|
|
|
|
*/
|
|
|
|
|
memset(r->disk_image.mem, 0, page->type == WT_PAGE_COL_FIX ?
|
|
|
|
|
memset(r->disk_image[0].mem, 0, page->type == WT_PAGE_COL_FIX ?
|
|
|
|
|
disk_img_buf_size : WT_PAGE_HEADER_SIZE);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set the page type (the type doesn't change, and setting it later
|
|
|
|
|
* would require additional code in a few different places).
|
|
|
|
|
*/
|
|
|
|
|
dsk = r->disk_image.mem;
|
|
|
|
|
dsk = r->disk_image[0].mem;
|
|
|
|
|
dsk->type = page->type;
|
|
|
|
|
r->cur_img_ptr = &r->disk_image[0];
|
|
|
|
|
r->prev_img_ptr = NULL;
|
|
|
|
|
|
|
|
|
|
r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk);
|
|
|
|
|
|
|
|
|
|
@@ -2245,7 +2287,6 @@ __rec_split_init(WT_SESSION_IMPL *session,
|
|
|
|
|
WT_RET(__rec_split_bnd_grow(session, r));
|
|
|
|
|
__rec_split_bnd_init(session, &r->bnd[0]);
|
|
|
|
|
r->bnd[0].max_bnd_recno = recno;
|
|
|
|
|
r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree);
|
|
|
|
|
|
|
|
|
|
/* Initialize the entry counter. */
|
|
|
|
|
r->entries = 0;
|
|
|
|
|
@@ -2451,21 +2492,18 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len)
|
|
|
|
|
{
|
|
|
|
|
WT_BM *bm;
|
|
|
|
|
WT_BTREE *btree;
|
|
|
|
|
size_t corrected_page_size, inuse, len;
|
|
|
|
|
size_t corrected_page_size, inuse;
|
|
|
|
|
|
|
|
|
|
btree = S2BT(session);
|
|
|
|
|
bm = btree->bm;
|
|
|
|
|
|
|
|
|
|
len = WT_PTRDIFF(r->first_free, r->disk_image.mem);
|
|
|
|
|
inuse = (len - r->bnd[r->bnd_next].offset) +
|
|
|
|
|
WT_PAGE_HEADER_BYTE_SIZE(btree);
|
|
|
|
|
inuse = WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem);
|
|
|
|
|
corrected_page_size = inuse + add_len;
|
|
|
|
|
|
|
|
|
|
WT_RET(bm->write_size(bm, session, &corrected_page_size));
|
|
|
|
|
/* Need to account for buffer carrying two chunks worth of data */
|
|
|
|
|
WT_RET(__wt_buf_grow(session, &r->disk_image, 2 * corrected_page_size));
|
|
|
|
|
WT_RET(__wt_buf_grow(session, r->cur_img_ptr, corrected_page_size));
|
|
|
|
|
|
|
|
|
|
r->first_free = (uint8_t *)r->disk_image.mem + len;
|
|
|
|
|
r->first_free = (uint8_t *)r->cur_img_ptr->mem + inuse;
|
|
|
|
|
WT_ASSERT(session, corrected_page_size >= inuse);
|
|
|
|
|
r->space_avail = corrected_page_size - inuse;
|
|
|
|
|
WT_ASSERT(session, r->space_avail >= add_len);
|
|
|
|
|
@@ -2474,89 +2512,55 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* __rec_split_write_prev_and_shift_cur --
|
|
|
|
|
* Write the previous split chunk to the disk as a page. Shift the contents
|
|
|
|
|
* of the current chunk to the start of the buffer, making space for a new
|
|
|
|
|
* chunk to be written.
|
|
|
|
|
* If the caller asks for a chunk resizing, the boundary between the two
|
|
|
|
|
* chunks is readjusted to the minimum split size boundary details stored
|
|
|
|
|
* in the previous chunk, letting the current chunk grow at the cost of the
|
|
|
|
|
* previous chunk.
|
|
|
|
|
* __rec_split_write_prev_and_swap_buf --
|
|
|
|
|
* If there is a previous split chunk held in the memory, write it to the
|
|
|
|
|
* disk as a page. If there isn't one, this is the first time we are
|
|
|
|
|
* splitting and need to initialize a second buffer. Also, swap the
|
|
|
|
|
* previous and the current buffer pointers.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
__rec_split_write_prev_and_shift_cur(
|
|
|
|
|
WT_SESSION_IMPL *session, WT_RECONCILE *r, bool resize_chunks)
|
|
|
|
|
__rec_split_write_prev_and_swap_buf(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
{
|
|
|
|
|
WT_BM *bm;
|
|
|
|
|
WT_BOUNDARY *bnd_cur, *bnd_prev;
|
|
|
|
|
WT_BTREE *btree;
|
|
|
|
|
WT_PAGE_HEADER *dsk, *dsk_tmp;
|
|
|
|
|
size_t cur_len, len;
|
|
|
|
|
uint8_t *dsk_start;
|
|
|
|
|
WT_BOUNDARY *bnd_prev;
|
|
|
|
|
WT_ITEM *tmp_img_ptr;
|
|
|
|
|
WT_PAGE_HEADER *dsk;
|
|
|
|
|
size_t disk_img_size;
|
|
|
|
|
|
|
|
|
|
WT_ASSERT(session, r->bnd_next != 0);
|
|
|
|
|
WT_ASSERT(session, (r->prev_img_ptr == NULL && r->bnd_next == 0) ||
|
|
|
|
|
(r->prev_img_ptr != NULL && r->bnd_next != 0));
|
|
|
|
|
|
|
|
|
|
btree = S2BT(session);
|
|
|
|
|
bm = btree->bm;
|
|
|
|
|
bnd_cur = &r->bnd[r->bnd_next];
|
|
|
|
|
bnd_prev = bnd_cur - 1;
|
|
|
|
|
dsk = r->disk_image.mem;
|
|
|
|
|
cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Resize chunks if the current is smaller than the minimum, and there
|
|
|
|
|
* are details on the minimum split size boundary available in the
|
|
|
|
|
* previous boundary details.
|
|
|
|
|
*
|
|
|
|
|
* There is a possibility that we do not have a minimum boundary set, in
|
|
|
|
|
* such a case we skip chunk resizing. Such a condition is possible for
|
|
|
|
|
* instance when we are building the image in the buffer and the first
|
|
|
|
|
* K/V pair is large enough that it surpasses both the minimum split
|
|
|
|
|
* size and the split size the application has set. In such a case we
|
|
|
|
|
* split the chunk without saving any minimum boundary.
|
|
|
|
|
*/
|
|
|
|
|
if (resize_chunks &&
|
|
|
|
|
cur_len < r->min_split_size && bnd_prev->min_bnd_offset != 0) {
|
|
|
|
|
bnd_cur->offset = bnd_prev->min_bnd_offset;
|
|
|
|
|
bnd_cur->max_bnd_entries +=
|
|
|
|
|
bnd_prev->max_bnd_entries - bnd_prev->min_bnd_entries;
|
|
|
|
|
bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries;
|
|
|
|
|
bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno;
|
|
|
|
|
|
|
|
|
|
WT_RET(__wt_buf_set(session, &bnd_cur->max_bnd_key,
|
|
|
|
|
bnd_prev->min_bnd_key.data, bnd_prev->min_bnd_key.size));
|
|
|
|
|
|
|
|
|
|
/* Update current chunk's length */
|
|
|
|
|
cur_len = WT_PTRDIFF(r->first_free, dsk) - bnd_cur->offset;
|
|
|
|
|
/* Write previous chunk, if there is one */
|
|
|
|
|
if (r->prev_img_ptr != NULL) {
|
|
|
|
|
bnd_prev = &r->bnd[r->bnd_next - 1];
|
|
|
|
|
dsk = r->prev_img_ptr->mem;
|
|
|
|
|
dsk->recno = bnd_prev->max_bnd_recno;
|
|
|
|
|
dsk->u.entries = bnd_prev->max_bnd_entries;
|
|
|
|
|
dsk->mem_size = (uint32_t)bnd_prev->size;
|
|
|
|
|
r->prev_img_ptr->size = dsk->mem_size;
|
|
|
|
|
WT_RET(__rec_split_write(session,
|
|
|
|
|
r, bnd_prev, r->prev_img_ptr, false));
|
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
* If we do not have a previous buffer, we should initialize the
|
|
|
|
|
* second buffer before proceeding. We will create the second
|
|
|
|
|
* buffer of the same size as the current buffer.
|
|
|
|
|
*/
|
|
|
|
|
disk_img_size = r->cur_img_ptr->memsize;
|
|
|
|
|
WT_RET(__wt_buf_init(session,
|
|
|
|
|
&r->disk_image[1], disk_img_size));
|
|
|
|
|
r->prev_img_ptr = &r->disk_image[1];
|
|
|
|
|
dsk = r->prev_img_ptr->mem;
|
|
|
|
|
memset(dsk, 0,
|
|
|
|
|
r->page->type == WT_PAGE_COL_FIX ?
|
|
|
|
|
disk_img_size : WT_PAGE_HEADER_SIZE);
|
|
|
|
|
dsk->type = r->page->type;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create an interim buffer if not already done to prepare the previous
|
|
|
|
|
* chunk's disk image.
|
|
|
|
|
*/
|
|
|
|
|
len = bnd_cur->offset;
|
|
|
|
|
WT_RET(bm->write_size(bm, session, &len));
|
|
|
|
|
if (r->interim_buf == NULL)
|
|
|
|
|
WT_RET(__wt_scr_alloc(session, len, &r->interim_buf));
|
|
|
|
|
else
|
|
|
|
|
WT_RET(__wt_buf_init(session, r->interim_buf, len));
|
|
|
|
|
/* swap previous and current buffers */
|
|
|
|
|
tmp_img_ptr = r->prev_img_ptr;
|
|
|
|
|
r->prev_img_ptr = r->cur_img_ptr;
|
|
|
|
|
r->cur_img_ptr = tmp_img_ptr;
|
|
|
|
|
|
|
|
|
|
dsk_tmp = r->interim_buf->mem;
|
|
|
|
|
memcpy(dsk_tmp, dsk, bnd_cur->offset);
|
|
|
|
|
dsk_tmp->recno = bnd_prev->max_bnd_recno;
|
|
|
|
|
dsk_tmp->u.entries = bnd_prev->max_bnd_entries;
|
|
|
|
|
dsk_tmp->mem_size = WT_STORE_SIZE(bnd_cur->offset);
|
|
|
|
|
r->interim_buf->size = dsk_tmp->mem_size;
|
|
|
|
|
WT_RET(__rec_split_write(session, r, bnd_prev, r->interim_buf, false));
|
|
|
|
|
|
|
|
|
|
/* Shift the current chunk to the start of the buffer */
|
|
|
|
|
dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk);
|
|
|
|
|
(void)memmove(dsk_start, (uint8_t *)dsk + bnd_cur->offset, cur_len);
|
|
|
|
|
|
|
|
|
|
/* Fix boundary offset */
|
|
|
|
|
bnd_cur->offset = WT_PAGE_HEADER_BYTE_SIZE(btree);
|
|
|
|
|
/* Fix where free points */
|
|
|
|
|
r->first_free = dsk_start + cur_len;
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -2574,7 +2578,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
|
|
|
|
|
size_t inuse;
|
|
|
|
|
|
|
|
|
|
btree = S2BT(session);
|
|
|
|
|
dsk = r->disk_image.mem;
|
|
|
|
|
dsk = r->cur_img_ptr->mem;
|
|
|
|
|
|
|
|
|
|
/* Fixed length col store can call with next_len 0 */
|
|
|
|
|
WT_ASSERT(session, next_len == 0 || r->space_avail < next_len);
|
|
|
|
|
@@ -2588,9 +2592,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
|
|
|
|
|
"%s page too large, attempted split during salvage",
|
|
|
|
|
__wt_page_type_string(r->page->type));
|
|
|
|
|
|
|
|
|
|
last = &r->bnd[r->bnd_next];
|
|
|
|
|
inuse = (WT_PTRDIFF(r->first_free, dsk) - last->offset) +
|
|
|
|
|
WT_PAGE_HEADER_BYTE_SIZE(btree);
|
|
|
|
|
inuse = WT_PTRDIFF(r->first_free, dsk);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We can get here if the first key/value pair won't fit.
|
|
|
|
|
@@ -2603,8 +2605,10 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
|
|
|
|
|
/* All page boundaries reset the dictionary. */
|
|
|
|
|
__rec_dictionary_reset(r);
|
|
|
|
|
|
|
|
|
|
/* Set the number of entries for the just finished chunk. */
|
|
|
|
|
/* Set the number of entries and size for the just finished chunk. */
|
|
|
|
|
last = &r->bnd[r->bnd_next];
|
|
|
|
|
last->max_bnd_entries = r->entries;
|
|
|
|
|
last->size = (uint32_t)inuse;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* In case of bulk load, write out chunks as we get them. Otherwise we
|
|
|
|
|
@@ -2616,19 +2620,22 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
|
|
|
|
|
dsk->recno = last->max_bnd_recno;
|
|
|
|
|
dsk->u.entries = last->max_bnd_entries;
|
|
|
|
|
dsk->mem_size = (uint32_t)inuse;
|
|
|
|
|
r->disk_image.size = dsk->mem_size;
|
|
|
|
|
WT_RET(__rec_split_write(
|
|
|
|
|
session, r, last, &r->disk_image, false));
|
|
|
|
|
/* Fix where free points */
|
|
|
|
|
r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk);
|
|
|
|
|
} else if (r->bnd_next != 0)
|
|
|
|
|
WT_RET(__rec_split_write_prev_and_shift_cur(session, r, false));
|
|
|
|
|
r->cur_img_ptr->size = dsk->mem_size;
|
|
|
|
|
WT_RET(__rec_split_write(session,
|
|
|
|
|
r, last, r->cur_img_ptr, false));
|
|
|
|
|
} else {
|
|
|
|
|
WT_RET(__rec_split_write_prev_and_swap_buf(session, r));
|
|
|
|
|
/* current image we are writing to has changed */
|
|
|
|
|
dsk = r->cur_img_ptr->mem;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Fix where free points */
|
|
|
|
|
r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk);
|
|
|
|
|
|
|
|
|
|
/* Prepare the next boundary */
|
|
|
|
|
WT_RET(__rec_split_bnd_grow(session, r));
|
|
|
|
|
r->bnd_next++;
|
|
|
|
|
next = &r->bnd[r->bnd_next];
|
|
|
|
|
next->offset = WT_PTRDIFF(r->first_free, dsk);
|
|
|
|
|
/* Set the key for the next chunk. */
|
|
|
|
|
next->max_bnd_recno = r->recno;
|
|
|
|
|
if (dsk->type == WT_PAGE_ROW_INT || dsk->type == WT_PAGE_ROW_LEAF)
|
|
|
|
|
@@ -2687,9 +2694,8 @@ __rec_split_crossing_bnd(
|
|
|
|
|
!WT_CROSSING_SPLIT_BND(r, next_len)) {
|
|
|
|
|
btree = S2BT(session);
|
|
|
|
|
bnd = &r->bnd[r->bnd_next];
|
|
|
|
|
dsk = r->disk_image.mem;
|
|
|
|
|
min_bnd_offset = (WT_PTRDIFF(r->first_free, dsk) -
|
|
|
|
|
bnd->offset) + WT_PAGE_HEADER_BYTE_SIZE(btree);
|
|
|
|
|
dsk = r->cur_img_ptr->mem;
|
|
|
|
|
min_bnd_offset = WT_PTRDIFF(r->first_free, dsk);
|
|
|
|
|
if (min_bnd_offset == WT_PAGE_HEADER_BYTE_SIZE(btree))
|
|
|
|
|
/*
|
|
|
|
|
* This is possible if the first record doesn't fit in
|
|
|
|
|
@@ -2750,7 +2756,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
|
|
|
|
|
unpack = &_unpack;
|
|
|
|
|
compressor = btree->compressor;
|
|
|
|
|
dst = &r->raw_destination;
|
|
|
|
|
dsk = r->disk_image.mem;
|
|
|
|
|
dsk = r->cur_img_ptr->mem;
|
|
|
|
|
|
|
|
|
|
WT_RET(__rec_split_bnd_grow(session, r));
|
|
|
|
|
last = &r->bnd[r->bnd_next];
|
|
|
|
|
@@ -3066,7 +3072,7 @@ no_slots:
|
|
|
|
|
r->first_free = dsk_start + len;
|
|
|
|
|
r->space_avail += r->raw_offsets[result_slots];
|
|
|
|
|
WT_ASSERT(session, r->first_free + r->space_avail <=
|
|
|
|
|
(uint8_t *)r->disk_image.mem + r->disk_image.memsize);
|
|
|
|
|
(uint8_t *)r->cur_img_ptr->mem + r->cur_img_ptr->memsize);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Set the key for the next block (before writing the block, a
|
|
|
|
|
@@ -3105,13 +3111,13 @@ no_slots:
|
|
|
|
|
dsk->recno = last->max_bnd_recno;
|
|
|
|
|
dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk);
|
|
|
|
|
dsk->u.entries = r->entries;
|
|
|
|
|
r->disk_image.size = dsk->mem_size;
|
|
|
|
|
r->cur_img_ptr->size = dsk->mem_size;
|
|
|
|
|
|
|
|
|
|
r->entries = 0;
|
|
|
|
|
r->first_free = WT_PAGE_HEADER_BYTE(btree, dsk);
|
|
|
|
|
r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
|
|
|
|
|
|
|
|
|
|
write_ref = &r->disk_image;
|
|
|
|
|
write_ref = r->cur_img_ptr;
|
|
|
|
|
last->already_compressed = false;
|
|
|
|
|
} else {
|
|
|
|
|
/*
|
|
|
|
|
@@ -3139,7 +3145,7 @@ no_slots:
|
|
|
|
|
last_block && __rec_is_checkpoint(session, r, last)) {
|
|
|
|
|
if (write_ref == dst)
|
|
|
|
|
WT_RET(__wt_buf_set(
|
|
|
|
|
session, &r->disk_image, dst->mem, dst->size));
|
|
|
|
|
session, r->cur_img_ptr, dst->mem, dst->size));
|
|
|
|
|
} else
|
|
|
|
|
WT_RET(
|
|
|
|
|
__rec_split_write(session, r, last, write_ref, last_block));
|
|
|
|
|
@@ -3172,6 +3178,111 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
|
|
|
|
|
return (__rec_split_raw_worker(session, r, next_len, false));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* __rec_split_finish_process_prev --
|
|
|
|
|
* If the two split chunks together fit in a single page, merge them into
|
|
|
|
|
* one. If they do not fit in a single page but the last is smaller than
|
|
|
|
|
* the minimum desired, move some data from the penultimate chunk to the
|
|
|
|
|
* last chunk and write out the previous/penultimate. Finally, update the
|
|
|
|
|
* pointer to the current image buffer. After this function exits, we will
|
|
|
|
|
* have one (last) buffer in memory, pointed to by the current image
|
|
|
|
|
* pointer.
|
|
|
|
|
*/
|
|
|
|
|
static int
|
|
|
|
|
__rec_split_finish_process_prev(
|
|
|
|
|
WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *chunks_merged)
|
|
|
|
|
{
|
|
|
|
|
WT_BOUNDARY *bnd_cur, *bnd_prev;
|
|
|
|
|
WT_BTREE *btree;
|
|
|
|
|
WT_PAGE_HEADER *dsk;
|
|
|
|
|
size_t len_to_move;
|
|
|
|
|
uint32_t combined_size;
|
|
|
|
|
uint8_t *cur_dsk_start;
|
|
|
|
|
|
|
|
|
|
WT_ASSERT(session, r->prev_img_ptr != NULL);
|
|
|
|
|
|
|
|
|
|
btree = S2BT(session);
|
|
|
|
|
bnd_cur = &r->bnd[r->bnd_next];
|
|
|
|
|
bnd_prev = bnd_cur - 1;
|
|
|
|
|
*chunks_merged = false;
|
|
|
|
|
/*
|
|
|
|
|
* The sizes referred to in the boundary structure include the header,
|
|
|
|
|
* so when calculating the combined size, make sure not to include the
|
|
|
|
|
* header twice.
|
|
|
|
|
*/
|
|
|
|
|
combined_size = bnd_prev->size +
|
|
|
|
|
(bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree));
|
|
|
|
|
|
|
|
|
|
if (combined_size <= r->page_size) {
|
|
|
|
|
/*
|
|
|
|
|
* We have two boundaries, but the data in the buffers can fit a
|
|
|
|
|
* single page. Merge the boundaries and create a single chunk.
|
|
|
|
|
*/
|
|
|
|
|
dsk = r->cur_img_ptr->mem;
|
|
|
|
|
memcpy((uint8_t *)r->prev_img_ptr->mem + bnd_prev->size,
|
|
|
|
|
WT_PAGE_HEADER_BYTE(btree, dsk),
|
|
|
|
|
bnd_cur->size - WT_PAGE_HEADER_BYTE_SIZE(btree));
|
|
|
|
|
bnd_prev->size = combined_size;
|
|
|
|
|
bnd_prev->max_bnd_entries += bnd_cur->max_bnd_entries;
|
|
|
|
|
r->bnd_next--;
|
|
|
|
|
*chunks_merged = true;
|
|
|
|
|
} else {
|
|
|
|
|
if (bnd_cur->size < r->min_split_size &&
|
|
|
|
|
bnd_prev->min_bnd_offset != 0 ) {
|
|
|
|
|
/*
|
|
|
|
|
* The last chunk, pointed to by the current image
|
|
|
|
|
* pointer, has less than the minimum data. Let's move
|
|
|
|
|
* any data more than the minimum from the previous
|
|
|
|
|
* image into the current.
|
|
|
|
|
*/
|
|
|
|
|
len_to_move = bnd_prev->size - bnd_prev->min_bnd_offset;
|
|
|
|
|
/* Grow current buffer if it is not large enough */
|
|
|
|
|
if (r->space_avail < len_to_move)
|
|
|
|
|
WT_RET(__rec_split_grow(session,
|
|
|
|
|
r, len_to_move));
|
|
|
|
|
cur_dsk_start = WT_PAGE_HEADER_BYTE(btree,
|
|
|
|
|
r->cur_img_ptr->mem);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Shift the contents of the current buffer to make
|
|
|
|
|
* space for the data that will be prepended into the
|
|
|
|
|
* current buffer
|
|
|
|
|
*/
|
|
|
|
|
memmove(cur_dsk_start + len_to_move,
|
|
|
|
|
cur_dsk_start, bnd_cur->size -
|
|
|
|
|
WT_PAGE_HEADER_BYTE_SIZE(btree));
|
|
|
|
|
/*
|
|
|
|
|
* copy any data more than the minimum, from the
|
|
|
|
|
* previous buffer to the start of the current.
|
|
|
|
|
*/
|
|
|
|
|
memcpy(cur_dsk_start, (uint8_t *)r->prev_img_ptr->mem +
|
|
|
|
|
bnd_prev->min_bnd_offset, len_to_move);
|
|
|
|
|
|
|
|
|
|
/* Update boundary information */
|
|
|
|
|
bnd_cur->size += (uint32_t)len_to_move;
|
|
|
|
|
bnd_prev->size -= (uint32_t)len_to_move;
|
|
|
|
|
bnd_cur->max_bnd_entries += bnd_prev->max_bnd_entries -
|
|
|
|
|
bnd_prev->min_bnd_entries;
|
|
|
|
|
bnd_prev->max_bnd_entries = bnd_prev->min_bnd_entries;
|
|
|
|
|
bnd_cur->max_bnd_recno = bnd_prev->min_bnd_recno;
|
|
|
|
|
WT_RET(__wt_buf_set(session,
|
|
|
|
|
&bnd_cur->max_bnd_key, bnd_prev->min_bnd_key.data,
|
|
|
|
|
bnd_prev->min_bnd_key.size));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Write out the previous image */
|
|
|
|
|
WT_RET(__rec_split_write_prev_and_swap_buf(session, r));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* At this point, there is only one disk image in the memory, pointed to
|
|
|
|
|
* by the previous image pointer. Update the current image pointer to
|
|
|
|
|
* this image.
|
|
|
|
|
*/
|
|
|
|
|
r->cur_img_ptr = r->prev_img_ptr;
|
|
|
|
|
return (0);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* __rec_split_finish_std --
|
|
|
|
|
* Finish processing a page, standard version.
|
|
|
|
|
@@ -3179,9 +3290,9 @@ __rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
|
|
|
|
|
static int
|
|
|
|
|
__rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
{
|
|
|
|
|
WT_BOUNDARY *bnd_cur, *bnd_prev;
|
|
|
|
|
WT_BOUNDARY *bnd_cur;
|
|
|
|
|
WT_PAGE_HEADER *dsk;
|
|
|
|
|
bool grow_bnd;
|
|
|
|
|
bool chunks_merged;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We may arrive here with no entries to write if the page was entirely
|
|
|
|
|
@@ -3208,50 +3319,22 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
return (EBUSY);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dsk = r->disk_image.mem;
|
|
|
|
|
|
|
|
|
|
/* Set the number of entries for the just finished chunk. */
|
|
|
|
|
/* Set the number of entries and size for the just finished chunk. */
|
|
|
|
|
bnd_cur = &r->bnd[r->bnd_next];
|
|
|
|
|
bnd_cur->max_bnd_entries = r->entries;
|
|
|
|
|
bnd_cur->size = WT_PTRDIFF32(r->first_free, r->cur_img_ptr->mem);
|
|
|
|
|
|
|
|
|
|
grow_bnd = true;
|
|
|
|
|
/*
|
|
|
|
|
* We can reach here even with raw_compression when the last split chunk
|
|
|
|
|
* is too small to be sent for raw compression.
|
|
|
|
|
*/
|
|
|
|
|
if (!r->is_bulk_load && !r->raw_compression) {
|
|
|
|
|
if (WT_PTRDIFF(r->first_free, dsk) > r->page_size &&
|
|
|
|
|
r->bnd_next != 0) {
|
|
|
|
|
/*
|
|
|
|
|
* We hold two boundaries worth of data in the buffer,
|
|
|
|
|
* and this data doesn't fit in a single page. If the
|
|
|
|
|
* last chunk is too small, readjust the boundary to a
|
|
|
|
|
* pre-computed minimum.
|
|
|
|
|
* Write out the penultimate chunk to the disk as a page
|
|
|
|
|
*/
|
|
|
|
|
WT_RET(__rec_split_write_prev_and_shift_cur(
|
|
|
|
|
session, r, true));
|
|
|
|
|
} else
|
|
|
|
|
if (r->bnd_next != 0) {
|
|
|
|
|
/*
|
|
|
|
|
* We have two boundaries, but the data in the
|
|
|
|
|
* buffer can fit a single page. Merge the
|
|
|
|
|
* boundaries to create a single chunk.
|
|
|
|
|
*/
|
|
|
|
|
bnd_prev = bnd_cur - 1;
|
|
|
|
|
bnd_prev->max_bnd_entries +=
|
|
|
|
|
bnd_cur->max_bnd_entries;
|
|
|
|
|
r->bnd_next--;
|
|
|
|
|
grow_bnd = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
chunks_merged = false;
|
|
|
|
|
if (r->prev_img_ptr != NULL)
|
|
|
|
|
WT_RET(__rec_split_finish_process_prev(session,
|
|
|
|
|
r, &chunks_merged));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* We already have space for an extra boundary if we merged two
|
|
|
|
|
* boundaries above, in that case we do not need to grow the boundary
|
|
|
|
|
* structure.
|
|
|
|
|
*/
|
|
|
|
|
if (grow_bnd)
|
|
|
|
|
if (!chunks_merged)
|
|
|
|
|
WT_RET(__rec_split_bnd_grow(session, r));
|
|
|
|
|
bnd_cur = &r->bnd[r->bnd_next];
|
|
|
|
|
r->bnd_next++;
|
|
|
|
|
@@ -3260,14 +3343,15 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
* Current boundary now has all the remaining data/last page now.
|
|
|
|
|
* Let's write it to the disk
|
|
|
|
|
*/
|
|
|
|
|
dsk = r->cur_img_ptr->mem;
|
|
|
|
|
dsk->recno = bnd_cur->max_bnd_recno;
|
|
|
|
|
dsk->u.entries = bnd_cur->max_bnd_entries;
|
|
|
|
|
dsk->mem_size = WT_PTRDIFF32(r->first_free, dsk);
|
|
|
|
|
r->disk_image.size = dsk->mem_size;
|
|
|
|
|
dsk->mem_size = bnd_cur->size;
|
|
|
|
|
r->cur_img_ptr->size = dsk->mem_size;
|
|
|
|
|
|
|
|
|
|
/* If this is a checkpoint, we're done, otherwise write the page. */
|
|
|
|
|
return (__rec_is_checkpoint(session, r, bnd_cur) ?
|
|
|
|
|
0 : __rec_split_write(session, r, bnd_cur, &r->disk_image, true));
|
|
|
|
|
0 : __rec_split_write(session, r, bnd_cur, r->cur_img_ptr, true));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
@@ -3289,7 +3373,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
|
|
|
|
if (r->raw_compression && r->entries != 0) {
|
|
|
|
|
while (r->entries != 0) {
|
|
|
|
|
data_size =
|
|
|
|
|
WT_PTRDIFF(r->first_free, r->disk_image.mem);
|
|
|
|
|
WT_PTRDIFF(r->first_free, r->cur_img_ptr->mem);
|
|
|
|
|
if (data_size <= btree->allocsize)
|
|
|
|
|
break;
|
|
|
|
|
WT_RET(__rec_split_raw_worker(session, r, 0, true));
|
|
|
|
|
@@ -5882,7 +5966,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
|
|
|
|
* write the buffer so we know what to do here.
|
|
|
|
|
*/
|
|
|
|
|
if (bnd->addr.addr == NULL)
|
|
|
|
|
WT_RET(__wt_bt_write(session, &r->disk_image,
|
|
|
|
|
WT_RET(__wt_bt_write(session, r->cur_img_ptr,
|
|
|
|
|
NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING),
|
|
|
|
|
bnd->already_compressed));
|
|
|
|
|
else {
|
|
|
|
|
@@ -6546,7 +6630,7 @@ __rec_dictionary_lookup(
|
|
|
|
|
for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
|
|
|
|
|
dp != NULL && dp->hash == hash; dp = dp->next[0]) {
|
|
|
|
|
WT_RET(__wt_cell_pack_data_match(
|
|
|
|
|
(WT_CELL *)((uint8_t *)r->disk_image.mem + dp->offset),
|
|
|
|
|
(WT_CELL *)((uint8_t *)r->cur_img_ptr->mem + dp->offset),
|
|
|
|
|
&val->cell, val->buf.data, &match));
|
|
|
|
|
if (match) {
|
|
|
|
|
WT_STAT_DATA_INCR(session, rec_dictionary);
|
|
|
|
|
|