Compare commits

..

23 Commits
1.2.0 ... 1.2.2

Author SHA1 Message Date
Michael Cahill
ebcbbed09b Add documentation for 1.2.2 to the landing pag. 2012-06-20 16:00:01 +10:00
Michael Cahill
d22c483d3c Cut bugfix release 1.2.2. 2012-06-20 15:51:53 +10:00
Michael Cahill
8896c8d450 Fix a case where checkpoints could self-deadlock trying to reenter the connection spinlock. 2012-06-20 15:32:48 +10:00
Michael Cahill
3c4adbdbb2 Fix two bugs with snapshot isolation:
1. reset the isolation level when the transaction completes;
2. when checking visibility, check item's ID against the maximum snapshot ID
   (not the transaction's ID).
2012-06-20 15:32:48 +10:00
Michael Cahill
41dce09ac6 When checking the value of the "isolation" key, don't assume it is NUL terminated. 2012-06-20 15:32:47 +10:00
Michael Cahill
f2984e06ff Handle checkpoints immediately during close operations: the btree handle will be cleared by the time we do meta tracking. 2012-06-20 15:32:47 +10:00
Michael Cahill
f74d5baa28 Defer making free pages available until the end of a checkpoint, in case it fails after processing some files. 2012-06-20 15:32:47 +10:00
Michael Cahill
ea63867d69 src/txn/txn.c:243:29: error: 'txn_global' may be used uninitialized in this function 2012-06-20 15:32:47 +10:00
Michael Cahill
f3ab36014f Fix __wt_conn_btree_apply to acquire handles correctly,
pass WT_BTREE_SNAPSHOT_OP from checkpoint code.
2012-06-20 15:32:47 +10:00
Michael Cahill
a7dc715a83 src/conn/conn_api.c:375:22: error: variable 'conn' set but not used.
Conflicts:
	src/conn/conn_api.c
2012-06-20 15:32:38 +10:00
Michael Cahill
257104e58b Added tag 1.2.1 for changeset 9046bcab74eb 2012-06-15 17:36:26 +10:00
Michael Cahill
110ba1f941 Bump version to 1.2.1. 2012-06-15 17:34:30 +10:00
Michael Cahill
eb06033573 Allocate "desc" buffers in heap memory so that they are correctly aligned. 2011-12-23 10:57:57 +11:00
Michael Cahill
97fd6ca26e Avoid a deadlock between eviction and checkpoint on the connection spinlock. 2012-06-08 13:56:24 +10:00
Keith Bostic
c6103a38b1 Initialize the snapshot-avail list after cleaning it out, else we'll try and
print a NULL pointer in VERBOSE mode.
2012-06-06 11:26:09 -04:00
Keith Bostic
a6883cdb29 The nlpo2 and ispo2 functions came from net postings, put the files into
the public domain so nobody complains.
2012-06-05 10:41:29 +00:00
Keith Bostic
f48952d2bd Add an upgrading page.
Make the main page capitalization consistent
2012-06-05 10:02:44 +00:00
Keith Bostic
697f5c48b4 lint (unused variable) 2012-06-05 09:46:05 +00:00
Keith Bostic
18bf130f25 __wt_block_off_remove_overlap() allocates memory and so can fail in
"interesting" ways, don't ignore its error return.
2012-06-05 09:13:58 -04:00
Keith Bostic
f5910ecb2f fix comment typo 2012-06-05 09:04:42 -04:00
Keith Bostic
6564203c37 Minor re-work of extent lists (no real change, just associate better
logical names with them); write explicit init/free functions for the
extent lists.

Fix bug where snapshot_avail extent list didn't have a name at all,
we attempted to print a NULL in VERBOSE mode.
2012-06-05 09:01:28 -04:00
Keith Bostic
b5ea6f767f Merge the verbose write-offset/cksum information with the page-type
information.
2012-06-05 09:01:28 -04:00
Michael Cahill
97cb94c0cc Added tag 1.2.0 for changeset 12cf1d5546df 2012-06-04 17:02:49 +10:00
34 changed files with 393 additions and 134 deletions

View File

@@ -7,3 +7,5 @@
a792d468bedd7b37be9cfff545582ae8ff54ff6f 1.1.3
8054de4cb42988cd54b395cc834a6f8ab25298f7 1.1.4
ef844093bec2ac38945fd04487dc3a051f4b9136 1.1.5
12cf1d5546df25ac323f0400d4764e67ad5802e2 1.2.0
9046bcab74eba90a2cb05af28026ec4a74e4fb9c 1.2.1

33
NEWS
View File

@@ -1,3 +1,36 @@
WiredTiger release 1.2.2, 2012-06-20
------------------------------------
This is a bugfix release. The changes are as follows:
* Defer making free pages available until the end of a checkpoint, in case
there is a failure after processing some files.
* When checking the value of the "isolation" key, don't assume it is NUL
terminated. This bug could cause transactions to run with incorrect
isolation.
* Fix two bugs with snapshot isolation:
1. reset the isolation level when the transaction completes;
2. when checking visibility, check item's ID against the maximum snapshot ID
(not the transaction's ID).
WiredTiger release 1.2.1, 2012-06-15
------------------------------------
This is a bugfix release. The changes are as follows:
* Avoid a deadlock between eviction and checkpoint on the connection spinlock.
* Allocate "desc" buffers in heap memory so that they are correctly aligned
(fixes direct_io support on Linux).
* Initialize the snapshot-avail list after cleaning it out, else we'll try and
print a NULL pointer in VERBOSE mode.
WiredTiger release 1.2.0, 2012-06-04
------------------------------------

4
README
View File

@@ -1,6 +1,6 @@
WiredTiger 1.2.0: (June 4, 2012)
WiredTiger 1.2.2: (June 20, 2012)
This is version 1.2.0 of WiredTiger.
This is version 1.2.2 of WiredTiger.
WiredTiger documentation can be found at:

View File

@@ -2,8 +2,8 @@ dnl build by dist/s_version
VERSION_MAJOR=1
VERSION_MINOR=2
VERSION_PATCH=0
VERSION_STRING='"WiredTiger 1.2.0: (June 4, 2012)"'
VERSION_PATCH=2
VERSION_STRING='"WiredTiger 1.2.2: (June 20, 2012)"'
AC_SUBST(VERSION_MAJOR)
AC_SUBST(VERSION_MINOR)

View File

@@ -1,2 +1,2 @@
dnl WiredTiger product version for AC_INIT. Maintained by dist/s_version
1.2.0
1.2.2

2
dist/RELEASE vendored
View File

@@ -1,6 +1,6 @@
WIREDTIGER_VERSION_MAJOR=1
WIREDTIGER_VERSION_MINOR=2
WIREDTIGER_VERSION_PATCH=0
WIREDTIGER_VERSION_PATCH=2
WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH"
WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"`

12
dist/api_data.py vendored
View File

@@ -425,6 +425,14 @@ flags = {
###################################################
# Structure flag declarations
###################################################
'conn' : [ 'CONN_NOSYNC', 'CONN_TRANSACTIONAL', 'SERVER_RUN' ],
'session' : [ 'SESSION_INTERNAL', 'SESSION_SALVAGE_QUIET_ERR' ],
'conn' : [
'CONN_NOSYNC',
'CONN_TRANSACTIONAL',
'SERVER_RUN'
],
'session' : [
'SESSION_HAS_CONNLOCK',
'SESSION_INTERNAL',
'SESSION_SALVAGE_QUIET_ERR'
],
}

View File

@@ -397,8 +397,8 @@ __wt_block_extend(
fh = block->fh;
/*
* Callers of this function are expected to be holding any locks
* required to extend the file.
* Callers of this function are expected to have already acquired any
* locks required to extend the file.
*
* We should never be allocating from an empty file.
*/
@@ -464,24 +464,26 @@ int
__wt_block_off_free(
WT_SESSION_IMPL *session, WT_BLOCK *block, off_t off, off_t size)
{
WT_EXTLIST *el;
WT_DECL_RET;
/*
* Callers of this function are expected to be holding any locks
* required to manipulate the extent lists.
* Callers of this function are expected to have already acquired any
* locks required to manipulate the extent lists.
*
* We can reuse this extent immediately if it was allocated during this
* snapshot, merge it into the avail list (which slows file growth in
* snapshot, merge it into the avail list (which slows file growth in
* workloads including repeated overflow record modification). If this
* extent is referenced in a previous snapshot, merge into the discard
* list.
*/
el = __wt_block_off_remove_overlap(
session, &block->live.alloc, off, size) == 0 ?
&block->live.avail : &block->live.discard;
WT_RET(__block_merge(session, el, off, (off_t)size));
return (0);
if ((ret = __wt_block_off_remove_overlap(
session, &block->live.alloc, off, size)) == 0)
ret = __block_merge(
session, &block->live.avail, off, (off_t)size);
else if (ret == WT_NOTFOUND)
ret = __block_merge(
session, &block->live.discard, off, (off_t)size);
return (ret);
}
#ifdef HAVE_DIAGNOSTIC
@@ -771,15 +773,15 @@ __wt_block_insert_ext(
WT_SESSION_IMPL *session, WT_EXTLIST *el, off_t off, off_t size)
{
/*
* There are currently two copies of this function (this code is a
* one-liner that calls the internal version of the function, which
* means the compiler should compress out the function call). It's
* that way because the interface is still fluid, I'm not convinced
* there won't be a need for a functional split between the internal
* and external versions in the future.
* There are currently two copies of this function (this code is a one-
* liner that calls the internal version of the function, which means
* the compiler should compress out the function call). It's that way
* because the interface is still fluid, I'm not convinced there won't
* be a need for a functional split between the internal and external
* versions in the future.
*
* Callers of this function are expected to be holding any locks
* required to manipulate the extent list.
* Callers of this function are expected to have already acquired any
* locks required to manipulate the extent list.
*/
return (__block_merge(session, el, off, size));
}
@@ -1052,6 +1054,25 @@ __wt_block_extlist_truncate(
return (0);
}
/*
* __wt_block_extlist_init --
* Initialize an extent list.
*/
int
__wt_block_extlist_init(WT_SESSION_IMPL *session,
WT_EXTLIST *el, const char *name, const char *extname)
{
char buf[128];
(void)snprintf(buf, sizeof(buf), "%s.%s",
name == NULL ? "" : name, extname == NULL ? "" : extname);
WT_RET(__wt_strdup(session, buf, &el->name));
el->offset = WT_BLOCK_INVALID_OFFSET;
return (0);
}
/*
* __wt_block_extlist_free --
* Discard an extent list.
@@ -1062,19 +1083,19 @@ __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el)
WT_EXT *ext, *next;
WT_SIZE *szp, *nszp;
__wt_free(session, el->name);
for (ext = el->off[0]; ext != NULL; ext = next) {
next = ext->next[0];
__wt_free(session, ext);
}
memset(el->off, 0, sizeof(el->off));
for (szp = el->sz[0]; szp != NULL; szp = nszp) {
nszp = szp->next[0];
__wt_free(session, szp);
}
memset(el->sz, 0, sizeof(el->sz));
el->bytes = 0;
el->entries = 0;
/* Extent lists are re-used, clear them. */
memset(el, 0, sizeof(*el));
}
#ifdef HAVE_VERBOSE

View File

@@ -165,10 +165,14 @@ int
__wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh)
{
WT_BLOCK_DESC *desc;
uint8_t buf[WT_BLOCK_DESC_SECTOR];
WT_DECL_RET;
WT_ITEM *buf;
memset(buf, 0, sizeof(buf));
desc = (void *)buf;
/* Use a scratch buffer to get correct alignment for direct I/O. */
WT_RET(__wt_scr_alloc(session, WT_BLOCK_DESC_SECTOR, &buf));
memset(buf->mem, 0, WT_BLOCK_DESC_SECTOR);
desc = buf->mem;
desc->magic = WT_BLOCK_MAGIC;
desc->majorv = WT_BLOCK_MAJOR_VERSION;
desc->minorv = WT_BLOCK_MINOR_VERSION;
@@ -177,7 +181,10 @@ __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh)
desc->cksum = 0;
desc->cksum = __wt_cksum(desc, WT_BLOCK_DESC_SECTOR);
return (__wt_write(session, fh, (off_t)0, WT_BLOCK_DESC_SECTOR, desc));
ret = __wt_write(session, fh, (off_t)0, WT_BLOCK_DESC_SECTOR, desc);
__wt_scr_free(&buf);
return (ret);
}
/*
@@ -188,16 +195,19 @@ static int
__desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
{
WT_BLOCK_DESC *desc;
WT_DECL_RET;
WT_ITEM *buf;
uint32_t cksum;
uint8_t buf[WT_BLOCK_DESC_SECTOR];
/* Use a scratch buffer to get correct alignment for direct I/O. */
WT_RET(__wt_scr_alloc(session, WT_BLOCK_DESC_SECTOR, &buf));
/* Read the first sector and verify the file's format. */
memset(buf, 0, sizeof(buf));
WT_RET(__wt_read(
session, block->fh, (off_t)0, WT_BLOCK_DESC_SECTOR, buf));
WT_ERR(__wt_read(
session, block->fh, (off_t)0, WT_BLOCK_DESC_SECTOR, buf->mem));
desc = (void *)buf;
WT_VERBOSE_RET(session, block,
desc = buf->mem;
WT_VERBOSE_ERR(session, block,
"open: magic %" PRIu32
", major/minor: %" PRIu32 "/%" PRIu32
", checksum %#" PRIx32,
@@ -218,16 +228,17 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
desc->cksum = 0;
if (desc->magic != WT_BLOCK_MAGIC ||
cksum != __wt_cksum(desc, WT_BLOCK_DESC_SECTOR))
WT_RET_MSG(session, WT_ERROR,
WT_ERR_MSG(session, WT_ERROR,
"%s does not appear to be a WiredTiger file", block->name);
if (desc->majorv > WT_BLOCK_MAJOR_VERSION ||
(desc->majorv == WT_BLOCK_MAJOR_VERSION &&
desc->minorv > WT_BLOCK_MINOR_VERSION))
WT_RET_MSG(session, WT_ERROR,
WT_ERR_MSG(session, WT_ERROR,
"%s is an unsupported version of a WiredTiger file",
block->name);
err: __wt_scr_free(&buf);
return (0);
}

View File

@@ -24,7 +24,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
* Salvage creates a new snapshot when it's finished, set up for
* rolling an empty file forward.
*/
WT_RET(__wt_block_snap_init(session, block, &block->live, 1));
WT_RET(__wt_block_snap_init(session, block, &block->live, "live", 1));
/*
* Truncate the file to an initial sector plus N allocation size

View File

@@ -19,7 +19,7 @@ static int __snapshot_update(WT_SESSION_IMPL *,
*/
int
__wt_block_snap_init(WT_SESSION_IMPL *session,
WT_BLOCK *block, WT_BLOCK_SNAPSHOT *si, int is_live)
WT_BLOCK *block, WT_BLOCK_SNAPSHOT *si, const char *name, int is_live)
{
WT_DECL_RET;
@@ -43,16 +43,13 @@ __wt_block_snap_init(WT_SESSION_IMPL *session,
si->root_offset = WT_BLOCK_INVALID_OFFSET;
si->alloc.name = "alloc";
si->alloc.offset = WT_BLOCK_INVALID_OFFSET;
si->avail.name = "avail";
si->avail.offset = WT_BLOCK_INVALID_OFFSET;
si->discard.name = "discard";
si->discard.offset = WT_BLOCK_INVALID_OFFSET;
WT_RET(__wt_block_extlist_init(session, &si->alloc, name, "alloc"));
WT_RET(__wt_block_extlist_init(session, &si->avail, name, "avail"));
WT_RET(__wt_block_extlist_init(session, &si->discard, name, "discard"));
si->file_size = WT_BLOCK_DESC_SECTOR;
WT_RET(__wt_block_extlist_init(
session, &si->snapshot_avail, name, "snapshot_avail"));
return (0);
}
@@ -80,7 +77,7 @@ __wt_block_snapshot_load(WT_SESSION_IMPL *session,
dsk->size = 0;
si = &block->live;
WT_RET(__wt_block_snap_init(session, block, si, 1));
WT_RET(__wt_block_snap_init(session, block, si, "live", 1));
if (WT_VERBOSE_ISSET(session, snapshot)) {
if (addr != NULL) {
@@ -163,18 +160,27 @@ __wt_block_snapshot_unload(WT_SESSION_IMPL *session, WT_BLOCK *block)
if (block->verify)
WT_TRET(__wt_verify_snap_unload(session, block, si));
/* Discard the extent lists. */
__wt_block_extlist_free(session, &si->alloc);
__wt_block_extlist_free(session, &si->avail);
__wt_block_extlist_free(session, &si->discard);
__wt_block_extlist_free(session, &si->snapshot_avail);
__wt_block_snap_destroy(session, si);
block->live_load = 0;
return (ret);
}
/*
* __wt_block_snap_destroy --
* Clear a snapshot structure.
*/
void
__wt_block_snap_destroy(WT_SESSION_IMPL *session, WT_BLOCK_SNAPSHOT *si)
{
/* Discard the extent lists. */
__wt_block_extlist_free(session, &si->alloc);
__wt_block_extlist_free(session, &si->avail);
__wt_block_extlist_free(session, &si->discard);
__wt_block_extlist_free(session, &si->snapshot_avail);
}
/*
* __wt_block_snapshot --
* Create a new snapshot.
@@ -267,8 +273,15 @@ __snapshot_process(
* but there's no explicit "free the snapshot information" call into the
* block manager; if there was an error in an upper level resulting in
* the snapshot never being "resolved", the list might not be empty.
*
* XXX
* This isn't sufficient, actually: we're going to leak all the blocks
* that were written as part of the last snapshot because it was never
* resolved.
*/
__wt_block_extlist_free(session, &si->snapshot_avail);
WT_RET(__wt_block_extlist_init(
session, &si->snapshot_avail, "live", "snapshot_avail"));
/*
* To delete a snapshot, we'll need snapshot information for it, and we
@@ -309,7 +322,7 @@ __snapshot_process(
WT_ERR(__wt_calloc(
session, 1, sizeof(WT_BLOCK_SNAPSHOT), &snap->bpriv));
si = snap->bpriv;
WT_ERR(__wt_block_snap_init(session, block, si, 0));
WT_ERR(__wt_block_snap_init(session, block, si, snap->name, 0));
WT_ERR(__wt_block_buffer_to_snapshot(
session, block, snap->raw.data, si));
WT_ERR(__wt_block_extlist_read(session, block, &si->alloc));
@@ -467,11 +480,14 @@ live_update:
}
/*
* Discard the live system's alloc and discard extent lists, leave the
* Reset the live system's alloc and discard extent lists, leave the
* avail list alone.
*/
__wt_block_extlist_free(session, &si->alloc);
WT_ERR(__wt_block_extlist_init(session, &si->alloc, "live", "alloc"));
__wt_block_extlist_free(session, &si->discard);
WT_ERR(
__wt_block_extlist_init(session, &si->discard, "live", "discard"));
#ifdef HAVE_DIAGNOSTIC
/*
@@ -638,7 +654,7 @@ __snapshot_string(WT_SESSION_IMPL *session,
/* Initialize the snapshot, crack the cookie. */
si = &_si;
WT_RET(__wt_block_snap_init(session, block, si, 0));
WT_RET(__wt_block_snap_init(session, block, si, "string", 0));
WT_RET(__wt_block_buffer_to_snapshot(session, block, addr, si));
WT_RET(__wt_buf_fmt(session, buf,

View File

@@ -32,10 +32,6 @@ __wt_block_verify_start(
{
off_t file_size;
memset(&block->verify_alloc, 0, sizeof(block->verify_alloc));
block->verify_alloc.name = "verify_alloc";
block->verify_alloc.offset = WT_BLOCK_INVALID_OFFSET;
/*
* We're done if the file has no data pages (this happens if we verify
* a file immediately after creation).
@@ -80,6 +76,13 @@ __wt_block_verify_start(
block->frags = (uint32_t)(file_size / block->allocsize);
WT_RET(__bit_alloc(session, block->frags, &block->fragfile));
/*
* We maintain an allocation list that is rolled forward through the
* set of snapshots.
*/
WT_RET(__wt_block_extlist_init(
session, &block->verify_alloc, "verify", "alloc"));
/*
* The only snapshot avail list we care about is the last one written;
* get it now and initialize the list of file fragments.
@@ -160,20 +163,20 @@ __verify_start_avail(
--snap;
si = &_si;
WT_RET(__wt_block_snap_init(session, block, si, 0));
WT_RET(__wt_block_buffer_to_snapshot(
session, block, snap->raw.data, si));
WT_RET(__wt_block_snap_init(session, block, si, snap->name, 0));
WT_ERR(
__wt_block_buffer_to_snapshot(session, block, snap->raw.data, si));
el = &si->avail;
if (el->offset == WT_BLOCK_INVALID_OFFSET)
return (0);
if (el->offset != WT_BLOCK_INVALID_OFFSET) {
WT_ERR(__wt_block_extlist_read(session, block, el));
WT_EXT_FOREACH(ext, el->off)
if ((ret = __verify_filefrag_add(
session, block, ext->off, ext->size, 1)) != 0)
break;
}
WT_RET(__wt_block_extlist_read(session, block, el));
WT_EXT_FOREACH(ext, el->off)
if ((ret = __verify_filefrag_add(
session, block, ext->off, ext->size, 1)) != 0)
break;
__wt_block_extlist_free(session, el);
err: __wt_block_snap_destroy(session, si);
return (ret);
}

View File

@@ -254,7 +254,8 @@ not_compressed: /*
WT_CSTAT_INCR(session, block_write);
WT_VERBOSE_ERR(session, write,
"off %" PRIuMAX ", size %" PRIu32 ", cksum %" PRIu32,
"%s (off %" PRIuMAX ", size %" PRIu32 ", cksum %" PRIu32 ")",
__wt_page_type_string(dsk->type),
(uintmax_t)offset, align_size, blk->cksum);
*offsetp = offset;

View File

@@ -724,8 +724,12 @@ __evict_walk(WT_SESSION_IMPL *session)
* We hold a spinlock for the entire walk -- it's slow, but (1) how
* often do new files get added or removed to/from the system, and (2)
* it's all in-memory stuff, so it's not that slow.
*
* If the connection spinlock is not available, don't block: another
* thread may be holding it and waiting on eviction (e.g., checkpoint).
*/
__wt_spin_lock(session, &conn->spinlock);
if (__wt_spin_trylock(session, &conn->spinlock) != 0)
return (0);
/*
* Resize the array in which we're tracking pages, as necessary, then
@@ -734,8 +738,8 @@ __evict_walk(WT_SESSION_IMPL *session)
*/
elem = WT_EVICT_WALK_BASE + (conn->btqcnt * WT_EVICT_WALK_PER_TABLE);
if (elem > cache->evict_entries) {
/* Save the offset of the eviction point. */
__wt_spin_lock(session, &cache->evict_lock);
/* Save the offset of the eviction point. */
i = (u_int)(cache->evict_current - cache->evict);
WT_ERR(__wt_realloc(session, &cache->evict_allocated,
elem * sizeof(WT_EVICT_ENTRY), &cache->evict));

View File

@@ -133,8 +133,10 @@ __wt_btree_close(WT_SESSION_IMPL *session)
__wt_btree_huffman_close(session);
/* Snapshot lock. */
if (btree->snaplock != NULL)
if (btree->snaplock != NULL) {
(void)__wt_rwlock_destroy(session, btree->snaplock);
btree->snaplock = NULL;
}
/* Free allocated memory. */
__wt_free(session, btree->key_format);
@@ -169,7 +171,7 @@ __btree_conf(WT_SESSION_IMPL *session)
/* Validate file types and check the data format plan. */
WT_RET(__wt_config_getones(session, config, "key_format", &cval));
WT_RET(__wt_struct_check(session, cval.str, cval.len, NULL, NULL));
if (cval.len > 0 && strncmp(cval.str, "r", cval.len) == 0)
if (__wt_config_strcmp(&cval, "r") == 0)
btree->type = BTREE_COL_VAR;
else
btree->type = BTREE_ROW;
@@ -184,8 +186,8 @@ __btree_conf(WT_SESSION_IMPL *session)
session, config, "collator", &cval));
if (cval.len > 0) {
TAILQ_FOREACH(ncoll, &conn->collqh, q) {
if (strncmp(
ncoll->name, cval.str, cval.len) == 0) {
if (__wt_config_strcmp(
&cval, ncoll->name) == 0) {
btree->collator = ncoll->collator;
break;
}

View File

@@ -968,7 +968,6 @@ __rec_split_write(
uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
dsk = buf->mem;
WT_VERBOSE_RET(session, write, "%s", __wt_page_type_string(dsk->type));
/*
* We always write an additional byte on row-store leaf pages after the

View File

@@ -668,3 +668,28 @@ __wt_config_subgets(WT_SESSION_IMPL *session,
return (__wt_config_subgetraw(session, cfg, &key_item, value));
}
/*
* __wt_config_strcmp --
* Compare a string value with a given string.
*/
int
__wt_config_strcmp(WT_CONFIG_ITEM *cfg, const char *str)
{
const char *cstr;
size_t i;
for (i = 0, cstr = cfg->str; i < cfg->len; i++, cstr++, str++) {
/* This covers hitting a NULL at the end of the string. */
if (*cstr > *str)
return (1);
if (*cstr < *str)
return (-1);
}
/*
* All the characters are equal: if we are at the end of the string,
* we're done.
*/
return ((*str == '\0') ? 0 : -1);
}

View File

@@ -564,7 +564,7 @@ __conn_home(WT_CONNECTION_IMPL *conn, const char *home, const char **cfg)
"WIREDTIGER_HOME environment variable set but process "
"lacks privileges to use that environment variable");
copy: return (__wt_strdup(session, home, &conn->home));
copy: return (__wt_strdup(session, home, &S2C(session)->home));
}
/*

View File

@@ -84,9 +84,15 @@ __conn_btree_get(WT_SESSION_IMPL *session,
conn = S2C(session);
/*
* If we aren't holding the connection spinlock at a higher level,
* acquire it now.
*/
if (!F_ISSET(session, WT_SESSION_HAS_CONNLOCK))
__wt_spin_lock(session, &conn->spinlock);
/* Increment the reference count if we already have the btree open. */
matched = 0;
__wt_spin_lock(session, &conn->spinlock);
TAILQ_FOREACH(btree, &conn->btqh, q) {
if (strcmp(name, btree->name) == 0 &&
((snapshot == NULL && btree->snapshot == NULL) ||
@@ -99,7 +105,8 @@ __conn_btree_get(WT_SESSION_IMPL *session,
}
}
if (matched) {
__wt_spin_unlock(session, &conn->spinlock);
if (!F_ISSET(session, WT_SESSION_HAS_CONNLOCK))
__wt_spin_unlock(session, &conn->spinlock);
__wt_conn_btree_open_lock(session, flags);
return (0);
}
@@ -124,7 +131,9 @@ __conn_btree_get(WT_SESSION_IMPL *session,
TAILQ_INSERT_TAIL(&conn->btqh, btree, q);
++conn->btqcnt;
}
__wt_spin_unlock(session, &conn->spinlock);
if (!F_ISSET(session, WT_SESSION_HAS_CONNLOCK))
__wt_spin_unlock(session, &conn->spinlock);
if (ret == 0)
session->btree = btree;
@@ -299,14 +308,23 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
saved_btree = session->btree;
__wt_spin_lock(session, &conn->spinlock);
F_SET(session, WT_SESSION_HAS_CONNLOCK);
TAILQ_FOREACH(btree, &conn->btqh, q)
if (btree->snapshot == NULL &&
if (F_ISSET(btree, WT_BTREE_OPEN) &&
btree->snapshot == NULL &&
strcmp(btree->name, WT_METADATA_URI) != 0) {
/*
* We have the connection spinlock, which prevents
* handles being opened or closed, so there is no need
* for additional handle locking here, or pulling every
* tree into this session's handle cache.
*/
session->btree = btree;
WT_ERR(func(session, cfg));
}
err: __wt_spin_unlock(session, &conn->spinlock);
err: F_CLR(session, WT_SESSION_HAS_CONNLOCK);
__wt_spin_unlock(session, &conn->spinlock);
session->btree = saved_btree;
return (ret);
}
@@ -329,6 +347,8 @@ __wt_conn_btree_close(WT_SESSION_IMPL *session, int locked)
if (F_ISSET(btree, WT_BTREE_OPEN))
WT_STAT_DECR(conn->stats, file_open);
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_HAS_CONNLOCK));
/*
* Decrement the reference count. If we really are the last reference,
* get an exclusive lock on the handle so that we can close it.
@@ -376,7 +396,10 @@ __wt_conn_btree_close_all(WT_SESSION_IMPL *session, const char *name)
conn = S2C(session);
saved_btree = session->btree;
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_HAS_CONNLOCK));
__wt_spin_lock(session, &conn->spinlock);
F_SET(session, WT_SESSION_HAS_CONNLOCK);
TAILQ_FOREACH(btree, &conn->btqh, q) {
if (strcmp(btree->name, name) != 0)
continue;
@@ -402,8 +425,6 @@ __wt_conn_btree_close_all(WT_SESSION_IMPL *session, const char *name)
* necessary.
*/
if (F_ISSET(btree, WT_BTREE_OPEN)) {
__wt_spin_unlock(session, &conn->spinlock);
ret = __wt_meta_track_sub_on(session);
if (ret == 0)
ret = __wt_conn_btree_sync_and_close(session);
@@ -416,8 +437,6 @@ __wt_conn_btree_close_all(WT_SESSION_IMPL *session, const char *name)
*/
if (ret == 0)
ret = __wt_meta_track_sub_off(session);
__wt_spin_lock(session, &conn->spinlock);
}
if (!WT_META_TRACKING(session))
@@ -427,7 +446,8 @@ __wt_conn_btree_close_all(WT_SESSION_IMPL *session, const char *name)
WT_ERR(ret);
}
err: __wt_spin_unlock(session, &conn->spinlock);
err: F_CLR(session, WT_SESSION_HAS_CONNLOCK);
__wt_spin_unlock(session, &conn->spinlock);
return (ret);
}

View File

@@ -411,7 +411,7 @@ __wt_cursor_init(WT_CURSOR *cursor,
WT_RET(__wt_config_gets(session, cfg, "dump", &cval));
if (cval.len != 0) {
F_SET(cursor, (strncmp(cval.str, "print", cval.len) == 0) ?
F_SET(cursor, (__wt_config_strcmp(&cval, "print") == 0) ?
WT_CURSTD_DUMP_PRINT : WT_CURSTD_DUMP_HEX);
WT_RET(__wt_curdump_create(cursor, owner, &cdump));
owner = cdump;

View File

@@ -1,4 +1,4 @@
/*! @page admin Managing a WiredTiger Database
/*! @page admin Managing WiredTiger databases
- @subpage home
- @subpage security

View File

@@ -29,6 +29,8 @@ For more information about using WiredTiger, see:
- @subpage install\n
- @subpage upgrading\n
- @subpage programming\n
- @ref wt "WiredTiger API reference manual"

View File

@@ -13,6 +13,6 @@ To ask questions or discuss issues related to using WiredTiger, visit our
View the documentation online:
- <a href="1.2.0/index.html"><b>WiredTiger 1.2.0 documentation (current)</b></a>
- <a href="1.2.2/index.html"><b>WiredTiger 1.2.2 documentation (current)</b></a>
- <a href="1.1.5/index.html"><b>WiredTiger 1.1.5 documentation</b></a>
*/

8
src/docs/upgrading.dox Normal file
View File

@@ -0,0 +1,8 @@
/*! @page upgrading Upgrading WiredTiger applications
@section version13 Version 1.3
- There were no API changes in release 1.3.
- There were no file format changes in release 1.3.
*/

View File

@@ -337,6 +337,7 @@ extern WT_PROCESS __wt_process;
#define WT_PAGE_FREE_IGNORE_DISK 0x00000001
#define WT_REC_SINGLE 0x00000001
#define WT_SERVER_RUN 0x00000001
#define WT_SESSION_HAS_CONNLOCK 0x00000004
#define WT_SESSION_INTERNAL 0x00000002
#define WT_SESSION_SALVAGE_QUIET_ERR 0x00000001
#define WT_VERB_block 0x00001000

View File

@@ -71,6 +71,10 @@ extern int __wt_block_extlist_write(WT_SESSION_IMPL *session,
extern int __wt_block_extlist_truncate( WT_SESSION_IMPL *session,
WT_BLOCK *block,
WT_EXTLIST *el);
extern int __wt_block_extlist_init(WT_SESSION_IMPL *session,
WT_EXTLIST *el,
const char *name,
const char *extname);
extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
extern int __wt_block_extlist_dump( WT_SESSION_IMPL *session,
const char *tag,
@@ -166,6 +170,7 @@ extern int __wt_block_salvage_next( WT_SESSION_IMPL *session,
extern int __wt_block_snap_init(WT_SESSION_IMPL *session,
WT_BLOCK *block,
WT_BLOCK_SNAPSHOT *si,
const char *name,
int is_live);
extern int __wt_block_snapshot_load(WT_SESSION_IMPL *session,
WT_BLOCK *block,
@@ -175,6 +180,8 @@ extern int __wt_block_snapshot_load(WT_SESSION_IMPL *session,
int readonly);
extern int __wt_block_snapshot_unload(WT_SESSION_IMPL *session,
WT_BLOCK *block);
extern void __wt_block_snap_destroy(WT_SESSION_IMPL *session,
WT_BLOCK_SNAPSHOT *si);
extern int __wt_block_snapshot(WT_SESSION_IMPL *session,
WT_BLOCK *block,
WT_ITEM *buf,
@@ -456,6 +463,7 @@ extern int __wt_config_subgets(WT_SESSION_IMPL *session,
WT_CONFIG_ITEM *cfg,
const char *key,
WT_CONFIG_ITEM *value);
extern int __wt_config_strcmp(WT_CONFIG_ITEM *cfg, const char *str);
extern int __wt_config_check(WT_SESSION_IMPL *session,
const char *checks,
const char *config);
@@ -653,6 +661,7 @@ extern int __wt_meta_track_on(WT_SESSION_IMPL *session);
extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll);
extern int __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session);
extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session);
extern int __wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key);
extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key);
extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session,

View File

@@ -73,7 +73,7 @@ __wt_txn_visible(WT_SESSION_IMPL *session, wt_txnid_t id)
*/
if (TXNID_LT(id, txn->snap_min))
return (1);
if (TXNID_LT(txn->id, txn->snap_max))
if (TXNID_LT(txn->snap_max, id))
return (0);
/*

View File

@@ -15,6 +15,7 @@
typedef struct __wt_meta_track {
enum {
WT_ST_EMPTY, /* Unused slot */
WT_ST_CHECKPOINT, /* Complete a checkpoint */
WT_ST_FILEOP, /* File operation */
WT_ST_LOCK, /* Lock a handle */
WT_ST_REMOVE, /* Remove a metadata entry */
@@ -91,13 +92,25 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
WT_DECL_RET;
int tret;
/* Unlock handles regardless of whether we are unrolling. */
if (!unroll && trk->op != WT_ST_LOCK)
/*
* Unlock handles and complete checkpoints regardless of whether we are
* unrolling.
*/
if (!unroll && trk->op != WT_ST_CHECKPOINT && trk->op != WT_ST_LOCK)
goto free;
switch (trk->op) {
case WT_ST_EMPTY: /* Unused slot */
break;
case WT_ST_CHECKPOINT: /* Checkpoint, see above */
saved_btree = session->btree;
session->btree = trk->btree;
if (!unroll)
WT_TRET(__wt_bm_snapshot_resolve(session, NULL));
/* Release the snapshot lock */
__wt_rwunlock(session, session->btree->snaplock);
session->btree = saved_btree;
break;
case WT_ST_LOCK: /* Handle lock, see above */
saved_btree = session->btree;
session->btree = trk->btree;
@@ -233,6 +246,23 @@ __wt_meta_track_sub_off(WT_SESSION_IMPL *session)
return (ret);
}
/*
* __wt_meta_track_checkpoint --
* Track a handle involved in a checkpoint.
*/
int
__wt_meta_track_checkpoint(WT_SESSION_IMPL *session)
{
WT_META_TRACK *trk;
WT_ASSERT(session, session->btree != NULL);
WT_RET(__meta_track_next(session, &trk));
trk->op = WT_ST_CHECKPOINT;
trk->btree = session->btree;
return (0);
}
/*
* __wt_meta_track_insert --
* Track an insert operation.

View File

@@ -101,10 +101,10 @@ __snapshot_worker(
WT_BTREE *btree;
WT_DECL_RET;
WT_SNAPSHOT *deleted, *snap, *snapbase;
int force, matched;
int force, matched, tracked;
btree = session->btree;
matched = 0;
matched = tracked = 0;
snap = snapbase = NULL;
/* Snapshots are single-threaded. */
@@ -246,11 +246,22 @@ nomatch: WT_ERR_MSG(session,
EINVAL, "cache flush failed to create a snapshot");
} else {
WT_ERR(__wt_meta_snaplist_set(session, btree->name, snapbase));
WT_ERR(__wt_bm_snapshot_resolve(session, snapbase));
/*
* If tracking is enabled, defer making pages available until
* the end of the transaction. The exception is if the handle
* is being discarded: in that case, it will be gone by the
* time we try to apply or unroll the meta tracking event.
*/
if (WT_META_TRACKING(session) && !discard) {
WT_ERR(__wt_meta_track_checkpoint(session));
tracked = 1;
} else
WT_ERR(__wt_bm_snapshot_resolve(session, snapbase));
}
err: __wt_meta_snaplist_free(session, snapbase);
__wt_rwunlock(session, btree->snaplock);
if (!tracked)
__wt_rwunlock(session, btree->snaplock);
return (ret);
}

View File

@@ -1,8 +1,28 @@
/*-
* Copyright (c) 2008-2012 WiredTiger, Inc.
* All rights reserved.
*
* See the file LICENSE for redistribution information.
* This is free and unencumbered software released into the public domain.
*
* Anyone is free to copy, modify, publish, use, compile, sell, or
* distribute this software, either in source code form or as a compiled
* binary, for any purpose, commercial or non-commercial, and by any
* means.
*
* In jurisdictions that recognize copyright laws, the author or authors
* of this software dedicate any and all copyright interest in the
* software to the public domain. We make this dedication for the benefit
* of the public at large and to the detriment of our heirs and
* successors. We intend this dedication to be an overt act of
* relinquishment in perpetuity of all present and future rights to this
* software under copyright law.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "wt_internal.h"

View File

@@ -98,7 +98,7 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET_MSG(session, EINVAL, "Transaction already running");
WT_RET(__wt_config_gets(session, cfg, "isolation", &cval));
txn->isolation = (strcmp(cval.str, "snapshot") == 0) ?
txn->isolation = (__wt_config_strcmp(&cval, "snapshot") == 0) ?
TXN_ISO_SNAPSHOT : TXN_ISO_READ_UNCOMMITTED;
WT_ASSERT(session, txn->id == WT_TXN_NONE);
@@ -144,10 +144,14 @@ __txn_release(WT_SESSION_IMPL *session)
if (!F_ISSET(txn, TXN_RUNNING))
WT_RET_MSG(session, EINVAL, "No transaction is active");
/* Clear the transaction's ID from the global table. */
txn_global = &S2C(session)->txn_global;
WT_ASSERT(session, txn_global->ids[session->id] != WT_TXN_NONE &&
txn->id != WT_TXN_NONE);
WT_PUBLISH(txn_global->ids[session->id], txn->id = WT_TXN_NONE);
/* Reset the transaction state to not running. */
txn->isolation = TXN_ISO_READ_UNCOMMITTED;
F_CLR(txn, TXN_ERROR | TXN_RUNNING);
return (0);
@@ -193,13 +197,11 @@ int
__wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_TXN_GLOBAL *txn_global;
const char *snapshot;
const char *txn_cfg[] = { "isolation=snapshot", NULL };
cursor = NULL;
txn_global = &S2C(session)->txn_global;
if ((ret = __wt_config_gets(
@@ -212,12 +214,13 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
/* Only one checkpoint can be active at a time. */
__wt_writelock(session, S2C(session)->ckpt_rwlock);
WT_ERR(__wt_txn_begin(session, txn_cfg));
/* Prevent eviction from evicting anything newer than this. */
txn_global->ckpt_txnid = session->txn.snap_min;
WT_ERR(__wt_meta_track_on(session));
/*
* If we're doing an ordinary unnamed checkpoint, we only need to flush
* open files. If we're creating a named snapshot, we need to walk the
@@ -225,16 +228,27 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_TRET((snapshot == NULL) ?
__wt_conn_btree_apply(session, __wt_snapshot, cfg) :
__wt_meta_btree_apply(session, __wt_snapshot, cfg, 0));
__wt_meta_btree_apply(session,
__wt_snapshot, cfg, WT_BTREE_SNAPSHOT_OP));
if (cursor != NULL)
WT_TRET(cursor->close(cursor));
/*
* XXX Rolling back the changes here is problematic.
*
* If we unroll here, we need a way to roll back changes to the avail
* list for each tree that was successfully synced before the error
* occurred. Otherwise, the next time we try this operation, we will
* try to free an old snapshot again.
*
* OTOH, if we commit the changes after a failure, we have partially
* overwritten the checkpoint, so what ends up on disk is not
* consistent.
*/
WT_TRET(__wt_meta_track_off(session, ret != 0));
txn_global->ckpt_txnid = WT_TXN_NONE;
WT_TRET(__txn_release(session));
err: __wt_rwunlock(session, S2C(session)->ckpt_rwlock);
err: txn_global->ckpt_txnid = WT_TXN_NONE;
if (F_ISSET(&session->txn, TXN_RUNNING))
WT_TRET(__txn_release(session));
__wt_rwunlock(session, S2C(session)->ckpt_rwlock);
__wt_free(session, snapshot);
return (ret);
}

View File

@@ -171,10 +171,10 @@ ops(void *arg)
WT_TABLENAME, sync_name);
sync_drop = 0;
} else {
if ((ret = session->sync(
session, WT_TABLENAME, sync_name)) != 0)
die(ret, "session.sync: %s: %s",
WT_TABLENAME, sync_name);
if ((ret = session->checkpoint(
session, sync_name)) != 0)
die(ret, "session.checkpoint: %s",
sync_name);
sync_drop = 1;
}

View File

@@ -69,8 +69,9 @@ class test_txn02(wttest.WiredTigerTestCase):
txn2s = [('t2c', dict(txn2='commit')), ('t2r', dict(txn2='rollback'))]
txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))]
txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))]
scenarios = number_scenarios(multiply_scenarios('.', types,
op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s))
op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)) # [:1]
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
@@ -78,12 +79,17 @@ class test_txn02(wttest.WiredTigerTestCase):
('error_prefix="%s: ",' % self.shortid()) +
'transactional,')
self.pr(`conn`)
self.session2 = conn.open_session()
return conn
def check(self, expected):
c = self.session.open_cursor(self.uri, None)
def check(self, session, txn_config, expected):
if txn_config:
session.begin_transaction(txn_config)
c = session.open_cursor(self.uri, None)
actual = dict((k, v) for k, v in c if v != 0)
c.close()
if txn_config:
session.commit_transaction()
self.assertEqual(actual, expected)
def test_ops(self):
@@ -100,34 +106,47 @@ class test_txn02(wttest.WiredTigerTestCase):
ops = (self.op1, self.op2, self.op3, self.op4)
txns = (self.txn1, self.txn2, self.txn3, self.txn4)
# print ', '.join('%s(%d)[%s]' % (ok[0], ok[1], txn)
# for ok, txn in zip(ops, txns))
for i, ot in enumerate(zip(ops, txns)):
self.session.begin_transaction()
c = self.session.open_cursor(self.uri, None, 'overwrite')
ok, txn = ot
op, k = ok
# print '%s(%d)[%s]' % (ok[0], ok[1], txn)
# We use the overwrite config so insert can update as needed.
if op == 'insert' or op == 'update':
c.set_key(k)
c.set_value(i + 2)
c.insert()
# A snapshot transaction should not see the changes
self.check(self.session2, "isolation=snapshot", expected)
if txn == 'commit':
expected[k] = i + 2
elif op == 'remove':
c.set_key(k)
c.remove()
# A snapshot transaction should not see the changes
self.check(self.session2, "isolation=snapshot", expected)
if txn == 'commit' and k in expected:
del expected[k]
else:
print "UNKNOWN op", op
if txn == 'commit':
# The transaction should see its own changes
self.check(expected)
self.check(self.session, None, expected)
# A read-uncommitted transaction should see the changes already
self.check(self.session2, "isolation=read-uncommitted", expected)
self.session.commit_transaction()
elif txn == 'rollback':
self.session.rollback_transaction()
else:
print "UNKNOWN op", op
self.check(expected)
# The change should be (in)visible in the same session
self.check(self.session, None, expected)
# The change should be (in)visible to snapshot transactions
self.check(self.session2, "isolation=snapshot", expected)
self.session.drop(self.uri)
if __name__ == '__main__':

View File

@@ -163,8 +163,8 @@ wt_shutdown(void)
if ((ret = session->verify(session, FNAME, NULL)) != 0)
die("session.verify", ret);
if ((ret = session->sync(session, FNAME, NULL)) != 0)
die("session.sync", ret);
if ((ret = session->checkpoint(session, NULL)) != 0)
die("session.checkpoint", ret);
if ((ret = conn->close(conn, NULL)) != 0)
die("conn.close", ret);