Compare commits

...

69 Commits

Author SHA1 Message Date
David Hows
234b68b116 WT-2613 Add WT_UNUSED to a variable to fix Windows compilation. (#2717)
(cherry picked from commit 7deb9c213b)
2016-06-01 12:47:25 +10:00
Michael Cahill
5d215904c3 SERVER-24306 Fix stall in log_flush switching to new files. (#2761)
* SERVER-24306 Fix stall in log_flush switching to new files.

* Pass boolean false rather than 0.

(cherry picked from commit b89aaece7b)
2016-06-01 12:44:46 +10:00
Michael Cahill
18879587af WT-2629 Make the stack non-executable with GCC only. (#2742)
(cherry picked from commit f6f86961a4)
2016-06-01 12:41:58 +10:00
Michael Cahill
6bfcb1ca5b WT-2629 Don't make stacks executable in assembly source. (#2739)
(cherry picked from commit 0f7ae730d9)
2016-06-01 12:41:48 +10:00
Alex Gorrod
71c0588a77 Merge pull request #2677 from wiredtiger/wt-2560-spin
WT-2560 Spin on transaction locks.
(cherry picked from commit f498d8c1c1)
2016-06-01 10:48:12 +10:00
Michael Cahill
58765850aa Merge pull request #2660 from wiredtiger/wt-2560
WT-2560 Use a rwlock to protect transaction state, don't spin.
Conflicted on a whitespace cleanup.

(cherry picked from commit 76e286c7ba)
2016-06-01 10:46:47 +10:00
Keith Bostic
30d327f810 Merge pull request #2664 from wiredtiger/wt-2559
WT-2559 Open a local log file handle for sync.
(cherry picked from commit 6b3553003f)
2016-06-01 10:40:38 +10:00
Michael Cahill
88b898e7cb Merge pull request #2670 from wiredtiger/wt-2566
WT-2566 Lock/unlock operations should imply memory barriers.
(cherry picked from commit 05cfbc26c2)
2016-04-20 17:04:16 +10:00
Alex Gorrod
7ea2631de2 Merge branch 'mongodb-3.4' into mongodb-3.2 2016-04-08 16:48:45 +10:00
Michael Cahill
039fe06082 Merge branch 'mongodb-3.4' into mongodb-3.2 2016-04-07 17:29:23 +10:00
Michael Cahill
43e885a0f9 Merge branch 'mongodb-3.4' into mongodb-3.2 2016-03-25 08:19:12 +11:00
Michael Cahill
5cdd3e320c Merge branch 'mongodb-3.2.3' into mongodb-3.2 2016-02-18 14:00:00 +11:00
Michael Cahill
563b7823f7 Merge pull request #2495 from wiredtiger/wt-2397
WT-2397: Cursor traversal from end of the tree skips records.
(cherry picked from commit d915631b)

Manually resolved conflicts from pull request 2478.
2016-02-17 17:30:39 +11:00
Michael Cahill
5e3a56f0ab Merge pull request #2498 from wiredtiger/server-22676
SERVER-22676 Don't check duplicated create calls for matching configurations
(cherry picked from commit 98d6ce255d)
2016-02-17 17:17:20 +11:00
Michael Cahill
bc929dbcf1 Merge branch 'mongodb-3.4' into mongodb-3.2 2016-02-11 12:05:59 +11:00
Michael Cahill
07966a492a Fixup for merge vs backport. 2016-01-28 12:18:13 +11:00
Michael Cahill
67e412d4c5 Merge branch 'develop' into mongodb-3.2 2016-01-28 09:30:32 +11:00
Alex Gorrod
3c2ad56b50 Merge pull request #2415 from wiredtiger/wt-2307-fix
(cherry picked from commit 12aaeb6)

WT-2307: Fix for cursor iteration bug when pages are splitting
2015-12-29 05:42:26 +11:00
Alex Gorrod
b1768d0d9f Revert "Merge pull request #2394 from wiredtiger/SERVER-21887-sample"
The change wasn't ready for back port into 3.2.1

This reverts commit 21b5f9951e.
2015-12-22 08:15:10 +00:00
Alex Gorrod
2893117baa Revert "WT-2291: error: comparison of array 'ins->next' not equal to a null"
The change isn't ready for back port into 3.2.1

This reverts commit 4380cec93d.
2015-12-22 08:14:38 +00:00
Keith Bostic
4380cec93d WT-2291: error: comparison of array 'ins->next' not equal to a null
pointer is always true [-Werror,-Wtautological-pointer-compare]
2015-12-17 10:48:19 +11:00
Alex Gorrod
21b5f9951e Merge pull request #2394 from wiredtiger/SERVER-21887-sample
WT-2291: fix for sampling in newly created trees.
2015-12-17 10:47:55 +11:00
Keith Bostic
decd9166cc __wt_ref_info() and __ref_is_lef() no longer need a WT_SESSION_IMPL
argument, remove it.
2015-12-16 15:54:52 +11:00
Keith Bostic
d835a0c0a8 gcc47 with [-Werror=maybe-uninitialized] reports we can end up with type
uninitialized in __ref_is_leaf() (based on a call to __wt_ref_info()).
It's not really possible because the path where type isn't set is a path
where we panic because the WT_ADDR structure has an impossible type.

We already ignore the __wt_ref_info() error return in one path, and
there are only two paths that care about the returned type; remove the
error check from __wt_ref_info() and set type to 0 in the failing case
(the same value we use when there's no WT_REF addr to check), the code
that calls this function already checks addr on return.

This simplifies __ref_is_leaf() slightly, it now returns a boolean
instead of an error code with a boolean pointer argument.
2015-12-16 15:29:31 +11:00
Keith Bostic
48e1343e40 Merge pull request #2363 from wiredtiger/WT-2262
WT-2262 Have random sampling walk the tree so it isn't biased in skewed trees.
2015-12-16 15:29:13 +11:00
Michael Cahill
eb838c7f12 Merge pull request #2361 from wiredtiger/wt-2260-dont-evict-internal
WT-2260 Avoid adding internal pages to the eviction queue.
2015-12-16 15:28:52 +11:00
Alex Gorrod
a6957512a4 Merge pull request #2358 from wiredtiger/wt-2258-preload-directio 2015-12-16 15:28:11 +11:00
Michael Cahill
197eef00fd Merge branch 'develop' into mongodb-3.2 2015-12-02 22:58:11 +11:00
Michael Cahill
7a4f3259b4 Merge branch 'develop' into mongodb-3.2 2015-12-01 17:12:04 +11:00
Michael Cahill
8326df6b76 Merge branch 'develop' into mongodb-3.2 2015-11-30 15:02:38 +11:00
Keith Bostic
b65381f64c Merge branch 'develop' into mongodb-3.2 2015-11-24 11:29:41 -05:00
Michael Cahill
0019262fed Merge branch 'develop' into mongodb-3.2 2015-11-24 21:59:04 +11:00
Keith Bostic
4d72349b8a Merge branch 'develop' into mongodb-3.2 2015-11-23 09:00:49 -05:00
Michael Cahill
4898aa408f Merge branch 'develop' into mongodb-3.2 2015-11-20 16:58:33 +11:00
Michael Cahill
9d375e3416 Merge branch 'develop' into mongodb-3.2 2015-11-19 17:18:28 +11:00
Michael Cahill
d9ec1ff8ec Merge branch 'develop' into mongodb-3.2 2015-11-11 16:27:42 +11:00
Keith Bostic
465dca8b46 Merge branch 'develop' into mongodb-3.2 2015-11-03 10:33:14 -05:00
Alex Gorrod
f95877af13 Merge branch 'develop' into mongodb-3.2 2015-11-03 22:18:00 +11:00
Alex Gorrod
62c1a7aa36 Merge branch 'develop' into mongodb-3.2 2015-11-02 03:59:53 +00:00
Michael Cahill
0dc3f20df6 Merge branch 'develop' into mongodb-3.2 2015-11-02 11:43:14 +11:00
Michael Cahill
0537648e03 Merge branch 'develop' into mongodb-3.2 2015-10-09 17:07:24 +11:00
Alex Gorrod
3c856645c8 Merge branch 'develop' into mongodb-3.2 2015-09-30 15:01:57 +10:00
Michael Cahill
10208e8284 Merge branch 'develop' into mongodb-3.2 2015-09-25 15:00:04 +10:00
Michael Cahill
16e3e48d98 Merge branch 'develop' into mongodb-3.2 2015-09-18 12:56:14 +10:00
Michael Cahill
5205bb1f0f Merge branch 'develop' into mongodb-3.2 2015-09-11 16:23:01 +10:00
Michael Cahill
dca63120b7 Merge branch 'develop' into mongodb-3.2 2015-09-11 12:21:49 +10:00
sueloverso
0cccab30c0 WT-2064 Don't spin indefinitely waiting for the handle list lock in eviction
Merge pull request #2155 from wiredtiger/WT-2064

(cherry picked from commit 66757f7247)
2015-08-28 11:48:29 +10:00
Alex Gorrod
578a856c19 Merge pull request #2156 from wiredtiger/WT-2066
WT-2066 - Update the oldest transaction ID from eviction
(cherry picked from commit 8f42f02d3c)
2015-08-28 11:47:56 +10:00
Alex Gorrod
a85c5cda41 Merge pull request #2152 from wiredtiger/WT-2062
WT-2062 Try harder to make progress on in-memory splits
(cherry picked from commit 3e0c7bfa3a)
2015-08-28 11:47:30 +10:00
Michael Cahill
6da2dc175b Merge commit '12044d22cce1a79804254ac9c80b1120701bd7c8' into mongodb-3.2 2015-08-28 11:45:48 +10:00
Michael Cahill
7ffa315e39 Merge branch 'develop' into mongodb-3.2 2015-08-18 10:25:36 +10:00
Michael Cahill
26d1ad271f Merge branch 'develop' into mongodb-3.2 2015-08-12 20:36:04 +10:00
Alex Gorrod
fdedd3621c Merge branch 'develop' into mongodb-3.2 2015-07-23 15:53:52 +10:00
Alex Gorrod
4187f419f8 Merge branch 'develop' into mongodb-3.2 2015-07-23 15:50:57 +10:00
Michael Cahill
42823c9682 Merge branch 'develop' into mongodb-3.2 2015-07-17 22:19:04 +10:00
Alex Gorrod
fbaf1cf4f5 Merge branch 'develop' into mongodb-3.2 2015-06-26 05:09:48 +00:00
Alex Gorrod
3d845c98cb Merge branch 'develop' into mongodb-3.2 2015-06-10 18:54:48 +00:00
Michael Cahill
1d2fe8a145 Merge branch 'develop' into mongodb-3.2 2015-06-09 01:26:55 +10:00
Alex Gorrod
bdaaaec87d Merge branch 'develop' into mongodb-3.2 2015-05-29 00:19:26 +00:00
Michael Cahill
35cc116acd Merge branch 'develop' into mongodb-3.2 2015-05-18 15:02:56 +10:00
Alex Gorrod
cbe0fad3e9 Merge branch 'develop' into mongodb-3.2 2015-05-15 06:07:13 +00:00
Michael Cahill
4f9aa1c548 Merge branch 'develop' into mongodb-3.2 2015-05-08 14:19:20 +10:00
Michael Cahill
1f44c05f91 Merge branch 'develop' into mongodb-3.2 2015-04-27 17:43:11 +10:00
Michael Cahill
e31aa8cf29 Merge branch 'develop' into mongodb-3.2 2015-04-27 17:23:58 +10:00
Michael Cahill
c90bc747e1 Merge branch 'develop' into mongodb-3.2 2015-04-24 17:17:50 +10:00
Alex Gorrod
2c1b7aa80b Update MongoDB changelog with latest merge commits. 2015-04-23 17:28:27 +10:00
Alex Gorrod
41762ae13c Merge branch 'develop' into mongodb-3.2 2015-04-23 17:25:06 +10:00
Alex Gorrod
f7691f63a6 Add long version of commit log to NEWS.MONGODB 2015-04-23 17:21:01 +10:00
Alex Gorrod
9be5497753 Add in MongoDB changelog from 3.0 2015-04-23 11:43:19 +10:00
22 changed files with 2753 additions and 158 deletions

2523
NEWS.MONGODB Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -325,7 +325,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
valid = false;
if (F_ISSET(cbt, WT_CBT_ACTIVE) &&
cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
__wt_txn_cursor_op(session);
WT_ERR(__wt_txn_cursor_op(session));
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, cbt->ref, false) :
@@ -405,7 +405,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
if (btree->type == BTREE_ROW &&
F_ISSET(cbt, WT_CBT_ACTIVE) &&
cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
__wt_txn_cursor_op(session);
WT_ERR(__wt_txn_cursor_op(session));
WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));

View File

@@ -326,7 +326,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_page_evict_soon(page);
/* Bump the oldest ID, we're about to do some visibility checks. */
__wt_txn_update_oldest(session, false);
WT_RET(__wt_txn_update_oldest(session, false));
/* If eviction cannot succeed, don't try. */
return (__wt_page_can_evict(session, ref, NULL));

View File

@@ -81,7 +81,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
if (__wt_page_is_modified(page) &&
WT_TXNID_LT(page->modify->update_txn, oldest_id)) {
if (txn->isolation == WT_ISO_READ_COMMITTED)
__wt_txn_get_snapshot(session);
WT_ERR(__wt_txn_get_snapshot(session));
leaf_bytes += page->memory_footprint;
++leaf_pages;
WT_ERR(__wt_reconcile(session, walk, NULL, 0));
@@ -100,7 +100,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* the metadata shouldn't be that big, and (b) if we do ever
*/
if (txn->isolation == WT_ISO_READ_COMMITTED)
__wt_txn_get_snapshot(session);
WT_ERR(__wt_txn_get_snapshot(session));
/*
* We cannot check the tree modified flag in the case of a

View File

@@ -93,7 +93,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
* transaction ID will catch up with the current ID.
*/
for (;;) {
__wt_txn_update_oldest(session, true);
WT_TRET(__wt_txn_update_oldest(session, true));
if (txn_global->oldest_id == txn_global->current)
break;
__wt_yield();

View File

@@ -16,7 +16,7 @@ static int
__curds_txn_enter(WT_SESSION_IMPL *session)
{
session->ncursors++; /* XXX */
__wt_txn_cursor_op(session);
WT_RET(__wt_txn_cursor_op(session));
return (0);
}

View File

@@ -26,7 +26,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_RET(__wt_evict_file_exclusive_on(session));
/* Make sure the oldest transaction ID is up-to-date. */
__wt_txn_update_oldest(session, true);
WT_RET(__wt_txn_update_oldest(session, true));
/* Walk the tree, discarding pages. */
next_ref = NULL;

View File

@@ -594,7 +594,7 @@ __evict_pass(WT_SESSION_IMPL *session)
* of whether the cache is full, to prevent the oldest ID
* falling too far behind.
*/
__wt_txn_update_oldest(session, true);
WT_RET(__wt_txn_update_oldest(session, loop > 0));
if (!__evict_update_work(session))
break;

View File

@@ -420,7 +420,7 @@ __evict_review(
* fallen behind current.
*/
if (modified)
__wt_txn_update_oldest(session, true);
WT_RET(__wt_txn_update_oldest(session, false));
if (!__wt_page_can_evict(session, ref, inmem_splitp))
return (EBUSY);

View File

@@ -270,7 +270,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
* to read.
*/
if (!F_ISSET(cbt, WT_CBT_NO_TXN))
__wt_txn_cursor_op(session);
WT_RET(__wt_txn_cursor_op(session));
return (0);
}

View File

@@ -676,8 +676,8 @@ extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats);
extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats);
extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force);
extern int __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force);
extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]);
extern void __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]);

View File

@@ -306,6 +306,12 @@ __wt_fair_lock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock)
__wt_sleep(0, 10);
}
/*
* Applications depend on a barrier here so that operations holding the
* lock see consistent data.
*/
WT_READ_BARRIER();
return (0);
}
@@ -318,6 +324,12 @@ __wt_fair_unlock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock)
{
WT_UNUSED(session);
/*
* Ensure that all updates made while the lock was held are visible to
* the next thread to acquire the lock.
*/
WT_WRITE_BARRIER();
/*
* We have exclusive access - the update does not need to be atomic.
*/

View File

@@ -306,7 +306,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) {
if (!__wt_txn_visible_all(session, txn)) {
/* Try to move the oldest ID forward and re-check. */
__wt_txn_update_oldest(session, false);
WT_RET(__wt_txn_update_oldest(session, false));
if (!__wt_txn_visible_all(session, txn))
return (0);

View File

@@ -74,7 +74,7 @@ struct __wt_txn_global {
volatile uint64_t current; /* Current transaction ID. */
/* The oldest running transaction ID (may race). */
uint64_t last_running;
volatile uint64_t last_running;
/*
* The oldest transaction ID that is not yet visible to some
@@ -82,8 +82,11 @@ struct __wt_txn_global {
*/
volatile uint64_t oldest_id;
/* Count of scanning threads, or -1 for exclusive access. */
volatile int32_t scan_count;
/*
* Prevents the oldest ID moving forwards while threads are scanning
* the global transaction state.
*/
WT_RWLOCK *scan_rwlock;
/*
* Track information about the running checkpoint. The transaction

View File

@@ -261,14 +261,14 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
* eviction, it's better to do it beforehand.
*/
WT_RET(__wt_cache_eviction_check(session, false, NULL));
__wt_txn_get_snapshot(session);
WT_RET(__wt_txn_get_snapshot(session));
}
F_SET(txn, WT_TXN_RUNNING);
if (F_ISSET(S2C(session), WT_CONN_READONLY))
F_SET(txn, WT_TXN_READONLY);
return (false);
return (0);
}
/*
@@ -450,7 +450,7 @@ __wt_txn_read_last(WT_SESSION_IMPL *session)
* __wt_txn_cursor_op --
* Called for each cursor operation.
*/
static inline void
static inline int
__wt_txn_cursor_op(WT_SESSION_IMPL *session)
{
WT_TXN *txn;
@@ -482,7 +482,9 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
if (txn_state->snap_min == WT_TXN_NONE)
txn_state->snap_min = txn_global->last_running;
} else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
__wt_txn_get_snapshot(session);
WT_RET(__wt_txn_get_snapshot(session));
return (0);
}
/*

View File

@@ -8,6 +8,8 @@
#include "wt_internal.h"
static int __log_openfile(
WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t);
static int __log_write_internal(
WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t);
@@ -93,8 +95,9 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn)
int
__wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
{
WT_LOG *log;
WT_DECL_RET;
WT_FH *log_fh;
WT_LOG *log;
log = S2C(session)->log;
@@ -129,12 +132,21 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
* Sync the log file if needed.
*/
if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) {
/*
* Get our own file handle to the log file. It is possible
* for the file handle in the log structure to change out
* from under us and either be NULL or point to a different
* file than we want.
*/
WT_ERR(__log_openfile(session,
false, &log_fh, WT_LOG_FILENAME, min_lsn->l.file));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log->log_fh->name, min_lsn->l.file, min_lsn->l.offset));
WT_ERR(__wt_fsync(session, log->log_fh, true));
log_fh->name, min_lsn->l.file, min_lsn->l.offset));
WT_ERR(__wt_fsync(session, log_fh, true));
log->sync_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync);
WT_ERR(__wt_close(session, &log_fh));
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
}
err:
@@ -2128,9 +2140,18 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
* We need to flush out the current slot first to get the real
* end of log LSN in log->alloc_lsn.
*/
WT_RET(__wt_log_flush_lsn(session, &lsn, 0));
WT_RET(__wt_log_flush_lsn(session, &lsn, false));
last_lsn = log->alloc_lsn;
/*
* If the last write caused a switch to a new log file, we should only
* wait for the last write to be flushed. Otherwise, if the workload
* is single-threaded we could wait here forever because the write LSN
* doesn't switch into the new file until it contains a record.
*/
if (last_lsn.l.offset == WT_LOG_FIRST_RECORD)
last_lsn = log->log_close_lsn;
/*
* Wait until all current outstanding writes have been written
* to the file system.

View File

@@ -210,7 +210,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
goto open;
if (txn->isolation == WT_ISO_SNAPSHOT)
__wt_txn_cursor_op(session);
WT_RET(__wt_txn_cursor_op(session));
/*
* Figure out how many updates are required for

View File

@@ -289,7 +289,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
}
/* Stop if a running transaction needs the chunk. */
__wt_txn_update_oldest(session, true);
WT_RET(__wt_txn_update_oldest(session, true));
if (chunk->switch_txn == WT_TXN_NONE ||
!__wt_txn_visible_all(session, chunk->switch_txn)) {
WT_RET(__wt_verbose(session, WT_VERB_LSM,

View File

@@ -183,6 +183,8 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
session, WT_VERB_MUTEX, "rwlock: readlock %s", rwlock->name));
WT_STAT_FAST_CONN_INCR(session, rwlock_read);
WT_DIAGNOSTIC_YIELD;
l = &rwlock->rwlock;
/*
@@ -213,6 +215,12 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
*/
++l->s.readers;
/*
* Applications depend on a barrier here so that operations holding the
* lock see consistent data.
*/
WT_READ_BARRIER();
return (0);
}
@@ -306,6 +314,12 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
__wt_sleep(0, 10);
}
/*
* Applications depend on a barrier here so that operations holding the
* lock see consistent data.
*/
WT_READ_BARRIER();
return (0);
}
@@ -316,31 +330,32 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
int
__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
{
wt_rwlock_t *l, copy;
wt_rwlock_t *l, new;
WT_RET(__wt_verbose(
session, WT_VERB_MUTEX, "rwlock: writeunlock %s", rwlock->name));
/*
* Ensure that all updates made while the lock was held are visible to
* the next thread to acquire the lock.
*/
WT_WRITE_BARRIER();
l = &rwlock->rwlock;
copy = *l;
new = *l;
/*
* We're the only writer of the writers/readers fields, so the update
* does not need to be atomic; we have to update both values at the
* same time though, otherwise we'd potentially race with the thread
* next granted the lock.
*
* Use a memory barrier to ensure the compiler doesn't mess with these
* instructions and rework the code in a way that avoids the update as
* a unit.
*/
WT_BARRIER();
++new.s.writers;
++new.s.readers;
l->i.wr = new.i.wr;
++copy.s.writers;
++copy.s.readers;
l->i.wr = copy.i.wr;
WT_DIAGNOSTIC_YIELD;
return (0);
}

View File

@@ -769,3 +769,10 @@ FUNC_START(__crc32_vpmsum)
FUNC_END(__crc32_vpmsum)
#endif
/*
* Make sure the stack isn't executable with GCC (regardless of platform).
*/
#ifndef __clang__
.section .note.GNU-stack,"",@progbits
#endif

View File

@@ -108,17 +108,17 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
* __wt_txn_get_snapshot --
* Allocate a snapshot.
*/
void
int
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s, *txn_state;
uint64_t current_id, id;
uint64_t prev_oldest_id, snap_min;
uint32_t i, n, session_cnt;
int32_t count;
conn = S2C(session);
txn = &session->txn;
@@ -126,15 +126,13 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
txn_state = WT_SESSION_TXN_STATE(session);
/*
* We're going to scan. Increment the count of scanners to prevent the
* oldest ID from moving forwards. Spin if the count is negative,
* which indicates that some thread is moving the oldest ID forwards.
* Spin waiting for the lock: the sleeps in our blocking readlock
* implementation are too slow for scanning the transaction table.
*/
do {
if ((count = txn_global->scan_count) < 0)
WT_PAUSE();
} while (count < 0 ||
!__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1));
while ((ret =
__wt_try_readlock(session, txn_global->scan_rwlock)) == EBUSY)
WT_PAUSE();
WT_RET(ret);
current_id = snap_min = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
@@ -145,11 +143,9 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
__txn_sort_snapshot(session, 0, current_id);
/* Check that the oldest ID has not moved in the meantime. */
if (prev_oldest_id == txn_global->oldest_id) {
WT_ASSERT(session, txn_global->scan_count > 0);
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
return;
}
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
return (0);
}
/* Walk the array of concurrent transactions. */
@@ -182,67 +178,35 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
txn_state->snap_min = snap_min;
WT_ASSERT(session, txn_global->scan_count > 0);
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
__txn_sort_snapshot(session, n, current_id);
return (0);
}
/*
* __wt_txn_update_oldest --
* Sweep the running transactions to update the oldest ID required.
* !!!
* If a data-source is calling the WT_EXTENSION_API.transaction_oldest
* method (for the oldest transaction ID not yet visible to a running
* transaction), and then comparing that oldest ID against committed
* transactions to see if updates for a committed transaction are still
* visible to running transactions, the oldest transaction ID may be
* the same as the last committed transaction ID, if the transaction
* state wasn't refreshed after the last transaction committed. Push
* past the last committed transaction.
*/
void
__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
* __txn_oldest_scan --
* Sweep the running transactions to calculate the oldest ID required.
*/
static void
__txn_oldest_scan(WT_SESSION_IMPL *session,
uint64_t *oldest_idp, uint64_t *last_runningp,
WT_SESSION_IMPL **oldest_sessionp)
{
WT_CONNECTION_IMPL *conn;
WT_SESSION_IMPL *oldest_session;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s;
uint64_t current_id, id, last_running, oldest_id, prev_oldest_id;
uint64_t id, last_running, oldest_id, prev_oldest_id;
uint32_t i, session_cnt;
int32_t count;
bool last_running_moved;
conn = S2C(session);
txn_global = &conn->txn_global;
retry:
current_id = last_running = txn_global->current;
oldest_session = NULL;
/* The oldest ID cannot change while we are holding the scan lock. */
prev_oldest_id = txn_global->oldest_id;
/*
* For pure read-only workloads, or if the update isn't forced and the
* oldest ID isn't too far behind, avoid scanning.
*/
if (prev_oldest_id == current_id ||
(!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
return;
/*
* We're going to scan. Increment the count of scanners to prevent the
* oldest ID from moving forwards. Spin if the count is negative,
* which indicates that some thread is moving the oldest ID forwards.
*/
do {
if ((count = txn_global->scan_count) < 0)
WT_PAUSE();
} while (count < 0 ||
!__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1));
/* The oldest ID cannot change until the scan count goes to zero. */
prev_oldest_id = txn_global->oldest_id;
current_id = oldest_id = last_running = txn_global->current;
oldest_id = last_running = txn_global->current;
/* Walk the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
@@ -264,7 +228,7 @@ retry:
* !!!
* Note: Don't ignore snap_min values older than the previous
* oldest ID. Read-uncommitted operations publish snap_min
* values without incrementing scan_count to protect the global
* values without acquiring the scan lock to protect the global
* table. See the comment in __wt_txn_cursor_op for
* more details.
*/
@@ -283,76 +247,118 @@ retry:
WT_TXNID_LT(id, oldest_id))
oldest_id = id;
/* Update the last running ID. */
last_running_moved =
WT_TXNID_LT(txn_global->last_running, last_running);
*oldest_idp = oldest_id;
*oldest_sessionp = oldest_session;
*last_runningp = last_running;
}
/* Update the oldest ID. */
if (WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) {
/*
* We know we want to update. Check if we're racing.
*/
if (__wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) {
WT_ORDERED_READ(session_cnt, conn->session_cnt);
for (i = 0, s = txn_global->states;
i < session_cnt; i++, s++) {
if ((id = s->id) != WT_TXN_NONE &&
WT_TXNID_LT(id, last_running))
last_running = id;
if ((id = s->snap_min) != WT_TXN_NONE &&
WT_TXNID_LT(id, oldest_id))
oldest_id = id;
}
/*
* __wt_txn_update_oldest --
* Sweep the running transactions to update the oldest ID required.
*/
int
__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION_IMPL *oldest_session;
WT_TXN_GLOBAL *txn_global;
uint64_t current_id, last_running, oldest_id;
uint64_t prev_last_running, prev_oldest_id;
if (WT_TXNID_LT(last_running, oldest_id))
oldest_id = last_running;
conn = S2C(session);
txn_global = &conn->txn_global;
current_id = last_running = txn_global->current;
prev_last_running = txn_global->last_running;
prev_oldest_id = txn_global->oldest_id;
/*
* For pure read-only workloads, or if the update isn't forced and the
* oldest ID isn't too far behind, avoid scanning.
*/
if (prev_oldest_id == current_id ||
(!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
return (0);
/* First do a read-only scan. */
if (force)
WT_RET(__wt_readlock(session, txn_global->scan_rwlock));
else if ((ret =
__wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
return (ret == EBUSY ? 0 : ret);
__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
/*
* If the state hasn't changed (or hasn't moved far enough for
* non-forced updates), give up.
*/
if ((oldest_id == prev_oldest_id ||
(!force && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
((last_running == prev_last_running) ||
(!force && WT_TXNID_LT(last_running, prev_last_running + 100))))
return (0);
/* It looks like an update is necessary, wait for exclusive access. */
if (force)
WT_RET(__wt_writelock(session, txn_global->scan_rwlock));
else if ((ret =
__wt_try_writelock(session, txn_global->scan_rwlock)) != 0)
return (ret == EBUSY ? 0 : ret);
/*
* If the oldest ID has been updated while we waited, don't bother
* scanning.
*/
if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
WT_TXNID_LE(last_running, txn_global->last_running))
goto done;
/*
* Re-scan now that we have exclusive access. This is necessary because
* threads get transaction snapshots with read locks, and we have to be
* sure that there isn't a thread that has got a snapshot locally but
* not yet published its snap_min.
*/
__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
#ifdef HAVE_DIAGNOSTIC
/*
* Make sure the ID doesn't move past any named
* snapshots.
*
* Don't include the read/assignment in the assert
* statement. Coverity complains if there are
* assignments only done in diagnostic builds, and
* when the read is from a volatile.
*/
id = txn_global->nsnap_oldest_id;
WT_ASSERT(session,
id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
{
/*
* Make sure the ID doesn't move past any named snapshots.
*
* Don't include the read/assignment in the assert statement. Coverity
* complains if there are assignments only done in diagnostic builds,
* and when the read is from a volatile.
*/
uint64_t id = txn_global->nsnap_oldest_id;
WT_ASSERT(session,
id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
}
#endif
if (WT_TXNID_LT(txn_global->last_running, last_running))
txn_global->last_running = last_running;
if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
txn_global->oldest_id = oldest_id;
WT_ASSERT(session, txn_global->scan_count == -1);
txn_global->scan_count = 0;
} else {
/*
* We wanted to update the oldest ID but we're racing
* another thread. Retry if this is a forced update.
*/
WT_ASSERT(session, txn_global->scan_count > 0);
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
if (force) {
__wt_yield();
goto retry;
}
}
} else {
/* Update the oldest ID. */
if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
txn_global->oldest_id = oldest_id;
if (WT_TXNID_LT(txn_global->last_running, last_running)) {
txn_global->last_running = last_running;
/* Output a verbose message about long-running transactions,
* but only when some progress is being made. */
if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
current_id - oldest_id > 10000 && oldest_session != NULL) {
(void)__wt_verbose(session, WT_VERB_TRANSACTION,
WT_TRET(__wt_verbose(session, WT_VERB_TRANSACTION,
"old snapshot %" PRIu64
" pinned in session %" PRIu32 " [%s]"
" with snap_min %" PRIu64 "\n",
oldest_id, oldest_session->id,
oldest_session->lastop,
oldest_session->txn.snap_min);
oldest_session->txn.snap_min));
}
WT_ASSERT(session, txn_global->scan_count > 0);
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
}
done: WT_TRET(__wt_writeunlock(session, txn_global->scan_rwlock));
return (ret);
}
/*
@@ -735,6 +741,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_spin_init(session,
&txn_global->id_lock, "transaction id lock"));
WT_RET(__wt_rwlock_alloc(session,
&txn_global->scan_rwlock, "transaction scan lock"));
WT_RET(__wt_rwlock_alloc(session,
&txn_global->nsnap_rwlock, "named snapshot lock"));
txn_global->nsnap_oldest_id = WT_TXN_NONE;
@@ -768,6 +776,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
return (0);
__wt_spin_destroy(session, &txn_global->id_lock);
WT_TRET(__wt_rwlock_destroy(session, &txn_global->scan_rwlock));
WT_TRET(__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock));
__wt_free(session, txn_global->states);

View File

@@ -404,7 +404,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* This is particularly important for compact, so that all dirty pages
* can be fully written.
*/
__wt_txn_update_oldest(session, true);
WT_ERR(__wt_txn_update_oldest(session, true));
/* Flush data-sources before we start the checkpoint. */
WT_ERR(__checkpoint_data_source(session, cfg));
@@ -792,6 +792,9 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session,
hot_backup_locked = false;
name_alloc = NULL;
/* Only referenced in diagnostic builds. */
WT_UNUSED(is_checkpoint);
/*
* Only referenced in diagnostic builds and gcc 5.1 isn't satisfied
* with wrapping the entire assert condition in the unused macro.
@@ -1281,7 +1284,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
* for active readers.
*/
if (!btree->modified && !bulk) {
__wt_txn_update_oldest(session, true);
WT_RET(__wt_txn_update_oldest(session, true));
return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
__wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY);
}