Compare commits
69 Commits
mongodb-3.
...
mongodb-3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
234b68b116 | ||
|
|
5d215904c3 | ||
|
|
18879587af | ||
|
|
6bfcb1ca5b | ||
|
|
71c0588a77 | ||
|
|
58765850aa | ||
|
|
30d327f810 | ||
|
|
88b898e7cb | ||
|
|
7ea2631de2 | ||
|
|
039fe06082 | ||
|
|
43e885a0f9 | ||
|
|
5cdd3e320c | ||
|
|
563b7823f7 | ||
|
|
5e3a56f0ab | ||
|
|
bc929dbcf1 | ||
|
|
07966a492a | ||
|
|
67e412d4c5 | ||
|
|
3c2ad56b50 | ||
|
|
b1768d0d9f | ||
|
|
2893117baa | ||
|
|
4380cec93d | ||
|
|
21b5f9951e | ||
|
|
decd9166cc | ||
|
|
d835a0c0a8 | ||
|
|
48e1343e40 | ||
|
|
eb838c7f12 | ||
|
|
a6957512a4 | ||
|
|
197eef00fd | ||
|
|
7a4f3259b4 | ||
|
|
8326df6b76 | ||
|
|
b65381f64c | ||
|
|
0019262fed | ||
|
|
4d72349b8a | ||
|
|
4898aa408f | ||
|
|
9d375e3416 | ||
|
|
d9ec1ff8ec | ||
|
|
465dca8b46 | ||
|
|
f95877af13 | ||
|
|
62c1a7aa36 | ||
|
|
0dc3f20df6 | ||
|
|
0537648e03 | ||
|
|
3c856645c8 | ||
|
|
10208e8284 | ||
|
|
16e3e48d98 | ||
|
|
5205bb1f0f | ||
|
|
dca63120b7 | ||
|
|
0cccab30c0 | ||
|
|
578a856c19 | ||
|
|
a85c5cda41 | ||
|
|
6da2dc175b | ||
|
|
7ffa315e39 | ||
|
|
26d1ad271f | ||
|
|
fdedd3621c | ||
|
|
4187f419f8 | ||
|
|
42823c9682 | ||
|
|
fbaf1cf4f5 | ||
|
|
3d845c98cb | ||
|
|
1d2fe8a145 | ||
|
|
bdaaaec87d | ||
|
|
35cc116acd | ||
|
|
cbe0fad3e9 | ||
|
|
4f9aa1c548 | ||
|
|
1f44c05f91 | ||
|
|
e31aa8cf29 | ||
|
|
c90bc747e1 | ||
|
|
2c1b7aa80b | ||
|
|
41762ae13c | ||
|
|
f7691f63a6 | ||
|
|
9be5497753 |
2523
NEWS.MONGODB
Normal file
2523
NEWS.MONGODB
Normal file
File diff suppressed because it is too large
Load Diff
@@ -325,7 +325,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
|
||||
valid = false;
|
||||
if (F_ISSET(cbt, WT_CBT_ACTIVE) &&
|
||||
cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
|
||||
__wt_txn_cursor_op(session);
|
||||
WT_ERR(__wt_txn_cursor_op(session));
|
||||
|
||||
WT_ERR(btree->type == BTREE_ROW ?
|
||||
__cursor_row_search(session, cbt, cbt->ref, false) :
|
||||
@@ -405,7 +405,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
|
||||
if (btree->type == BTREE_ROW &&
|
||||
F_ISSET(cbt, WT_CBT_ACTIVE) &&
|
||||
cbt->ref->page->read_gen != WT_READGEN_OLDEST) {
|
||||
__wt_txn_cursor_op(session);
|
||||
WT_ERR(__wt_txn_cursor_op(session));
|
||||
|
||||
WT_ERR(__cursor_row_search(session, cbt, cbt->ref, true));
|
||||
|
||||
|
||||
@@ -326,7 +326,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
__wt_page_evict_soon(page);
|
||||
|
||||
/* Bump the oldest ID, we're about to do some visibility checks. */
|
||||
__wt_txn_update_oldest(session, false);
|
||||
WT_RET(__wt_txn_update_oldest(session, false));
|
||||
|
||||
/* If eviction cannot succeed, don't try. */
|
||||
return (__wt_page_can_evict(session, ref, NULL));
|
||||
|
||||
@@ -81,7 +81,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
|
||||
if (__wt_page_is_modified(page) &&
|
||||
WT_TXNID_LT(page->modify->update_txn, oldest_id)) {
|
||||
if (txn->isolation == WT_ISO_READ_COMMITTED)
|
||||
__wt_txn_get_snapshot(session);
|
||||
WT_ERR(__wt_txn_get_snapshot(session));
|
||||
leaf_bytes += page->memory_footprint;
|
||||
++leaf_pages;
|
||||
WT_ERR(__wt_reconcile(session, walk, NULL, 0));
|
||||
@@ -100,7 +100,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
|
||||
* the metadata shouldn't be that big, and (b) if we do ever
|
||||
*/
|
||||
if (txn->isolation == WT_ISO_READ_COMMITTED)
|
||||
__wt_txn_get_snapshot(session);
|
||||
WT_ERR(__wt_txn_get_snapshot(session));
|
||||
|
||||
/*
|
||||
* We cannot check the tree modified flag in the case of a
|
||||
|
||||
@@ -93,7 +93,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
|
||||
* transaction ID will catch up with the current ID.
|
||||
*/
|
||||
for (;;) {
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_TRET(__wt_txn_update_oldest(session, true));
|
||||
if (txn_global->oldest_id == txn_global->current)
|
||||
break;
|
||||
__wt_yield();
|
||||
|
||||
@@ -16,7 +16,7 @@ static int
|
||||
__curds_txn_enter(WT_SESSION_IMPL *session)
|
||||
{
|
||||
session->ncursors++; /* XXX */
|
||||
__wt_txn_cursor_op(session);
|
||||
WT_RET(__wt_txn_cursor_op(session));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -26,7 +26,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
|
||||
WT_RET(__wt_evict_file_exclusive_on(session));
|
||||
|
||||
/* Make sure the oldest transaction ID is up-to-date. */
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_RET(__wt_txn_update_oldest(session, true));
|
||||
|
||||
/* Walk the tree, discarding pages. */
|
||||
next_ref = NULL;
|
||||
|
||||
@@ -594,7 +594,7 @@ __evict_pass(WT_SESSION_IMPL *session)
|
||||
* of whether the cache is full, to prevent the oldest ID
|
||||
* falling too far behind.
|
||||
*/
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_RET(__wt_txn_update_oldest(session, loop > 0));
|
||||
|
||||
if (!__evict_update_work(session))
|
||||
break;
|
||||
|
||||
@@ -420,7 +420,7 @@ __evict_review(
|
||||
* fallen behind current.
|
||||
*/
|
||||
if (modified)
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_RET(__wt_txn_update_oldest(session, false));
|
||||
|
||||
if (!__wt_page_can_evict(session, ref, inmem_splitp))
|
||||
return (EBUSY);
|
||||
|
||||
@@ -270,7 +270,7 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
|
||||
* to read.
|
||||
*/
|
||||
if (!F_ISSET(cbt, WT_CBT_NO_TXN))
|
||||
__wt_txn_cursor_op(session);
|
||||
WT_RET(__wt_txn_cursor_op(session));
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
@@ -676,8 +676,8 @@ extern void __wt_stat_join_clear_single(WT_JOIN_STATS *stats);
|
||||
extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats);
|
||||
extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to);
|
||||
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
|
||||
extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
|
||||
extern void __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force);
|
||||
extern int __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
|
||||
extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force);
|
||||
extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]);
|
||||
extern void __wt_txn_release(WT_SESSION_IMPL *session);
|
||||
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]);
|
||||
|
||||
@@ -306,6 +306,12 @@ __wt_fair_lock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock)
|
||||
__wt_sleep(0, 10);
|
||||
}
|
||||
|
||||
/*
|
||||
* Applications depend on a barrier here so that operations holding the
|
||||
* lock see consistent data.
|
||||
*/
|
||||
WT_READ_BARRIER();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -318,6 +324,12 @@ __wt_fair_unlock(WT_SESSION_IMPL *session, WT_FAIR_LOCK *lock)
|
||||
{
|
||||
WT_UNUSED(session);
|
||||
|
||||
/*
|
||||
* Ensure that all updates made while the lock was held are visible to
|
||||
* the next thread to acquire the lock.
|
||||
*/
|
||||
WT_WRITE_BARRIER();
|
||||
|
||||
/*
|
||||
* We have exclusive access - the update does not need to be atomic.
|
||||
*/
|
||||
|
||||
@@ -306,7 +306,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) {
|
||||
if (!__wt_txn_visible_all(session, txn)) {
|
||||
/* Try to move the oldest ID forward and re-check. */
|
||||
__wt_txn_update_oldest(session, false);
|
||||
WT_RET(__wt_txn_update_oldest(session, false));
|
||||
|
||||
if (!__wt_txn_visible_all(session, txn))
|
||||
return (0);
|
||||
|
||||
@@ -74,7 +74,7 @@ struct __wt_txn_global {
|
||||
volatile uint64_t current; /* Current transaction ID. */
|
||||
|
||||
/* The oldest running transaction ID (may race). */
|
||||
uint64_t last_running;
|
||||
volatile uint64_t last_running;
|
||||
|
||||
/*
|
||||
* The oldest transaction ID that is not yet visible to some
|
||||
@@ -82,8 +82,11 @@ struct __wt_txn_global {
|
||||
*/
|
||||
volatile uint64_t oldest_id;
|
||||
|
||||
/* Count of scanning threads, or -1 for exclusive access. */
|
||||
volatile int32_t scan_count;
|
||||
/*
|
||||
* Prevents the oldest ID moving forwards while threads are scanning
|
||||
* the global transaction state.
|
||||
*/
|
||||
WT_RWLOCK *scan_rwlock;
|
||||
|
||||
/*
|
||||
* Track information about the running checkpoint. The transaction
|
||||
|
||||
@@ -261,14 +261,14 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
* eviction, it's better to do it beforehand.
|
||||
*/
|
||||
WT_RET(__wt_cache_eviction_check(session, false, NULL));
|
||||
|
||||
__wt_txn_get_snapshot(session);
|
||||
WT_RET(__wt_txn_get_snapshot(session));
|
||||
}
|
||||
|
||||
F_SET(txn, WT_TXN_RUNNING);
|
||||
if (F_ISSET(S2C(session), WT_CONN_READONLY))
|
||||
F_SET(txn, WT_TXN_READONLY);
|
||||
return (false);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -450,7 +450,7 @@ __wt_txn_read_last(WT_SESSION_IMPL *session)
|
||||
* __wt_txn_cursor_op --
|
||||
* Called for each cursor operation.
|
||||
*/
|
||||
static inline void
|
||||
static inline int
|
||||
__wt_txn_cursor_op(WT_SESSION_IMPL *session)
|
||||
{
|
||||
WT_TXN *txn;
|
||||
@@ -482,7 +482,9 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
|
||||
if (txn_state->snap_min == WT_TXN_NONE)
|
||||
txn_state->snap_min = txn_global->last_running;
|
||||
} else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
|
||||
__wt_txn_get_snapshot(session);
|
||||
WT_RET(__wt_txn_get_snapshot(session));
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
|
||||
#include "wt_internal.h"
|
||||
|
||||
static int __log_openfile(
|
||||
WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t);
|
||||
static int __log_write_internal(
|
||||
WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t);
|
||||
|
||||
@@ -93,8 +95,9 @@ __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn)
|
||||
int
|
||||
__wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
|
||||
{
|
||||
WT_LOG *log;
|
||||
WT_DECL_RET;
|
||||
WT_FH *log_fh;
|
||||
WT_LOG *log;
|
||||
|
||||
log = S2C(session)->log;
|
||||
|
||||
@@ -129,12 +132,21 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
|
||||
* Sync the log file if needed.
|
||||
*/
|
||||
if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) {
|
||||
/*
|
||||
* Get our own file handle to the log file. It is possible
|
||||
* for the file handle in the log structure to change out
|
||||
* from under us and either be NULL or point to a different
|
||||
* file than we want.
|
||||
*/
|
||||
WT_ERR(__log_openfile(session,
|
||||
false, &log_fh, WT_LOG_FILENAME, min_lsn->l.file));
|
||||
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
|
||||
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
|
||||
log->log_fh->name, min_lsn->l.file, min_lsn->l.offset));
|
||||
WT_ERR(__wt_fsync(session, log->log_fh, true));
|
||||
log_fh->name, min_lsn->l.file, min_lsn->l.offset));
|
||||
WT_ERR(__wt_fsync(session, log_fh, true));
|
||||
log->sync_lsn = *min_lsn;
|
||||
WT_STAT_FAST_CONN_INCR(session, log_sync);
|
||||
WT_ERR(__wt_close(session, &log_fh));
|
||||
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
|
||||
}
|
||||
err:
|
||||
@@ -2128,9 +2140,18 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
|
||||
* We need to flush out the current slot first to get the real
|
||||
* end of log LSN in log->alloc_lsn.
|
||||
*/
|
||||
WT_RET(__wt_log_flush_lsn(session, &lsn, 0));
|
||||
WT_RET(__wt_log_flush_lsn(session, &lsn, false));
|
||||
last_lsn = log->alloc_lsn;
|
||||
|
||||
/*
|
||||
* If the last write caused a switch to a new log file, we should only
|
||||
* wait for the last write to be flushed. Otherwise, if the workload
|
||||
* is single-threaded we could wait here forever because the write LSN
|
||||
* doesn't switch into the new file until it contains a record.
|
||||
*/
|
||||
if (last_lsn.l.offset == WT_LOG_FIRST_RECORD)
|
||||
last_lsn = log->log_close_lsn;
|
||||
|
||||
/*
|
||||
* Wait until all current outstanding writes have been written
|
||||
* to the file system.
|
||||
|
||||
@@ -210,7 +210,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
|
||||
goto open;
|
||||
|
||||
if (txn->isolation == WT_ISO_SNAPSHOT)
|
||||
__wt_txn_cursor_op(session);
|
||||
WT_RET(__wt_txn_cursor_op(session));
|
||||
|
||||
/*
|
||||
* Figure out how many updates are required for
|
||||
|
||||
@@ -289,7 +289,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
|
||||
}
|
||||
|
||||
/* Stop if a running transaction needs the chunk. */
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_RET(__wt_txn_update_oldest(session, true));
|
||||
if (chunk->switch_txn == WT_TXN_NONE ||
|
||||
!__wt_txn_visible_all(session, chunk->switch_txn)) {
|
||||
WT_RET(__wt_verbose(session, WT_VERB_LSM,
|
||||
|
||||
@@ -183,6 +183,8 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
|
||||
session, WT_VERB_MUTEX, "rwlock: readlock %s", rwlock->name));
|
||||
WT_STAT_FAST_CONN_INCR(session, rwlock_read);
|
||||
|
||||
WT_DIAGNOSTIC_YIELD;
|
||||
|
||||
l = &rwlock->rwlock;
|
||||
|
||||
/*
|
||||
@@ -213,6 +215,12 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
|
||||
*/
|
||||
++l->s.readers;
|
||||
|
||||
/*
|
||||
* Applications depend on a barrier here so that operations holding the
|
||||
* lock see consistent data.
|
||||
*/
|
||||
WT_READ_BARRIER();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -306,6 +314,12 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
|
||||
__wt_sleep(0, 10);
|
||||
}
|
||||
|
||||
/*
|
||||
* Applications depend on a barrier here so that operations holding the
|
||||
* lock see consistent data.
|
||||
*/
|
||||
WT_READ_BARRIER();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -316,31 +330,32 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
|
||||
int
|
||||
__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
|
||||
{
|
||||
wt_rwlock_t *l, copy;
|
||||
wt_rwlock_t *l, new;
|
||||
|
||||
WT_RET(__wt_verbose(
|
||||
session, WT_VERB_MUTEX, "rwlock: writeunlock %s", rwlock->name));
|
||||
|
||||
/*
|
||||
* Ensure that all updates made while the lock was held are visible to
|
||||
* the next thread to acquire the lock.
|
||||
*/
|
||||
WT_WRITE_BARRIER();
|
||||
|
||||
l = &rwlock->rwlock;
|
||||
|
||||
copy = *l;
|
||||
new = *l;
|
||||
|
||||
/*
|
||||
* We're the only writer of the writers/readers fields, so the update
|
||||
* does not need to be atomic; we have to update both values at the
|
||||
* same time though, otherwise we'd potentially race with the thread
|
||||
* next granted the lock.
|
||||
*
|
||||
* Use a memory barrier to ensure the compiler doesn't mess with these
|
||||
* instructions and rework the code in a way that avoids the update as
|
||||
* a unit.
|
||||
*/
|
||||
WT_BARRIER();
|
||||
++new.s.writers;
|
||||
++new.s.readers;
|
||||
l->i.wr = new.i.wr;
|
||||
|
||||
++copy.s.writers;
|
||||
++copy.s.readers;
|
||||
|
||||
l->i.wr = copy.i.wr;
|
||||
WT_DIAGNOSTIC_YIELD;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -769,3 +769,10 @@ FUNC_START(__crc32_vpmsum)
|
||||
|
||||
FUNC_END(__crc32_vpmsum)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Make sure the stack isn't executable with GCC (regardless of platform).
|
||||
*/
|
||||
#ifndef __clang__
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
#endif
|
||||
|
||||
243
src/txn/txn.c
243
src/txn/txn.c
@@ -108,17 +108,17 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
|
||||
* __wt_txn_get_snapshot --
|
||||
* Allocate a snapshot.
|
||||
*/
|
||||
void
|
||||
int
|
||||
__wt_txn_get_snapshot(WT_SESSION_IMPL *session)
|
||||
{
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_DECL_RET;
|
||||
WT_TXN *txn;
|
||||
WT_TXN_GLOBAL *txn_global;
|
||||
WT_TXN_STATE *s, *txn_state;
|
||||
uint64_t current_id, id;
|
||||
uint64_t prev_oldest_id, snap_min;
|
||||
uint32_t i, n, session_cnt;
|
||||
int32_t count;
|
||||
|
||||
conn = S2C(session);
|
||||
txn = &session->txn;
|
||||
@@ -126,15 +126,13 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
|
||||
txn_state = WT_SESSION_TXN_STATE(session);
|
||||
|
||||
/*
|
||||
* We're going to scan. Increment the count of scanners to prevent the
|
||||
* oldest ID from moving forwards. Spin if the count is negative,
|
||||
* which indicates that some thread is moving the oldest ID forwards.
|
||||
* Spin waiting for the lock: the sleeps in our blocking readlock
|
||||
* implementation are too slow for scanning the transaction table.
|
||||
*/
|
||||
do {
|
||||
if ((count = txn_global->scan_count) < 0)
|
||||
WT_PAUSE();
|
||||
} while (count < 0 ||
|
||||
!__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1));
|
||||
while ((ret =
|
||||
__wt_try_readlock(session, txn_global->scan_rwlock)) == EBUSY)
|
||||
WT_PAUSE();
|
||||
WT_RET(ret);
|
||||
|
||||
current_id = snap_min = txn_global->current;
|
||||
prev_oldest_id = txn_global->oldest_id;
|
||||
@@ -145,11 +143,9 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
|
||||
__txn_sort_snapshot(session, 0, current_id);
|
||||
|
||||
/* Check that the oldest ID has not moved in the meantime. */
|
||||
if (prev_oldest_id == txn_global->oldest_id) {
|
||||
WT_ASSERT(session, txn_global->scan_count > 0);
|
||||
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
|
||||
return;
|
||||
}
|
||||
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
|
||||
WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Walk the array of concurrent transactions. */
|
||||
@@ -182,67 +178,35 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
|
||||
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
|
||||
txn_state->snap_min = snap_min;
|
||||
|
||||
WT_ASSERT(session, txn_global->scan_count > 0);
|
||||
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
|
||||
WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
|
||||
|
||||
__txn_sort_snapshot(session, n, current_id);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_txn_update_oldest --
|
||||
* Sweep the running transactions to update the oldest ID required.
|
||||
* !!!
|
||||
* If a data-source is calling the WT_EXTENSION_API.transaction_oldest
|
||||
* method (for the oldest transaction ID not yet visible to a running
|
||||
* transaction), and then comparing that oldest ID against committed
|
||||
* transactions to see if updates for a committed transaction are still
|
||||
* visible to running transactions, the oldest transaction ID may be
|
||||
* the same as the last committed transaction ID, if the transaction
|
||||
* state wasn't refreshed after the last transaction committed. Push
|
||||
* past the last committed transaction.
|
||||
*/
|
||||
void
|
||||
__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
|
||||
* __txn_oldest_scan --
|
||||
* Sweep the running transactions to calculate the oldest ID required.
|
||||
*/
|
||||
static void
|
||||
__txn_oldest_scan(WT_SESSION_IMPL *session,
|
||||
uint64_t *oldest_idp, uint64_t *last_runningp,
|
||||
WT_SESSION_IMPL **oldest_sessionp)
|
||||
{
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_SESSION_IMPL *oldest_session;
|
||||
WT_TXN_GLOBAL *txn_global;
|
||||
WT_TXN_STATE *s;
|
||||
uint64_t current_id, id, last_running, oldest_id, prev_oldest_id;
|
||||
uint64_t id, last_running, oldest_id, prev_oldest_id;
|
||||
uint32_t i, session_cnt;
|
||||
int32_t count;
|
||||
bool last_running_moved;
|
||||
|
||||
conn = S2C(session);
|
||||
txn_global = &conn->txn_global;
|
||||
|
||||
retry:
|
||||
current_id = last_running = txn_global->current;
|
||||
oldest_session = NULL;
|
||||
|
||||
/* The oldest ID cannot change while we are holding the scan lock. */
|
||||
prev_oldest_id = txn_global->oldest_id;
|
||||
|
||||
/*
|
||||
* For pure read-only workloads, or if the update isn't forced and the
|
||||
* oldest ID isn't too far behind, avoid scanning.
|
||||
*/
|
||||
if (prev_oldest_id == current_id ||
|
||||
(!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We're going to scan. Increment the count of scanners to prevent the
|
||||
* oldest ID from moving forwards. Spin if the count is negative,
|
||||
* which indicates that some thread is moving the oldest ID forwards.
|
||||
*/
|
||||
do {
|
||||
if ((count = txn_global->scan_count) < 0)
|
||||
WT_PAUSE();
|
||||
} while (count < 0 ||
|
||||
!__wt_atomic_casiv32(&txn_global->scan_count, count, count + 1));
|
||||
|
||||
/* The oldest ID cannot change until the scan count goes to zero. */
|
||||
prev_oldest_id = txn_global->oldest_id;
|
||||
current_id = oldest_id = last_running = txn_global->current;
|
||||
oldest_id = last_running = txn_global->current;
|
||||
|
||||
/* Walk the array of concurrent transactions. */
|
||||
WT_ORDERED_READ(session_cnt, conn->session_cnt);
|
||||
@@ -264,7 +228,7 @@ retry:
|
||||
* !!!
|
||||
* Note: Don't ignore snap_min values older than the previous
|
||||
* oldest ID. Read-uncommitted operations publish snap_min
|
||||
* values without incrementing scan_count to protect the global
|
||||
* values without acquiring the scan lock to protect the global
|
||||
* table. See the comment in __wt_txn_cursor_op for
|
||||
* more details.
|
||||
*/
|
||||
@@ -283,76 +247,118 @@ retry:
|
||||
WT_TXNID_LT(id, oldest_id))
|
||||
oldest_id = id;
|
||||
|
||||
/* Update the last running ID. */
|
||||
last_running_moved =
|
||||
WT_TXNID_LT(txn_global->last_running, last_running);
|
||||
*oldest_idp = oldest_id;
|
||||
*oldest_sessionp = oldest_session;
|
||||
*last_runningp = last_running;
|
||||
}
|
||||
|
||||
/* Update the oldest ID. */
|
||||
if (WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) {
|
||||
/*
|
||||
* We know we want to update. Check if we're racing.
|
||||
*/
|
||||
if (__wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) {
|
||||
WT_ORDERED_READ(session_cnt, conn->session_cnt);
|
||||
for (i = 0, s = txn_global->states;
|
||||
i < session_cnt; i++, s++) {
|
||||
if ((id = s->id) != WT_TXN_NONE &&
|
||||
WT_TXNID_LT(id, last_running))
|
||||
last_running = id;
|
||||
if ((id = s->snap_min) != WT_TXN_NONE &&
|
||||
WT_TXNID_LT(id, oldest_id))
|
||||
oldest_id = id;
|
||||
}
|
||||
/*
|
||||
* __wt_txn_update_oldest --
|
||||
* Sweep the running transactions to update the oldest ID required.
|
||||
*/
|
||||
int
|
||||
__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
|
||||
{
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_DECL_RET;
|
||||
WT_SESSION_IMPL *oldest_session;
|
||||
WT_TXN_GLOBAL *txn_global;
|
||||
uint64_t current_id, last_running, oldest_id;
|
||||
uint64_t prev_last_running, prev_oldest_id;
|
||||
|
||||
if (WT_TXNID_LT(last_running, oldest_id))
|
||||
oldest_id = last_running;
|
||||
conn = S2C(session);
|
||||
txn_global = &conn->txn_global;
|
||||
|
||||
current_id = last_running = txn_global->current;
|
||||
prev_last_running = txn_global->last_running;
|
||||
prev_oldest_id = txn_global->oldest_id;
|
||||
|
||||
/*
|
||||
* For pure read-only workloads, or if the update isn't forced and the
|
||||
* oldest ID isn't too far behind, avoid scanning.
|
||||
*/
|
||||
if (prev_oldest_id == current_id ||
|
||||
(!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
|
||||
return (0);
|
||||
|
||||
/* First do a read-only scan. */
|
||||
if (force)
|
||||
WT_RET(__wt_readlock(session, txn_global->scan_rwlock));
|
||||
else if ((ret =
|
||||
__wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
|
||||
return (ret == EBUSY ? 0 : ret);
|
||||
__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
|
||||
WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
|
||||
|
||||
/*
|
||||
* If the state hasn't changed (or hasn't moved far enough for
|
||||
* non-forced updates), give up.
|
||||
*/
|
||||
if ((oldest_id == prev_oldest_id ||
|
||||
(!force && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
|
||||
((last_running == prev_last_running) ||
|
||||
(!force && WT_TXNID_LT(last_running, prev_last_running + 100))))
|
||||
return (0);
|
||||
|
||||
/* It looks like an update is necessary, wait for exclusive access. */
|
||||
if (force)
|
||||
WT_RET(__wt_writelock(session, txn_global->scan_rwlock));
|
||||
else if ((ret =
|
||||
__wt_try_writelock(session, txn_global->scan_rwlock)) != 0)
|
||||
return (ret == EBUSY ? 0 : ret);
|
||||
|
||||
/*
|
||||
* If the oldest ID has been updated while we waited, don't bother
|
||||
* scanning.
|
||||
*/
|
||||
if (WT_TXNID_LE(oldest_id, txn_global->oldest_id) &&
|
||||
WT_TXNID_LE(last_running, txn_global->last_running))
|
||||
goto done;
|
||||
|
||||
/*
|
||||
* Re-scan now that we have exclusive access. This is necessary because
|
||||
* threads get transaction snapshots with read locks, and we have to be
|
||||
* sure that there isn't a thread that has got a snapshot locally but
|
||||
* not yet published its snap_min.
|
||||
*/
|
||||
__txn_oldest_scan(session, &oldest_id, &last_running, &oldest_session);
|
||||
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
/*
|
||||
* Make sure the ID doesn't move past any named
|
||||
* snapshots.
|
||||
*
|
||||
* Don't include the read/assignment in the assert
|
||||
* statement. Coverity complains if there are
|
||||
* assignments only done in diagnostic builds, and
|
||||
* when the read is from a volatile.
|
||||
*/
|
||||
id = txn_global->nsnap_oldest_id;
|
||||
WT_ASSERT(session,
|
||||
id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
|
||||
{
|
||||
/*
|
||||
* Make sure the ID doesn't move past any named snapshots.
|
||||
*
|
||||
* Don't include the read/assignment in the assert statement. Coverity
|
||||
* complains if there are assignments only done in diagnostic builds,
|
||||
* and when the read is from a volatile.
|
||||
*/
|
||||
uint64_t id = txn_global->nsnap_oldest_id;
|
||||
WT_ASSERT(session,
|
||||
id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
|
||||
}
|
||||
#endif
|
||||
if (WT_TXNID_LT(txn_global->last_running, last_running))
|
||||
txn_global->last_running = last_running;
|
||||
if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
|
||||
txn_global->oldest_id = oldest_id;
|
||||
WT_ASSERT(session, txn_global->scan_count == -1);
|
||||
txn_global->scan_count = 0;
|
||||
} else {
|
||||
/*
|
||||
* We wanted to update the oldest ID but we're racing
|
||||
* another thread. Retry if this is a forced update.
|
||||
*/
|
||||
WT_ASSERT(session, txn_global->scan_count > 0);
|
||||
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
|
||||
if (force) {
|
||||
__wt_yield();
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* Update the oldest ID. */
|
||||
if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
|
||||
txn_global->oldest_id = oldest_id;
|
||||
if (WT_TXNID_LT(txn_global->last_running, last_running)) {
|
||||
txn_global->last_running = last_running;
|
||||
|
||||
/* Output a verbose message about long-running transactions,
|
||||
* but only when some progress is being made. */
|
||||
if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
|
||||
current_id - oldest_id > 10000 && oldest_session != NULL) {
|
||||
(void)__wt_verbose(session, WT_VERB_TRANSACTION,
|
||||
WT_TRET(__wt_verbose(session, WT_VERB_TRANSACTION,
|
||||
"old snapshot %" PRIu64
|
||||
" pinned in session %" PRIu32 " [%s]"
|
||||
" with snap_min %" PRIu64 "\n",
|
||||
oldest_id, oldest_session->id,
|
||||
oldest_session->lastop,
|
||||
oldest_session->txn.snap_min);
|
||||
oldest_session->txn.snap_min));
|
||||
}
|
||||
WT_ASSERT(session, txn_global->scan_count > 0);
|
||||
(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
|
||||
}
|
||||
|
||||
done: WT_TRET(__wt_writeunlock(session, txn_global->scan_rwlock));
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -735,6 +741,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
|
||||
WT_RET(__wt_spin_init(session,
|
||||
&txn_global->id_lock, "transaction id lock"));
|
||||
WT_RET(__wt_rwlock_alloc(session,
|
||||
&txn_global->scan_rwlock, "transaction scan lock"));
|
||||
WT_RET(__wt_rwlock_alloc(session,
|
||||
&txn_global->nsnap_rwlock, "named snapshot lock"));
|
||||
txn_global->nsnap_oldest_id = WT_TXN_NONE;
|
||||
@@ -768,6 +776,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
|
||||
return (0);
|
||||
|
||||
__wt_spin_destroy(session, &txn_global->id_lock);
|
||||
WT_TRET(__wt_rwlock_destroy(session, &txn_global->scan_rwlock));
|
||||
WT_TRET(__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock));
|
||||
__wt_free(session, txn_global->states);
|
||||
|
||||
|
||||
@@ -404,7 +404,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
* This is particularly important for compact, so that all dirty pages
|
||||
* can be fully written.
|
||||
*/
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_ERR(__wt_txn_update_oldest(session, true));
|
||||
|
||||
/* Flush data-sources before we start the checkpoint. */
|
||||
WT_ERR(__checkpoint_data_source(session, cfg));
|
||||
@@ -792,6 +792,9 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session,
|
||||
hot_backup_locked = false;
|
||||
name_alloc = NULL;
|
||||
|
||||
/* Only referenced in diagnostic builds. */
|
||||
WT_UNUSED(is_checkpoint);
|
||||
|
||||
/*
|
||||
* Only referenced in diagnostic builds and gcc 5.1 isn't satisfied
|
||||
* with wrapping the entire assert condition in the unused macro.
|
||||
@@ -1281,7 +1284,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
|
||||
* for active readers.
|
||||
*/
|
||||
if (!btree->modified && !bulk) {
|
||||
__wt_txn_update_oldest(session, true);
|
||||
WT_RET(__wt_txn_update_oldest(session, true));
|
||||
return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
|
||||
__wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user