Compare commits

...

4 Commits

Author SHA1 Message Date
Michael Cahill
31af5d70a8 WT-3329 Visit trees using a tiny fraction of cache. (#3442)
For workloads where no tree takes up a large enough fraction of cache,
we were using a randomized approach to deciding when eviction should
visit trees.  That led to slow performance for workloads with uniform
updates over thousands of trees.
(cherry picked from commit 2f1ec98512)

Signed-off-by: Alex Gorrod <alexander.gorrod@mongodb.com>
2017-09-07 08:52:53 +10:00
David Hows
3579adf6c8 WT-3438 Don't tune eviction thread count when the count is fixed (#3519)
(cherry picked from commit 6173a98979)
Signed-off-by: Alex Gorrod <alexander.gorrod@mongodb.com>
2017-09-07 08:50:54 +10:00
sueloverso
3166629c1d WT-3499 Add a visibility rwlock between transactions and checkpoints. (#3575)
* WT-3499 Add a visibility rwlock between transactions and checkpoints.

* Typo

* Just acquire/release the lock immediately for synchronization.

(cherry picked from commit 80c6cee91f)
2017-08-15 13:06:09 -04:00
Michael Cahill
74510affec WT-3471 Sweep the table cache after schema changes. (#3551)
During WT_SESSION::reset, if there has been a schema change (such as a WT_SESSION::drop operation) since the last sweep, do a pass through the table cache and remove any obsolete table handles.
2017-08-03 15:29:53 +10:00
9 changed files with 88 additions and 32 deletions

View File

@@ -941,6 +941,13 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
/*
* If we have a fixed number of eviction threads, there is no value in
* calculating if we should do any tuning.
*/
if (conn->evict_threads_max == conn->evict_threads_min)
return (0);
WT_ASSERT(session, conn->evict_threads.threads[0]->session == session);
pgs_evicted_cur = pgs_evicted_persec_cur = 0;
@@ -1640,26 +1647,16 @@ __evict_walk_file(WT_SESSION_IMPL *session,
QUEUE_FILLS_PER_PASS;
/*
* Randomly walk trees with a small fraction of the cache in case there
* are so many trees that none of them use enough of the cache to be
* allocated slots.
*
* The chance of walking a tree is equal to the chance that a random
* byte in cache belongs to the tree, weighted by how many times we
* want to fill queues during a pass through all the trees in cache.
* Walk trees with a small fraction of the cache in case there are so
* many trees that none of them use enough of the cache to be allocated
* slots. Only skip a tree if it has no bytes of interest.
*/
if (target_pages == 0) {
if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN)) {
btree_inuse = __wt_btree_bytes_evictable(session);
cache_inuse = __wt_cache_bytes_inuse(cache);
} else {
btree_inuse = __wt_btree_dirty_leaf_inuse(session);
cache_inuse = __wt_cache_dirty_leaf_inuse(cache);
}
if (btree_inuse == 0 || cache_inuse == 0)
return (0);
if (__wt_random64(&session->rnd) % cache_inuse >
btree_inuse * QUEUE_FILLS_PER_PASS)
btree_inuse = F_ISSET(cache, WT_CACHE_EVICT_CLEAN) ?
__wt_btree_bytes_evictable(session) :
__wt_btree_dirty_leaf_inuse(session);
if (btree_inuse == 0)
return (0);
}

View File

@@ -570,6 +570,7 @@ extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp)
extern int __wt_schema_destroy_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_remove_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_close_tables(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_sweep_tables(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -691,7 +692,6 @@ extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern uint64_t __wt_random64(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));

View File

@@ -98,6 +98,12 @@ struct __wt_session_impl {
*/
TAILQ_HEAD(__tables, __wt_table) tables;
/*
* Updated when the table cache is swept of all tables older than the
* current schema generation.
*/
uint64_t table_sweep_gen;
/* Current rwlock for callback. */
WT_RWLOCK *current_rwlock;
uint8_t current_rwticket;

View File

@@ -93,6 +93,8 @@ struct __wt_txn_global {
* the global transaction state.
*/
WT_RWLOCK scan_rwlock;
/* Protects logging, checkpoints and transaction visibility. */
WT_RWLOCK visibility_rwlock;
/*
* Track information about the running checkpoint. The transaction

View File

@@ -249,3 +249,34 @@ __wt_schema_close_tables(WT_SESSION_IMPL *session)
WT_TRET(__wt_schema_remove_table(session, table));
return (ret);
}
/*
* __wt_schema_sweep_tables --
* Close all idle, obsolete tables in a session.
*/
int
__wt_schema_sweep_tables(WT_SESSION_IMPL *session)
{
WT_TABLE *table, *next;
uint64_t schema_gen;
bool old_table_busy;
WT_ORDERED_READ(schema_gen, S2C(session)->schema_gen);
if (schema_gen == session->table_sweep_gen)
return (0);
old_table_busy = false;
TAILQ_FOREACH_SAFE(table, &session->tables, q, next)
if (table->schema_gen != schema_gen) {
if (table->refcnt == 0)
WT_RET(__wt_schema_remove_table(
session, table));
else
old_table_busy = true;
}
if (!old_table_busy)
session->table_sweep_gen = schema_gen;
return (0);
}

View File

@@ -818,6 +818,8 @@ __session_reset(WT_SESSION *wt_session)
WT_TRET(__wt_session_reset_cursors(session, true));
WT_TRET(__wt_schema_sweep_tables(session));
/* Release common session resources. */
WT_TRET(__wt_session_release_resources(session));

View File

@@ -120,15 +120,3 @@ __wt_random(WT_RAND_STATE volatile * rnd_state)
return ((z << 16) + (w & 65535));
}
/*
* __wt_random64 --
* Return a 64-bit pseudo-random number.
*/
uint64_t
__wt_random64(WT_RAND_STATE volatile * rnd_state)
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
return (((uint64_t)__wt_random(rnd_state) << 32) +
__wt_random(rnd_state));
}

View File

@@ -503,13 +503,17 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_OP *op;
u_int i;
bool did_update;
bool did_update, locked;
txn = &session->txn;
conn = S2C(session);
txn_global = &conn->txn_global;
did_update = txn->mod_count != 0;
locked = false;
WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update);
if (!F_ISSET(txn, WT_TXN_RUNNING))
@@ -580,6 +584,14 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* This is particularly important for checkpoints.
*/
__wt_txn_release_snapshot(session);
/*
* We hold the visibility lock for reading from the time
* we write our log record until the time we release our
* transaction so that the LSN any checkpoint gets will
* always reflect visible data.
*/
__wt_readlock(session, &txn_global->visibility_rwlock);
locked = true;
ret = __wt_txn_log_commit(session, cfg);
}
@@ -590,6 +602,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* Nothing can fail after this point.
*/
if (ret != 0) {
if (locked)
__wt_readunlock(session,
&txn_global->visibility_rwlock);
WT_TRET(__wt_txn_rollback(session, cfg));
return (ret);
}
@@ -600,6 +615,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
txn->mod_count = 0;
__wt_txn_release(session);
if (locked)
__wt_readunlock(session, &txn_global->visibility_rwlock);
return (0);
}
@@ -770,6 +787,7 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
&txn_global->id_lock, "transaction id lock"));
WT_RET(__wt_rwlock_init(session, &txn_global->scan_rwlock));
WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock));
WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
txn_global->nsnap_oldest_id = WT_TXN_NONE;
TAILQ_INIT(&txn_global->nsnaph);
@@ -801,6 +819,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
__wt_spin_destroy(session, &txn_global->id_lock);
__wt_rwlock_destroy(session, &txn_global->scan_rwlock);
__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock);
__wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
__wt_free(session, txn_global->states);
}

View File

@@ -294,11 +294,13 @@ __wt_txn_checkpoint_log(
WT_ITEM *ckpt_snapshot, empty;
WT_LSN *ckpt_lsn;
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
uint8_t *end, *p;
size_t recsize;
uint32_t i, rectype = WT_LOGREC_CHECKPOINT;
const char *fmt = WT_UNCHECKED_STRING(IIIIu);
txn_global = &S2C(session)->txn_global;
txn = &session->txn;
ckpt_lsn = &txn->ckpt_lsn;
@@ -319,6 +321,15 @@ __wt_txn_checkpoint_log(
case WT_TXN_LOG_CKPT_PREPARE:
txn->full_ckpt = true;
WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
/*
* We take and immediately release the visibility lock.
* Acquiring the write lock guarantees that any transaction
* that has written to the log has also made its transaction
* visible at this time.
*/
__wt_writelock(session, &txn_global->visibility_rwlock);
__wt_writeunlock(session, &txn_global->visibility_rwlock);
/*
* We need to make sure that the log records in the checkpoint
* LSN are on disk. In particular to make sure that the