Merge branch 'mongodb-3.4' into mongodb-3.2

WT-3438 Don't tune eviction thread count when the count is fixed (#3519 )
(cherry picked from commit 6173a98979) Signed-off-by: Alex Gorrod <alexander.gorrod@mongodb.com>
2017-09-20 15:14:40 +10:00 · 2017-09-07 08:50:54 +10:00 · 2017-08-15 13:06:09 -04:00 · 2017-08-15 12:55:23 -04:00 · 2017-08-03 15:32:26 +10:00 · 2017-08-03 15:29:53 +10:00
10 changed files with 103 additions and 1 deletions
--- a/bench/wtperf/runners/checkpoint_schema_race.wtperf
+++ b/bench/wtperf/runners/checkpoint_schema_race.wtperf
@@ -0,0 +1,20 @@
+# Check create and drop behavior concurrent with checkpoints (WT-2798).
+# Setup a multiple tables and a cache size large enough that checkpoints can
+# take a long time.
+conn_config="cache_size=8GB,log=(enabled=false),checkpoint=(wait=30)"
+table_config="leaf_page_max=4k,internal_page_max=16k,type=file"
+icount=10000000
+table_count=100
+table_count_idle=100
+# Turn on create/drop of idle tables, but don't worry if individual operations
+# take a long time.
+idle_table_cycle=120
+populate_threads=5
+checkpoint_threads=0
+report_interval=5
+# 100 million
+random_range=10000000
+run_time=300
+# Setup a workload that dirties a lot of the cache
+threads=((count=2,reads=1),(count=2,inserts=1),(count=2,updates=1))
+value_sz=500
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -22,6 +22,8 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp)

 	*startp = false;

+	*startp = false;
+
 	conn = S2C(session);

 	WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -941,6 +941,13 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
 	conn = S2C(session);
 	cache = conn->cache;

+	/*
+	 * If we have a fixed number of eviction threads, there is no value in
+	 * calculating if we should do any tuning.
+	 */
+       if (conn->evict_threads_max == conn->evict_threads_min)
+		return (0);
+
 	WT_ASSERT(session, conn->evict_threads.threads[0]->session == session);
 	pgs_evicted_cur = pgs_evicted_persec_cur = 0;

--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -570,6 +570,7 @@ extern int __wt_schema_destroy_index(WT_SESSION_IMPL *session, WT_INDEX **idxp)
 extern int __wt_schema_destroy_table(WT_SESSION_IMPL *session, WT_TABLE **tablep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_remove_table(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_close_tables(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_sweep_tables(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_colgroup_name(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, size_t len, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_open_index(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, size_t len, WT_INDEX **indexp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -98,6 +98,12 @@ struct __wt_session_impl {
 	 */
 	TAILQ_HEAD(__tables, __wt_table) tables;

+	/*
+	 * Updated when the table cache is swept of all tables older than the
+	 * current schema generation.
+	 */
+	uint64_t table_sweep_gen;
+
 	/* Current rwlock for callback. */
 	WT_RWLOCK *current_rwlock;
 	uint8_t current_rwticket;
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -93,6 +93,8 @@ struct __wt_txn_global {
 	 * the global transaction state.
 	 */
 	WT_RWLOCK scan_rwlock;
+	/* Protects logging, checkpoints and transaction visibility. */
+	WT_RWLOCK visibility_rwlock;

 	/*
 	 * Track information about the running checkpoint. The transaction
--- a/src/schema/schema_list.c
+++ b/src/schema/schema_list.c
@@ -249,3 +249,34 @@ __wt_schema_close_tables(WT_SESSION_IMPL *session)
 		WT_TRET(__wt_schema_remove_table(session, table));
 	return (ret);
 }
+
+/*
+ * __wt_schema_sweep_tables --
+ *	Close all idle, obsolete tables in a session.
+ */
+int
+__wt_schema_sweep_tables(WT_SESSION_IMPL *session)
+{
+	WT_TABLE *table, *next;
+	uint64_t schema_gen;
+	bool old_table_busy;
+
+	WT_ORDERED_READ(schema_gen, S2C(session)->schema_gen);
+	if (schema_gen == session->table_sweep_gen)
+		return (0);
+
+	old_table_busy = false;
+	TAILQ_FOREACH_SAFE(table, &session->tables, q, next)
+		if (table->schema_gen != schema_gen) {
+			if (table->refcnt == 0)
+				WT_RET(__wt_schema_remove_table(
+				    session, table));
+			else
+				old_table_busy = true;
+		}
+
+	if (!old_table_busy)
+		session->table_sweep_gen = schema_gen;
+
+	return (0);
+}
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -818,6 +818,8 @@ __session_reset(WT_SESSION *wt_session)

 	WT_TRET(__wt_session_reset_cursors(session, true));

+	WT_TRET(__wt_schema_sweep_tables(session));
+
 	/* Release common session resources. */
 	WT_TRET(__wt_session_release_resources(session));

--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -503,13 +503,17 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_TXN *txn;
+	WT_TXN_GLOBAL *txn_global;
 	WT_TXN_OP *op;
 	u_int i;
-	bool did_update;
+	bool did_update, locked;

 	txn = &session->txn;
 	conn = S2C(session);
+	txn_global = &conn->txn_global;
 	did_update = txn->mod_count != 0;
+	locked = false;
+
 	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update);

 	if (!F_ISSET(txn, WT_TXN_RUNNING))
@@ -580,6 +584,14 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 		 * This is particularly important for checkpoints.
 		 */
 		__wt_txn_release_snapshot(session);
+		/*
+		 * We hold the visibility lock for reading from the time
+		 * we write our log record until the time we release our
+		 * transaction so that the LSN any checkpoint gets will
+		 * always reflect visible data.
+		 */
+		__wt_readlock(session, &txn_global->visibility_rwlock);
+		locked = true;
 		ret = __wt_txn_log_commit(session, cfg);
 	}

@@ -590,8 +602,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 	 * Nothing can fail after this point.
 	 */
 	if (ret != 0) {
+		if (locked)
+			__wt_readunlock(session,
+			    &txn_global->visibility_rwlock);
 		WT_TRET(__wt_txn_rollback(session, cfg));
 		return (ret);
+
 	}

 	/* Free memory associated with updates. */
@@ -600,6 +616,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 	txn->mod_count = 0;

 	__wt_txn_release(session);
+	if (locked)
+		__wt_readunlock(session, &txn_global->visibility_rwlock);
 	return (0);
 }

@@ -770,6 +788,7 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
 	    &txn_global->id_lock, "transaction id lock"));
 	WT_RET(__wt_rwlock_init(session, &txn_global->scan_rwlock));
 	WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock));
+	WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
 	txn_global->nsnap_oldest_id = WT_TXN_NONE;
 	TAILQ_INIT(&txn_global->nsnaph);

@@ -801,6 +820,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
 	__wt_spin_destroy(session, &txn_global->id_lock);
 	__wt_rwlock_destroy(session, &txn_global->scan_rwlock);
 	__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock);
+	__wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
 	__wt_free(session, txn_global->states);
 }

--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -294,11 +294,13 @@ __wt_txn_checkpoint_log(
 	WT_ITEM *ckpt_snapshot, empty;
 	WT_LSN *ckpt_lsn;
 	WT_TXN *txn;
+	WT_TXN_GLOBAL *txn_global;
 	uint8_t *end, *p;
 	size_t recsize;
 	uint32_t i, rectype = WT_LOGREC_CHECKPOINT;
 	const char *fmt = WT_UNCHECKED_STRING(IIIIu);

+	txn_global = &S2C(session)->txn_global;
 	txn = &session->txn;
 	ckpt_lsn = &txn->ckpt_lsn;

@@ -319,6 +321,15 @@ __wt_txn_checkpoint_log(
 	case WT_TXN_LOG_CKPT_PREPARE:
 		txn->full_ckpt = true;
 		WT_ERR(__wt_log_flush_lsn(session, ckpt_lsn, true));
+		/*
+		 * We take and immediately release the visibility lock.
+		 * Acquiring the write lock guarantees that any transaction
+		 * that has written to the log has also made its transaction
+		 * visible at this time.
+		 */
+		__wt_writelock(session, &txn_global->visibility_rwlock);
+		__wt_writeunlock(session, &txn_global->visibility_rwlock);
+
 		/*
 		 * We need to make sure that the log records in the checkpoint
 		 * LSN are on disk.  In particular to make sure that the