From e45d3e32b3e5301ad1a0d109f931f2119b9a4719 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 2 Sep 2014 12:04:30 -0400 Subject: [PATCH 001/132] Remove loop to walk replay iterator from LiveBackup. #1106 --- api/leveldb/hyper_wt.cc | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/api/leveldb/hyper_wt.cc b/api/leveldb/hyper_wt.cc index b147ff6fe75..95c82289e18 100644 --- a/api/leveldb/hyper_wt.cc +++ b/api/leveldb/hyper_wt.cc @@ -338,25 +338,6 @@ DbImpl::LiveBackup(const Slice& name) if ((t_ret = cursor->close(cursor)) != 0 && ret == 0) ret = t_ret; - // We only copied file contents that are on-disk. - // At this point we want to use a ReplayIterator to - // apply any in-memory operations. - DB* db; - leveldb::Options options; - ReplayIteratorImpl *iter = new ReplayIteratorImpl(context); - Status s = Open(options, backup, &db); - assert(s.ok()); - - while (iter->Valid()) { - if (iter->HasValue()) - s = db->Put(leveldb::WriteOptions(), - iter->key(), iter->value()); - else - s = db->Delete(leveldb::WriteOptions(), iter->key()); - iter->Next(); - } - delete iter; - delete db; return (WiredTigerErrorToStatus(ret)); } From 309e93e110c635cb45d5c6d651e653ad47c060eb Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 2 Sep 2014 14:22:20 -0400 Subject: [PATCH 002/132] Make load time seconds an integer, instead of a double. Simplify wtperf_run script. --- bench/wtperf/runners/wtperf_run.sh | 26 ++++++-------------------- bench/wtperf/wtperf.c | 15 +++++++-------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/bench/wtperf/runners/wtperf_run.sh b/bench/wtperf/runners/wtperf_run.sh index 3296a4072b5..16470f35dfa 100755 --- a/bench/wtperf/runners/wtperf_run.sh +++ b/bench/wtperf/runners/wtperf_run.sh @@ -79,7 +79,7 @@ while test "$run" -le "$runmax"; do if test "$?" -ne "0"; then exit 1 fi - # Load is always using floating point, so handle separately + # Load uses different text. Handle separately. l=`grep "^Load time:" ./WT_TEST/test.stat` if test "$?" -eq "0"; then load=`echo $l | cut -d ' ' -f 3` @@ -87,7 +87,7 @@ while test "$run" -le "$runmax"; do load=0 fi cur[$loadindex]=$load - sum[$loadindex]=`echo "${sum[$loadindex]} + $load" | bc` + sum[$loadindex]=`expr $load + ${sum[$loadindex]}` echo "cur ${cur[$loadindex]} sum ${sum[$loadindex]}" >> $outfile for i in ${!ops[*]}; do l=`grep "Executed.*${ops[$i]} operations" ./WT_TEST/test.stat` @@ -109,17 +109,8 @@ while test "$run" -le "$runmax"; do done else for i in ${!cur[*]}; do - if test "$i" -eq "$loadindex"; then - if (($(bc <<< "${cur[$i]} < ${min[$i]}") )); then - min[$i]=${cur[$i]} - fi - if (($(bc <<< "${cur[$i]} > ${max[$i]}") )); then - max[$i]=${cur[$i]} - fi - else - min[$i]=$(getval $getmin ${cur[$i]} ${min[$i]}) - max[$i]=$(getval $getmax ${cur[$i]} ${max[$i]}) - fi + min[$i]=$(getval $getmin ${cur[$i]} ${min[$i]}) + max[$i]=$(getval $getmax ${cur[$i]} ${max[$i]}) done fi # @@ -154,13 +145,8 @@ fi # Average the remaining and write it out to the file. # for i in ${!min[*]}; do - if test "$i" -eq "$loadindex"; then - s=`echo "scale=3; ${sum[$i]} - ${min[$i]} - ${max[$i]}" | bc` - avg[$i]=`echo "scale=3; $s / $numruns" | bc` - else - s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}` - avg[$i]=`expr $s / $numruns` - fi + s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}` + avg[$i]=`expr $s / $numruns` done for i in ${!outp[*]}; do echo "${outp[$i]} ${avg[$i]}" >> $outfile diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 41f95799c57..ad645e19596 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1199,9 +1199,8 @@ execute_populate(CONFIG *cfg) struct timespec start, stop; CONFIG_THREAD *popth; WT_ASYNC_OP *asyncop; - double secs; size_t i; - uint64_t last_ops; + uint64_t last_ops, secs; uint32_t interval, tables; int elapsed, ret; void *(*pfunc)(void *); @@ -1279,12 +1278,12 @@ execute_populate(CONFIG *cfg) } lprintf(cfg, 0, 1, "Finished load of %" PRIu32 " items", cfg->icount); - secs = stop.tv_sec + stop.tv_nsec / (double)BILLION; - secs -= start.tv_sec + start.tv_nsec / (double)BILLION; + secs = WT_TIMEDIFF(stop, start) / BILLION; if (secs == 0) ++secs; lprintf(cfg, 0, 1, - "Load time: %.2f\n" "load ops/sec: %.2f", secs, cfg->icount / secs); + "Load time: %" PRIu64 "\n" "load ops/sec: %" PRIu64, + secs, cfg->icount / secs); /* * If configured, compact to allow LSM merging to complete. We @@ -1324,9 +1323,9 @@ execute_populate(CONFIG *cfg) lprintf(cfg, ret, 0, "Get time failed in populate."); return (ret); } - secs = stop.tv_sec + stop.tv_nsec / (double)BILLION; - secs -= start.tv_sec + start.tv_nsec / (double)BILLION; - lprintf(cfg, 0, 1, "Compact completed in %.2f seconds", secs); + secs = WT_TIMEDIFF(stop, start) / BILLION; + lprintf(cfg, 0, 1, + "Compact completed in %" PRIu64 " seconds", secs); assert(tables == 0); } return (0); From 69f2c38ea6cf47223494afa36542ad80e201a58b Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 3 Sep 2014 11:48:07 +1000 Subject: [PATCH 003/132] Use a condition variable in LSM worker threads to sleep when there is no work to do. --- src/include/lsm.h | 5 +++-- src/lsm/lsm_cursor.c | 8 +------- src/lsm/lsm_manager.c | 7 +++++++ src/lsm/lsm_tree.c | 2 -- src/lsm/lsm_worker.c | 7 ++++--- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/src/include/lsm.h b/src/include/lsm.h index c6c68d1f901..888d01822fc 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -121,6 +121,7 @@ struct __wt_lsm_manager { WT_SPINLOCK switch_lock; /* Lock for switch queue */ WT_SPINLOCK app_lock; /* Lock for application queue */ WT_SPINLOCK manager_lock; /* Lock for manager queue */ + WT_CONDVAR *work_cond; /* Used to notify worker of activity */ uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; WT_LSM_WORKER_ARGS *lsm_worker_cookies; @@ -141,7 +142,6 @@ struct __wt_lsm_tree { int refcnt; /* Number of users of the tree */ int queue_ref; WT_RWLOCK *rwlock; - WT_CONDVAR *work_cond; /* Used to notify worker of activity */ TAILQ_ENTRY(__wt_lsm_tree) q; WT_DSRC_STATS stats; /* LSM-level statistics */ @@ -227,7 +227,8 @@ struct __wt_lsm_worker_cookie { */ struct __wt_lsm_worker_args { WT_SESSION_IMPL *session; - pthread_t tid; + WT_CONDVAR *work_cond; + pthread_t tid; u_int id; uint32_t flags; }; diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index b4af1c0abf2..bbc5de7f13f 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -50,7 +50,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) WT_LSM_CHUNK *primary_chunk; WT_LSM_TREE *lsm_tree; WT_SESSION_IMPL *session; - int have_primary, need_signal, ovfl; + int have_primary, ovfl; lsm_tree = clsm->lsm_tree; if (clsm->nchunks == 0 || @@ -88,19 +88,14 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) * when only one switch is required, creating very * small chunks. */ - need_signal = 0; WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 0)); if (clsm->dsk_gen == lsm_tree->dsk_gen && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_SWITCH, lsm_tree)); F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH); - need_signal = 1; } WT_RET(__wt_lsm_tree_unlock(session, lsm_tree)); - if (need_signal) - WT_RET(__wt_cond_signal( - session, lsm_tree->work_cond)); ovfl = 0; } } else if (have_primary) @@ -422,7 +417,6 @@ __clsm_open_cursors( F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH); locked = 0; WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - WT_ERR(__wt_cond_signal(session, lsm_tree->work_cond)); /* * Give the worker thread a chance to run before locking the diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index b888ff36bed..955e3729fa9 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -48,6 +48,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) S2C(session), "lsm-worker", 1, 0, &worker_session)); worker_session->isolation = TXN_ISO_READ_UNCOMMITTED; cookies[i].session = worker_session; + cookies[i].work_cond = manager->work_cond; } /* Start the LSM manager thread. */ @@ -149,6 +150,7 @@ __wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &manager->switch_lock); __wt_spin_destroy(session, &manager->app_lock); __wt_spin_destroy(session, &manager->manager_lock); + WT_TRET(__wt_cond_destroy(session, &manager->work_cond)); __wt_free(session, manager->lsm_worker_cookies); @@ -212,6 +214,8 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) session, &manager->manager_lock, "LSM manager queue lock")); WT_RET(__wt_spin_init( session, &manager->switch_lock, "LSM switch queue lock")); + WT_RET(__wt_cond_alloc( + session, "LSM worker cond", 0, &manager->work_cond)); worker_args = &manager->lsm_worker_cookies[1]; worker_args->id = manager->lsm_workers++; @@ -267,6 +271,7 @@ __lsm_manager_worker_shutdown(WT_SESSION_IMPL *session) */ for (i = 1; i < manager->lsm_workers; i++) { WT_ASSERT(session, manager->lsm_worker_cookies[i].tid != 0); + WT_TRET(__wt_cond_signal(session, manager->work_cond)); WT_TRET(__wt_thread_join( session, manager->lsm_worker_cookies[i].tid)); } @@ -544,5 +549,7 @@ __wt_lsm_manager_push_entry( WT_ILLEGAL_VALUE(session); } + WT_RET(__wt_cond_signal(session, manager->work_cond)); + return (0); } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index e0a977acf07..30f97821a8c 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -35,7 +35,6 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) __wt_free(session, lsm_tree->file_config); WT_TRET(__wt_rwlock_destroy(session, &lsm_tree->rwlock)); - WT_TRET(__wt_cond_destroy(session, &lsm_tree->work_cond)); for (i = 0; i < lsm_tree->nchunks; i++) { if ((chunk = lsm_tree->chunk[i]) == NULL) @@ -442,7 +441,6 @@ __lsm_tree_open( WT_RET(__wt_calloc_def(session, 1, &lsm_tree)); WT_ERR(__wt_rwlock_alloc(session, &lsm_tree->rwlock, "lsm tree")); - WT_ERR(__wt_cond_alloc(session, "lsm ckpt", 0, &lsm_tree->work_cond)); WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri)); WT_ERR(__wt_lsm_meta_read(session, lsm_tree)); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index d3018e4362a..ed1764fa6a9 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -86,9 +86,10 @@ __lsm_worker(void *arg) entry = NULL; while (F_ISSET(conn, WT_CONN_SERVER_RUN)) { - /* Don't busy wait if there aren't any LSM trees. */ - if (TAILQ_EMPTY(&conn->lsmqh)) { - __wt_sleep(0, 10000); + /* Don't busy wait if there isn't any work to do. */ + if (cookie->flags == 0) { + WT_ERR( + __wt_cond_wait(session, cookie->work_cond, 10000)); continue; } From 6e9ff974884b40a622b7584a665ac710b9c6f041 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 3 Sep 2014 12:23:15 +1000 Subject: [PATCH 004/132] Have the LSM worker track whether it found work to figure out whether to sleep or keep spinning. --- src/include/lsm.h | 2 +- src/lsm/lsm_worker.c | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/src/include/lsm.h b/src/include/lsm.h index 888d01822fc..e0197e01af6 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -227,7 +227,7 @@ struct __wt_lsm_worker_cookie { */ struct __wt_lsm_worker_args { WT_SESSION_IMPL *session; - WT_CONDVAR *work_cond; + WT_CONDVAR *work_cond; /* Owned by the manager */ pthread_t tid; u_int id; uint32_t flags; diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index ed1764fa6a9..54b95221793 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -78,7 +78,7 @@ __lsm_worker(void *arg) WT_LSM_WORK_UNIT *entry; WT_LSM_WORKER_ARGS *cookie; WT_SESSION_IMPL *session; - int ran; + int progress, ran; cookie = (WT_LSM_WORKER_ARGS *)arg; session = cookie->session; @@ -86,12 +86,7 @@ __lsm_worker(void *arg) entry = NULL; while (F_ISSET(conn, WT_CONN_SERVER_RUN)) { - /* Don't busy wait if there isn't any work to do. */ - if (cookie->flags == 0) { - WT_ERR( - __wt_cond_wait(session, cookie->work_cond, 10000)); - continue; - } + progress = 0; /* Switches are always a high priority */ while (F_ISSET(cookie, WT_LSM_WORK_SWITCH) && @@ -112,6 +107,7 @@ __lsm_worker(void *arg) if (ret == EBUSY) ret = 0; WT_ERR(ret); + progress = 1; } /* Flag an error if the pop failed. */ WT_ERR(ret); @@ -120,6 +116,7 @@ __lsm_worker(void *arg) if (ret == EBUSY || ret == WT_NOTFOUND) ret = 0; WT_ERR(ret); + progress = progress || ran; if (F_ISSET(cookie, WT_LSM_WORK_MERGE) && (ret = __wt_lsm_manager_pop_entry( @@ -137,9 +134,17 @@ __lsm_worker(void *arg) WT_CLEAR_BTREE_IN_SESSION(session); __wt_lsm_manager_free_work_unit(session, entry); entry = NULL; + progress = 1; } /* Flag an error if the pop failed. */ WT_ERR(ret); + + /* Don't busy wait if there isn't any work to do. */ + if (!progress) { + WT_ERR( + __wt_cond_wait(session, cookie->work_cond, 10000)); + continue; + } } if (ret != 0) { From ccdec898c39270a97c7ca7ea64446fc419b48e95 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 3 Sep 2014 12:37:27 +1000 Subject: [PATCH 005/132] Set the condition variable for LSM workers after allocating it in the manager. --- src/lsm/lsm_manager.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 955e3729fa9..2a30e6ba7a6 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -48,7 +48,6 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) S2C(session), "lsm-worker", 1, 0, &worker_session)); worker_session->isolation = TXN_ISO_READ_UNCOMMITTED; cookies[i].session = worker_session; - cookies[i].work_cond = manager->work_cond; } /* Start the LSM manager thread. */ @@ -218,6 +217,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) session, "LSM worker cond", 0, &manager->work_cond)); worker_args = &manager->lsm_worker_cookies[1]; + worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers++; worker_args->flags = WT_LSM_WORK_SWITCH; /* Start the switch thread. */ @@ -233,6 +233,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) /* Freed by the worker thread when it shuts down */ worker_args = &manager->lsm_worker_cookies[manager->lsm_workers]; + worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers; worker_args->flags = WT_LSM_WORK_BLOOM | From 66030f268eefffdf094b1a29edf6406f016c62dc Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 3 Sep 2014 14:16:06 +1000 Subject: [PATCH 006/132] When LSM worker threads fault in new files, make sure they don't block indefinitely due to a full cache. --- src/lsm/lsm_merge.c | 9 ++++++++- src/lsm/lsm_work_unit.c | 12 ++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 70b1503377e..bf758abd6b1 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -341,6 +341,13 @@ __wt_lsm_merge( F_CLR(session, WT_SESSION_NO_CACHE); + /* + * We're doing advisory reads to fault the new trees into cache. + * Don't block if the cache is full: our next unit of work may be to + * discard some trees to free space. + */ + F_SET(session, WT_SESSION_NO_CACHE_CHECK); + if (create_bloom) { if (ret == 0) WT_TRET(__wt_bloom_finalize(bloom)); @@ -450,7 +457,7 @@ err: if (locked) else WT_TRET(__wt_verbose(session, WT_VERB_LSM, "Merge failed with %s", wiredtiger_strerror(ret))); - F_CLR(session, WT_SESSION_NO_CACHE); } + F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_CACHE_CHECK); return (ret); } diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index fd40c0dac1f..d4d435aee75 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -334,7 +334,15 @@ __lsm_bloom_create(WT_SESSION_IMPL *session, F_CLR(session, WT_SESSION_NO_CACHE); - /* Load the new Bloom filter into cache. */ + /* + * Load the new Bloom filter into cache. + * + * We're doing advisory reads to fault the new trees into cache. + * Don't block if the cache is full: our next unit of work may be to + * discard some trees to free space. + */ + F_SET(session, WT_SESSION_NO_CACHE_CHECK); + WT_CLEAR(key); WT_ERR_NOTFOUND_OK(__wt_bloom_get(bloom, &key)); @@ -355,7 +363,7 @@ __lsm_bloom_create(WT_SESSION_IMPL *session, err: if (bloom != NULL) WT_TRET(__wt_bloom_close(bloom)); - F_CLR(session, WT_SESSION_NO_CACHE); + F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_CACHE_CHECK); return (ret); } From 119db91970a4f8460d422da7c87affef2e7d4387 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 3 Sep 2014 14:21:15 +1000 Subject: [PATCH 007/132] Don't starve LSM worker threads. There was a bug in the LSM worker code that stopped some worker threads getting work units if there was a different work unit type scheduled ahead of them. While here allow the dedicated switch thread to also do deletes, to free up pinned cache. --- src/lsm/lsm_manager.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 2a30e6ba7a6..c2a7ae20cb4 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -219,7 +219,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) worker_args = &manager->lsm_worker_cookies[1]; worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers++; - worker_args->flags = WT_LSM_WORK_SWITCH; + worker_args->flags = WT_LSM_WORK_SWITCH | WT_LSM_WORK_DROP; /* Start the switch thread. */ WT_RET(__wt_lsm_worker_start(session, worker_args)); @@ -331,6 +331,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) pushms > fillms) { WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_SWITCH, lsm_tree)); + WT_RET(__wt_lsm_manager_push_entry( + session, WT_LSM_WORK_DROP, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_FLUSH, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( @@ -338,11 +340,6 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_MERGE, lsm_tree)); } - if (lsm_tree->queue_ref == 0 && - lsm_tree->nold_chunks != 0) { - WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_DROP, lsm_tree)); - } } } @@ -491,13 +488,17 @@ __wt_lsm_manager_pop_entry( return (0); __wt_spin_lock(session, &manager->app_lock); - if (!TAILQ_EMPTY(&manager->appqh)) { - entry = TAILQ_FIRST(&manager->appqh); - WT_ASSERT(session, entry != NULL); - if (FLD_ISSET(type, entry->flags)) + /* + * Find and remove the first entry in the queue that matches the + * request. + */ + for (entry = TAILQ_FIRST(&manager->appqh); + entry != NULL; + entry = TAILQ_NEXT(entry, q)) { + if (FLD_ISSET(type, entry->flags)) { TAILQ_REMOVE(&manager->appqh, entry, q); - else - entry = NULL; + break; + } } __wt_spin_unlock(session, &manager->app_lock); break; From 3d40ecb61aa86a2507d41fc97cc759ba13989306 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 3 Sep 2014 14:54:20 +1000 Subject: [PATCH 008/132] Now that begin/commit_transaction isn't resetting cursors, make test/format more proactive about doing it. --- test/format/ops.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/test/format/ops.c b/test/format/ops.c index fdf762289c4..8c5a75e57a3 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -219,9 +219,10 @@ ops(void *arg) /* * We can't checkpoint or swap sessions/cursors while in a - * transaction, resolve any running transaction. Otherwise, - * reset the cursor: we may block waiting for a lock and there - * is no reason to keep pages pinned. + * transaction, resolve any running transaction. + * + * Reset the cursor regardless: we may block waiting for a lock + * and there is no reason to keep pages pinned. */ if (cnt == ckpt_op || cnt == session_op) { if (intxn) { @@ -231,7 +232,7 @@ ops(void *arg) ++tinfo->commit; intxn = 0; } - else if (cursor != NULL && + if (cursor != NULL && (ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); } @@ -313,7 +314,7 @@ ops(void *arg) /* * If we're not single-threaded and we're not in a transaction, - * start a transaction 80% of the time. + * start a transaction 20% of the time. */ if (!SINGLETHREADED && !intxn && MMRAND(1, 10) >= 8) { if ((ret = @@ -449,7 +450,8 @@ deadlock: ++tinfo->deadlock; } if ((ret = session->rollback_transaction( session, NULL)) != 0) - die(ret, "session.commit_transaction"); + die(ret, + "session.rollback_transaction"); ++tinfo->rollback; intxn = 0; break; From 16943c655a9f4d19d2cc5fa9281b31fb66459eab Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 2 Sep 2014 12:27:32 -0400 Subject: [PATCH 009/132] Alex's suggestion, break apart a large statement into an inline subroutine. --- src/btree/rec_write.c | 68 ++++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index d5564247afa..4687505df11 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -529,6 +529,52 @@ err: __wt_page_out(session, &next); return (ret); } +/* + * __rec_raw_compression_config -- + * Configure raw compression. + */ +static inline int +__rec_raw_compression_config( + WT_SESSION_IMPL *session, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage) +{ + WT_BTREE *btree; + + btree = S2BT(session); + + /* Check if raw compression configured. */ + if (btree->compressor == NULL || + btree->compressor->compress_raw == NULL) + return (0); + + /* Only for row-store and variable-length column-store objects. */ + if (page->type == WT_PAGE_COL_FIX) + return (0); + + /* + * Raw compression cannot support dictionary compression. (Technically, + * we could still use the raw callback on column-store variable length + * internal pages with dictionary compression configured, because + * dictionary compression only applies to column-store leaf pages, but + * that seems an unlikely use case.) + */ + if (btree->dictionary != 0) + return (0); + + /* Raw compression cannot support prefix compression. */ + if (btree->prefix_compression != 0) + return (0); + + /* + * Raw compression is also turned off during salvage: we can't allow + * pages to split during salvage, raw compression has no point if it + * can't manipulate the page size. + */ + if (salvage != NULL) + return (0); + + return (1); +} + /* * __rec_write_init -- * Initialize the reconciliation structure. @@ -566,27 +612,9 @@ __rec_write_init(WT_SESSION_IMPL *session, /* Track if the page can be marked clean. */ r->leave_dirty = 0; - /* - * Raw compression, the application builds disk images: applicable only - * to row-and variable-length column-store objects. Dictionary and - * prefix compression must be turned off or we ignore raw-compression, - * raw compression can't support either one. (Technically, we could - * still use the raw callback on column-store variable length internal - * pages with dictionary compression configured, because dictionary - * compression only applies to column-store leaf pages, but that seems - * an unlikely use case.) - * - * Raw compression is also turned off during salvage: we can't allow - * pages to split during salvage, raw compression has no point if it - * can't manipulate the page size. - */ + /* Raw compression. */ r->raw_compression = - btree->compressor != NULL && - btree->compressor->compress_raw != NULL && - page->type != WT_PAGE_COL_FIX && - btree->dictionary == 0 && - btree->prefix_compression == 0 && - salvage == NULL; + __rec_raw_compression_config(session, page, salvage); r->raw_destination.flags = WT_ITEM_ALIGNED; /* Track overflow items. */ From abd3adf129f72fbcbf5ca230c8c042f9e080c57b Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 3 Sep 2014 08:40:25 -0400 Subject: [PATCH 010/132] The short key/value encoding Btree code requires a size_t be at least 8B, meaning we won't run on a 32-bit machine without some changes. --- src/include/verify_build.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/include/verify_build.h b/src/include/verify_build.h index 811602be34a..53c59f90bbd 100644 --- a/src/include/verify_build.h +++ b/src/include/verify_build.h @@ -53,12 +53,10 @@ __wt_verify_build(void) SIZE_CHECK(WT_REF, WT_REF_SIZE); /* - * We mix-and-match 32-bit unsigned values and size_t's, mostly because - * we allocate and handle 32-bit objects, and lots of the underlying C - * library expects size_t values for the length of memory objects. We - * check, just to be sure. + * The btree code encodes key/value pairs in size_t's, and requires at + * least 8B size_t's. */ - STATIC_ASSERT(sizeof(size_t) >= sizeof(uint32_t)); + STATIC_ASSERT(sizeof(size_t) >= sizeof(int64_t)); /* * We require an off_t fit into an 8B chunk because 8B is the largest From 5620765191e00ebebdf591ea71bd62c90396c757 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Wed, 3 Sep 2014 10:12:23 -0400 Subject: [PATCH 011/132] Yield to make sure LSM manager is running and resources are available. --- src/include/lsm.h | 2 ++ src/lsm/lsm_manager.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/src/include/lsm.h b/src/include/lsm.h index e0197e01af6..a1374e694ed 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -125,6 +125,8 @@ struct __wt_lsm_manager { uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; WT_LSM_WORKER_ARGS *lsm_worker_cookies; +#define LSM_MANAGER_RUNNING 0x01 + uint32_t flags; }; /* diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index c2a7ae20cb4..3cb7441b7b4 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -54,6 +54,8 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_ERR(__wt_thread_create( session, &cookies[0].tid, __lsm_worker_manager, &cookies[0])); + while (!F_ISSET(manager, LSM_MANAGER_RUNNING)) + __wt_yield(); F_SET(S2C(session), WT_CONN_SERVER_LSM); if (0) { @@ -250,6 +252,12 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) F_SET(worker_args, WT_LSM_WORK_MERGE); WT_RET(__wt_lsm_worker_start(session, worker_args)); } + /* + * Yield to give new threads a chance to get started. Indicate that + * we have allocated resources and are running now. + */ + __wt_yield(); + F_SET(manager, LSM_MANAGER_RUNNING); return (0); } From 22c9453e24d8c0adb97cbd09314d4bd588d9de0a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 3 Sep 2014 10:14:55 -0400 Subject: [PATCH 012/132] Normal transaction boundaries no longer reset cursors, reference #1181. --- src/docs/tune-cursor-persist.dox | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/src/docs/tune-cursor-persist.dox b/src/docs/tune-cursor-persist.dox index b8420281c69..183a877434e 100644 --- a/src/docs/tune-cursor-persist.dox +++ b/src/docs/tune-cursor-persist.dox @@ -8,18 +8,11 @@ hold positions in objects, and therefore long-lived cursor positions can decrease performance by blocking page eviction or looking like a long-lived transaction. -One solution is to cache cursors, but use the WT_CURSOR::reset method +Best practices are to cache cursors, but use the WT_CURSOR::reset method to discard the cursor's position in the object when the position is no -longer needed. And, use the WT_CURSOR::insert method instead of the -WT_CURSOR::update method, when there's no reason to track a position in -the object, because the WT_CURSOR::insert method never maintains a cursor -position, so there's no need to call WT_CURSOR::reset. - -Additionally, cursors are automatically reset whenever a transaction -boundary is crossed; when a transaction is started with the -WT_SESSION::begin_transaction or ended with either -WT_SESSION::commit_transaction or WT_SESSION::rollback_transaction, all -open cursors are automatically reset, there is no need to call the -WT_CURSOR::reset method explicitly. +longer needed. Additionally, use the WT_CURSOR::insert method instead +of the WT_CURSOR::update method when there's no need to hold a position +in the object, because the WT_CURSOR::insert method never holds a cursor +position and there's no need to call WT_CURSOR::reset. */ From fdfe9fa2e96665a13926c446df66ddc96c2436d2 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Wed, 3 Sep 2014 12:38:55 -0400 Subject: [PATCH 013/132] Remove uses of WT_ERR_TIMEDOUT_OK. __wt_cond_wait already takes care of that return and a few other similar values. --- dist/s_define.list | 1 - src/async/async_api.c | 3 +-- src/async/async_worker.c | 3 +-- src/btree/bt_evict.c | 3 +-- src/conn/conn_cache_pool.c | 4 ++-- src/conn/conn_ckpt.c | 2 +- src/conn/conn_log.c | 5 ++--- src/conn/conn_stat.c | 2 +- src/conn/conn_sweep.c | 2 +- src/include/error.h | 14 -------------- 10 files changed, 10 insertions(+), 29 deletions(-) diff --git a/dist/s_define.list b/dist/s_define.list index 593deb6e672..653ba6c6a8b 100644 --- a/dist/s_define.list +++ b/dist/s_define.list @@ -24,7 +24,6 @@ WT_HANDLE_CLOSED WT_HANDLE_NULLABLE WT_READ_BARRIER WT_REF_SIZE -WT_RET_TIMEDOUT_OK WT_SPINLOCK_MAX WT_STAT_ATOMIC_DECR WT_STAT_ATOMIC_DECRV diff --git a/src/async/async_api.c b/src/async/async_api.c index 4f4958baf2a..ae567466e8e 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -523,8 +523,7 @@ retry: async->flush_op.state = WT_ASYNCOP_READY; WT_ERR(__wt_async_op_enqueue(conn, &async->flush_op)); while (async->flush_state != WT_ASYNC_FLUSH_COMPLETE) - WT_ERR_TIMEDOUT_OK( - __wt_cond_wait(NULL, async->flush_cond, 100000)); + WT_ERR(__wt_cond_wait(NULL, async->flush_cond, 100000)); /* * Flush is done. Clear the flags. */ diff --git a/src/async/async_worker.c b/src/async/async_worker.c index 192af09a6e1..4c7ba6036ab 100644 --- a/src/async/async_worker.c +++ b/src/async/async_worker.c @@ -108,8 +108,7 @@ __async_flush_wait(WT_SESSION_IMPL *session, WT_ASYNC *async, uint64_t my_gen) while (async->flush_state == WT_ASYNC_FLUSHING && async->flush_gen == my_gen) - WT_ERR_TIMEDOUT_OK( - __wt_cond_wait(session, async->flush_cond, 10000)); + WT_ERR(__wt_cond_wait(session, async->flush_cond, 10000)); err: return (ret); } diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index c402a1715d6..deab0f54312 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -191,8 +191,7 @@ __evict_server(void *arg) F_CLR(cache, WT_EVICT_ACTIVE); WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "sleeping")); /* Don't rely on signals: check periodically. */ - WT_ERR_TIMEDOUT_OK( - __wt_cond_wait(session, cache->evict_cond, 100000)); + WT_ERR(__wt_cond_wait(session, cache->evict_cond, 100000)); WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking")); } diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index cffb9af40f9..ab7e2cc48ee 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -562,8 +562,8 @@ __wt_cache_pool_server(void *arg) while (F_ISSET_ATOMIC(cp, WT_CACHE_POOL_ACTIVE) && F_ISSET(cache, WT_CACHE_POOL_RUN)) { if (cp->currently_used <= cp->size) - WT_ERR_TIMEDOUT_OK(__wt_cond_wait( - session, cp->cache_pool_cond, 1000000)); + WT_ERR(__wt_cond_wait(session, + cp->cache_pool_cond, 1000000)); /* * Re-check pool run flag - since we want to avoid getting the diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index 90560842c07..d1ee647d08a 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -86,7 +86,7 @@ __ckpt_server(void *arg) * NOTE: If the user only configured logsize, then usecs * will be 0 and this wait won't return until signalled. */ - WT_ERR_TIMEDOUT_OK( + WT_ERR( __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs)); } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index ee4a2dd6b70..0ecf48c6628 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -106,7 +106,7 @@ __log_archive_server(void *arg) "log_archive: Blocked due to open log " "cursor holding archive lock")); } - WT_ERR_TIMEDOUT_OK( + WT_ERR( __wt_cond_wait(session, conn->arch_cond, 1000000)); continue; } @@ -150,8 +150,7 @@ __log_archive_server(void *arg) WT_ERR(__wt_rwunlock(session, log->log_archive_lock)); /* Wait until the next event. */ - WT_ERR_TIMEDOUT_OK( - __wt_cond_wait(session, conn->arch_cond, 1000000)); + WT_ERR(__wt_cond_wait(session, conn->arch_cond, 1000000)); } if (0) { diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index 9ad1cddeb6e..eaee410ad0a 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -385,7 +385,7 @@ __statlog_server(void *arg) WT_ERR(__statlog_log_one(session, &path, &tmp)); /* Wait until the next event. */ - WT_ERR_TIMEDOUT_OK( + WT_ERR( __wt_cond_wait(session, conn->stat_cond, conn->stat_usecs)); } diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 2d917f9117b..3f0641923ec 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -109,7 +109,7 @@ __sweep_server(void *arg) F_ISSET(conn, WT_CONN_SERVER_SWEEP)) { /* Wait until the next event. */ - WT_ERR_TIMEDOUT_OK( + WT_ERR( __wt_cond_wait(session, conn->sweep_cond, 30 * WT_MILLION)); /* Sweep the handles. */ diff --git a/src/include/error.h b/src/include/error.h index fda63ef8442..8f4d5c9f2e5 100644 --- a/src/include/error.h +++ b/src/include/error.h @@ -35,14 +35,6 @@ goto err; \ } \ } while (0) -#define WT_ERR_TIMEDOUT_OK(a) do { \ - if ((ret = (a)) != 0) { \ - if (ret == ETIMEDOUT) \ - ret = 0; \ - else \ - goto err; \ - } \ -} while (0) #define WT_ERR_TEST(a, v) do { \ if (a) { \ ret = (v); \ @@ -70,12 +62,6 @@ if ((__ret = (a)) != 0 && __ret != WT_NOTFOUND) \ return (__ret); \ } while (0) -#define WT_RET_TIMEDOUT_OK(a) do { \ - int __ret; \ - if ((__ret = (a)) != 0 && __ret != ETIMEDOUT) \ - return (__ret); \ -} while (0) - /* Set "ret" if not already set. */ #define WT_TRET(a) do { \ int __ret; \ From fd6723a9f72382e7df6c4c4c1f568f8c36e5ba3f Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 4 Sep 2014 08:27:08 +1000 Subject: [PATCH 014/132] Lint. Have flag start with WT_ prefix. --- src/include/lsm.h | 2 +- src/lsm/lsm_manager.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/lsm.h b/src/include/lsm.h index a1374e694ed..fceed7987a7 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -125,7 +125,7 @@ struct __wt_lsm_manager { uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; WT_LSM_WORKER_ARGS *lsm_worker_cookies; -#define LSM_MANAGER_RUNNING 0x01 +#define WT_LSM_MANAGER_RUNNING 0x01 uint32_t flags; }; diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 3cb7441b7b4..7052d16c451 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -54,7 +54,7 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_ERR(__wt_thread_create( session, &cookies[0].tid, __lsm_worker_manager, &cookies[0])); - while (!F_ISSET(manager, LSM_MANAGER_RUNNING)) + while (!F_ISSET(manager, WT_LSM_MANAGER_RUNNING)) __wt_yield(); F_SET(S2C(session), WT_CONN_SERVER_LSM); @@ -257,7 +257,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) * we have allocated resources and are running now. */ __wt_yield(); - F_SET(manager, LSM_MANAGER_RUNNING); + F_SET(manager, WT_LSM_MANAGER_RUNNING); return (0); } From 1840cac773ef9ca1e0aba93efd72759684b15f8f Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 4 Sep 2014 10:02:47 +1000 Subject: [PATCH 015/132] Only run LSM switches if they are needed. Refs #1186 --- src/include/extern.h | 5 ++++- src/lsm/lsm_work_unit.c | 35 +++++++++++++++++++++++++++++++++-- src/lsm/lsm_worker.c | 21 +++------------------ 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index e235691b395..a213a0e4bfa 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1013,7 +1013,10 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, extern int __wt_lsm_get_chunk_to_flush( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK **chunkp); -extern int __wt_lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); +extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, + WT_LSM_WORK_UNIT **entryp, + int *ran); +extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index d4d435aee75..569d8f0d379 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -112,12 +112,43 @@ __lsm_unpin_chunks(WT_SESSION_IMPL *session, WT_LSM_WORKER_COOKIE *cookie) } /* - * __wt_lsm_bloom_work -- + * __wt_lsm_work_switch -- + * Do a switch if the LSM tree needs one. + */ +int +__wt_lsm_work_switch( + WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, int *ran) +{ + WT_DECL_RET; + WT_LSM_CHUNK *chunk; + WT_LSM_WORK_UNIT *entry; + + /* We've become responsible for freeing the work unit. */ + entry = *entryp; + *ran = 0; + *entryp = NULL; + + if (F_ISSET(entry->lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { + WT_WITH_SCHEMA_LOCK(session, ret = + __wt_lsm_tree_switch(session, entry->lsm_tree)); + /* Failing to complete the switch is fine */ + if (ret == EBUSY) + ret = 0; + else + *ran = 1; + } + +err: __wt_lsm_manager_free_work_unit(session, entry); + + return (ret); +} +/* + * __wt_lsm_work_bloom -- * Try to create a Bloom filter for the newest on-disk chunk that doesn't * have one. */ int -__wt_lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +__wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { WT_DECL_RET; WT_LSM_CHUNK *chunk; diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 54b95221793..0bb6cfb9c08 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -56,7 +56,7 @@ __lsm_worker_general_op( } else if (entry->flags == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); else if (entry->flags == WT_LSM_WORK_BLOOM) { - WT_ERR(__wt_lsm_bloom_work(session, entry->lsm_tree)); + WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree)); WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_MERGE, entry->lsm_tree)); } @@ -92,23 +92,8 @@ __lsm_worker(void *arg) while (F_ISSET(cookie, WT_LSM_WORK_SWITCH) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_SWITCH, &entry)) == 0 && - entry != NULL) { - /* - * Don't exit the switch thread because a single - * switch fails. Keep trying until we are told to - * shut down. - */ - WT_WITH_SCHEMA_LOCK(session, ret = - __wt_lsm_tree_switch(session, entry->lsm_tree)); - - __wt_lsm_manager_free_work_unit(session, entry); - entry = NULL; - - if (ret == EBUSY) - ret = 0; - WT_ERR(ret); - progress = 1; - } + entry != NULL) + WT_ERR(__wt_lsm_work_switch(session, &entry, &progress)); /* Flag an error if the pop failed. */ WT_ERR(ret); From 720a3763039525e741802910285133ede515d348 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 4 Sep 2014 10:10:40 +1000 Subject: [PATCH 016/132] Fix build warnings. --- src/lsm/lsm_work_unit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 569d8f0d379..5c96c82f84c 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -120,7 +120,6 @@ __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, int *ran) { WT_DECL_RET; - WT_LSM_CHUNK *chunk; WT_LSM_WORK_UNIT *entry; /* We've become responsible for freeing the work unit. */ @@ -138,7 +137,7 @@ __wt_lsm_work_switch( *ran = 1; } -err: __wt_lsm_manager_free_work_unit(session, entry); + __wt_lsm_manager_free_work_unit(session, entry); return (ret); } From e099f346b351dcc94ff4e068a0519d88b5803663 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 4 Sep 2014 12:14:16 +1000 Subject: [PATCH 017/132] Move LSM manager initialization into main thread. --- src/conn/conn_handle.c | 10 ++++ src/include/lsm.h | 2 - src/lsm/lsm_manager.c | 102 ++++++++++++++++++----------------------- 3 files changed, 54 insertions(+), 60 deletions(-) diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 36c53133325..af1513b3014 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -51,6 +51,16 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET( __wt_spin_init(session, &conn->page_lock[i], "btree page")); + /* Setup the spin locks for the LSM manager queues. */ + WT_RET(__wt_spin_init( + session, &conn->lsm_manager.app_lock, "LSM application queue lock")); + WT_RET(__wt_spin_init( + session, &conn->lsm_manager.manager_lock, "LSM manager queue lock")); + WT_RET(__wt_spin_init( + session, &conn->lsm_manager.switch_lock, "LSM switch queue lock")); + WT_RET(__wt_cond_alloc( + session, "LSM worker cond", 0, &conn->lsm_manager.work_cond)); + /* * Generation numbers. * diff --git a/src/include/lsm.h b/src/include/lsm.h index fceed7987a7..e0197e01af6 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -125,8 +125,6 @@ struct __wt_lsm_manager { uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; WT_LSM_WORKER_ARGS *lsm_worker_cookies; -#define WT_LSM_MANAGER_RUNNING 0x01 - uint32_t flags; }; /* diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 7052d16c451..1c9240d6af6 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -15,7 +15,8 @@ static void * __lsm_worker_manager(void *); /* * __wt_lsm_manager_start -- - * Start the LSM management infrastructure. + * Start the LSM management infrastructure. Our queues and locks were + * initialized when the connection was intialized. */ int __wt_lsm_manager_start(WT_SESSION_IMPL *session) @@ -54,8 +55,6 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session) WT_ERR(__wt_thread_create( session, &cookies[0].tid, __lsm_worker_manager, &cookies[0])); - while (!F_ISSET(manager, WT_LSM_MANAGER_RUNNING)) - __wt_yield(); F_SET(S2C(session), WT_CONN_SERVER_LSM); if (0) { @@ -104,56 +103,59 @@ __wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn) manager = &conn->lsm_manager; removed = 0; - if (manager->lsm_worker_cookies == NULL) - return (0); + if (manager->lsm_worker_cookies != NULL) { + /* Wait for the server to notice and wrap up. */ + while (F_ISSET(conn, WT_CONN_SERVER_LSM)) + __wt_yield(); - /* Wait for the server to notice and wrap up. */ - while (F_ISSET(conn, WT_CONN_SERVER_LSM)) - __wt_yield(); + /* Clean up open LSM handles. */ + ret = __wt_lsm_tree_close_all(conn->default_session); - /* Clean up open LSM handles. */ - ret = __wt_lsm_tree_close_all(conn->default_session); + WT_TRET(__wt_thread_join( + session, manager->lsm_worker_cookies[0].tid)); + manager->lsm_worker_cookies[0].tid = 0; - WT_TRET(__wt_thread_join(session, manager->lsm_worker_cookies[0].tid)); - manager->lsm_worker_cookies[0].tid = 0; + /* Release memory from any operations left on the queue. */ + for (current = TAILQ_FIRST(&manager->switchqh); + current != NULL; current = next) { + next = TAILQ_NEXT(current, q); + TAILQ_REMOVE(&manager->switchqh, current, q); + ++removed; + __wt_lsm_manager_free_work_unit(session, current); + } + for (current = TAILQ_FIRST(&manager->appqh); + current != NULL; current = next) { + next = TAILQ_NEXT(current, q); + TAILQ_REMOVE(&manager->appqh, current, q); + ++removed; + __wt_lsm_manager_free_work_unit(session, current); + } + for (current = TAILQ_FIRST(&manager->managerqh); + current != NULL; current = next) { + next = TAILQ_NEXT(current, q); + TAILQ_REMOVE(&manager->managerqh, current, q); + ++removed; + __wt_lsm_manager_free_work_unit(session, current); + } - /* Release memory from any operations left on the queue. */ - for (current = TAILQ_FIRST(&manager->switchqh); - current != NULL; current = next) { - next = TAILQ_NEXT(current, q); - TAILQ_REMOVE(&manager->switchqh, current, q); - ++removed; - __wt_lsm_manager_free_work_unit(session, current); - } - for (current = TAILQ_FIRST(&manager->appqh); - current != NULL; current = next) { - next = TAILQ_NEXT(current, q); - TAILQ_REMOVE(&manager->appqh, current, q); - ++removed; - __wt_lsm_manager_free_work_unit(session, current); - } - for (current = TAILQ_FIRST(&manager->managerqh); - current != NULL; current = next) { - next = TAILQ_NEXT(current, q); - TAILQ_REMOVE(&manager->managerqh, current, q); - ++removed; - __wt_lsm_manager_free_work_unit(session, current); + /* Close all LSM worker sessions. */ + for (i = 0; i < manager->lsm_workers_max; i++) { + wt_session = + &manager->lsm_worker_cookies[i].session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + } + + WT_STAT_FAST_CONN_INCRV(session, + lsm_work_units_discarded, removed); + __wt_free(session, manager->lsm_worker_cookies); } - /* Close all LSM worker sessions. */ - for (i = 0; i < manager->lsm_workers_max; i++) { - wt_session = &manager->lsm_worker_cookies[i].session->iface; - WT_TRET(wt_session->close(wt_session, NULL)); - } - - WT_STAT_FAST_CONN_INCRV(session, lsm_work_units_discarded, removed); - + /* Free resources that are allocated in connection initialize */ __wt_spin_destroy(session, &manager->switch_lock); __wt_spin_destroy(session, &manager->app_lock); __wt_spin_destroy(session, &manager->manager_lock); WT_TRET(__wt_cond_destroy(session, &manager->work_cond)); - __wt_free(session, manager->lsm_worker_cookies); return (ret); } @@ -208,16 +210,6 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) WT_ASSERT(session, manager->lsm_workers == 1); - /* Setup the spin locks for the queues. */ - WT_RET(__wt_spin_init( - session, &manager->app_lock, "LSM application queue lock")); - WT_RET(__wt_spin_init( - session, &manager->manager_lock, "LSM manager queue lock")); - WT_RET(__wt_spin_init( - session, &manager->switch_lock, "LSM switch queue lock")); - WT_RET(__wt_cond_alloc( - session, "LSM worker cond", 0, &manager->work_cond)); - worker_args = &manager->lsm_worker_cookies[1]; worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers++; @@ -252,12 +244,6 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) F_SET(worker_args, WT_LSM_WORK_MERGE); WT_RET(__wt_lsm_worker_start(session, worker_args)); } - /* - * Yield to give new threads a chance to get started. Indicate that - * we have allocated resources and are running now. - */ - __wt_yield(); - F_SET(manager, WT_LSM_MANAGER_RUNNING); return (0); } From d3eecc260fb9bf7ea35b28c997100c8265c3a650 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 4 Sep 2014 07:17:58 -0400 Subject: [PATCH 018/132] long lines, whitespace --- src/conn/conn_handle.c | 8 ++++---- src/lsm/lsm_manager.c | 3 +-- src/lsm/lsm_worker.c | 3 ++- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index af1513b3014..66517f9c2de 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -52,10 +52,10 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) __wt_spin_init(session, &conn->page_lock[i], "btree page")); /* Setup the spin locks for the LSM manager queues. */ - WT_RET(__wt_spin_init( - session, &conn->lsm_manager.app_lock, "LSM application queue lock")); - WT_RET(__wt_spin_init( - session, &conn->lsm_manager.manager_lock, "LSM manager queue lock")); + WT_RET(__wt_spin_init(session, + &conn->lsm_manager.app_lock, "LSM application queue lock")); + WT_RET(__wt_spin_init(session, + &conn->lsm_manager.manager_lock, "LSM manager queue lock")); WT_RET(__wt_spin_init( session, &conn->lsm_manager.switch_lock, "LSM switch queue lock")); WT_RET(__wt_cond_alloc( diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 1c9240d6af6..34cd13554d1 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -16,7 +16,7 @@ static void * __lsm_worker_manager(void *); /* * __wt_lsm_manager_start -- * Start the LSM management infrastructure. Our queues and locks were - * initialized when the connection was intialized. + * initialized when the connection was initialized. */ int __wt_lsm_manager_start(WT_SESSION_IMPL *session) @@ -156,7 +156,6 @@ __wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &manager->manager_lock); WT_TRET(__wt_cond_destroy(session, &manager->work_cond)); - return (ret); } diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 0bb6cfb9c08..257fda03f3b 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -93,7 +93,8 @@ __lsm_worker(void *arg) (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_SWITCH, &entry)) == 0 && entry != NULL) - WT_ERR(__wt_lsm_work_switch(session, &entry, &progress)); + WT_ERR( + __wt_lsm_work_switch(session, &entry, &progress)); /* Flag an error if the pop failed. */ WT_ERR(ret); From b2542b7800942e1c6ed3d8eb3a4b976a2310fee1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 4 Sep 2014 07:33:39 -0400 Subject: [PATCH 019/132] Don't remove the wiredtiger directory, it has source code, reference #1188. --- lang/python/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/python/Makefile.am b/lang/python/Makefile.am index 0ac56138e29..ae4e6cc6441 100644 --- a/lang/python/Makefile.am +++ b/lang/python/Makefile.am @@ -19,6 +19,6 @@ install-exec-local: clean-local: $(PYTHON) $(PYSRC)/setup.py clean - rm -rf _wiredtiger.so WT_TEST build wiredtiger + rm -rf _wiredtiger.so WT_TEST build TESTS = run-ex_access From ed5c5d33892f1a8d044b4b009ca7c00eed3ce21e Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 4 Sep 2014 07:41:32 -0400 Subject: [PATCH 020/132] Add wiredtiger_wrap.o to the clean-local label. --- lang/python/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/python/Makefile.am b/lang/python/Makefile.am index ae4e6cc6441..223d9695f76 100644 --- a/lang/python/Makefile.am +++ b/lang/python/Makefile.am @@ -19,6 +19,6 @@ install-exec-local: clean-local: $(PYTHON) $(PYSRC)/setup.py clean - rm -rf _wiredtiger.so WT_TEST build + rm -rf _wiredtiger.so wiredtiger_wrap.o WT_TEST build TESTS = run-ex_access From eece553c0be260e8e51012b29fa302807aa85e25 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 4 Sep 2014 07:58:20 -0400 Subject: [PATCH 021/132] As far as I can tell, there's no "build" directory, so quit trying to remove it, reference #1188. --- lang/python/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lang/python/Makefile.am b/lang/python/Makefile.am index 223d9695f76..a6b44932d60 100644 --- a/lang/python/Makefile.am +++ b/lang/python/Makefile.am @@ -19,6 +19,6 @@ install-exec-local: clean-local: $(PYTHON) $(PYSRC)/setup.py clean - rm -rf _wiredtiger.so wiredtiger_wrap.o WT_TEST build + rm -rf _wiredtiger.so wiredtiger_wrap.o WT_TEST TESTS = run-ex_access From a5a290e95ab0209cef905637cbb563017745f200 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 14 Aug 2014 11:04:56 -0400 Subject: [PATCH 022/132] It's "PREDEFINED", not "PREDEFINE" --- dist/s_docs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dist/s_docs b/dist/s_docs index 815d27d7b11..c0c8885e1b5 100755 --- a/dist/s_docs +++ b/dist/s_docs @@ -113,7 +113,7 @@ valid_build() } classf=`ls ../docs/struct___* 2>/dev/null` for c in $classf; do - echo "$c: Need to add class to PREDEFINE in src/docs/Doxyfile" + echo "$c: Need to add class to PREDEFINED in src/docs/Doxyfile" done } From d4b42f0563b46c51bee85d60035724ef41412387 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 15 Aug 2014 12:37:09 -0400 Subject: [PATCH 023/132] Minor cleanups: "onpage_ovfl" is never interesting if the overflow key has already been removed, factor that into the original test and set of the variable. --- src/btree/rec_write.c | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 4687505df11..bae29929aa5 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -3859,7 +3859,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_REF *ref; size_t size; u_int vtype; - int hazard, onpage_ovfl, ovfl_key, state; + int hazard, key_onpage_ovfl, ovfl_key, state; const void *p; btree = S2BT(session); @@ -3907,11 +3907,12 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) ikey = __wt_ref_key_instantiated(ref); if (ikey == NULL || ikey->cell_offset == 0) { cell = NULL; - onpage_ovfl = 0; + key_onpage_ovfl = 0; } else { cell = WT_PAGE_REF_OFFSET(page, ikey->cell_offset); __wt_cell_unpack(cell, kpack); - onpage_ovfl = kpack->ovfl == 1 ? 1 : 0; + key_onpage_ovfl = + kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM; } WT_ERR(__rec_child_modify(session, r, ref, &hazard, &state)); @@ -3928,7 +3929,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * always instantiated. Don't worry about reuse, * reusing this key in this reconciliation is unlikely. */ - if (onpage_ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM) + if (key_onpage_ovfl) WT_ERR(__wt_ovfl_discard_add( session, page, kpack->cell)); CHILD_RELEASE_ERR(session, hazard, ref); @@ -3954,8 +3955,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * worry about reuse, reusing this key in this * reconciliation is unlikely. */ - if (onpage_ovfl && - kpack->raw != WT_CELL_KEY_OVFL_RM) + if (key_onpage_ovfl) WT_ERR(__wt_ovfl_discard_add( session, page, kpack->cell)); CHILD_RELEASE_ERR(session, hazard, ref); @@ -3970,8 +3970,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * worry about reuse, reusing this key in this * reconciliation is unlikely. */ - if (onpage_ovfl && - kpack->raw != WT_CELL_KEY_OVFL_RM) + if (key_onpage_ovfl) WT_ERR(__wt_ovfl_discard_add( session, page, kpack->cell)); @@ -4010,19 +4009,11 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) __rec_cell_build_addr(r, p, size, vtype, 0); CHILD_RELEASE_ERR(session, hazard, ref); - /* - * If the key is an overflow key, check to see if the backing - * blocks have been freed; in that case, we have to build a new - * key. - */ - if (onpage_ovfl && kpack->raw == WT_CELL_KEY_OVFL_RM) - onpage_ovfl = 0; - /* * Build key cell. * Truncate any 0th key, internal pages don't need 0th keys. */ - if (onpage_ovfl) { + if (key_onpage_ovfl) { key->buf.data = cell; key->buf.size = __wt_cell_total_len(kpack); key->cell_len = 0; @@ -4048,10 +4039,10 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * case, we have to build the actual key now because we * are about to promote it. */ - if (onpage_ovfl) { + if (key_onpage_ovfl) { WT_ERR(__wt_buf_set(session, r->cur, WT_IKEY_DATA(ikey), ikey->size)); - onpage_ovfl = 0; + key_onpage_ovfl = 0; } WT_ERR(__rec_split(session, r)); } From a012e4829e15461f172f78a1d1bfbb3fe9992b0f Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 4 Sep 2014 11:40:50 -0400 Subject: [PATCH 024/132] Send t_ret error value to lprintf. --- bench/wtperf/wtperf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index ad645e19596..38352fd3ef6 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1871,7 +1871,7 @@ err: if (ret == 0) if (cfg->conn != NULL && (t_ret = cfg->conn->close(cfg->conn, NULL)) != 0) { - lprintf(cfg, ret, 0, + lprintf(cfg, t_ret, 0, "Error closing connection to %s", cfg->home); if (ret == 0) ret = t_ret; From 5604615b1053ec20dfa37dfac66642423cd8cb18 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 5 Sep 2014 11:03:05 +1000 Subject: [PATCH 025/132] Document the autogen.sh script in the top level of the tree: that is common practise on GitHub. refs #1198 --- src/docs/install.dox | 7 +++---- src/docs/spell.ok | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/docs/install.dox b/src/docs/install.dox index f3f5094eb5d..eae566f4291 100644 --- a/src/docs/install.dox +++ b/src/docs/install.dox @@ -21,15 +21,14 @@ First, clone the repository: git clone git://github.com/wiredtiger/wiredtiger.git @endcode -Second, run the \c build_posix/reconf script: +Second, run \c autogen.sh to create the \c configure script: @code cd wiredtiger -sh build_posix/reconf +sh autogen.sh @endcode -This creates the \c configure script, and you can now proceed with @ref -building. +Now proceed with @ref building. @section building Building WiredTiger diff --git a/src/docs/spell.ok b/src/docs/spell.ok index 95bdf58fc06..857f89cef05 100644 --- a/src/docs/spell.ok +++ b/src/docs/spell.ok @@ -88,6 +88,7 @@ ar archiver arg async +autogen atomicity autoconf automake @@ -339,7 +340,6 @@ realloc'd recno recnoN recnum -reconf recoverability recs rectype From fedef3e83f881596e83a1df196fda233b251b021 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Fri, 5 Sep 2014 12:21:59 +1000 Subject: [PATCH 026/132] Rearrange the Python build so we generate object files under the build directory. Clean up manually: don't rely on setup.py. refs #1188 --- lang/python/Makefile.am | 18 +++++++++++------- lang/python/setup.py | 10 +++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/lang/python/Makefile.am b/lang/python/Makefile.am index a6b44932d60..03c65a57028 100644 --- a/lang/python/Makefile.am +++ b/lang/python/Makefile.am @@ -1,5 +1,5 @@ PYSRC = $(top_srcdir)/lang/python -PY_INCLUDE_DIRS = $(top_srcdir) +PYDIRS = -t $(abs_builddir) -I $(abs_top_srcdir):$(abs_top_builddir) -L $(abs_top_builddir)/.libs all-local: _wiredtiger.so # We keep generated Python sources under lang/python: that's where they live @@ -10,15 +10,19 @@ $(PYSRC)/wiredtiger_wrap.c: $(top_srcdir)/src/include/wiredtiger.in $(PYSRC)/wir mv wiredtiger.py wiredtiger/__init__.py) _wiredtiger.so: $(top_builddir)/libwiredtiger.la $(PYSRC)/wiredtiger_wrap.c - $(PYTHON) $(PYSRC)/setup.py build_ext -b . -t . -f -I $(PY_INCLUDE_DIRS) + (cd $(PYSRC) && \ + $(PYTHON) setup.py build_ext -f -b $(abs_builddir) $(PYDIRS)) install-exec-local: - $(PYTHON) $(PYSRC)/setup.py build_py -d build - $(PYTHON) $(PYSRC)/setup.py build_ext -b build -t . -f -I $(PY_INCLUDE_DIRS) - $(PYTHON) $(PYSRC)/setup.py install_lib -b build --skip-build $(PYTHON_INSTALL_ARG) + (cd $(PYSRC) && \ + $(PYTHON) setup.py build_py -d $(abs_builddir)/build && \ + $(PYTHON) setup.py build_ext -f -b $(abs_builddir)/build $(PYDIRS) && \ + $(PYTHON) setup.py install_lib -b $(abs_builddir)/build --skip-build $(PYTHON_INSTALL_ARG)) +# We build in different places for an install vs running from the tree: +# clean up both. Don't rely on "setup.py clean" -- everything that should +# be removed is created under the build directory. clean-local: - $(PYTHON) $(PYSRC)/setup.py clean - rm -rf _wiredtiger.so wiredtiger_wrap.o WT_TEST + rm -rf build _wiredtiger.so wiredtiger_wrap.o WT_TEST TESTS = run-ex_access diff --git a/lang/python/setup.py b/lang/python/setup.py index 1c6ebc71387..1057006ce50 100644 --- a/lang/python/setup.py +++ b/lang/python/setup.py @@ -35,9 +35,7 @@ if not 'ARCHFLAGS' in os.environ: os.environ['ARCHFLAGS'] = '' # Suppress warnings building SWIG generated code -extra_cflags = [ - '-w', -] +extra_cflags = [ '-w' ] dir = os.path.dirname(__file__) @@ -50,12 +48,10 @@ wt_ver = '%d.%d' % (WIREDTIGER_VERSION_MAJOR, WIREDTIGER_VERSION_MINOR) setup(name='wiredtiger', version=wt_ver, ext_modules=[Extension('_wiredtiger', - [os.path.join(dir, 'wiredtiger_wrap.c')], - include_dirs=['../..'], - library_dirs=['../../.libs'], + [os.path.join(dir, 'wiredtiger_wrap.c')], libraries=['wiredtiger'], extra_compile_args=extra_cflags, )], - package_dir={'' : dir}, + package_dir={'' : dir}, packages=['wiredtiger'], ) From 7bbaf0f70683dd7cf91ad3bd728885d263f973d0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 5 Sep 2014 07:38:27 -0400 Subject: [PATCH 027/132] whitespace --- src/include/dhandle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/dhandle.h b/src/include/dhandle.h index 9a05620c74c..5556627c74d 100644 --- a/src/include/dhandle.h +++ b/src/include/dhandle.h @@ -18,7 +18,7 @@ #define WT_SET_BTREE_IN_SESSION(s, b) ((s)->dhandle = b->dhandle) #define WT_CLEAR_BTREE_IN_SESSION(s) ((s)->dhandle = NULL) -#define WT_WITH_DHANDLE(s, d, e) do { \ +#define WT_WITH_DHANDLE(s, d, e) do { \ WT_DATA_HANDLE *__saved_dhandle = (s)->dhandle; \ (s)->dhandle = (d); \ e; \ From 7a009dc0aad7d82f666d5ceadfac479bc65773d7 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 5 Sep 2014 07:39:02 -0400 Subject: [PATCH 028/132] WT_WITH_SCHEMA_LOCK, WT_SESSION_SCHEMA_LOCKED don't need to spin, #1202. --- src/include/schema.h | 47 ++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/src/include/schema.h b/src/include/schema.h index 038404f5ea5..c8c80dcf9c1 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -65,43 +65,38 @@ struct __wt_table { */ #define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1) +/* + * WT_WITH_SCHEMA_LOCK -- + * Acquire the schema lock, perform an operation, drop the lock. + */ #define WT_WITH_SCHEMA_LOCK(session, op) do { \ - int __schema_locked = 0; \ - WT_DECL_SPINLOCK_ID(__id); /* Must appear last */ \ WT_ASSERT(session, \ F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) || \ !F_ISSET(session, WT_SESSION_NO_SCHEMA_LOCK)); \ - while (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) \ - if (session->skip_schema_lock || __wt_spin_trylock( \ - session, &S2C(session)->schema_lock, &__id) == 0) { \ - F_SET(session, WT_SESSION_SCHEMA_LOCKED); \ - __schema_locked = 1; \ - } else \ - __wt_yield(); \ - (op); \ - if (__schema_locked) { \ + if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) || \ + session->skip_schema_lock) { \ + (op); \ + } else { \ + __wt_spin_lock(session, &S2C(session)->schema_lock); \ + F_SET(session, WT_SESSION_SCHEMA_LOCKED); \ + (op); \ + __wt_spin_unlock(session, &S2C(session)->schema_lock); \ F_CLR(session, WT_SESSION_SCHEMA_LOCKED); \ - if (!session->skip_schema_lock) \ - __wt_spin_unlock( \ - session, &S2C(session)->schema_lock); \ } \ } while (0) -/* Drop the schema lock, and re-acquire after operation. */ +/* + * WT_WITHOUT_SCHEMA_LOCK -- + * Drop the schema lock, perform an operation, re-acquire the lock. + */ #define WT_WITHOUT_SCHEMA_LOCK(session, op) do { \ - WT_DECL_SPINLOCK_ID(__id); /* Must appear last */ \ - if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) \ - (op); \ - else { \ + if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { \ __wt_spin_unlock(session, &S2C(session)->schema_lock); \ F_CLR(session, WT_SESSION_SCHEMA_LOCKED); \ (op); \ - while (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { \ - if (__wt_spin_trylock(session, \ - &S2C(session)->schema_lock, &__id) == 0) \ - F_SET(session, WT_SESSION_SCHEMA_LOCKED);\ - else \ - __wt_yield(); \ - } \ + __wt_spin_lock(session, &S2C(session)->schema_lock); \ + F_SET(session, WT_SESSION_SCHEMA_LOCKED); \ + } else { \ + (op); \ } \ } while (0) From 99ce38eef9b3b6828e5769d56cd0d53b272358e8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 6 Sep 2014 15:59:51 -0400 Subject: [PATCH 029/132] Write a new version of config-collapse that merges nested structures so reconfiguration doesn't lose previous configuration information. Reference #1172. --- src/config/config_collapse.c | 286 +++++++++++++++++++++++++++++++---- src/conn/conn_api.c | 57 ++++--- src/conn/conn_handle.c | 1 + src/include/connection.h | 2 + src/include/extern.h | 5 +- src/meta/meta_ckpt.c | 2 +- src/meta/meta_turtle.c | 2 +- src/schema/schema_create.c | 8 +- src/session/session_api.c | 2 +- 9 files changed, 306 insertions(+), 59 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 8d245680f69..8b56e57bc74 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -8,29 +8,64 @@ #include "wt_internal.h" /* - * __wt_config_collapse -- - * Given a NULL-terminated list of configuration strings, where the first - * one contains all the defaults, collapse them into newly allocated - * memory. + * We need a character that can't appear in a key as a separator. + * + * XXX + * I'm not using '.' although that seems like the natural one to use because + * default checkpoints are named "WiredTiger.#" where dot is part of the key. + * I think it's wrong, we should not have used a dot in that name, but that's + * a format change. */ -int -__wt_config_collapse( - WT_SESSION_IMPL *session, const char **cfg, const char **config_ret) +#undef SEP /* separator key, character */ +#define SEP "," +#undef SEPC +#define SEPC ',' + +/* + * Individual configuration entries, including a generation number used to make + * the qsort stable. + */ +typedef struct { + char *k, *v; /* key, value */ + size_t gen; /* generation */ +} WT_COLLAPSE_ENTRY; + +/* + * The array of configuration entries. + */ +typedef struct { + size_t entries_allocated; /* allocated */ + size_t entries_next; /* next slot */ + + int nested_replace; /* replace nested values */ + + WT_COLLAPSE_ENTRY *entries; /* array of entries */ +} WT_COLLAPSE; + +/* + * __collapse_scan -- + * Walk a configuration string, inserting entries into the collapse array. + */ +static int +__collapse_scan(WT_SESSION_IMPL *session, + const char *key, const char *value, WT_COLLAPSE *cp) { WT_CONFIG cparser; WT_CONFIG_ITEM k, v; - WT_DECL_ITEM(tmp); + WT_DECL_ITEM(kb); + WT_DECL_ITEM(vb); WT_DECL_RET; - WT_RET(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_scr_alloc(session, 0, &kb)); + WT_ERR(__wt_scr_alloc(session, 0, &vb)); - WT_ERR(__wt_config_init(session, &cparser, cfg[0])); + WT_ERR(__wt_config_init(session, &cparser, value)); while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) { if (k.type != WT_CONFIG_ITEM_STRING && k.type != WT_CONFIG_ITEM_ID) WT_ERR_MSG(session, EINVAL, "Invalid configuration key found: '%s'\n", k.str); - WT_ERR(__wt_config_get(session, cfg, &k, &v)); + /* Include the quotes around string keys/values. */ if (k.type == WT_CONFIG_ITEM_STRING) { --k.str; @@ -40,23 +75,222 @@ __wt_config_collapse( --v.str; v.len += 2; } - WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", - (int)k.len, k.str, (int)v.len, v.str)); + + /* Build the key/value strings. */ + WT_ERR(__wt_buf_fmt(session, + kb, "%s%s%.*s", + key == NULL ? "" : key, + key == NULL ? "" : SEP, + (int)k.len, k.str)); + WT_ERR(__wt_buf_fmt(session, + vb, "%.*s", (int)v.len, v.str)); + + /* + * If the value is a structure, recursively parse it. + * + * XXX + * Problem #1: we store "checkpoint_lsn=(1,0)" in the metadata + * file, where the key is type WT_CONFIG_ITEM_ID, the value is + * type WT_CONFIG_ITEM_STRUCT. Other nested structures have + * field names, should this have been "(file=1,offset=0)"? + * + * Problem #2: the configuration collapse functions are used by + * checkpoint to replace the previous entry in its entirety, + * that is, the work we're doing to integrate nested changes + * into previous values breaks it. + * + * We're currently turning off merging nested structures in most + * places (including the checkpoint code). + */ + if (!cp->nested_replace && + v.type == WT_CONFIG_ITEM_STRUCT && + strchr(vb->data, '=') != NULL) { + WT_ERR( + __collapse_scan(session, kb->data, vb->data, cp)); + continue; + } + + /* Insert the value into the array. */ + WT_ERR(__wt_realloc_def(session, + &cp->entries_allocated, + cp->entries_next + 1, &cp->entries)); + WT_ERR(__wt_strndup(session, + kb->data, kb->size, &cp->entries[cp->entries_next].k)); + WT_ERR(__wt_strndup(session, + vb->data, vb->size, &cp->entries[cp->entries_next].v)); + cp->entries[cp->entries_next].gen = cp->entries_next; + ++cp->entries_next; } - if (ret != WT_NOTFOUND) - goto err; - /* - * If the caller passes us no valid configuration strings, we get here - * with no bytes to copy -- that's OK, the underlying string copy can - * handle empty strings. - * - * Strip any trailing comma. - */ - if (tmp->size != 0) - --tmp->size; - ret = __wt_strndup(session, tmp->data, tmp->size, config_ret); +err: __wt_scr_free(&kb); + __wt_scr_free(&vb); + return (0); +} -err: __wt_scr_free(&tmp); +/* + * __strip_comma -- + * Strip a trailing comma. + */ +static inline void +__strip_comma(WT_ITEM *buf) +{ + if (buf->size != 0 && ((char *)buf->data)[buf->size - 1] == ',') + --buf->size; +} + +/* + * __collapse_format_next -- + * Walk the array, building entries. + */ +static int +__collapse_format_next(WT_SESSION_IMPL *session, const char *prefix, + size_t plen, size_t *enp, WT_COLLAPSE *cp, WT_ITEM *build) +{ + WT_COLLAPSE_ENTRY *ep; + size_t len1, len2, next; + char *p; + + for (; *enp < cp->entries_next; ++*enp) { + ep = &cp->entries[*enp]; + + /* + * The entries are in sorted order, take the last entry for any + * key. + */ + if (*enp < (cp->entries_next - 1)) { + len1 = strlen(ep->k); + len2 = strlen((ep + 1)->k); + + /* Choose the last of identical keys. */ + if (len1 == len2 && + memcmp(ep->k, (ep + 1)->k, len1) == 0) + continue; + + /* + * The test is complicated by matching empty entries + * "foo=" against nested structures "foo,bar=", where + * the latter is a replacement for the former. + */ + if (len2 > len1 && + (ep + 1)->k[len1] == SEPC && + memcmp(ep->k, (ep + 1)->k, len1) == 0) + continue; + } + + /* + * If we're skipping a prefix and this entry doesn't match it, + * back off one entry and pop up a level. + */ + if (plen != 0 && memcmp(ep->k, prefix, plen) != 0) { + --*enp; + break; + } + + /* + * If the entry introduces a new level, recurse through that + * new level. + */ + if ((p = strchr(ep->k + plen, SEPC)) != NULL) { + next = WT_PTRDIFF(p, ep->k); + WT_RET(__wt_buf_catfmt(session, + build, "%.*s=(", (int)(next - plen), ep->k + plen)); + WT_RET(__collapse_format_next( + session, ep->k, next + 1, enp, cp, build)); + __strip_comma(build); + WT_RET(__wt_buf_catfmt(session, build, "),")); + continue; + } + + /* Append the entry to the buffer. */ + WT_RET(__wt_buf_catfmt( + session, build, "%s=%s,", ep->k + plen, ep->v)); + } + + return (0); +} + +/* + * __collapse_format -- + * Take the sorted array of entries, and format them into allocated memory. + */ +static int +__collapse_format( + WT_SESSION_IMPL *session, WT_COLLAPSE *cp, const char **config_ret) +{ + WT_DECL_ITEM(build); + WT_DECL_RET; + size_t entries; + + WT_RET(__wt_scr_alloc(session, 4 * 1024, &build)); + + entries = 0; + WT_ERR(__collapse_format_next(session, "", 0, &entries, cp, build)); + + __strip_comma(build); + + ret = __wt_strndup(session, build->data, build->size, config_ret); + +err: __wt_scr_free(&build); + return (ret); +} + +/* + * __collapse_cmp -- + * Qsort function: sort the collapse array. + */ +static int +__collapse_cmp(const void *a, const void *b) +{ + WT_COLLAPSE_ENTRY *ae, *be; + int cmp; + + ae = (WT_COLLAPSE_ENTRY *)a; + be = (WT_COLLAPSE_ENTRY *)b; + + if ((cmp = strcmp(ae->k, be->k)) != 0) + return (cmp); + return (ae->gen > be->gen ? 1 : -1); +} + +/* + * __wt_config_collapse -- + * Given a NULL-terminated list of configuration strings, in reverse order + * of preference (the first set of strings are the least preferred), collapse + * them into allocated memory. + */ +int +__wt_config_collapse(WT_SESSION_IMPL *session, + const char **cfg, const char **config_ret, int nested_replace) +{ + WT_COLLAPSE collapse; + WT_DECL_RET; + size_t i; + + /* Start out with a reasonable number of entries. */ + WT_CLEAR(collapse); + collapse.nested_replace = nested_replace; + + WT_RET(__wt_realloc_def( + session, &collapse.entries_allocated, 100, &collapse.entries)); + + /* Scan the configuration strings, entering them into the array. */ + for (; *cfg != NULL; ++cfg) + WT_ERR(__collapse_scan(session, NULL, *cfg, &collapse)); + + /* + * Sort the array by key and, in the case of identical keys, by + * generation. + */ + qsort(collapse.entries, + collapse.entries_next, sizeof(WT_COLLAPSE_ENTRY), __collapse_cmp); + + /* Convert the array of entries into a string. */ + ret = __collapse_format(session, &collapse, config_ret); + +err: for (i = 0; i < collapse.entries_next; ++i) { + __wt_free(session, collapse.entries[i].k); + __wt_free(session, collapse.entries[i].v); + } + __wt_free(session, collapse.entries); return (ret); } diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index e7826e9fd56..41bf351f2a5 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -630,30 +630,36 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; - - /* - * Special version of cfg that doesn't include the default config: used - * to limit changes to values that the application sets explicitly. - * Note that any function using this value has to be prepared to handle - * not-found as a valid option return. - */ - const char *raw_cfg[] = { config, NULL }; + const char *p, *config_cfg[] = { NULL, NULL, NULL }; conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, reconfigure, config, cfg); + WT_UNUSED(cfg); - WT_ERR(__wt_conn_cache_pool_config(session, cfg)); - WT_ERR(__wt_cache_config(conn, raw_cfg)); + /* + * The configuration argument has been checked for validity, replace the + * previous connection configuration. + * + * DO NOT collapse the configuration before the reconfigure calls. Some + * of the underlying reconfiguration functions do explicit checks for + * the second element of the configuration array, knowing the defaults + * are in slot #1 and the application's modifications are in slot #2. + */ + config_cfg[0] = conn->cfg; + config_cfg[1] = config; - WT_ERR(__wt_async_reconfig(conn, raw_cfg)); - WT_ERR(__conn_statistics_config(session, raw_cfg)); - WT_ERR(__wt_conn_verbose_config(session, raw_cfg)); - WT_ERR(__wt_checkpoint_server_create(conn, cfg)); - WT_ERR(__wt_statlog_create(conn, cfg)); + WT_ERR(__wt_conn_cache_pool_config(session, config_cfg)); + WT_ERR(__wt_cache_config(conn, config_cfg)); + + WT_ERR(__wt_async_reconfig(conn, config_cfg)); + WT_ERR(__conn_statistics_config(session, config_cfg)); + WT_ERR(__wt_conn_verbose_config(session, config_cfg)); + WT_ERR(__wt_checkpoint_server_create(conn, config_cfg)); + WT_ERR(__wt_statlog_create(conn, config_cfg)); WT_ERR(__wt_config_gets( - session, cfg, "lsm_manager.worker_thread_max", &cval)); + session, config_cfg, "lsm_manager.worker_thread_max", &cval)); if (cval.val) conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val; @@ -662,6 +668,10 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_ERR(__wt_cond_signal( session, __wt_process.cache_pool->cache_pool_cond)); + WT_ERR(__wt_config_collapse(session, config_cfg, &p, 0)); + __wt_free(session, conn->cfg); + conn->cfg = p; + err: API_END_RET(session, ret); } @@ -1399,18 +1409,17 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, */ WT_ERR(__wt_connection_workers(session, cfg)); + /* Take a copy of the final configuration for later reconfiguration. */ + WT_ERR(__wt_config_collapse(session, cfg, &conn->cfg, 0)); + STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *wt_connp = &conn->iface; - /* - * Destroying the connection on error will destroy our session handle, - * cleanup using the session handle first, then discard the connection. - */ -err: __wt_buf_free(session, &cbbuf); - __wt_buf_free(session, &cubuf); - - if (ret != 0 && conn != NULL) +err: if (ret != 0 && conn != NULL) WT_TRET(__wt_connection_close(conn)); + __wt_buf_free(session, &cbbuf); + __wt_buf_free(session, &cubuf); + return (ret); } diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 66517f9c2de..03c22b04e30 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -130,6 +130,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_free(session, conn->page_lock); /* Free allocated memory. */ + __wt_free(session, conn->cfg); __wt_free(session, conn->home); __wt_free(session, conn->error_prefix); __wt_free(session, conn->sessions); diff --git a/src/include/connection.h b/src/include/connection.h index 03feef68e56..1f1f8be88ea 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -70,6 +70,8 @@ struct __wt_connection_impl { WT_SESSION_IMPL *default_session; WT_SESSION_IMPL dummy_session; + const char *cfg; /* Connection configuration */ + WT_SPINLOCK api_lock; /* Connection API spinlock */ WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ WT_SPINLOCK fh_lock; /* File handle queue spinlock */ diff --git a/src/include/extern.h b/src/include/extern.h index a213a0e4bfa..a462815a93d 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -572,9 +572,10 @@ extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len); -extern int __wt_config_collapse( WT_SESSION_IMPL *session, +extern int __wt_config_collapse(WT_SESSION_IMPL *session, const char **cfg, - const char **config_ret); + const char **config_ret, + int nested_replace); extern int __wt_config_concat( WT_SESSION_IMPL *session, const char **cfg, const char **config_ret); diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index 998ae7e0d02..ec42a8032ae 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -117,7 +117,7 @@ __ckpt_set(WT_SESSION_IMPL *session, const char *fname, const char *v) cfg[0] = config; cfg[1] = v == NULL ? "checkpoint=()" : v; cfg[2] = NULL; - WT_ERR(__wt_config_collapse(session, cfg, &newcfg)); + WT_ERR(__wt_config_collapse(session, cfg, &newcfg, 1)); WT_ERR(__wt_metadata_update(session, fname, newcfg)); err: __wt_free(session, config); diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index 2e3eca10833..b0914165837 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -29,7 +29,7 @@ __metadata_config(WT_SESSION_IMPL *session, const char **metaconfp) "key_format=S,value_format=S,id=0,version=(major=%d,minor=%d)", WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX)); cfg[1] = buf->data; - WT_ERR(__wt_config_collapse(session, cfg, &metaconf)); + WT_ERR(__wt_config_collapse(session, cfg, &metaconf, 1)); *metaconfp = metaconf; diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 398fea4476f..50005753559 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -102,7 +102,7 @@ __create_file(WT_SESSION_IMPL *session, for (p = filecfg; *p != NULL; ++p) ; *p = val->data; - WT_ERR(__wt_config_collapse(session, filecfg, &fileconf)); + WT_ERR(__wt_config_collapse(session, filecfg, &fileconf, 1)); WT_ERR(__wt_metadata_insert(session, uri, fileconf)); } @@ -248,7 +248,7 @@ __create_colgroup(WT_SESSION_IMPL *session, WT_ERR(__wt_schema_create(session, source, sourceconf)); - WT_ERR(__wt_config_collapse(session, cfg, &cgconf)); + WT_ERR(__wt_config_collapse(session, cfg, &cgconf, 1)); if ((ret = __wt_metadata_insert(session, name, cgconf)) != 0) { /* * If the entry already exists in the metadata, we're done. @@ -416,7 +416,7 @@ __create_index(WT_SESSION_IMPL *session, cfg[1] = sourceconf; cfg[2] = confbuf.data; - WT_ERR(__wt_config_collapse(session, cfg, &idxconf)); + WT_ERR(__wt_config_collapse(session, cfg, &idxconf, 1)); if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) { /* * If the entry already exists in the metadata, we're done. @@ -480,7 +480,7 @@ __create_table(WT_SESSION_IMPL *session, ; WT_RET_NOTFOUND_OK(ret); - WT_RET(__wt_config_collapse(session, cfg, &tableconf)); + WT_RET(__wt_config_collapse(session, cfg, &tableconf, 1)); if ((ret = __wt_metadata_insert(session, name, tableconf)) != 0) { /* * If the entry already exists in the metadata, we're done. diff --git a/src/session/session_api.c b/src/session/session_api.c index e63e2c0284a..36f7afc075e 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -300,7 +300,7 @@ __wt_session_create_strip(WT_SESSION *wt_session, const char *cfg[] = { WT_CONFIG_BASE(session, session_create), v1, v2, NULL }; - return (__wt_config_collapse(session, cfg, value_ret)); + return (__wt_config_collapse(session, cfg, value_ret, 1)); } /* From 021f259d74db789d8271d8f6fdcf9bac5b166663 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 7 Sep 2014 20:02:55 -0400 Subject: [PATCH 030/132] Get rid of WT_SESSION.skip_schema_lock, it's no longer used anywhere. --- src/include/schema.h | 3 +-- src/include/session.h | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/include/schema.h b/src/include/schema.h index c8c80dcf9c1..e24a19b03ca 100644 --- a/src/include/schema.h +++ b/src/include/schema.h @@ -73,8 +73,7 @@ struct __wt_table { WT_ASSERT(session, \ F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) || \ !F_ISSET(session, WT_SESSION_NO_SCHEMA_LOCK)); \ - if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) || \ - session->skip_schema_lock) { \ + if (F_ISSET(session, WT_SESSION_SCHEMA_LOCKED)) { \ (op); \ } else { \ __wt_spin_lock(session, &S2C(session)->schema_lock); \ diff --git a/src/include/session.h b/src/include/session.h index 5d566f8b62d..31d58ff61e5 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -113,8 +113,6 @@ struct __wt_session_impl { int (*reconcile_cleanup)(WT_SESSION_IMPL *); int compaction; /* Compaction did some work */ - int skip_schema_lock; /* Another thread holds the schema lock - * on our behalf */ /* * The split stash memory and hazard information persist past session From c7ddaaeef8cf7483111c7266c99bf490b4e88016 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Mon, 8 Sep 2014 22:20:49 +1000 Subject: [PATCH 031/132] Fixes for the LevelDB JNI build. Make the fields of Slice public, rename the config file and only include the necessary defines. --- api/leveldb/Makefile.am | 2 +- api/leveldb/basho/perf_count.h | 2 +- api/leveldb/config.hin | 22 +++++++++++++++++++ api/leveldb/hyperleveldb/replay_iterator.h | 2 +- api/leveldb/leveldb/include/leveldb/cache.h | 2 +- .../leveldb/include/leveldb/comparator.h | 2 +- api/leveldb/leveldb/include/leveldb/db.h | 8 ++++++- api/leveldb/leveldb/include/leveldb/env.h | 2 +- .../leveldb/include/leveldb/filter_policy.h | 2 +- .../leveldb/include/leveldb/iterator.h | 2 +- api/leveldb/leveldb/include/leveldb/options.h | 2 +- api/leveldb/leveldb/include/leveldb/slice.h | 5 +++-- api/leveldb/leveldb/include/leveldb/status.h | 2 +- .../leveldb/include/leveldb/write_batch.h | 2 +- api/leveldb/leveldb_wt.cc | 14 ++---------- api/leveldb/leveldb_wt.h | 3 ++- build_posix/configure.ac.in | 2 +- 17 files changed, 48 insertions(+), 28 deletions(-) create mode 100644 api/leveldb/config.hin diff --git a/api/leveldb/Makefile.am b/api/leveldb/Makefile.am index 44aa69bbd48..2cfd9d945a5 100644 --- a/api/leveldb/Makefile.am +++ b/api/leveldb/Makefile.am @@ -16,7 +16,7 @@ leveldbincludedir = $(includedir)/wiredtiger/leveldb endif endif leveldbinclude_HEADERS = \ - wiredtiger_config.h \ + leveldb_wt_config.h \ leveldb/include/leveldb/cache.h \ leveldb/include/leveldb/comparator.h\ leveldb/include/leveldb/db.h \ diff --git a/api/leveldb/basho/perf_count.h b/api/leveldb/basho/perf_count.h index 0edf1b96549..b0f4abf9b66 100644 --- a/api/leveldb/basho/perf_count.h +++ b/api/leveldb/basho/perf_count.h @@ -23,7 +23,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_ #define STORAGE_LEVELDB_INCLUDE_PERF_COUNT_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #include #include diff --git a/api/leveldb/config.hin b/api/leveldb/config.hin new file mode 100644 index 00000000000..131b68969d3 --- /dev/null +++ b/api/leveldb/config.hin @@ -0,0 +1,22 @@ +/* api/leveldb/config.hin. Generated by autoheader, then hand-edited. */ + +/* Build the LevelDB API with Basho LevelDB support. */ +#undef HAVE_BASHOLEVELDB + +/* Snappy support automatically loaded. */ +#undef HAVE_BUILTIN_EXTENSION_SNAPPY + +/* Zlib support automatically loaded. */ +#undef HAVE_BUILTIN_EXTENSION_ZLIB + +/* Define to 1 for diagnostic tests. */ +#undef HAVE_DIAGNOSTIC + +/* Build the LevelDB API with HyperLevelDB support. */ +#undef HAVE_HYPERLEVELDB + +/* Define to 1 if you have the `snappy' library (-lsnappy). */ +#undef HAVE_LIBSNAPPY + +/* Build the LevelDB API with RocksDB support. */ +#undef HAVE_ROCKSDB diff --git a/api/leveldb/hyperleveldb/replay_iterator.h b/api/leveldb/hyperleveldb/replay_iterator.h index 6e2f562c6c4..397acdfd889 100644 --- a/api/leveldb/hyperleveldb/replay_iterator.h +++ b/api/leveldb/hyperleveldb/replay_iterator.h @@ -5,7 +5,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_ #define STORAGE_LEVELDB_INCLUDE_REPLAY_ITERATOR_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #include "slice.h" #include "status.h" diff --git a/api/leveldb/leveldb/include/leveldb/cache.h b/api/leveldb/leveldb/include/leveldb/cache.h index 6ae25122133..94be8e919a8 100644 --- a/api/leveldb/leveldb/include/leveldb/cache.h +++ b/api/leveldb/leveldb/include/leveldb/cache.h @@ -18,7 +18,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_CACHE_H_ #define STORAGE_LEVELDB_INCLUDE_CACHE_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/comparator.h b/api/leveldb/leveldb/include/leveldb/comparator.h index 23e0ba84559..78d83a4d08e 100644 --- a/api/leveldb/leveldb/include/leveldb/comparator.h +++ b/api/leveldb/leveldb/include/leveldb/comparator.h @@ -5,7 +5,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ #define STORAGE_LEVELDB_INCLUDE_COMPARATOR_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/db.h b/api/leveldb/leveldb/include/leveldb/db.h index c1818d28a7a..df8fcbbe9f8 100644 --- a/api/leveldb/leveldb/include/leveldb/db.h +++ b/api/leveldb/leveldb/include/leveldb/db.h @@ -5,7 +5,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_DB_H_ #define STORAGE_LEVELDB_INCLUDE_DB_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif @@ -292,6 +292,12 @@ class DB { // db->CompactRange(NULL, NULL); virtual void CompactRange(const Slice* begin, const Slice* end) = 0; + // Suspends the background compaction thread. This methods + // returns once suspended. + virtual void SuspendCompactions() = 0; + // Resumes a suspended background compation thread. + virtual void ResumeCompactions() = 0; + #ifdef HAVE_HYPERLEVELDB // Create a live backup of a live LevelDB instance. // The backup is stored in a directory named "backup-" under the top diff --git a/api/leveldb/leveldb/include/leveldb/env.h b/api/leveldb/leveldb/include/leveldb/env.h index 0d043307736..4ad67d36fea 100644 --- a/api/leveldb/leveldb/include/leveldb/env.h +++ b/api/leveldb/leveldb/include/leveldb/env.h @@ -13,7 +13,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_ENV_H_ #define STORAGE_LEVELDB_INCLUDE_ENV_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/filter_policy.h b/api/leveldb/leveldb/include/leveldb/filter_policy.h index 2d970e709d6..e434ef4b241 100644 --- a/api/leveldb/leveldb/include/leveldb/filter_policy.h +++ b/api/leveldb/leveldb/include/leveldb/filter_policy.h @@ -16,7 +16,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ #define STORAGE_LEVELDB_INCLUDE_FILTER_POLICY_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/iterator.h b/api/leveldb/leveldb/include/leveldb/iterator.h index 3845d553a4e..2d97d180b17 100644 --- a/api/leveldb/leveldb/include/leveldb/iterator.h +++ b/api/leveldb/leveldb/include/leveldb/iterator.h @@ -15,7 +15,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ #define STORAGE_LEVELDB_INCLUDE_ITERATOR_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/options.h b/api/leveldb/leveldb/include/leveldb/options.h index a14503fe086..9dcf73fc2a0 100644 --- a/api/leveldb/leveldb/include/leveldb/options.h +++ b/api/leveldb/leveldb/include/leveldb/options.h @@ -5,7 +5,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ #define STORAGE_LEVELDB_INCLUDE_OPTIONS_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/slice.h b/api/leveldb/leveldb/include/leveldb/slice.h index d7c20cfcaac..1eb66dd825f 100644 --- a/api/leveldb/leveldb/include/leveldb/slice.h +++ b/api/leveldb/leveldb/include/leveldb/slice.h @@ -15,7 +15,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_SLICE_H_ #define STORAGE_LEVELDB_INCLUDE_SLICE_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif @@ -82,7 +82,8 @@ class Slice { (memcmp(data_, x.data_, x.size_) == 0)); } - private: +// The LevelDB JNI layer peeks in here +// private: const char* data_; size_t size_; diff --git a/api/leveldb/leveldb/include/leveldb/status.h b/api/leveldb/leveldb/include/leveldb/status.h index 8b2cbb9b422..3c21f64462b 100644 --- a/api/leveldb/leveldb/include/leveldb/status.h +++ b/api/leveldb/leveldb/include/leveldb/status.h @@ -13,7 +13,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_STATUS_H_ #define STORAGE_LEVELDB_INCLUDE_STATUS_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb/include/leveldb/write_batch.h b/api/leveldb/leveldb/include/leveldb/write_batch.h index 9184d42c24c..293b41ad818 100644 --- a/api/leveldb/leveldb/include/leveldb/write_batch.h +++ b/api/leveldb/leveldb/include/leveldb/write_batch.h @@ -21,7 +21,7 @@ #ifndef STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ #define STORAGE_LEVELDB_INCLUDE_WRITE_BATCH_H_ -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #if defined(HAVE_ROCKSDB) && !defined(leveldb) #define leveldb rocksdb #endif diff --git a/api/leveldb/leveldb_wt.cc b/api/leveldb/leveldb_wt.cc index cfeb0549db4..6425a5a8dfd 100644 --- a/api/leveldb/leveldb_wt.cc +++ b/api/leveldb/leveldb_wt.cc @@ -755,14 +755,9 @@ IteratorImpl::Next() int ret; WT_ITEM item; - if (!Status().ok()) + if (!Status().ok() || !valid_) return; - if (!valid_) { - SetError(EINVAL); - return; - } - ret = cursor_->next(cursor_); if (ret != 0) { if (ret != WT_NOTFOUND) @@ -791,14 +786,9 @@ IteratorImpl::Prev() { WT_ITEM item; - if (!Status().ok()) + if (!Status().ok() || !valid_) return; - if (!valid_) { - SetError(EINVAL); - return; - } - int ret = cursor_->prev(cursor_); if (ret != 0) { if (ret != WT_NOTFOUND) diff --git a/api/leveldb/leveldb_wt.h b/api/leveldb/leveldb_wt.h index 301fa250e85..683482ad23c 100644 --- a/api/leveldb/leveldb_wt.h +++ b/api/leveldb/leveldb_wt.h @@ -27,7 +27,7 @@ #ifndef _INCLUDE_LEVELDB_WT_H #define _INCLUDE_LEVELDB_WT_H 1 -#include "wiredtiger_config.h" +#include "leveldb_wt_config.h" #include "leveldb/cache.h" #include "leveldb/comparator.h" @@ -171,6 +171,7 @@ private: class CacheImpl : public Cache { public: CacheImpl(size_t capacity) : Cache(), capacity_(capacity) {} + virtual ~CacheImpl() {} virtual Handle* Insert(const Slice&, void*, size_t, void (*)(const Slice&, void*)) { return 0; } diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index dd5bce738ea..268ed586232 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -165,7 +165,7 @@ AC_CONFIG_HEADERS([wiredtiger_config.h:build_posix/config.hin]) # The LevelDB API needs some configuration knowledge AM_COND_IF([LEVELDB], - AC_CONFIG_HEADERS([api/leveldb/wiredtiger_config.h:build_posix/config.hin])) + AC_CONFIG_HEADERS([api/leveldb/leveldb_wt_config.h:api/leveldb/config.hin])) # BEGIN check existence -- maintained by reconf and Make.subdirs # END check existence From be4dce95a559e66578795b41e914fd6660a3a72f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 11:12:22 -0400 Subject: [PATCH 032/132] Create __wt_config_merge for the merge functionality, called only from reconfigure at the moment. Clean up some comments, explain how config-collapse and config-merge differ. --- src/config/config_collapse.c | 216 ++++++++++++++++++++++++----------- src/conn/conn_api.c | 4 +- src/include/extern.h | 8 +- src/meta/meta_ckpt.c | 2 +- src/meta/meta_turtle.c | 2 +- src/schema/schema_create.c | 8 +- src/session/session_api.c | 2 +- 7 files changed, 163 insertions(+), 79 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 8b56e57bc74..2d708521d20 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -8,18 +8,81 @@ #include "wt_internal.h" /* - * We need a character that can't appear in a key as a separator. + * __wt_config_collapse -- + * Collapse a set of configuration strings into newly allocated memory. * - * XXX - * I'm not using '.' although that seems like the natural one to use because - * default checkpoints are named "WiredTiger.#" where dot is part of the key. - * I think it's wrong, we should not have used a dot in that name, but that's - * a format change. + * This function takes a NULL-terminated list of configuration strings (where + * the first one contains all the defaults and the values are in order from + * least to most preferred, that is, the default values are least preferred), + * and collapses them into newly allocated memory. The algorithm is to walk + * the first of the configuration strings, and for each entry, search all of + * the configuration strings for a final value, keeping the last value found. + * + * Notes: + * Any key not appearing in the first configuration string is discarded + * from the final result, because we'll never search for it. + * + * Nested structures aren't parsed. For example, imagine a configuration + * string contains "key=(k2=v2,k3=v3)", and a subsequent string has + * "key=(k4=v4)", the result will be "key=(k4=v4)", as we search for and + * use the final value of "key", regardless of field overlap or missing + * fields in the nested value. + */ +int +__wt_config_collapse( + WT_SESSION_IMPL *session, const char **cfg, const char **config_ret) +{ + WT_CONFIG cparser; + WT_CONFIG_ITEM k, v; + WT_DECL_ITEM(tmp); + WT_DECL_RET; + + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + + WT_ERR(__wt_config_init(session, &cparser, cfg[0])); + while ((ret = __wt_config_next(&cparser, &k, &v)) == 0) { + if (k.type != WT_CONFIG_ITEM_STRING && + k.type != WT_CONFIG_ITEM_ID) + WT_ERR_MSG(session, EINVAL, + "Invalid configuration key found: '%s'\n", k.str); + WT_ERR(__wt_config_get(session, cfg, &k, &v)); + /* Include the quotes around string keys/values. */ + if (k.type == WT_CONFIG_ITEM_STRING) { + --k.str; + k.len += 2; + } + if (v.type == WT_CONFIG_ITEM_STRING) { + --v.str; + v.len += 2; + } + WT_ERR(__wt_buf_catfmt(session, tmp, "%.*s=%.*s,", + (int)k.len, k.str, (int)v.len, v.str)); + } + if (ret != WT_NOTFOUND) + goto err; + + /* + * If the caller passes us no valid configuration strings, we get here + * with no bytes to copy -- that's OK, the underlying string copy can + * handle empty strings. + * + * Strip any trailing comma. + */ + if (tmp->size != 0) + --tmp->size; + ret = __wt_strndup(session, tmp->data, tmp->size, config_ret); + +err: __wt_scr_free(&tmp); + return (ret); +} + +/* + * We need a character that can't appear in a key as a separator. */ #undef SEP /* separator key, character */ -#define SEP "," +#define SEP "." #undef SEPC -#define SEPC ',' +#define SEPC '.' /* * Individual configuration entries, including a generation number used to make @@ -28,7 +91,7 @@ typedef struct { char *k, *v; /* key, value */ size_t gen; /* generation */ -} WT_COLLAPSE_ENTRY; +} WT_CONFIG_MERGE_ENTRY; /* * The array of configuration entries. @@ -37,18 +100,16 @@ typedef struct { size_t entries_allocated; /* allocated */ size_t entries_next; /* next slot */ - int nested_replace; /* replace nested values */ - - WT_COLLAPSE_ENTRY *entries; /* array of entries */ -} WT_COLLAPSE; + WT_CONFIG_MERGE_ENTRY *entries; /* array of entries */ +} WT_CONFIG_MERGE; /* - * __collapse_scan -- - * Walk a configuration string, inserting entries into the collapse array. + * __config_merge_scan -- + * Walk a configuration string, inserting entries into the merged array. */ static int -__collapse_scan(WT_SESSION_IMPL *session, - const char *key, const char *value, WT_COLLAPSE *cp) +__config_merge_scan(WT_SESSION_IMPL *session, + const char *key, const char *value, WT_CONFIG_MERGE *cp) { WT_CONFIG cparser; WT_CONFIG_ITEM k, v; @@ -85,28 +146,40 @@ __collapse_scan(WT_SESSION_IMPL *session, WT_ERR(__wt_buf_fmt(session, vb, "%.*s", (int)v.len, v.str)); + /* + * !!! + * WiredTiger names its internal checkpoints with a trailing + * dot and a number, for example, "WiredTigerCheckpoint.37". + * We're using dot to separate names in nested structures, + * and there's an obvious conflict. This works for now because + * that's the only case of a dot in a key name, and we never + * merge configuration strings that contain checkpoint names, + * for historic reasons. For now, return an error if there's + * ever a problem. (Note, it's probably safe if the dot is in + * a quoted key, that is, a key of type WT_CONFIG_ITEM_STRING, + * but since this isn't ever supposed to happen, I'm leaving + * the test simple.) + */ + if (strchr(kb->data, SEPC) != NULL) + WT_RET_MSG(session, EINVAL, + "key %s contains a separator character (%s)", + kb->data, SEP); + /* * If the value is a structure, recursively parse it. * - * XXX - * Problem #1: we store "checkpoint_lsn=(1,0)" in the metadata - * file, where the key is type WT_CONFIG_ITEM_ID, the value is - * type WT_CONFIG_ITEM_STRUCT. Other nested structures have - * field names, should this have been "(file=1,offset=0)"? - * - * Problem #2: the configuration collapse functions are used by - * checkpoint to replace the previous entry in its entirety, - * that is, the work we're doing to integrate nested changes - * into previous values breaks it. - * - * We're currently turning off merging nested structures in most - * places (including the checkpoint code). + * !!! + * Don't merge unless the structure has field names. WiredTiger + * stores checkpoint LSNs in the metadata file using nested + * structures without field names: "checkpoint_lsn=(1,0)", not + * "checkpoint_lsn=(file=1,offset=0)". The value type is still + * WT_CONFIG_ITEM_STRUCT, so we check for a field name in the + * value. */ - if (!cp->nested_replace && - v.type == WT_CONFIG_ITEM_STRUCT && + if (v.type == WT_CONFIG_ITEM_STRUCT && strchr(vb->data, '=') != NULL) { - WT_ERR( - __collapse_scan(session, kb->data, vb->data, cp)); + WT_ERR(__config_merge_scan( + session, kb->data, vb->data, cp)); continue; } @@ -139,14 +212,14 @@ __strip_comma(WT_ITEM *buf) } /* - * __collapse_format_next -- + * __config_merge_format_next -- * Walk the array, building entries. */ static int -__collapse_format_next(WT_SESSION_IMPL *session, const char *prefix, - size_t plen, size_t *enp, WT_COLLAPSE *cp, WT_ITEM *build) +__config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix, + size_t plen, size_t *enp, WT_CONFIG_MERGE *cp, WT_ITEM *build) { - WT_COLLAPSE_ENTRY *ep; + WT_CONFIG_MERGE_ENTRY *ep; size_t len1, len2, next; char *p; @@ -194,7 +267,7 @@ __collapse_format_next(WT_SESSION_IMPL *session, const char *prefix, next = WT_PTRDIFF(p, ep->k); WT_RET(__wt_buf_catfmt(session, build, "%.*s=(", (int)(next - plen), ep->k + plen)); - WT_RET(__collapse_format_next( + WT_RET(__config_merge_format_next( session, ep->k, next + 1, enp, cp, build)); __strip_comma(build); WT_RET(__wt_buf_catfmt(session, build, "),")); @@ -210,12 +283,12 @@ __collapse_format_next(WT_SESSION_IMPL *session, const char *prefix, } /* - * __collapse_format -- + * __config_merge_format -- * Take the sorted array of entries, and format them into allocated memory. */ static int -__collapse_format( - WT_SESSION_IMPL *session, WT_COLLAPSE *cp, const char **config_ret) +__config_merge_format( + WT_SESSION_IMPL *session, WT_CONFIG_MERGE *cp, const char **config_ret) { WT_DECL_ITEM(build); WT_DECL_RET; @@ -224,7 +297,7 @@ __collapse_format( WT_RET(__wt_scr_alloc(session, 4 * 1024, &build)); entries = 0; - WT_ERR(__collapse_format_next(session, "", 0, &entries, cp, build)); + WT_ERR(__config_merge_format_next(session, "", 0, &entries, cp, build)); __strip_comma(build); @@ -235,17 +308,17 @@ err: __wt_scr_free(&build); } /* - * __collapse_cmp -- - * Qsort function: sort the collapse array. + * __config_merge_cmp -- + * Qsort function: sort the config merge array. */ static int -__collapse_cmp(const void *a, const void *b) +__config_merge_cmp(const void *a, const void *b) { - WT_COLLAPSE_ENTRY *ae, *be; + WT_CONFIG_MERGE_ENTRY *ae, *be; int cmp; - ae = (WT_COLLAPSE_ENTRY *)a; - be = (WT_COLLAPSE_ENTRY *)b; + ae = (WT_CONFIG_MERGE_ENTRY *)a; + be = (WT_CONFIG_MERGE_ENTRY *)b; if ((cmp = strcmp(ae->k, be->k)) != 0) return (cmp); @@ -253,44 +326,53 @@ __collapse_cmp(const void *a, const void *b) } /* - * __wt_config_collapse -- - * Given a NULL-terminated list of configuration strings, in reverse order - * of preference (the first set of strings are the least preferred), collapse - * them into allocated memory. + * __wt_config_merge -- + * Merge a set of configuration strings into newly allocated memory. + * + * This function takes a NULL-terminated list of configuration strings (where + * the values are in order from least to most preferred), and merges them into + * newly allocated memory. The algorithm is to walk the configuration strings + * and build a table of each key/value pair. The pairs are sorted based on the + * name and the configuration string in which they were found, and a final + * configuration string is built from the result. + * + * Note: + * Nested structures are parsed and merge. For example, if configuration + * strings "key=(k1=v1,k2=v2)" and "key=(k1=v2)" appear, the result will + * be "key=(k1=v2,k2=v2)" because the nested values are merged. */ int -__wt_config_collapse(WT_SESSION_IMPL *session, - const char **cfg, const char **config_ret, int nested_replace) +__wt_config_merge( + WT_SESSION_IMPL *session, const char **cfg, const char **config_ret) { - WT_COLLAPSE collapse; + WT_CONFIG_MERGE merge; WT_DECL_RET; size_t i; /* Start out with a reasonable number of entries. */ - WT_CLEAR(collapse); - collapse.nested_replace = nested_replace; + WT_CLEAR(merge); WT_RET(__wt_realloc_def( - session, &collapse.entries_allocated, 100, &collapse.entries)); + session, &merge.entries_allocated, 100, &merge.entries)); /* Scan the configuration strings, entering them into the array. */ for (; *cfg != NULL; ++cfg) - WT_ERR(__collapse_scan(session, NULL, *cfg, &collapse)); + WT_ERR(__config_merge_scan(session, NULL, *cfg, &merge)); /* * Sort the array by key and, in the case of identical keys, by * generation. */ - qsort(collapse.entries, - collapse.entries_next, sizeof(WT_COLLAPSE_ENTRY), __collapse_cmp); + qsort(merge.entries, merge.entries_next, + sizeof(WT_CONFIG_MERGE_ENTRY), __config_merge_cmp); /* Convert the array of entries into a string. */ - ret = __collapse_format(session, &collapse, config_ret); + ret = __config_merge_format(session, &merge, config_ret); -err: for (i = 0; i < collapse.entries_next; ++i) { - __wt_free(session, collapse.entries[i].k); - __wt_free(session, collapse.entries[i].v); +err: for (i = 0; i < merge.entries_next; ++i) { + __wt_free(session, merge.entries[i].k); + __wt_free(session, merge.entries[i].v); } - __wt_free(session, collapse.entries); + __wt_free(session, merge.entries); return (ret); } diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 41bf351f2a5..955933f81cc 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -668,7 +668,7 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_ERR(__wt_cond_signal( session, __wt_process.cache_pool->cache_pool_cond)); - WT_ERR(__wt_config_collapse(session, config_cfg, &p, 0)); + WT_ERR(__wt_config_merge(session, config_cfg, &p)); __wt_free(session, conn->cfg); conn->cfg = p; @@ -1410,7 +1410,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, WT_ERR(__wt_connection_workers(session, cfg)); /* Take a copy of the final configuration for later reconfiguration. */ - WT_ERR(__wt_config_collapse(session, cfg, &conn->cfg, 0)); + WT_ERR(__wt_config_collapse(session, cfg, &conn->cfg)); STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *wt_connp = &conn->iface; diff --git a/src/include/extern.h b/src/include/extern.h index a462815a93d..43821086bf5 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -572,10 +572,12 @@ extern int __wt_config_check(WT_SESSION_IMPL *session, const WT_CONFIG_ENTRY *entry, const char *config, size_t config_len); -extern int __wt_config_collapse(WT_SESSION_IMPL *session, +extern int __wt_config_collapse( WT_SESSION_IMPL *session, const char **cfg, - const char **config_ret, - int nested_replace); + const char **config_ret); +extern int __wt_config_merge( WT_SESSION_IMPL *session, + const char **cfg, + const char **config_ret); extern int __wt_config_concat( WT_SESSION_IMPL *session, const char **cfg, const char **config_ret); diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index ec42a8032ae..998ae7e0d02 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -117,7 +117,7 @@ __ckpt_set(WT_SESSION_IMPL *session, const char *fname, const char *v) cfg[0] = config; cfg[1] = v == NULL ? "checkpoint=()" : v; cfg[2] = NULL; - WT_ERR(__wt_config_collapse(session, cfg, &newcfg, 1)); + WT_ERR(__wt_config_collapse(session, cfg, &newcfg)); WT_ERR(__wt_metadata_update(session, fname, newcfg)); err: __wt_free(session, config); diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c index b0914165837..2e3eca10833 100644 --- a/src/meta/meta_turtle.c +++ b/src/meta/meta_turtle.c @@ -29,7 +29,7 @@ __metadata_config(WT_SESSION_IMPL *session, const char **metaconfp) "key_format=S,value_format=S,id=0,version=(major=%d,minor=%d)", WT_BTREE_MAJOR_VERSION_MAX, WT_BTREE_MINOR_VERSION_MAX)); cfg[1] = buf->data; - WT_ERR(__wt_config_collapse(session, cfg, &metaconf, 1)); + WT_ERR(__wt_config_collapse(session, cfg, &metaconf)); *metaconfp = metaconf; diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c index 50005753559..398fea4476f 100644 --- a/src/schema/schema_create.c +++ b/src/schema/schema_create.c @@ -102,7 +102,7 @@ __create_file(WT_SESSION_IMPL *session, for (p = filecfg; *p != NULL; ++p) ; *p = val->data; - WT_ERR(__wt_config_collapse(session, filecfg, &fileconf, 1)); + WT_ERR(__wt_config_collapse(session, filecfg, &fileconf)); WT_ERR(__wt_metadata_insert(session, uri, fileconf)); } @@ -248,7 +248,7 @@ __create_colgroup(WT_SESSION_IMPL *session, WT_ERR(__wt_schema_create(session, source, sourceconf)); - WT_ERR(__wt_config_collapse(session, cfg, &cgconf, 1)); + WT_ERR(__wt_config_collapse(session, cfg, &cgconf)); if ((ret = __wt_metadata_insert(session, name, cgconf)) != 0) { /* * If the entry already exists in the metadata, we're done. @@ -416,7 +416,7 @@ __create_index(WT_SESSION_IMPL *session, cfg[1] = sourceconf; cfg[2] = confbuf.data; - WT_ERR(__wt_config_collapse(session, cfg, &idxconf, 1)); + WT_ERR(__wt_config_collapse(session, cfg, &idxconf)); if ((ret = __wt_metadata_insert(session, name, idxconf)) != 0) { /* * If the entry already exists in the metadata, we're done. @@ -480,7 +480,7 @@ __create_table(WT_SESSION_IMPL *session, ; WT_RET_NOTFOUND_OK(ret); - WT_RET(__wt_config_collapse(session, cfg, &tableconf, 1)); + WT_RET(__wt_config_collapse(session, cfg, &tableconf)); if ((ret = __wt_metadata_insert(session, name, tableconf)) != 0) { /* * If the entry already exists in the metadata, we're done. diff --git a/src/session/session_api.c b/src/session/session_api.c index 36f7afc075e..e63e2c0284a 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -300,7 +300,7 @@ __wt_session_create_strip(WT_SESSION *wt_session, const char *cfg[] = { WT_CONFIG_BASE(session, session_create), v1, v2, NULL }; - return (__wt_config_collapse(session, cfg, value_ret, 1)); + return (__wt_config_collapse(session, cfg, value_ret)); } /* From 77769ec0bb3e5d3323ffa1d6e0ad77963eaa731c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 15:33:34 -0400 Subject: [PATCH 033/132] Messed up the test for a separator character (dot) in the key, the check has to appear before we build the key we're using for the merge. --- src/config/config_collapse.c | 29 ++++++++++++++++------------- src/conn/conn_api.c | 4 ++-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 2d708521d20..e56c6a62800 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -116,6 +116,8 @@ __config_merge_scan(WT_SESSION_IMPL *session, WT_DECL_ITEM(kb); WT_DECL_ITEM(vb); WT_DECL_RET; + size_t len; + char *str; WT_ERR(__wt_scr_alloc(session, 0, &kb)); WT_ERR(__wt_scr_alloc(session, 0, &vb)); @@ -137,15 +139,6 @@ __config_merge_scan(WT_SESSION_IMPL *session, v.len += 2; } - /* Build the key/value strings. */ - WT_ERR(__wt_buf_fmt(session, - kb, "%s%s%.*s", - key == NULL ? "" : key, - key == NULL ? "" : SEP, - (int)k.len, k.str)); - WT_ERR(__wt_buf_fmt(session, - vb, "%.*s", (int)v.len, v.str)); - /* * !!! * WiredTiger names its internal checkpoints with a trailing @@ -160,10 +153,20 @@ __config_merge_scan(WT_SESSION_IMPL *session, * but since this isn't ever supposed to happen, I'm leaving * the test simple.) */ - if (strchr(kb->data, SEPC) != NULL) - WT_RET_MSG(session, EINVAL, - "key %s contains a separator character (%s)", - kb->data, SEP); + for (str = k.str, len = k.len; len > 0; ++str, --len) + if (*str == SEPC) + WT_RET_MSG(session, EINVAL, + "key %s contains a separator character " + "(%s)", kb->data, SEP); + + /* Build the key/value strings. */ + WT_ERR(__wt_buf_fmt(session, + kb, "%s%s%.*s", + key == NULL ? "" : key, + key == NULL ? "" : SEP, + (int)k.len, k.str)); + WT_ERR(__wt_buf_fmt(session, + vb, "%.*s", (int)v.len, v.str)); /* * If the value is a structure, recursively parse it. diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 955933f81cc..6ec87d2293a 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -641,8 +641,8 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) * The configuration argument has been checked for validity, replace the * previous connection configuration. * - * DO NOT collapse the configuration before the reconfigure calls. Some - * of the underlying reconfiguration functions do explicit checks for + * DO NOT merge the configuration before the reconfigure calls. Some + * of the underlying reconfiguration functions do explicit checks with * the second element of the configuration array, knowing the defaults * are in slot #1 and the application's modifications are in slot #2. */ From 929b579f4267c54587955457770b6b584c78a140 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 15:45:41 -0400 Subject: [PATCH 034/132] Statistics logging requires a wait time, reference #1172. --- examples/c/ex_all.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 8f9fba093de..8928736570e 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -1055,7 +1055,7 @@ main(void) /*! [Statistics logging with a table] */ ret = wiredtiger_open(home, NULL, - "create," + "create,wait=5" "statistics_log=(sources=(\"table:table1\",\"table:table2\"))", &conn); /*! [Statistics logging with a table] */ @@ -1064,7 +1064,7 @@ main(void) /*! [Statistics logging with all tables] */ ret = wiredtiger_open(home, NULL, - "create,statistics_log=(sources=(\"table:\"))", + "create,wait=5,statistics_log=(sources=(\"table:\"))", &conn); /*! [Statistics logging with all tables] */ if (ret == 0) From d08de00193efb8ba4a150a4c90bd91d3f61a43f4 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Mon, 8 Sep 2014 16:24:27 -0400 Subject: [PATCH 035/132] Add force flag to checkpoint last chunk. #1200 --- src/include/extern.h | 3 ++- src/include/lsm.h | 2 ++ src/lsm/lsm_manager.c | 2 +- src/lsm/lsm_tree.c | 2 +- src/lsm/lsm_work_unit.c | 9 +++++---- src/lsm/lsm_worker.c | 7 +++++-- 6 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index a213a0e4bfa..76b9f350566 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1010,8 +1010,9 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, int *), const char *cfg[], uint32_t open_flags); -extern int __wt_lsm_get_chunk_to_flush( WT_SESSION_IMPL *session, +extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, + int force, WT_LSM_CHUNK **chunkp); extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, diff --git a/src/include/lsm.h b/src/include/lsm.h index e0197e01af6..b4581b2b094 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -88,6 +88,8 @@ struct __wt_lsm_chunk { #define WT_LSM_WORK_FLUSH 0x04 /* Flush a chunk to disk */ #define WT_LSM_WORK_MERGE 0x08 /* Look for a tree merge */ #define WT_LSM_WORK_SWITCH 0x10 /* Switch to a new in memory chunk */ +#define WT_LSM_WORK_FORCE 0x10000 /* Force last chunk flush */ +#define WT_LSM_WORK_MASK 0xffff /* Mask for work types */ /* * WT_LSM_WORK_UNIT -- diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 34cd13554d1..6f532111905 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -523,7 +523,7 @@ __wt_lsm_manager_push_entry( (void)WT_ATOMIC_ADD(lsm_tree->queue_ref, 1); WT_STAT_FAST_CONN_INCR(session, lsm_work_units_created); - switch (type) { + switch (type & WT_LSM_WORK_MASK) { case WT_LSM_WORK_SWITCH: __wt_spin_lock(session, &manager->switch_lock); TAILQ_INSERT_TAIL(&manager->switchqh, entry, q); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 30f97821a8c..f8a7083efac 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1058,7 +1058,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) /* Make sure the in-memory chunk gets flushed but not switched. */ WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_FLUSH, lsm_tree)); + session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 5c96c82f84c..e0b4a6a808b 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -66,10 +66,10 @@ err: WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); * Find and pin a chunk in the LSM tree that is likely to need flushing. */ int -__wt_lsm_get_chunk_to_flush( - WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK **chunkp) +__wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, + WT_LSM_TREE *lsm_tree, int force, WT_LSM_CHUNK **chunkp) { - u_int i; + u_int i, end; *chunkp = NULL; @@ -78,7 +78,8 @@ __wt_lsm_get_chunk_to_flush( if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) return (__wt_lsm_tree_unlock(session, lsm_tree)); - for (i = 0; i < lsm_tree->nchunks - 1; i++) { + end = force ? lsm_tree->nchunks : lsm_tree->nchunks - 1; + for (i = 0; i < end; i++) { if (!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK)) { (void)WT_ATOMIC_ADD(lsm_tree->chunk[i]->refcnt, 1); *chunkp = lsm_tree->chunk[i]; diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 257fda03f3b..4aab508896c 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -32,6 +32,7 @@ __lsm_worker_general_op( WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_WORK_UNIT *entry; + int force; *completed = 0; if (!F_ISSET(cookie, WT_LSM_WORK_FLUSH) && @@ -43,9 +44,11 @@ __lsm_worker_general_op( cookie->flags, &entry)) != 0 || entry == NULL) return (ret); - if (entry->flags == WT_LSM_WORK_FLUSH) { + if ((entry->flags & WT_LSM_WORK_MASK) == WT_LSM_WORK_FLUSH) { + force = F_ISSET(entry, WT_LSM_WORK_FORCE); + F_CLR(entry, WT_LSM_WORK_FORCE); WT_ERR(__wt_lsm_get_chunk_to_flush( - session, entry->lsm_tree, &chunk)); + session, entry->lsm_tree, force, &chunk)); if (chunk != NULL) { ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); From bcd1fe29870f3d9200e9426320eb47d5b03bc682 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 17:43:31 -0400 Subject: [PATCH 036/132] config_collapse.c:156:12: error: assignment discards 'const' qualifier from pointer target type [-Werror] config_collapse.c:158:5: error: format '%s' expects argument of type 'char *', but argument 4 has type 'const void *' [-Werror=format=] --- src/config/config_collapse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index e56c6a62800..6773b06dd09 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -117,7 +117,7 @@ __config_merge_scan(WT_SESSION_IMPL *session, WT_DECL_ITEM(vb); WT_DECL_RET; size_t len; - char *str; + const char *str; WT_ERR(__wt_scr_alloc(session, 0, &kb)); WT_ERR(__wt_scr_alloc(session, 0, &vb)); @@ -157,7 +157,7 @@ __config_merge_scan(WT_SESSION_IMPL *session, if (*str == SEPC) WT_RET_MSG(session, EINVAL, "key %s contains a separator character " - "(%s)", kb->data, SEP); + "(%s)", (char *)kb->data, SEP); /* Build the key/value strings. */ WT_ERR(__wt_buf_fmt(session, From 04204363208e7ff13d0483e44d212990ca26148d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 18:27:04 -0400 Subject: [PATCH 037/132] typo --- examples/c/ex_all.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 8928736570e..6ddd76bca46 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -1055,7 +1055,7 @@ main(void) /*! [Statistics logging with a table] */ ret = wiredtiger_open(home, NULL, - "create,wait=5" + "create,wait=5," "statistics_log=(sources=(\"table:table1\",\"table:table2\"))", &conn); /*! [Statistics logging with a table] */ From 19a4da2b049010deff2d6423f31809c2787a5bd8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 18:33:37 -0400 Subject: [PATCH 038/132] Statistics logging of sources only currently supports lsm: and file: URIs. --- examples/c/ex_all.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c index 6ddd76bca46..ea5d26ce133 100644 --- a/examples/c/ex_all.c +++ b/examples/c/ex_all.c @@ -1055,8 +1055,8 @@ main(void) /*! [Statistics logging with a table] */ ret = wiredtiger_open(home, NULL, - "create,wait=5," - "statistics_log=(sources=(\"table:table1\",\"table:table2\"))", + "create, statistics_log=(" + "sources=(\"lsm:table1\",\"lsm:table2\"), wait=5)", &conn); /*! [Statistics logging with a table] */ if (ret == 0) @@ -1064,7 +1064,7 @@ main(void) /*! [Statistics logging with all tables] */ ret = wiredtiger_open(home, NULL, - "create,wait=5,statistics_log=(sources=(\"table:\"))", + "create, statistics_log=(sources=(\"lsm:\"), wait=5)", &conn); /*! [Statistics logging with all tables] */ if (ret == 0) From 852ed7b98520cf06bf4866863da4257124ed8046 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 8 Sep 2014 18:35:58 -0400 Subject: [PATCH 039/132] The wiredtiger_open final config array contains multiple entries (the defaults, the config file, the environment variable and the application-supplied config), merge them instead of collapse so we don't lose fields in nested structures. --- src/conn/conn_api.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 6ec87d2293a..0b210627df5 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1409,8 +1409,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, */ WT_ERR(__wt_connection_workers(session, cfg)); - /* Take a copy of the final configuration for later reconfiguration. */ - WT_ERR(__wt_config_collapse(session, cfg, &conn->cfg)); + /* Merge the final configuration for later reconfiguration. */ + WT_ERR(__wt_config_merge(session, cfg, &conn->cfg)); STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *wt_connp = &conn->iface; From 9f396e9572ce0356505eee6755c11b62f8006a13 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Tue, 9 Sep 2014 17:24:20 +1000 Subject: [PATCH 040/132] Fix a bug in the shared cache implementation. We could decrement an unsigned number into negative. Also add a test case and some better documentation to the source code. --- src/conn/conn_cache_pool.c | 39 ++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index ab7e2cc48ee..9b540c147d4 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -488,26 +488,46 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0) continue; /* - * TODO: Use __wt_cache_bytes_inuse instead of eviction_target - * which doesn't do the right thing at the moment. + * If the entry is currently allocated less than the reserved + * size, increase it's allocation. This should only happen if: + * - It's the first time we've seen this member + * - The reserved size has been adjusted */ if (entry->cache_size < reserved) { grew = 1; adjusted = reserved - entry->cache_size; + /* + * Conditions for reducing the amount of resources for an + * entry: + * - If we are forcing and this entry has more than the + * minimum amount of space in use. + * - If the read pressure in this entry is below the + * threshold, other entries need more cache, the entry has + * more than the minimum space and there is no available + * space in the pool. + */ } else if ((force && entry->cache_size > reserved) || (read_pressure < WT_CACHE_POOL_REDUCE_THRESHOLD && highest > 1 && entry->cache_size > reserved && cp->currently_used >= cp->size)) { - /* - * If a connection isn't actively using it's assigned - * cache and is assigned a reasonable amount - reduce - * it. - */ grew = 0; - if (entry->cache_size - cp->chunk > reserved) + /* + * Shrink by a chunk size if that doesn't drop us + * below the reserved size. + */ + if (entry->cache_size > cp->chunk + reserved) adjusted = cp->chunk; else adjusted = entry->cache_size - reserved; + /* + * Conditions for increasing the amount of resources for an + * entry: + * - There was some activity across the pool + * - This entry is using less than the entire cache pool + * - The connection is using enough cache to require eviction + * - There is space available in the pool + * - Additional cache would benefit the connection + */ } else if (highest > 1 && entry->cache_size < cp->size && cache->bytes_inmem >= @@ -527,6 +547,9 @@ __cache_pool_adjust(WT_SESSION_IMPL *session, } else { cache->cp_skip_count = WT_CACHE_POOL_REDUCE_SKIPS; + WT_ASSERT(session, + entry->cache_size >= adjusted && + cp->currently_used >= adjusted); entry->cache_size -= adjusted; cp->currently_used -= adjusted; } From 127d0f0bb229516648b6ef60795765d2723e80d8 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 9 Sep 2014 17:49:21 +1000 Subject: [PATCH 041/132] The WT_NOTFOUND return should work the same as other non-zero returns and leave cursors pointing to the original key/value pair. refs #1209 --- src/cursor/cur_file.c | 21 +++++++-------------- src/include/misc.h | 6 ++++++ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index e5a1d8a68b6..08129e668f5 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -10,29 +10,22 @@ /* * WT_BTREE_CURSOR_SAVE_AND_RESTORE * Save the cursor's key/value data/size fields, call an underlying btree - * function, and then consistently handle failure and success. + * function, and then consistently handle failure and success. */ #define WT_BTREE_CURSOR_SAVE_AND_RESTORE(cursor, f, ret) do { \ - const void *__key_data = (cursor)->key.data; \ - const void *__value_data = (cursor)->value.data; \ + WT_ITEM __key_copy = (cursor)->key; \ uint64_t __recno = (cursor)->recno; \ - size_t __key_size = (cursor)->key.size; \ - size_t __value_size = (cursor)->value.size; \ + WT_ITEM __value_copy = (cursor)->value; \ if (((ret) = (f)) == 0) { \ F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \ F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ - } else if ((ret) == WT_NOTFOUND) \ - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); \ - else { \ + } else { \ if (F_ISSET(cursor, WT_CURSTD_KEY_EXT)) { \ (cursor)->recno = __recno; \ - (cursor)->key.data = __key_data; \ - (cursor)->key.size = __key_size; \ - } \ - if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) { \ - (cursor)->value.data = __value_data; \ - (cursor)->value.size = __value_size; \ + WT_ITEM_SET((cursor)->key, __key_copy); \ } \ + if (F_ISSET(cursor, WT_CURSTD_VALUE_EXT)) \ + WT_ITEM_SET((cursor)->value, __value_copy); \ F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT); \ } \ } while (0) diff --git a/src/include/misc.h b/src/include/misc.h index e50038b2c66..d28de81a6aa 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -186,6 +186,12 @@ ((i)->mem != NULL && (i)->data >= (i)->mem && \ WT_PTRDIFF((i)->data, (i)->mem) < (i)->memsize) +/* Copy the data and size fields of an item. */ +#define WT_ITEM_SET(dst, src) do { \ + (dst).data = (src).data; \ + (dst).size = (src).size; \ +} while (0) + /* * In diagnostic mode we track the locations from which hazard pointers and * scratch buffers were acquired. From c3d9cd2a77b886aeaad8bdaeb60599784cba6423 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 9 Sep 2014 17:49:21 +1000 Subject: [PATCH 042/132] api/leveldb/leveldb_wt_config.in: copyright information is incorrect --- dist/s_copyright.list | 1 + 1 file changed, 1 insertion(+) diff --git a/dist/s_copyright.list b/dist/s_copyright.list index ca2ba425ad5..d66be5a1ba7 100644 --- a/dist/s_copyright.list +++ b/dist/s_copyright.list @@ -1,3 +1,4 @@ +skip api/leveldb/leveldb_wt_config.in skip dist/api_config.py skip dist/api_data.py skip dist/api_err.py From 0a3061750aee368a5aa57e4bd6a367536c466122 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 9 Sep 2014 17:57:13 +1000 Subject: [PATCH 043/132] Have test/format always reset cursors at the end of each operation. --- test/format/ops.c | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/test/format/ops.c b/test/format/ops.c index 8c5a75e57a3..3ceaaf5e33f 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -220,21 +220,13 @@ ops(void *arg) /* * We can't checkpoint or swap sessions/cursors while in a * transaction, resolve any running transaction. - * - * Reset the cursor regardless: we may block waiting for a lock - * and there is no reason to keep pages pinned. */ - if (cnt == ckpt_op || cnt == session_op) { - if (intxn) { - if ((ret = session->commit_transaction( - session, NULL)) != 0) - die(ret, "session.commit_transaction"); - ++tinfo->commit; - intxn = 0; - } - if (cursor != NULL && - (ret = cursor->reset(cursor)) != 0) - die(ret, "cursor.reset"); + if (intxn && (cnt == ckpt_op || cnt == session_op)) { + if ((ret = session->commit_transaction( + session, NULL)) != 0) + die(ret, "session.commit_transaction"); + ++tinfo->commit; + intxn = 0; } /* Open up a new session and cursors. */ @@ -372,13 +364,6 @@ ops(void *arg) if (g.append_cnt >= g.append_max) goto skip_insert; - /* - * Reset the standard cursor so it doesn't keep - * pages pinned. - */ - if ((ret = cursor->reset(cursor)) != 0) - die(ret, "cursor.reset"); - /* Insert, then reset the insert cursor. */ if (col_insert( cursor_insert, &key, &value, &keyno)) @@ -430,6 +415,10 @@ skip_insert: if (col_update(cursor, &key, &value, keyno)) ++tinfo->search; if (read_row(cursor, &key, keyno)) goto deadlock; + + /* Reset the cursor: there is no reason to keep pages pinned. */ + if (cursor != NULL && (ret = cursor->reset(cursor)) != 0) + die(ret, "cursor.reset"); /* * If we're in the transaction, commit 40% of the time and From 5cd6a2685fcdce853ed29f8e7b75e2c9e83bbbbe Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 9 Sep 2014 18:09:34 +1000 Subject: [PATCH 044/132] It turns out that LSM depended on a `WT_NOTFOUND` return clearing `WT_CURSTD_KEY_SET`. Change LSM for now to keep test/format running, but we should review the way that LSM tracks whether chunk cursors are positioned. --- src/lsm/lsm_cursor.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index bbc5de7f13f..df2f7bba271 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -700,7 +700,7 @@ __clsm_get_current( multiple = 0; WT_FORALL_CURSORS(clsm, c, i) { - if (!F_ISSET(c, WT_CURSTD_KEY_SET)) + if (!F_ISSET(c, WT_CURSTD_KEY_INT)) continue; if (current == NULL) { current = c; @@ -823,7 +823,7 @@ retry: /* if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) { check = 0; WT_FORALL_CURSORS(clsm, c, i) { - if (!F_ISSET(c, WT_CURSTD_KEY_SET)) + if (!F_ISSET(c, WT_CURSTD_KEY_INT)) continue; if (check) { WT_ERR(WT_LSM_CURCMP(session, @@ -906,7 +906,7 @@ retry: /* if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) { check = 0; WT_FORALL_CURSORS(clsm, c, i) { - if (!F_ISSET(c, WT_CURSTD_KEY_SET)) + if (!F_ISSET(c, WT_CURSTD_KEY_INT)) continue; if (check) { WT_ERR(WT_LSM_CURCMP(session, @@ -958,7 +958,7 @@ __clsm_reset_cursors(WT_CURSOR_LSM *clsm, WT_CURSOR *skip) WT_FORALL_CURSORS(clsm, c, i) { if (c == skip) continue; - if (F_ISSET(c, WT_CURSTD_KEY_SET)) + if (F_ISSET(c, WT_CURSTD_KEY_INT)) WT_TRET(c->reset(c)); } From 10a92203f9708f35ebcfbed67ecfc34544c6c75b Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 9 Sep 2014 18:14:06 +1000 Subject: [PATCH 045/132] whitespace --- test/format/ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/format/ops.c b/test/format/ops.c index 3ceaaf5e33f..e38b75f0deb 100644 --- a/test/format/ops.c +++ b/test/format/ops.c @@ -415,7 +415,7 @@ skip_insert: if (col_update(cursor, &key, &value, keyno)) ++tinfo->search; if (read_row(cursor, &key, keyno)) goto deadlock; - + /* Reset the cursor: there is no reason to keep pages pinned. */ if (cursor != NULL && (ret = cursor->reset(cursor)) != 0) die(ret, "cursor.reset"); From 08c75ef4d13f3a6f5c6d3f3c4dd345d301af4f34 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 9 Sep 2014 08:44:10 -0400 Subject: [PATCH 046/132] Fix return after branch to error. --- src/config/config_collapse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 6773b06dd09..98b32a109bc 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -155,7 +155,7 @@ __config_merge_scan(WT_SESSION_IMPL *session, */ for (str = k.str, len = k.len; len > 0; ++str, --len) if (*str == SEPC) - WT_RET_MSG(session, EINVAL, + WT_ERR_MSG(session, EINVAL, "key %s contains a separator character " "(%s)", (char *)kb->data, SEP); From 7ef2e3adfb6da3c7da6c51e36b4775fad72443d5 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 9 Sep 2014 09:06:39 -0400 Subject: [PATCH 047/132] Disallow JSON quoting characters in checkpoint names, it's not a good idea. --- src/include/extern.h | 5 ++- src/schema/schema_util.c | 26 +++++++++++++-- src/session/session_api.c | 12 +++---- src/txn/txn_ckpt.c | 59 ++++++++++++++++++--------------- test/suite/test_checkpoint01.py | 10 ++++-- 5 files changed, 73 insertions(+), 39 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index 3528ef38283..6be8d20bf2b 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1379,7 +1379,10 @@ extern int __wt_schema_range_truncate( WT_SESSION_IMPL *session, WT_CURSOR *stop); extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name); -extern int __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri); +extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str); +extern int __wt_name_check(WT_SESSION_IMPL *session, + const char *str, + size_t len); extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, diff --git a/src/schema/schema_util.c b/src/schema/schema_util.c index 90e5fb42dc1..263f56f1c41 100644 --- a/src/schema/schema_util.c +++ b/src/schema/schema_util.c @@ -23,11 +23,11 @@ __wt_schema_get_source(WT_SESSION_IMPL *session, const char *name) } /* - * __wt_schema_name_check -- + * __wt_str_name_check -- * Disallow any use of the WiredTiger name space. */ int -__wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri) +__wt_str_name_check(WT_SESSION_IMPL *session, const char *str) { const char *name, *sep; int skipped; @@ -37,7 +37,7 @@ __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri) * "bad" if the application truncated the metadata file. Skip any * leading URI prefix, check and then skip over a table name. */ - name = uri; + name = str; for (skipped = 0; skipped < 2; skipped++) { if ((sep = strchr(name, ':')) == NULL) break; @@ -62,3 +62,23 @@ __wt_schema_name_check(WT_SESSION_IMPL *session, const char *uri) return (0); } + +/* + * __wt_name_check -- + * Disallow any use of the WiredTiger name space. + */ +int +__wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) +{ + WT_DECL_RET; + WT_DECL_ITEM(tmp); + + WT_RET(__wt_scr_alloc(session, len, &tmp)); + + WT_ERR(__wt_buf_fmt(session, tmp, "%.*s", (int)len, str)); + + ret = __wt_str_name_check(session, tmp->data); + +err: __wt_scr_free(&tmp); + return (ret); +} diff --git a/src/session/session_api.c b/src/session/session_api.c index e63e2c0284a..922aa3e9c25 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -319,7 +319,7 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config) WT_UNUSED(cfg); /* Disallow objects in the WiredTiger name space. */ - WT_ERR(__wt_schema_name_check(session, uri)); + WT_ERR(__wt_str_name_check(session, uri)); /* * Type configuration only applies to tables, column groups and indexes. @@ -387,8 +387,8 @@ __session_rename(WT_SESSION *wt_session, SESSION_API_CALL(session, rename, config, cfg); /* Disallow objects in the WiredTiger name space. */ - WT_ERR(__wt_schema_name_check(session, uri)); - WT_ERR(__wt_schema_name_check(session, newuri)); + WT_ERR(__wt_str_name_check(session, uri)); + WT_ERR(__wt_str_name_check(session, newuri)); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_rename(session, uri, newuri, cfg)); @@ -408,7 +408,7 @@ __session_compact(WT_SESSION *wt_session, const char *uri, const char *config) session = (WT_SESSION_IMPL *)wt_session; /* Disallow objects in the WiredTiger name space. */ - WT_RET(__wt_schema_name_check(session, uri)); + WT_RET(__wt_str_name_check(session, uri)); if (!WT_PREFIX_MATCH(uri, "colgroup:") && !WT_PREFIX_MATCH(uri, "file:") && @@ -434,7 +434,7 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config) SESSION_API_CALL(session, drop, config, cfg); /* Disallow objects in the WiredTiger name space. */ - WT_ERR(__wt_schema_name_check(session, uri)); + WT_ERR(__wt_str_name_check(session, uri)); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_drop(session, uri, cfg)); @@ -496,7 +496,7 @@ __session_truncate(WT_SESSION *wt_session, if (uri != NULL) { /* Disallow objects in the WiredTiger name space. */ - WT_ERR(__wt_schema_name_check(session, uri)); + WT_ERR(__wt_str_name_check(session, uri)); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_truncate(session, uri, cfg)); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 71f1c8bb2ae..c7ddbcfd604 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -7,6 +7,30 @@ #include "wt_internal.h" +/* + * __checkpoint_name_ok -- + * Complain if the checkpoint name isn't acceptable. + */ +static int +__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) +{ + /* Check for characters we don't want to see in a metadata file. */ + WT_RET(__wt_name_check(session, name, len)); + + /* + * The internal checkpoint name is special, applications aren't allowed + * to use it. Be aggressive and disallow any matching prefix, it makes + * things easier when checking in other places. + */ + if (len < strlen(WT_CHECKPOINT)) + return (0); + if (!WT_PREFIX_MATCH(name, WT_CHECKPOINT)) + return (0); + + WT_RET_MSG(session, EINVAL, + "the checkpoint name \"%s\" is reserved", WT_CHECKPOINT); +} + /* * __checkpoint_name_check -- * Check for an attempt to name a checkpoint that includes anything @@ -75,9 +99,11 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], target_list = 0; - /* Flag if this is a named checkpoint. */ - WT_ERR(__wt_config_gets(session, cfg, "name", &cval)); + /* Flag if this is a named checkpoint, and check if the name is OK. */ + WT_RET(__wt_config_gets(session, cfg, "name", &cval)); named = cval.len != 0; + if (named) + WT_RET(__checkpoint_name_ok(session, cval.str, cval.len)); /* Step through the targets and optionally operate on each one. */ WT_ERR(__wt_config_gets(session, cfg, "target", &cval)); @@ -412,27 +438,6 @@ err: /* return (ret); } -/* - * __ckpt_name_ok -- - * Complain if our reserved checkpoint name is used. - */ -static int -__ckpt_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) -{ - /* - * The internal checkpoint name is special, applications aren't allowed - * to use it. Be aggressive and disallow any matching prefix, it makes - * things easier when checking in other places. - */ - if (len < strlen(WT_CHECKPOINT)) - return (0); - if (!WT_PREFIX_MATCH(name, WT_CHECKPOINT)) - return (0); - - WT_RET_MSG(session, EINVAL, - "the checkpoint name \"%s\" is reserved", WT_CHECKPOINT); -} - /* * __drop -- * Drop all checkpoints with a specific name. @@ -575,7 +580,7 @@ __checkpoint_worker( if (cval.len == 0) name = WT_CHECKPOINT; else { - WT_ERR(__ckpt_name_ok(session, cval.str, cval.len)); + WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc)); name = name_alloc; } @@ -588,12 +593,12 @@ __checkpoint_worker( WT_ERR(__wt_config_subinit(session, &dropconf, &cval)); while ((ret = __wt_config_next(&dropconf, &k, &v)) == 0) { - /* Disallow the reserved checkpoint name. */ + /* Disallow unsafe checkpoint names. */ if (v.len == 0) - WT_ERR(__ckpt_name_ok( + WT_ERR(__checkpoint_name_ok( session, k.str, k.len)); else - WT_ERR(__ckpt_name_ok( + WT_ERR(__checkpoint_name_ok( session, v.str, v.len)); if (v.len == 0) diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py index 153ea015cf5..ab4dbe18bd6 100644 --- a/test/suite/test_checkpoint01.py +++ b/test/suite/test_checkpoint01.py @@ -308,8 +308,8 @@ class test_checkpoint_last(wttest.WiredTigerTestCase): # Check we can't use the reserved name as an application checkpoint name. -class test_checkpoint_last_name(wttest.WiredTigerTestCase): - def test_checkpoint_last_name(self): +class test_checkpoint_illegal_name(wttest.WiredTigerTestCase): + def test_checkpoint_illegal_name(self): simple_populate(self, "file:checkpoint", 'key_format=S', 100) msg = '/the checkpoint name.*is reserved/' for conf in ( @@ -324,6 +324,12 @@ class test_checkpoint_last_name(wttest.WiredTigerTestCase): 'drop=(to=WiredTigerCheckpointX)'): self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda: self.session.checkpoint(conf), msg) + msg = '/WiredTiger objects should not include grouping/' + for conf in ( + 'name=check{point', + 'name=check\\point'): + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.checkpoint(conf), msg) # Check we can't name checkpoints that include LSM tables. From 95e84d49bbf4a571e2790836b656ba2a14e995cf Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 9 Sep 2014 09:33:35 -0400 Subject: [PATCH 048/132] Don't check the key for a prefix match if the prefix is longer than the key. --- src/config/config_collapse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 98b32a109bc..425ab963e5b 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -228,13 +228,13 @@ __config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix, for (; *enp < cp->entries_next; ++*enp) { ep = &cp->entries[*enp]; + len1 = strlen(ep->k); /* * The entries are in sorted order, take the last entry for any * key. */ if (*enp < (cp->entries_next - 1)) { - len1 = strlen(ep->k); len2 = strlen((ep + 1)->k); /* Choose the last of identical keys. */ @@ -257,7 +257,8 @@ __config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix, * If we're skipping a prefix and this entry doesn't match it, * back off one entry and pop up a level. */ - if (plen != 0 && memcmp(ep->k, prefix, plen) != 0) { + if (plen != 0 && + (plen < len1 || memcmp(ep->k, prefix, plen) != 0)) { --*enp; break; } From bdebc7c4c054b7aef2e09a883b14bcfd3b773066 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 9 Sep 2014 09:50:40 -0400 Subject: [PATCH 049/132] I flipped the comparison, fix it. --- src/config/config_collapse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 425ab963e5b..5621ad492f6 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -258,7 +258,7 @@ __config_merge_format_next(WT_SESSION_IMPL *session, const char *prefix, * back off one entry and pop up a level. */ if (plen != 0 && - (plen < len1 || memcmp(ep->k, prefix, plen) != 0)) { + (plen > len1 || memcmp(ep->k, prefix, plen) != 0)) { --*enp; break; } From 0841c3b7cebc35de0977a87969ebacc742bbfc77 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 9 Sep 2014 12:26:26 -0400 Subject: [PATCH 050/132] Don't check for a checkpoint name match if none was specified (yeah, it works, but it's not a reasonable check). Don't let the application specify an illegal checkpoint name, it's going to fail eventually. Don't roll our own __wt_buf_fmt() call, it's just wasted effort. Don't leak memory if the checkpoint server's name is reconfigured. --- src/conn/conn_ckpt.c | 24 ++++++++++++++++-------- src/include/extern.h | 3 +++ src/txn/txn_ckpt.c | 14 +++++++------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index d1ee647d08a..f948cd64d2e 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -20,6 +20,7 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp) WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(tmp); WT_DECL_RET; + char *p; conn = S2C(session); @@ -40,19 +41,26 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, int *startp) } *startp = 1; + /* + * The application can specify a checkpoint name, which we ignore if + * it's our default. + */ WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval)); + if (cval.len != 0 && + !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { + WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); - if (!WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) { WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp)); - strcpy((char *)tmp->data, "name="); - strncat((char *)tmp->data, cval.str, cval.len); - ret = __wt_strndup(session, - tmp->data, strlen("name=") + cval.len, &conn->ckpt_config); - __wt_scr_free(&tmp); - WT_RET(ret); + WT_ERR(__wt_buf_fmt( + session, tmp, "name=%.*s", (int)cval.len, cval.str)); + WT_ERR(__wt_strdup(session, tmp->data, &p)); + + __wt_free(session, conn->ckpt_config); + conn->ckpt_config = p; } - return (0); +err: __wt_scr_free(&tmp); + return (ret); } /* diff --git a/src/include/extern.h b/src/include/extern.h index 6be8d20bf2b..ed285db6395 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1611,6 +1611,9 @@ extern int __wt_txn_init(WT_SESSION_IMPL *session); extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern void __wt_txn_global_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, + const char *name, + size_t len); extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]); diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index c7ddbcfd604..0bebce927fe 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -8,11 +8,11 @@ #include "wt_internal.h" /* - * __checkpoint_name_ok -- + * __wt_checkpoint_name_ok -- * Complain if the checkpoint name isn't acceptable. */ -static int -__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) +int +__wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len) { /* Check for characters we don't want to see in a metadata file. */ WT_RET(__wt_name_check(session, name, len)); @@ -103,7 +103,7 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[], WT_RET(__wt_config_gets(session, cfg, "name", &cval)); named = cval.len != 0; if (named) - WT_RET(__checkpoint_name_ok(session, cval.str, cval.len)); + WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len)); /* Step through the targets and optionally operate on each one. */ WT_ERR(__wt_config_gets(session, cfg, "target", &cval)); @@ -580,7 +580,7 @@ __checkpoint_worker( if (cval.len == 0) name = WT_CHECKPOINT; else { - WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len)); + WT_ERR(__wt_checkpoint_name_ok(session, cval.str, cval.len)); WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc)); name = name_alloc; } @@ -595,10 +595,10 @@ __checkpoint_worker( __wt_config_next(&dropconf, &k, &v)) == 0) { /* Disallow unsafe checkpoint names. */ if (v.len == 0) - WT_ERR(__checkpoint_name_ok( + WT_ERR(__wt_checkpoint_name_ok( session, k.str, k.len)); else - WT_ERR(__checkpoint_name_ok( + WT_ERR(__wt_checkpoint_name_ok( session, v.str, v.len)); if (v.len == 0) From 4d361acba912a98ea7dcfbad4ef1ea5778f3e0c7 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 9 Sep 2014 13:28:00 -0400 Subject: [PATCH 051/132] Add and enhance LSM verbose messages. #1200 --- src/lsm/lsm_merge.c | 6 ++++++ src/lsm/lsm_tree.c | 1 + src/lsm/lsm_work_unit.c | 23 +++++++++++++++++++---- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index bf758abd6b1..6b8c4b65dea 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -61,6 +61,7 @@ __wt_lsm_merge( uint32_t aggressive, generation, max_gap, max_gen, max_level, start_id; uint64_t insert_count, record_count, chunk_size; u_int dest_id, end_chunk, i, merge_max, merge_min, nchunks, start_chunk; + u_int verb; int create_bloom, locked, tret; const char *cfg[3]; const char *drop_cfg[] = @@ -253,6 +254,11 @@ __wt_lsm_merge( "Merging chunks %u-%u into %u (%" PRIu64 " records)" ", generation %" PRIu32, start_chunk, end_chunk, dest_id, record_count, generation)); + if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) + for (verb = start_chunk; verb <= end_chunk; verb++) + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "Chunk[%u] id %u", + verb, lsm_tree->chunk[verb]->id)); WT_RET(__wt_calloc_def(session, 1, &chunk)); chunk->id = dest_id; diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index f8a7083efac..51e357fd33d 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1057,6 +1057,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); /* Make sure the in-memory chunk gets flushed but not switched. */ + WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact force flush")); WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index e0b4a6a808b..ac18e133846 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -78,10 +78,22 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) return (__wt_lsm_tree_unlock(session, lsm_tree)); - end = force ? lsm_tree->nchunks : lsm_tree->nchunks - 1; + /* + * Normally we don't want to force out the last chunk. But if we're + * doing a forced flush, likely from a compact call, then we do want + * to include the final chunk. + */ + if (force) { + end = lsm_tree->nchunks; + } else + end = lsm_tree->nchunks - 1; for (i = 0; i < end; i++) { if (!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK)) { (void)WT_ATOMIC_ADD(lsm_tree->chunk[i]->refcnt, 1); + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "Flush%s: return chunk %u of %u: %s", + force ? " w/ force" : "", i, end - 1, + lsm_tree->chunk[i]->uri)); *chunkp = lsm_tree->chunk[i]; break; } @@ -226,7 +238,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, !__wt_txn_visible_all(session, chunk->switch_txn)) return (0); - WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing")); + WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s", + chunk->uri)); /* * Flush the file before checkpointing: this is the expensive part in @@ -249,7 +262,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, } WT_RET(ret); - WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing")); + WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s", + chunk->uri)); WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, chunk->uri, @@ -290,7 +304,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, /* Make sure we aren't pinning a transaction ID. */ __wt_txn_release_snapshot(session); - WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed")); + WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointed %s", + chunk->uri)); /* * Schedule a bloom filter create for our newly flushed chunk */ if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF)) From e9df44477ce16f80a19a96b30b6ba34bafdbc798 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 9 Sep 2014 13:51:41 -0400 Subject: [PATCH 052/132] If forcing a flush, look for all chunks to process. #1200 --- src/lsm/lsm_worker.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 4aab508896c..70ec016db01 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -32,7 +32,7 @@ __lsm_worker_general_op( WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_WORK_UNIT *entry; - int force; + int force, count; *completed = 0; if (!F_ISSET(cookie, WT_LSM_WORK_FLUSH) && @@ -47,15 +47,26 @@ __lsm_worker_general_op( if ((entry->flags & WT_LSM_WORK_MASK) == WT_LSM_WORK_FLUSH) { force = F_ISSET(entry, WT_LSM_WORK_FORCE); F_CLR(entry, WT_LSM_WORK_FORCE); - WT_ERR(__wt_lsm_get_chunk_to_flush( - session, entry->lsm_tree, force, &chunk)); - if (chunk != NULL) { - ret = __wt_lsm_checkpoint_chunk( - session, entry->lsm_tree, chunk); - WT_ASSERT(session, chunk->refcnt > 0); - (void)WT_ATOMIC_SUB(chunk->refcnt, 1); - WT_ERR(ret); - } + /* + * If this is a force flush, we want to force out all + * possible chunks, not just the first one we find. + */ + count = 0; + do { + WT_ERR(__wt_lsm_get_chunk_to_flush( + session, entry->lsm_tree, force, &chunk)); + if (chunk != NULL) { + count++; + __wt_errx(session, "Got chunk %d", chunk->id); + ret = __wt_lsm_checkpoint_chunk( + session, entry->lsm_tree, chunk); + WT_ASSERT(session, chunk->refcnt > 0); + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + WT_ERR(ret); + } + while (force &&chunk != NULL); + if (count > 1) + __wt_errx(session, "Processed %d chunks", count); } else if (entry->flags == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); else if (entry->flags == WT_LSM_WORK_BLOOM) { From 8c7ed677e61e18572440aaebe9b4151f8e4d738d Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 9 Sep 2014 13:54:09 -0400 Subject: [PATCH 053/132] Fix typo --- src/lsm/lsm_worker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 70ec016db01..3b13cb639e8 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -64,7 +64,7 @@ __lsm_worker_general_op( (void)WT_ATOMIC_SUB(chunk->refcnt, 1); WT_ERR(ret); } - while (force &&chunk != NULL); + } while (force &&chunk != NULL); if (count > 1) __wt_errx(session, "Processed %d chunks", count); } else if (entry->flags == WT_LSM_WORK_DROP) From c4fb36e6262f818b714865508c1050c69de6c342 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 9 Sep 2014 16:27:46 -0400 Subject: [PATCH 054/132] Add table name to many LSM verbose statements. Add arg to indicate if this is the last chunk. #1200 --- src/include/extern.h | 1 + src/lsm/lsm_manager.c | 8 ++++---- src/lsm/lsm_merge.c | 5 +++-- src/lsm/lsm_tree.c | 7 ++++--- src/lsm/lsm_work_unit.c | 5 ++++- src/lsm/lsm_worker.c | 19 ++++++++++--------- 6 files changed, 26 insertions(+), 19 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index ed285db6395..a0cb087e5e2 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1016,6 +1016,7 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, int force, + int *last, WT_LSM_CHUNK **chunkp); extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 6f532111905..10e61176c82 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -185,10 +185,10 @@ __lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) if (lsm_tree->merge_aggressiveness > old_aggressive) WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM merge got aggressive (%u), " - "%u / %" PRIu64, - lsm_tree->merge_aggressiveness, stallms, - lsm_tree->chunk_fill_ms)); + "LSM merge %s got aggressive (%u), " + "%u / %" PRIu64, + lsm_tree->name, lsm_tree->merge_aggressiveness, stallms, + lsm_tree->chunk_fill_ms)); return (0); } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 6b8c4b65dea..2da69b3601c 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -73,7 +73,6 @@ __wt_lsm_merge( dest = src = NULL; locked = 0; start_id = 0; - aggressive = lsm_tree->merge_aggressiveness; /* * If the tree is open read-only be very aggressive. Otherwise, we can @@ -83,6 +82,7 @@ __wt_lsm_merge( if (!lsm_tree->modified) lsm_tree->merge_aggressiveness = 10; + aggressive = lsm_tree->merge_aggressiveness; merge_max = (aggressive > 5) ? 100 : lsm_tree->merge_min; merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min; max_gap = (aggressive + 4) / 5; @@ -251,8 +251,9 @@ __wt_lsm_merge( dest_id = WT_ATOMIC_ADD(lsm_tree->last, 1); WT_RET(__wt_verbose(session, WT_VERB_LSM, - "Merging chunks %u-%u into %u (%" PRIu64 " records)" + "Merging %s chunks %u-%u into %u (%" PRIu64 " records)" ", generation %" PRIu32, + lsm_tree->name, start_chunk, end_chunk, dest_id, record_count, generation)); if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) for (verb = start_chunk; verb <= end_chunk; verb++) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 51e357fd33d..ef9f859b59a 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -744,8 +744,8 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) nchunks + 1, &lsm_tree->chunk)); WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Tree switch to: %" PRIu32 ", checkpoint throttle %ld, " - "merge throttle %ld", + "Tree %s switch to: %" PRIu32 ", checkpoint throttle %ld, " + "merge throttle %ld", lsm_tree->name, new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle)); WT_ERR(__wt_calloc_def(session, 1, &chunk)); @@ -1057,7 +1057,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); /* Make sure the in-memory chunk gets flushed but not switched. */ - WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact force flush")); + WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact %s force flush", + name)); WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index ac18e133846..c7c78a70f44 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -67,11 +67,12 @@ err: WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); */ int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, - WT_LSM_TREE *lsm_tree, int force, WT_LSM_CHUNK **chunkp) + WT_LSM_TREE *lsm_tree, int force, int *last, WT_LSM_CHUNK **chunkp) { u_int i, end; *chunkp = NULL; + *last = 0; WT_ASSERT(session, lsm_tree->queue_ref > 0); WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 0)); @@ -95,6 +96,8 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, force ? " w/ force" : "", i, end - 1, lsm_tree->chunk[i]->uri)); *chunkp = lsm_tree->chunk[i]; + if (i == end - 1) + *last = 1; break; } } diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 3b13cb639e8..d1eb9ca9ee6 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -32,7 +32,7 @@ __lsm_worker_general_op( WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_WORK_UNIT *entry; - int force, count; + int force, last; *completed = 0; if (!F_ISSET(cookie, WT_LSM_WORK_FLUSH) && @@ -51,22 +51,23 @@ __lsm_worker_general_op( * If this is a force flush, we want to force out all * possible chunks, not just the first one we find. */ - count = 0; + last = 0; do { - WT_ERR(__wt_lsm_get_chunk_to_flush( - session, entry->lsm_tree, force, &chunk)); + WT_ERR(__wt_lsm_get_chunk_to_flush(session, + entry->lsm_tree, force, &last, &chunk)); if (chunk != NULL) { - count++; - __wt_errx(session, "Got chunk %d", chunk->id); + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Flush%s%s chunk %d %s", + force ? " w/ force" : "", + last ? " last" : "", + chunk->id, chunk->uri)); ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); WT_ASSERT(session, chunk->refcnt > 0); (void)WT_ATOMIC_SUB(chunk->refcnt, 1); WT_ERR(ret); } - } while (force &&chunk != NULL); - if (count > 1) - __wt_errx(session, "Processed %d chunks", count); + } while (force && chunk != NULL && !last); } else if (entry->flags == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); else if (entry->flags == WT_LSM_WORK_BLOOM) { From c4893022e076d47db4d552b3960adf85a5292ed5 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 9 Sep 2014 16:39:38 -0400 Subject: [PATCH 055/132] Fix conditional after removing verbose code. --- src/lsm/lsm_work_unit.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index c7c78a70f44..525fb57b5b3 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -84,10 +84,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, * doing a forced flush, likely from a compact call, then we do want * to include the final chunk. */ - if (force) { - end = lsm_tree->nchunks; - } else - end = lsm_tree->nchunks - 1; + end = force ? lsm_tree->nchunks : lsm_tree->nchunks - 1; for (i = 0; i < end; i++) { if (!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK)) { (void)WT_ATOMIC_ADD(lsm_tree->chunk[i]->refcnt, 1); From 92a06bb0c94321e1ddab940cb77e3c0a3d5f5704 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 10 Sep 2014 09:08:25 +1000 Subject: [PATCH 056/132] Add wtperf configuration for shared cache testing. --- bench/wtperf/runners/shared-cache-stress.wtperf | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 bench/wtperf/runners/shared-cache-stress.wtperf diff --git a/bench/wtperf/runners/shared-cache-stress.wtperf b/bench/wtperf/runners/shared-cache-stress.wtperf new file mode 100644 index 00000000000..eee73b5d8e3 --- /dev/null +++ b/bench/wtperf/runners/shared-cache-stress.wtperf @@ -0,0 +1,11 @@ +# Stress out the shared cache. +conn_config="statistics=(none),shared_cache=(name=wt-cache,size=536870912,reserve=10MB,chunk=20MB,)" +table_config="allocation_size=4KB,key_gap=10,split_pct=75,internal_page_max=4KB,internal_key_truncate=false,prefix_compression=false,leaf_item_max=1433,type=file,internal_item_max=1433,exclusive=true,leaf_page_max=4KB,block_compressor=," +checkpoint_interval=100 +checkpoint_threads=1 +icount=50000 +report_interval=5 +run_time=600 +populate_threads=1 +threads=((count=1,inserts=1),(count=1,reads=1)) +database_count=25 From 0177e63ff2b68caceddefe237daa2e55d7be2454 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 10 Sep 2014 13:55:44 +1000 Subject: [PATCH 057/132] Fix for doxygen-1.8.8: it doesn't know how to treat "wiredtiger.in" without help. --- src/docs/Doxyfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile index 792b255f4ab..5492905f7e9 100644 --- a/src/docs/Doxyfile +++ b/src/docs/Doxyfile @@ -268,7 +268,7 @@ OPTIMIZE_OUTPUT_VHDL = NO # that for custom extensions you also need to set FILE_PATTERNS otherwise the # files are not read by doxygen. -EXTENSION_MAPPING = +EXTENSION_MAPPING = in=C # If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all # comments according to the Markdown format, which allows for more readable From 03495cf56795eef5011c2440678025912c42d119 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 10 Sep 2014 13:55:44 +1000 Subject: [PATCH 058/132] Document that bulk load cursors are non-transactional. --- src/docs/tune-bulk-load.dox | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/docs/tune-bulk-load.dox b/src/docs/tune-bulk-load.dox index 9e89fb7ceea..8ee1061c76c 100644 --- a/src/docs/tune-bulk-load.dox +++ b/src/docs/tune-bulk-load.dox @@ -11,7 +11,9 @@ be used on newly created objects, and an object being bulk-loaded is not accessible from other cursors. Cursors configured for bulk-load only support the WT_CURSOR::insert and -WT_CURSOR::close methods. +WT_CURSOR::close methods. Bulk load inserts are non-transactional: they +cannot be rolled back and ignore the transactional state of the WT_SESSION +in which they are opened. When bulk-loading row-store objects, keys must be loaded in sorted order. From 72d4e708e6a1d4e84db117ae3f098ad970c098ba Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 10 Sep 2014 14:30:02 +1000 Subject: [PATCH 059/132] Make "-O3 -g" the default build flags (again), including for C++ (api/leveldb). --- build_posix/configure.ac.in | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index 268ed586232..6352fa6d0df 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -9,6 +9,10 @@ AC_CONFIG_AUX_DIR([build_posix/gnu-support]) AC_CONFIG_MACRO_DIR([build_posix/aclocal]) AC_CONFIG_SRCDIR([RELEASE]) +# If CFLAGS/CXXFLAGS were not set on entry, default to "-O3 -g" +: ${CFLAGS=-O3 -g} +: ${CXXFLAGS=-O3 -g} + AM_INIT_AUTOMAKE([1.11 foreign parallel-tests subdir-objects]) m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([no])]) @@ -24,9 +28,6 @@ LT_PREREQ(2.2.6) LT_INIT([pic-only]) AC_SUBST([LIBTOOL_DEPS]) -# If CFLAGS was not set on entry, default to "-O3 -g" -: ${CFLAGS="-O3 -g"} - AC_PROG_CC(cc gcc) # AC_PROG_CXX(c++ g++) From c07c5cb34aae56fb20e30fa6586fa660326a6d5d Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 10 Sep 2014 14:30:24 +1000 Subject: [PATCH 060/132] Limit the maximum compression ratio our raw zlib implementation will allow. Once we have taken 20x the maximum page size, stop. This prevents pathological behavior in sythetic workloads where a page is forced out of cache, then compresses into a single page on disk, and we repeat for every update. --- ext/compressors/zlib/zlib_compress.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c index 33bb9bf8810..3532ecf16cd 100644 --- a/ext/compressors/zlib/zlib_compress.c +++ b/ext/compressors/zlib/zlib_compress.c @@ -225,8 +225,15 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session, * Strategy: take the available output size and compress that much * input. Continue until there is no input small enough or the * compression fails to fit. + * + * Don't let the compression ratio become insanely good (which can + * happen with synthetic workloads). Once we hit a limit, stop so that + * the in-memory size of pages isn't totally different to the on-disk + * size. Otherwise we can get into trouble where every update to a + * page results in forced eviction based on in-memory size, even though + * the data fits into a single on-disk block. */ - while (zs.avail_out > 0) { + while (zs.avail_out > 0 && zs.total_in <= zs.total_out * 20) { /* Find the slot we will try to compress up to. */ if ((curr_slot = zlib_find_slot( zs.total_in + zs.avail_out, offsets, slots)) <= last_slot) From 8599b1a45ae20ba3cd586442d9faf7de0b5d56ae Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Wed, 10 Sep 2014 14:50:04 +1000 Subject: [PATCH 061/132] Limit range in shared cache wtperf configuration. Avoids using too much disk space. --- bench/wtperf/runners/shared-cache-stress.wtperf | 1 + 1 file changed, 1 insertion(+) diff --git a/bench/wtperf/runners/shared-cache-stress.wtperf b/bench/wtperf/runners/shared-cache-stress.wtperf index eee73b5d8e3..87d14f4f5c1 100644 --- a/bench/wtperf/runners/shared-cache-stress.wtperf +++ b/bench/wtperf/runners/shared-cache-stress.wtperf @@ -4,6 +4,7 @@ table_config="allocation_size=4KB,key_gap=10,split_pct=75,internal_page_max=4KB, checkpoint_interval=100 checkpoint_threads=1 icount=50000 +random_range=500000 report_interval=5 run_time=600 populate_threads=1 From 4e7ec366cd20667ba873a9f1dd3edf9144031fbf Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Wed, 10 Sep 2014 13:26:16 -0400 Subject: [PATCH 062/132] Add flushing stage to compact. Restore COMPACTING check during merges to force aggressiveness. #1200 --- src/include/lsm.h | 11 +++++---- src/lsm/lsm_merge.c | 9 ++++---- src/lsm/lsm_tree.c | 54 +++++++++++++++++++++++++++++--------------- src/lsm/lsm_worker.c | 37 +++++++++++++++++------------- 4 files changed, 68 insertions(+), 43 deletions(-) diff --git a/src/include/lsm.h b/src/include/lsm.h index b4581b2b094..48984399acd 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -192,11 +192,12 @@ struct __wt_lsm_tree { int freeing_old_chunks; /* Whether chunks are being freed */ uint32_t merge_aggressiveness; /* Increase amount of work per merge */ -#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ -#define WT_LSM_TREE_COMPACTING 0x02 /* Tree is being compacted */ -#define WT_LSM_TREE_NEED_SWITCH 0x04 /* A new chunk should be created */ -#define WT_LSM_TREE_OPEN 0x08 /* The tree is open */ -#define WT_LSM_TREE_THROTTLE 0x10 /* Throttle updates */ +#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ +#define WT_LSM_TREE_COMPACT_FLUSH 0x02 /* Flushed for compact */ +#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */ +#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */ +#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */ +#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */ uint32_t flags; #define WT_LSM_TREE_EXCLUSIVE 0x01 /* Tree is opened exclusively */ diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 2da69b3601c..8de7d1350df 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -75,11 +75,12 @@ __wt_lsm_merge( start_id = 0; /* - * If the tree is open read-only be very aggressive. Otherwise, we can - * spend a long time waiting for merges to start in read-only - * applications. + * If the tree is open read-only or we are compacting, be very + * aggressive. Otherwise, we can spend a long time waiting for merges + * to start in read-only applications. */ - if (!lsm_tree->modified) + if (!lsm_tree->modified || + F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) lsm_tree->merge_aggressiveness = 10; aggressive = lsm_tree->merge_aggressiveness; diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index ef9f859b59a..8a5a8ca1f3d 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1007,7 +1007,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) WT_LSM_CHUNK *chunk; WT_LSM_TREE *lsm_tree; time_t begin, end; - int i, compacting, locked; + int i, compacting, flushing, locked, ref; compacting = locked = 0; /* @@ -1040,30 +1040,42 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) goto err; - compacting = 1; - F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); + compacting = flushing = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH | WT_LSM_TREE_COMPACTING); /* * Set the switch transaction on the current chunk, if it * hasn't been set before. This prevents further writes, so it * can be flushed by the checkpoint worker. */ + ref = 0; if (lsm_tree->nchunks > 0 && - (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL && - chunk->switch_txn == WT_TXN_NONE) - chunk->switch_txn = __wt_txn_current_id(session); + (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { + if (chunk->switch_txn == WT_TXN_NONE) + chunk->switch_txn = __wt_txn_current_id(session); + (void)WT_ATOMIC_ADD(chunk->refcnt, 1); + ref = 1; + } locked = 0; WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - /* Make sure the in-memory chunk gets flushed but not switched. */ - WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact %s force flush", - name)); - WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact force flush %s flags 0x%" PRIx32 " chunk %u flags 0x%" + PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); + /* Make sure the in-memory chunk gets flushed but not switched. */ + WT_ERR(__wt_lsm_manager_push_entry(session, + WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact flush complete %s chunk %u", + name, chunk->id)); + flushing = ref = 0; + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + } /* * The compacting flag is cleared when no merges can be done. * Ensure that we push through some aggressive merges before @@ -1074,7 +1086,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) if (lsm_tree->merge_aggressiveness < 10) { F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); lsm_tree->merge_aggressiveness = 10; - } else + } else if (!flushing) break; } __wt_sleep(1, 0); @@ -1088,21 +1100,27 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * done. If we are pushing merges, make sure they are * aggressive, to avoid duplicating effort. */ + if (!flushing) #define COMPACT_PARALLEL_MERGES 5 - for (i = lsm_tree->queue_ref; - i < COMPACT_PARALLEL_MERGES; i++) { - lsm_tree->merge_aggressiveness = 10; - WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_MERGE, lsm_tree)); - } + for (i = lsm_tree->queue_ref; + i < COMPACT_PARALLEL_MERGES; i++) { + lsm_tree->merge_aggressiveness = 10; + WT_ERR(__wt_lsm_manager_push_entry( + session, WT_LSM_WORK_MERGE, lsm_tree)); + } } err: if (locked) WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); /* Ensure the compacting flag is cleared if we set it. */ + if (flushing) + F_CLR(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); + if (ref) + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); if (compacting) { F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); lsm_tree->merge_aggressiveness = 0; } + WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact complete %s", name)); __wt_lsm_tree_release(session, lsm_tree); return (ret); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index d1eb9ca9ee6..68ebe34fe91 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -52,22 +52,27 @@ __lsm_worker_general_op( * possible chunks, not just the first one we find. */ last = 0; - do { - WT_ERR(__wt_lsm_get_chunk_to_flush(session, - entry->lsm_tree, force, &last, &chunk)); - if (chunk != NULL) { - WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Flush%s%s chunk %d %s", - force ? " w/ force" : "", - last ? " last" : "", - chunk->id, chunk->uri)); - ret = __wt_lsm_checkpoint_chunk( - session, entry->lsm_tree, chunk); - WT_ASSERT(session, chunk->refcnt > 0); - (void)WT_ATOMIC_SUB(chunk->refcnt, 1); - WT_ERR(ret); - } - } while (force && chunk != NULL && !last); + WT_ERR(__wt_lsm_get_chunk_to_flush(session, + entry->lsm_tree, force, &last, &chunk)); + if (chunk != NULL) { + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Flush%s%s chunk %d %s", + force ? " w/ force" : "", + last ? " last" : "", + chunk->id, chunk->uri)); + ret = __wt_lsm_checkpoint_chunk( + session, entry->lsm_tree, chunk); + WT_ASSERT(session, chunk->refcnt > 0); + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + WT_ERR(ret); + } + /* + * If we flushed the last chunk for a compact, clear the + * flag so compact knows that is complete. + */ + if (last && force && + F_ISSET(entry->lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) + F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); } else if (entry->flags == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); else if (entry->flags == WT_LSM_WORK_BLOOM) { From 44581e8f5c7cf73af1a2e30de55395d5eb00c4df Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Wed, 10 Sep 2014 13:55:57 -0400 Subject: [PATCH 063/132] Fix warnings. --- src/lsm/lsm_tree.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 8a5a8ca1f3d..681b194ed9d 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1009,7 +1009,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) time_t begin, end; int i, compacting, flushing, locked, ref; - compacting = locked = 0; + compacting = flushing = locked = ref = 0; /* * This function is applied to all matching sources: ignore anything * that is not an LSM tree. @@ -1048,7 +1048,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * hasn't been set before. This prevents further writes, so it * can be flushed by the checkpoint worker. */ - ref = 0; + chunk = NULL; if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { if (chunk->switch_txn == WT_TXN_NONE) @@ -1069,12 +1069,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { - if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { - WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush complete %s chunk %u", - name, chunk->id)); + if (flushing && ref && + !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { flushing = ref = 0; - (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + if (chunk != NULL) { + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact flush complete %s chunk %u", + name, chunk->id)); + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + } } /* * The compacting flag is cleared when no merges can be done. From ccb500510aa03e6679286c278625156ec3a82c9d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 16:26:29 -0400 Subject: [PATCH 064/132] Add support to run zlib-noraw compression. --- test/format/config.c | 21 +++++++++++++++------ test/format/config.h | 2 +- test/format/format.h | 1 + test/format/wts.c | 4 ++++ 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/test/format/config.c b/test/format/config.c index 509dc7684ec..519c5db7890 100644 --- a/test/format/config.c +++ b/test/format/config.c @@ -243,26 +243,32 @@ config_compression(void) cp = config_find("compression", strlen("compression")); if (!(cp->flags & C_PERM)) { cstr = "compression=none"; - switch (MMRAND(1, 10)) { - case 1: case 2: case 3: /* 30% */ + switch (MMRAND(1, 20)) { + case 1: case 2: case 3: /* 30% no compression */ + case 4: case 5: case 6: break; - case 4: case 5: /* 20% */ + case 7: case 8: case 9: case 10: /* 20% bzip */ if (access(BZIP_PATH, R_OK) == 0) cstr = "compression=bzip"; break; - case 6: /* 10% */ + case 11: /* 5% bzip-raw */ if (access(BZIP_PATH, R_OK) == 0) cstr = "compression=bzip-raw"; break; - case 7: case 8: /* 20% */ + case 12: case 13: case 14: case 15: /* 20% snappy */ if (access(SNAPPY_PATH, R_OK) == 0) cstr = "compression=snappy"; break; - case 9: case 10: /* 20% */ + case 16: case 17: case 18: case 19: /* 20% zlib */ if (access(ZLIB_PATH, R_OK) == 0) cstr = "compression=zlib"; break; + case 20: /* 5% zlib-no-raw */ + if (access(ZLIB_PATH, R_OK) == 0) + cstr = "compression=zlib-noraw"; + break; } + config_single(cstr, 0); } @@ -281,6 +287,7 @@ config_compression(void) die(0, "snappy library not found or not readable"); break; case COMPRESS_ZLIB: + case COMPRESS_ZLIB_NO_RAW: if (access(ZLIB_PATH, R_OK) != 0) die(0, "zlib library not found or not readable"); break; @@ -549,6 +556,8 @@ config_map_compression(const char *s, u_int *vp) *vp = COMPRESS_SNAPPY; else if (strcmp(s, "zlib") == 0) *vp = COMPRESS_ZLIB; + else if (strcmp(s, "zlib-noraw") == 0) + *vp = COMPRESS_ZLIB_NO_RAW; else die(EINVAL, "illegal compression configuration: %s", s); } diff --git a/test/format/config.h b/test/format/config.h index a32df3de95c..4bc1493a681 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -115,7 +115,7 @@ static CONFIG c[] = { C_BOOL, 10, 0, 0, &g.c_compact, NULL }, { "compression", - "type of compression (none | bzip | bzip-raw | lzo | snappy | zlib)", + "type of compression (none | bzip | bzip-raw | lzo | snappy | zlib | zlib-noraw)", C_IGNORE|C_STRING, 1, 5, 5, NULL, &g.c_compression }, { "data_extend", diff --git a/test/format/format.h b/test/format/format.h index 0e45a28b3ef..1f2b363e9a4 100644 --- a/test/format/format.h +++ b/test/format/format.h @@ -205,6 +205,7 @@ typedef struct { #define COMPRESS_LZO 4 #define COMPRESS_SNAPPY 5 #define COMPRESS_ZLIB 6 +#define COMPRESS_ZLIB_NO_RAW 7 u_int c_compression_flag; /* Compression flag value */ #define ISOLATION_RANDOM 1 diff --git a/test/format/wts.c b/test/format/wts.c index 1a83fa92894..e495956fd2e 100644 --- a/test/format/wts.c +++ b/test/format/wts.c @@ -272,6 +272,10 @@ wts_create(void) p += snprintf(p, (size_t)(end - p), ",block_compressor=\"zlib\""); break; + case COMPRESS_ZLIB_NO_RAW: + p += snprintf(p, (size_t)(end - p), + ",block_compressor=\"zlib-noraw\""); + break; } /* Configure Btree internal key truncation. */ From 2ff6bbfc499db45087a85494d5d390634d2be2ab Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:25:44 -0400 Subject: [PATCH 065/132] Rename __wt_conn_cache_pool_config to be __wt_cache_pool_config, the other cache_XXX API doesn't have _conn_ in its name. --- src/conn/conn_api.c | 3 +-- src/conn/conn_cache_pool.c | 4 ++-- src/conn/conn_open.c | 2 +- src/include/extern.h | 3 +-- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 0b210627df5..d9e83db4e37 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -649,9 +649,8 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) config_cfg[0] = conn->cfg; config_cfg[1] = config; - WT_ERR(__wt_conn_cache_pool_config(session, config_cfg)); + WT_ERR(__wt_cache_pool_config(session, config_cfg)); WT_ERR(__wt_cache_config(conn, config_cfg)); - WT_ERR(__wt_async_reconfig(conn, config_cfg)); WT_ERR(__conn_statistics_config(session, config_cfg)); WT_ERR(__wt_conn_verbose_config(session, config_cfg)); diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 9b540c147d4..57d343d0d46 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -24,11 +24,11 @@ static int __cache_pool_assess(WT_SESSION_IMPL *, uint64_t *); static int __cache_pool_balance(WT_SESSION_IMPL *); /* - * __wt_conn_cache_pool_config -- + * __wt_cache_pool_config -- * Parse and setup the cache pool options. */ int -__wt_conn_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) +__wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) { WT_CACHE_POOL *cp; WT_CONFIG_ITEM cval; diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 3a2f1cb51a4..e20931e7028 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -55,7 +55,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_WRITE_BARRIER(); /* Connect to a cache pool. */ - WT_RET(__wt_conn_cache_pool_config(session, cfg)); + WT_RET(__wt_cache_pool_config(session, cfg)); /* Create the cache. */ WT_RET(__wt_cache_create(conn, cfg)); diff --git a/src/include/extern.h b/src/include/extern.h index ed285db6395..67decaee28b 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -609,8 +609,7 @@ extern int __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern int __wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern void __wt_cache_stats_update(WT_SESSION_IMPL *session); extern int __wt_cache_destroy(WT_CONNECTION_IMPL *conn); -extern int __wt_conn_cache_pool_config(WT_SESSION_IMPL *session, - const char **cfg); +extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session); extern int __wt_conn_cache_pool_destroy(WT_CONNECTION_IMPL *conn); extern void *__wt_cache_pool_server(void *arg); From 65e598ddc1031b4803281644a0ac8023d944e5b2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:27:17 -0400 Subject: [PATCH 066/132] Rename __wt_conn_verbose_config to be __wt_verbose_config, the _conn_ isn't adding anything useful. --- src/btree/bt_debug.c | 2 +- src/conn/conn_api.c | 14 +++++++------- src/include/extern.h | 3 +-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 8a069cc4bdf..84c4565eafe 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -62,7 +62,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v) snprintf(buf, sizeof(buf), "verbose=[%s]", v); cfg[0] = buf; - return (__wt_conn_verbose_config(session, cfg)); + return (__wt_verbose_config(session, cfg)); } /* diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index d9e83db4e37..b05a2f87f60 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -649,12 +649,12 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) config_cfg[0] = conn->cfg; config_cfg[1] = config; - WT_ERR(__wt_cache_pool_config(session, config_cfg)); - WT_ERR(__wt_cache_config(conn, config_cfg)); - WT_ERR(__wt_async_reconfig(conn, config_cfg)); WT_ERR(__conn_statistics_config(session, config_cfg)); - WT_ERR(__wt_conn_verbose_config(session, config_cfg)); + WT_ERR(__wt_async_reconfig(conn, config_cfg)); + WT_ERR(__wt_cache_config(conn, config_cfg)); + WT_ERR(__wt_cache_pool_config(session, config_cfg)); WT_ERR(__wt_checkpoint_server_create(conn, config_cfg)); + WT_ERR(__wt_verbose_config(session, config_cfg)); WT_ERR(__wt_statlog_create(conn, config_cfg)); WT_ERR(__wt_config_gets( @@ -1062,11 +1062,11 @@ __conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[]) } /* - * __wt_conn_verbose_config -- + * __wt_verbose_config -- * Set verbose configuration. */ int -__wt_conn_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) +__wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CONFIG_ITEM cval, sval; WT_CONNECTION_IMPL *conn; @@ -1328,7 +1328,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, if (cval.val) F_SET(conn, WT_CONN_CKPT_SYNC); - WT_ERR(__wt_conn_verbose_config(session, cfg)); + WT_ERR(__wt_verbose_config(session, cfg)); WT_ERR(__wt_config_gets(session, cfg, "buffer_alignment", &cval)); if (cval.val == -1) diff --git a/src/include/extern.h b/src/include/extern.h index 67decaee28b..b7479f586b9 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -603,8 +603,7 @@ extern int __wt_conn_remove_compressor( WT_CONNECTION_IMPL *conn, WT_NAMED_COMPRESSOR *ncomp); extern int __wt_conn_remove_data_source( WT_CONNECTION_IMPL *conn, WT_NAMED_DATA_SOURCE *ndsrc); -extern int __wt_conn_verbose_config(WT_SESSION_IMPL *session, - const char *cfg[]); +extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern int __wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern void __wt_cache_stats_update(WT_SESSION_IMPL *session); From 91d63c3b70a91933547f8d7ce510994afd2e8b50 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:40:37 -0400 Subject: [PATCH 067/132] Fix some places we were passing the WT_CONNECTION_IMPL to underlying routines, that means they have to directly grab WT_CONNECTION_IMPL.default_sesion which isn't good. Pass the WT_SESSION_IMPL instead, then functions do S2C() like the rest of the code. Not "right", but "better". --- src/async/async_api.c | 15 +++++++-------- src/conn/conn_api.c | 8 ++++---- src/conn/conn_cache.c | 14 +++++++------- src/conn/conn_ckpt.c | 6 ++++-- src/conn/conn_log.c | 6 +++--- src/conn/conn_open.c | 10 +++++----- src/conn/conn_stat.c | 6 +++--- src/include/extern.h | 14 +++++++------- 8 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/async/async_api.c b/src/async/async_api.c index ae567466e8e..294662defab 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -228,14 +228,14 @@ __wt_async_stats_update(WT_SESSION_IMPL *session) * Start the async subsystem and worker threads. */ int -__wt_async_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) { WT_ASYNC *async; - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; int run; uint32_t i; - session = conn->default_session; + conn = S2C(session); /* Handle configuration. */ run = 0; @@ -288,17 +288,16 @@ __wt_async_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) * Start the async subsystem and worker threads. */ int -__wt_async_reconfig(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) { WT_ASYNC *async; - WT_CONNECTION_IMPL tmp_conn; + WT_CONNECTION_IMPL *conn, tmp_conn; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; int run; uint32_t i; - session = conn->default_session; + conn = S2C(session); async = conn->async; memset(&tmp_conn, 0, sizeof(tmp_conn)); tmp_conn.async_cfg = conn->async_cfg; @@ -338,7 +337,7 @@ __wt_async_reconfig(WT_CONNECTION_IMPL *conn, const char *cfg[]) return (ret); } else if (conn->async_cfg == 0 && run) /* Case 2 */ - return (__wt_async_create(conn, cfg)); + return (__wt_async_create(session, cfg)); else if (conn->async_cfg == 0) /* Case 3 */ return (0); diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index b05a2f87f60..22d3b9d4411 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -650,12 +650,12 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) config_cfg[1] = config; WT_ERR(__conn_statistics_config(session, config_cfg)); - WT_ERR(__wt_async_reconfig(conn, config_cfg)); - WT_ERR(__wt_cache_config(conn, config_cfg)); + WT_ERR(__wt_async_reconfig(session, config_cfg)); + WT_ERR(__wt_cache_config(session, config_cfg)); WT_ERR(__wt_cache_pool_config(session, config_cfg)); - WT_ERR(__wt_checkpoint_server_create(conn, config_cfg)); + WT_ERR(__wt_checkpoint_server_create(session, config_cfg)); WT_ERR(__wt_verbose_config(session, config_cfg)); - WT_ERR(__wt_statlog_create(conn, config_cfg)); + WT_ERR(__wt_statlog_create(session, config_cfg)); WT_ERR(__wt_config_gets( session, config_cfg, "lsm_manager.worker_thread_max", &cval)); diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index 42e45a9c58b..b5f94f137ab 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -12,14 +12,14 @@ * Configure the underlying cache. */ int -__wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CACHE *cache; WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); cache = conn->cache; /* @@ -85,13 +85,13 @@ __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[]) * Create the underlying cache. */ int -__wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) { WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); WT_ASSERT(session, conn->cache == NULL || (F_ISSET(conn, WT_CONN_CACHE_POOL) && conn->cache != NULL)); @@ -101,7 +101,7 @@ __wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) cache = conn->cache; /* Use a common routine for run-time configuration options. */ - WT_RET(__wt_cache_config(conn, cfg)); + WT_RET(__wt_cache_config(session, cfg)); /* Add the configured cache to the cache pool. */ if (F_ISSET(conn, WT_CONN_CACHE_POOL)) diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index f948cd64d2e..101877a3ddb 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -147,17 +147,19 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn) * Configure and start the checkpoint server. */ int -__wt_checkpoint_server_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_CONNECTION_IMPL *conn; int start; + conn = S2C(session); start = 0; /* If there is already a server running, shut it down. */ if (conn->ckpt_session != NULL) WT_RET(__wt_checkpoint_server_destroy(conn)); - WT_RET(__ckpt_server_config(conn->default_session, cfg, &start)); + WT_RET(__ckpt_server_config(session, cfg, &start)); if (start) WT_RET(__ckpt_server_start(conn)); diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 0ecf48c6628..114e44ea193 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -166,13 +166,13 @@ err: __wt_err(session, ret, "log archive server error"); * Start the log subsystem and archive server thread. */ int -__wt_logmgr_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; WT_LOG *log; int run; - session = conn->default_session; + conn = S2C(session); /* Handle configuration. */ WT_RET(__logmgr_config(session, cfg, &run)); diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index e20931e7028..78351ce4bd7 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -58,7 +58,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_RET(__wt_cache_pool_config(session, cfg)); /* Create the cache. */ - WT_RET(__wt_cache_create(conn, cfg)); + WT_RET(__wt_cache_create(session, cfg)); /* Initialize transaction support. */ WT_RET(__wt_txn_global_init(conn, cfg)); @@ -225,20 +225,20 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) * Start the optional statistics thread. Start statistics first so that * other optional threads can know if statistics are enabled or not. */ - WT_RET(__wt_statlog_create(conn, cfg)); + WT_RET(__wt_statlog_create(session, cfg)); /* Start the optional async threads. */ - WT_RET(__wt_async_create(conn, cfg)); + WT_RET(__wt_async_create(session, cfg)); /* * Start the optional logging/archive thread. * NOTE: The log manager must be started before checkpoints so that the * checkpoint server knows if logging is enabled. */ - WT_RET(__wt_logmgr_create(conn, cfg)); + WT_RET(__wt_logmgr_create(session, cfg)); /* Start the optional checkpoint thread. */ - WT_RET(__wt_checkpoint_server_create(conn, cfg)); + WT_RET(__wt_checkpoint_server_create(session, cfg)); return (0); } diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index eaee410ad0a..12b9567b6e8 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -441,12 +441,12 @@ __statlog_start(WT_CONNECTION_IMPL *conn) * Start the statistics server thread. */ int -__wt_statlog_create(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; int start; - session = conn->default_session; + conn = S2C(session); start = 0; /* diff --git a/src/include/extern.h b/src/include/extern.h index b7479f586b9..8208c021d47 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1,8 +1,8 @@ /* DO NOT EDIT: automatically built by dist/s_prototypes. */ extern void __wt_async_stats_update(WT_SESSION_IMPL *session); -extern int __wt_async_create(WT_CONNECTION_IMPL *conn, const char *cfg[]); -extern int __wt_async_reconfig(WT_CONNECTION_IMPL *conn, const char *cfg[]); +extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]); +extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_async_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_async_flush(WT_CONNECTION_IMPL *conn); extern int __wt_async_new_op(WT_CONNECTION_IMPL *conn, @@ -604,15 +604,15 @@ extern int __wt_conn_remove_compressor( WT_CONNECTION_IMPL *conn, extern int __wt_conn_remove_data_source( WT_CONNECTION_IMPL *conn, WT_NAMED_DATA_SOURCE *ndsrc); extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_cache_config(WT_CONNECTION_IMPL *conn, const char *cfg[]); -extern int __wt_cache_create(WT_CONNECTION_IMPL *conn, const char *cfg[]); +extern int __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]); +extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_cache_stats_update(WT_SESSION_IMPL *session); extern int __wt_cache_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session); extern int __wt_conn_cache_pool_destroy(WT_CONNECTION_IMPL *conn); extern void *__wt_cache_pool_server(void *arg); -extern int __wt_checkpoint_server_create(WT_CONNECTION_IMPL *conn, +extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_checkpoint_server_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, off_t logsize); @@ -642,14 +642,14 @@ extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, extern int __wt_conn_dhandle_discard(WT_CONNECTION_IMPL *conn); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn); -extern int __wt_logmgr_create(WT_CONNECTION_IMPL *conn, const char *cfg[]); +extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_logmgr_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern int __wt_connection_close(WT_CONNECTION_IMPL *conn); extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_conn_stat_init(WT_SESSION_IMPL *session); extern int __wt_statlog_log_one(WT_SESSION_IMPL *session); -extern int __wt_statlog_create(WT_CONNECTION_IMPL *conn, const char *cfg[]); +extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close); extern int __wt_sweep_create(WT_CONNECTION_IMPL *conn); extern int __wt_sweep_destroy(WT_CONNECTION_IMPL *conn); From 90d6d4173aa1957e3b4d633c5691f7853c2701a8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:43:30 -0400 Subject: [PATCH 068/132] Merging the wiredtiger_open configuration means we should never see not-found returned when retrieving the "statistics" configuration. Don't reset the connection's statistics flags until the entire config function is successful, it would be strange to mix-and-match old and new flags. --- src/conn/conn_api.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 22d3b9d4411..3793d30b335 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1018,46 +1018,48 @@ __conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_CONFIG_ITEM cval, sval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint32_t flags; int set; conn = S2C(session); - if ((ret = __wt_config_gets(session, cfg, "statistics", &cval)) != 0) - return (ret == WT_NOTFOUND ? 0 : ret); - - /* Configuring statistics clears any existing values. */ - conn->stat_flags = 0; + WT_RET(__wt_config_gets(session, cfg, "statistics", &cval)); + flags = 0; set = 0; if ((ret = __wt_config_subgets( session, &cval, "none", &sval)) == 0 && sval.val != 0) { - FLD_SET(conn->stat_flags, WT_CONN_STAT_NONE); + LF_SET(WT_CONN_STAT_NONE); ++set; } WT_RET_NOTFOUND_OK(ret); if ((ret = __wt_config_subgets( session, &cval, "fast", &sval)) == 0 && sval.val != 0) { - FLD_SET(conn->stat_flags, WT_CONN_STAT_FAST); + LF_SET(WT_CONN_STAT_FAST); ++set; } WT_RET_NOTFOUND_OK(ret); if ((ret = __wt_config_subgets( session, &cval, "all", &sval)) == 0 && sval.val != 0) { - FLD_SET(conn->stat_flags, WT_CONN_STAT_ALL | WT_CONN_STAT_FAST); + LF_SET(WT_CONN_STAT_ALL | WT_CONN_STAT_FAST); ++set; } WT_RET_NOTFOUND_OK(ret); if ((ret = __wt_config_subgets( session, &cval, "clear", &sval)) == 0 && sval.val != 0) - FLD_SET(conn->stat_flags, WT_CONN_STAT_CLEAR); + LF_SET(WT_CONN_STAT_CLEAR); WT_RET_NOTFOUND_OK(ret); if (set > 1) WT_RET_MSG(session, EINVAL, "only one statistics configuration value may be specified"); + + /* Configuring statistics clears any existing values. */ + conn->stat_flags = flags; + return (0); } From 8582cd5db8ae729deef7aa8e67cdfdb29f08c848 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:46:19 -0400 Subject: [PATCH 069/132] Merging the wiredtiger_open configuration means we should never see not-found returned when retrieving the async.enabled, async.ops_max and async.threads configurations. --- src/async/async_api.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/src/async/async_api.c b/src/async/async_api.c index 294662defab..e7c2c101bb1 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -172,32 +172,24 @@ __async_config(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn, const char **cfg, int *runp) { WT_CONFIG_ITEM cval; - WT_DECL_RET; /* * The async configuration is off by default. */ - if ((ret = __wt_config_gets( - session, cfg, "async.enabled", &cval)) == 0) - *runp = cval.val != 0; - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "async.enabled", &cval)); + *runp = cval.val != 0; /* * Even if async is turned off, we want to parse and store the * default values so that reconfigure can just enable them. */ - if ((ret = __wt_config_gets( - session, cfg, "async.ops_max", &cval)) == 0) - conn->async_size = (uint32_t)cval.val; - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "async.ops_max", &cval)); + conn->async_size = (uint32_t)cval.val; - if ((ret = __wt_config_gets( - session, cfg, "async.threads", &cval)) == 0) { - conn->async_workers = (uint32_t)cval.val; - /* Sanity check that api_data.py is in sync with async.h */ - WT_ASSERT(session, conn->async_workers <= WT_ASYNC_MAX_WORKERS); - } - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "async.threads", &cval)); + conn->async_workers = (uint32_t)cval.val; + /* Sanity check that api_data.py is in sync with async.h */ + WT_ASSERT(session, conn->async_workers <= WT_ASYNC_MAX_WORKERS); return (0); } From 45746528302a833ff16435477f789411fd4ac907 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:49:09 -0400 Subject: [PATCH 070/132] Merging the wiredtiger_open configuration means we should never see not-found returned when retrieving the eviction_target, eviction_trigger, eviction_dirty_target, eviction.threads_max and eviction.threads_min configuration strings. --- src/conn/conn_cache.c | 37 +++++++++++++------------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index b5f94f137ab..fabff374319 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -39,38 +39,27 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]) cache->cp_reserved = (uint64_t)cval.val; WT_RET_NOTFOUND_OK(ret); - if ((ret = - __wt_config_gets(session, cfg, "eviction_target", &cval)) == 0) - cache->eviction_target = (u_int)cval.val; - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval)); + cache->eviction_target = (u_int)cval.val; - if ((ret = - __wt_config_gets(session, cfg, "eviction_trigger", &cval)) == 0) - cache->eviction_trigger = (u_int)cval.val; - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval)); + cache->eviction_trigger = (u_int)cval.val; - if ((ret = __wt_config_gets( - session, cfg, "eviction_dirty_target", &cval)) == 0) - cache->eviction_dirty_target = (u_int)cval.val; - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval)); + cache->eviction_dirty_target = (u_int)cval.val; /* * The eviction thread configuration options include the main eviction * thread and workers. Our implementation splits them out. Adjust for * the difference when parsing the configuration. */ - if ((ret = __wt_config_gets( - session, cfg, "eviction.threads_max", &cval)) == 0) { - WT_ASSERT(session, cval.val > 0); - conn->evict_workers_max = (u_int)cval.val - 1; - } - WT_RET_NOTFOUND_OK(ret); - if ((ret = __wt_config_gets( - session, cfg, "eviction.threads_min", &cval)) == 0) { - WT_ASSERT(session, cval.val > 0); - conn->evict_workers_min = (u_int)cval.val - 1; - } - WT_RET_NOTFOUND_OK(ret); + WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval)); + WT_ASSERT(session, cval.val > 0); + conn->evict_workers_max = (u_int)cval.val - 1; + + WT_RET(__wt_config_gets(session, cfg, "eviction.threads_min", &cval)); + WT_ASSERT(session, cval.val > 0); + conn->evict_workers_min = (u_int)cval.val - 1; if (conn->evict_workers_min > conn->evict_workers_max) WT_RET_MSG(session, EINVAL, From 9b35bee31185f06a4ce883648fd478bd46893e67 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 17:52:47 -0400 Subject: [PATCH 071/132] Merging the wiredtiger_open configuration means we should never see not-found returned when retrieving the verbose configuation. Don't reset the connection's verbose flags until the entire config function is successful. It's not a bug yet, but makes sure an error case introduced later doesn't also introduce the possibility of mixing-and-matching old/new flags. --- src/conn/conn_api.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 3793d30b335..b406a69b9c6 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1073,6 +1073,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_CONFIG_ITEM cval, sval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; + uint32_t flags; static const struct { const char *name; uint32_t flag; @@ -1103,14 +1104,14 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) conn = S2C(session); - if ((ret = __wt_config_gets(session, cfg, "verbose", &cval)) != 0) - return (ret == WT_NOTFOUND ? 0 : ret); + WT_RET(__wt_config_gets(session, cfg, "verbose", &cval)); + flags = 0; for (ft = verbtypes; ft->name != NULL; ft++) { if ((ret = __wt_config_subgets( session, &cval, ft->name, &sval)) == 0 && sval.val != 0) { #ifdef HAVE_VERBOSE - FLD_SET(conn->verbose, ft->flag); + LF_SET(ft->flag); #else WT_RET_MSG(session, EINVAL, "Verbose option specified when WiredTiger built " @@ -1118,11 +1119,11 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) "configure command and rebuild to include support " "for verbose messages"); #endif - } else - FLD_CLR(conn->verbose, ft->flag); - + } WT_RET_NOTFOUND_OK(ret); } + + conn->verbose = flags; return (0); } From 93de0739cf20574897f94e1e198e68efcf9fd69c Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 18:02:20 -0400 Subject: [PATCH 072/132] Merging the wiredtiger_open configuration means we should never see not-found returned when retrieving the statistics-logs configuration. Don't leak memory if we're unable to build the list of sources for the statistics-logging. Cleanup a couple more places where we were passing a connection handle instead of a session handle. --- src/conn/conn_open.c | 2 +- src/conn/conn_stat.c | 59 ++++++++++++++++++++++++++++---------------- src/include/extern.h | 2 +- 3 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 78351ce4bd7..956b944ec19 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -102,7 +102,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_TRET(__wt_async_destroy(conn)); WT_TRET(__wt_lsm_manager_destroy(conn)); WT_TRET(__wt_checkpoint_server_destroy(conn)); - WT_TRET(__wt_statlog_destroy(conn, 1)); + WT_TRET(__wt_statlog_destroy(session, 1)); WT_TRET(__wt_sweep_destroy(conn)); /* Close open data handles. */ diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c index 12b9567b6e8..fbd9b3835b4 100644 --- a/src/conn/conn_stat.c +++ b/src/conn/conn_stat.c @@ -18,6 +18,22 @@ #endif #endif +/* + * __stat_sources_free -- + * Free the array of statistics sources. + */ +static void +__stat_sources_free(WT_SESSION_IMPL *session, char ***sources) +{ + char **p; + + if ((p = (*sources)) != NULL) { + for (; *p != NULL; ++p) + __wt_free(session, *p); + __wt_free(session, *sources); + } +} + /* * __wt_conn_stat_init -- * Initialize the per-connection statistics. @@ -41,8 +57,10 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) WT_CONNECTION_IMPL *conn; WT_DECL_RET; int cnt; + char **sources; conn = S2C(session); + sources = NULL; WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval)); /* Only start the server if wait time is non-zero */ @@ -67,7 +85,7 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) ; WT_RET_NOTFOUND_OK(ret); if (cnt != 0) { - WT_RET(__wt_calloc_def(session, cnt + 1, &conn->stat_sources)); + WT_RET(__wt_calloc_def(session, cnt + 1, &sources)); WT_RET(__wt_config_subinit(session, &objectconf, &cval)); for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) { @@ -80,24 +98,28 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, int *runp) */ if (!WT_PREFIX_MATCH(k.str, "file:") && !WT_PREFIX_MATCH(k.str, "lsm:")) - WT_RET_MSG(session, EINVAL, + WT_ERR_MSG(session, EINVAL, "statistics_log sources configuration only " "supports objects of type \"file\" or " "\"lsm\""); - WT_RET(__wt_strndup(session, - k.str, k.len, &conn->stat_sources[cnt])); + WT_ERR( + __wt_strndup(session, k.str, k.len, &sources[cnt])); } - WT_RET_NOTFOUND_OK(ret); + WT_ERR_NOTFOUND_OK(ret); + + conn->stat_sources = sources; + sources = NULL; } - WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval)); - WT_RET(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path)); + WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval)); + WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path)); - WT_RET(__wt_config_gets( + WT_ERR(__wt_config_gets( session, cfg, "statistics_log.timestamp", &cval)); - WT_RET(__wt_strndup(session, cval.str, cval.len, &conn->stat_format)); + WT_ERR(__wt_strndup(session, cval.str, cval.len, &conn->stat_format)); - return (0); +err: __stat_sources_free(session, &sources); + return (ret); } /* @@ -455,9 +477,9 @@ __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) * configuration changes - but that makes our lives easier. */ if (conn->stat_session != NULL) - WT_RET(__wt_statlog_destroy(conn, 0)); + WT_RET(__wt_statlog_destroy(session, 0)); - WT_RET_NOTFOUND_OK(__statlog_config(session, cfg, &start)); + WT_RET(__statlog_config(session, cfg, &start)); if (start) WT_RET(__statlog_start(conn)); @@ -469,14 +491,13 @@ __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) * Destroy the statistics server thread. */ int -__wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close) +__wt_statlog_destroy(WT_SESSION_IMPL *session, int is_close) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; - char **p; - session = conn->default_session; + conn = S2C(session); F_CLR(conn, WT_CONN_SERVER_STATISTICS); if (conn->stat_tid_set) { @@ -491,11 +512,7 @@ __wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close) WT_TRET(__wt_cond_destroy(session, &conn->stat_cond)); - if ((p = conn->stat_sources) != NULL) { - for (; *p != NULL; ++p) - __wt_free(session, *p); - __wt_free(session, conn->stat_sources); - } + __stat_sources_free(session, &conn->stat_sources); __wt_free(session, conn->stat_path); __wt_free(session, conn->stat_format); diff --git a/src/include/extern.h b/src/include/extern.h index 8208c021d47..1d52262446b 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -650,7 +650,7 @@ extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_conn_stat_init(WT_SESSION_IMPL *session); extern int __wt_statlog_log_one(WT_SESSION_IMPL *session); extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_statlog_destroy(WT_CONNECTION_IMPL *conn, int is_close); +extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, int is_close); extern int __wt_sweep_create(WT_CONNECTION_IMPL *conn); extern int __wt_sweep_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_curbackup_open(WT_SESSION_IMPL *session, From 1613c2661598f7bd72f1a5e8e8925339ec312b9a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 18:07:10 -0400 Subject: [PATCH 073/132] Move the LSM manager re-configuration code into the LSM manager file. --- src/conn/conn_api.c | 9 ++------- src/include/extern.h | 1 + src/lsm/lsm_manager.c | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index b406a69b9c6..b681876a4b2 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -626,7 +626,6 @@ err: /* static int __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) { - WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION_IMPL *session; @@ -654,13 +653,9 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_ERR(__wt_cache_config(session, config_cfg)); WT_ERR(__wt_cache_pool_config(session, config_cfg)); WT_ERR(__wt_checkpoint_server_create(session, config_cfg)); - WT_ERR(__wt_verbose_config(session, config_cfg)); + WT_ERR(__wt_lsm_manager_config(session, config_cfg)); WT_ERR(__wt_statlog_create(session, config_cfg)); - - WT_ERR(__wt_config_gets( - session, config_cfg, "lsm_manager.worker_thread_max", &cval)); - if (cval.val) - conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val; + WT_ERR(__wt_verbose_config(session, config_cfg)); /* Wake up the cache pool server so any changes are noticed. */ if (F_ISSET(conn, WT_CONN_CACHE_POOL)) diff --git a/src/include/extern.h b/src/include/extern.h index 1d52262446b..c5e6a49fdf8 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -930,6 +930,7 @@ extern int __wt_clsm_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); +extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session); extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry); diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 6f532111905..2ac20b9b92d 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -13,6 +13,25 @@ static int __lsm_manager_worker_setup(WT_SESSION_IMPL *); static void * __lsm_worker_manager(void *); +/* + * __wt_lsm_manager_config -- + * Re-configure the LSM manager. + */ +int +__wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg) +{ + WT_CONNECTION_IMPL *conn; + WT_CONFIG_ITEM cval; + + conn = S2C(session); + + WT_RET(__wt_config_gets( + session, cfg, "lsm_manager.worker_thread_max", &cval)); + if (cval.val) + conn->lsm_manager.lsm_workers_max = (uint32_t)cval.val; + return (0); +} + /* * __wt_lsm_manager_start -- * Start the LSM management infrastructure. Our queues and locks were From 87a90096360c83584e2a3cb8785dc6dbed031b46 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 18:16:51 -0400 Subject: [PATCH 074/132] KNF --- src/async/async_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/async/async_api.c b/src/async/async_api.c index e7c2c101bb1..2e788b772fd 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -201,8 +201,8 @@ __async_config(WT_SESSION_IMPL *session, void __wt_async_stats_update(WT_SESSION_IMPL *session) { - WT_CONNECTION_IMPL *conn; WT_ASYNC *async; + WT_CONNECTION_IMPL *conn; WT_CONNECTION_STATS *stats; conn = S2C(session); From 59f5c4e4376e5b6450e9fb87b11fd270f18569fc Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 18:29:21 -0400 Subject: [PATCH 075/132] long line complaint --- test/format/config.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/format/config.h b/test/format/config.h index 4bc1493a681..9852fafabf7 100644 --- a/test/format/config.h +++ b/test/format/config.h @@ -115,7 +115,8 @@ static CONFIG c[] = { C_BOOL, 10, 0, 0, &g.c_compact, NULL }, { "compression", - "type of compression (none | bzip | bzip-raw | lzo | snappy | zlib | zlib-noraw)", + "type of compression " + "(none | bzip | bzip-raw | lzo | snappy | zlib | zlib-noraw)", C_IGNORE|C_STRING, 1, 5, 5, NULL, &g.c_compression }, { "data_extend", From cfab614cbb4d2c9f3443a0289eb47476344fb5a3 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Wed, 10 Sep 2014 19:40:44 -0400 Subject: [PATCH 076/132] Wait for ondisk flag to get set on compact. #1200 --- src/lsm/lsm_tree.c | 19 +++++++++++++++---- src/lsm/lsm_work_unit.c | 11 ++++++++++- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 681b194ed9d..5db017514ae 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1071,12 +1071,23 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { if (flushing && ref && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { - flushing = ref = 0; - if (chunk != NULL) { + if (chunk != NULL && + !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush complete %s chunk %u", + "Compact flush retry %s chunk %u", name, chunk->id)); - (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + WT_ERR(__wt_lsm_manager_push_entry(session, + WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, + lsm_tree)); + } else { + flushing = ref = 0; + if (chunk != NULL) { + WT_ERR(__wt_verbose(session, + WT_VERB_LSM, + "Compact flush done %s chunk %u", + name, chunk->id)); + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + } } } /* diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 525fb57b5b3..f3ae9f04219 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -229,11 +229,20 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, else WT_RET_MSG(session, ret, "discard handle"); } - if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) + if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "LSM worker %s already on disk", + chunk->uri)); return (0); + } /* Stop if a running transaction needs the chunk. */ __wt_txn_update_oldest(session); + if (chunk->switch_txn == WT_TXN_NONE) + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "LSM ckp txn needs chunk %s: switch %" PRIu64 + " oldest %" PRIu64, chunk->uri, + chunk->switch_txn, S2C(session)->txn_global.oldest_id)); if (chunk->switch_txn == WT_TXN_NONE || !__wt_txn_visible_all(session, chunk->switch_txn)) return (0); From e535a5202a60050f84cc2a0f8f5cc8c7392b9b20 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 11 Sep 2014 10:30:52 +1000 Subject: [PATCH 077/132] Change transaction ID allocation so that if transactions stop, the last ID becomes globally visible. We want post-increment semantics but our atomic primitive is pre-increment. refs #1200 --- src/btree/rec_track.c | 2 +- src/include/txn.i | 21 +++++++++------------ src/lsm/lsm_tree.c | 4 ++-- 3 files changed, 12 insertions(+), 15 deletions(-) diff --git a/src/btree/rec_track.c b/src/btree/rec_track.c index a4ef0aaa100..165df9d61e5 100644 --- a/src/btree/rec_track.c +++ b/src/btree/rec_track.c @@ -807,7 +807,7 @@ __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, txnc->value_offset = WT_PTRDIFF32(p, txnc); txnc->value_size = WT_STORE_SIZE(value_size); memcpy(p, value, value_size); - txnc->current = __wt_txn_current_id(session); + txnc->current = __wt_txn_new_id(session); __wt_cache_page_inmem_incr(session, page, WT_OVFL_SIZE(WT_OVFL_TXNC) + addr_size + value_size); diff --git a/src/include/txn.i b/src/include/txn.i index 3854429f8e4..81559bfe490 100644 --- a/src/include/txn.i +++ b/src/include/txn.i @@ -179,7 +179,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd) /* * __wt_txn_autocommit_check -- - * If an auto-commit transaction is required, start one. + * If an auto-commit transaction is required, start one. */ static inline int __wt_txn_autocommit_check(WT_SESSION_IMPL *session) @@ -194,16 +194,6 @@ __wt_txn_autocommit_check(WT_SESSION_IMPL *session) return (0); } -/* - * __wt_txn_current_id -- - * Get the current transaction ID. - */ -static inline uint64_t -__wt_txn_current_id(WT_SESSION_IMPL *session) -{ - return (S2C(session)->txn_global.current); -} - /* * __wt_txn_new_id -- * Allocate a new transaction ID. @@ -211,7 +201,14 @@ __wt_txn_current_id(WT_SESSION_IMPL *session) static inline uint64_t __wt_txn_new_id(WT_SESSION_IMPL *session) { - return WT_ATOMIC_ADD(S2C(session)->txn_global.current, 1); + /* + * We want the global value to lead the allocated values, so that any + * allocated transaction ID eventually becomes globally visible. When + * there are no transactions running, the oldest_id will reach the + * global current ID, so we want post-increment semantics. Our atomic + * add primitive does pre-increment, so adjust the result here. + */ + return WT_ATOMIC_ADD(S2C(session)->txn_global.current, 1) - 1; } /* diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 5db017514ae..c3082ca4a92 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -733,7 +733,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) /* Set the switch transaction in the previous chunk, if necessary. */ if (chunk != NULL && chunk->switch_txn == WT_TXN_NONE) - chunk->switch_txn = __wt_txn_current_id(session); + chunk->switch_txn = __wt_txn_new_id(session); /* Update the throttle time. */ __wt_lsm_tree_throttle(session, lsm_tree, 0); @@ -1052,7 +1052,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { if (chunk->switch_txn == WT_TXN_NONE) - chunk->switch_txn = __wt_txn_current_id(session); + chunk->switch_txn = __wt_txn_new_id(session); (void)WT_ATOMIC_ADD(chunk->refcnt, 1); ref = 1; } From 4a901b42ab03a708de27c31a0bc6c9ad582191aa Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 11 Sep 2014 10:31:22 +1000 Subject: [PATCH 078/132] On error in wiredtiger_open, don't try to free buffers after closing the connection handle. --- src/conn/conn_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 0b210627df5..1865fb5c047 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1415,11 +1415,11 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, STATIC_ASSERT(offsetof(WT_CONNECTION_IMPL, iface) == 0); *wt_connp = &conn->iface; -err: if (ret != 0 && conn != NULL) - WT_TRET(__wt_connection_close(conn)); - - __wt_buf_free(session, &cbbuf); +err: __wt_buf_free(session, &cbbuf); __wt_buf_free(session, &cubuf); + if (ret != 0 && conn != NULL) + WT_TRET(__wt_connection_close(conn)); + return (ret); } From c63a2ebf84982758b9183e50b6f778ac0cff70d0 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 20:38:44 -0400 Subject: [PATCH 079/132] Clean up the first test in __wt_cache_config (not-found is no longer an option). --- src/conn/conn_cache.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index fabff374319..2c21e5f1fbc 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -17,7 +17,6 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_CACHE *cache; WT_CONFIG_ITEM cval; WT_CONNECTION_IMPL *conn; - WT_DECL_RET; conn = S2C(session); cache = conn->cache; @@ -26,18 +25,17 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]) * If not using a shared cache configure the cache size, otherwise * check for a reserved size. */ - if (!F_ISSET(conn, WT_CONN_CACHE_POOL) && - (ret = __wt_config_gets(session, cfg, "cache_size", &cval)) == 0) + if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) { + WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval)); conn->cache_size = (uint64_t)cval.val; - - if (F_ISSET(conn, WT_CONN_CACHE_POOL) && - (ret = __wt_config_gets(session, cfg, - "shared_cache.reserve", &cval)) == 0 && cval.val != 0) + } else { + WT_RET(__wt_config_gets( + session, cfg, "shared_cache.reserve", &cval)); + if (cval.val == 0) + WT_RET(__wt_config_gets( + session, cfg, "shared_cache.chunk", &cval)); cache->cp_reserved = (uint64_t)cval.val; - else if ((ret = __wt_config_gets(session, cfg, - "shared_cache.chunk", &cval)) == 0) - cache->cp_reserved = (uint64_t)cval.val; - WT_RET_NOTFOUND_OK(ret); + } WT_RET(__wt_config_gets(session, cfg, "eviction_target", &cval)); cache->eviction_target = (u_int)cval.val; From cf8454b55bfdb177a53660297ca2be511a29d7c1 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 10 Sep 2014 20:51:32 -0400 Subject: [PATCH 080/132] Serialize re-configuration, everything is going to go pear-shaped if we reconfigure in multiple threads at the same time. --- src/conn/conn_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index b681876a4b2..28509a71be6 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -636,6 +636,9 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) CONNECTION_API_CALL(conn, session, reconfigure, config, cfg); WT_UNUSED(cfg); + /* Serialize reconfiguration. */ + __wt_spin_lock(session, &conn->api_lock); + /* * The configuration argument has been checked for validity, replace the * previous connection configuration. @@ -666,7 +669,9 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) __wt_free(session, conn->cfg); conn->cfg = p; -err: API_END_RET(session, ret); +err: __wt_spin_unlock(session, &conn->api_lock); + + API_END_RET(session, ret); } /* From ba56917bcedaba6fdc3d697ce740fc5654480f26 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 11 Sep 2014 11:47:20 +1000 Subject: [PATCH 081/132] Add a new lock to serialize reconfigure. Don't overload the API lock, since reconfigure can acquire the api lock while doing a reconfigure. --- src/conn/conn_api.c | 4 ++-- src/conn/conn_handle.c | 1 + src/include/connection.h | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 28509a71be6..019463f44b7 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -637,7 +637,7 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_UNUSED(cfg); /* Serialize reconfiguration. */ - __wt_spin_lock(session, &conn->api_lock); + __wt_spin_lock(session, &conn->reconfig_lock); /* * The configuration argument has been checked for validity, replace the @@ -669,7 +669,7 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) __wt_free(session, conn->cfg); conn->cfg = p; -err: __wt_spin_unlock(session, &conn->api_lock); +err: __wt_spin_unlock(session, &conn->reconfig_lock); API_END_RET(session, ret); } diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 03c22b04e30..8deae14ce0c 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -44,6 +44,7 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->checkpoint_lock, "checkpoint")); WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); + WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_RET(__wt_spin_init(session, &conn->hot_backup_lock, "hot backup")); WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema")); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS(conn), &conn->page_lock)); diff --git a/src/include/connection.h b/src/include/connection.h index 1f1f8be88ea..97314759342 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -76,6 +76,7 @@ struct __wt_connection_impl { WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ WT_SPINLOCK fh_lock; /* File handle queue spinlock */ WT_SPINLOCK schema_lock; /* Schema operation spinlock */ + WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ /* * We distribute the btree page locks across a set of spin locks; it From 06d9d5ca346f9cb075773224dec93fe296ccdcae Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Thu, 11 Sep 2014 12:40:50 +1000 Subject: [PATCH 082/132] Fixup cache pool reconfigure now that options are being folded together. Try to make the code more obvious as well. --- src/conn/conn_api.c | 5 --- src/conn/conn_cache_pool.c | 65 +++++++++++++++++++++++++------------- 2 files changed, 43 insertions(+), 27 deletions(-) diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 019463f44b7..81d06881eb5 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -660,11 +660,6 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config) WT_ERR(__wt_statlog_create(session, config_cfg)); WT_ERR(__wt_verbose_config(session, config_cfg)); - /* Wake up the cache pool server so any changes are noticed. */ - if (F_ISSET(conn, WT_CONN_CACHE_POOL)) - WT_ERR(__wt_cond_signal( - session, __wt_process.cache_pool->cache_pool_cond)); - WT_ERR(__wt_config_merge(session, config_cfg, &p)); __wt_free(session, conn->cfg); conn->cfg = p; diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 57d343d0d46..5148229db51 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -102,6 +102,7 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) pool_name); cp = __wt_process.cache_pool; + /* * The cache pool requires a reference count to avoid a race between * configuration/open and destroy. @@ -110,39 +111,54 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) ++cp->refs; /* - * Retrieve the pool configuration options. The values are optional if - * we are re-configuring. + * Cache pool configurations are optional when not creating. If + * values aren't being changed, retrieve the current value so that + * validation of settings works. */ - ret = __wt_config_gets(session, cfg, "shared_cache.size", &cval); - if (reconfiguring && ret == WT_NOTFOUND) - /* Not being changed; use the old value. */ - size = cp->size; - else { - WT_ERR(ret); + if (!created) { + if (__wt_config_gets(session, &cfg[1], + "shared_cache.size", &cval) == 0 && cval.val != 0) + size = (uint64_t)cval.val; + else + size = cp->size; + if (__wt_config_gets(session, &cfg[1], + "shared_cache.chunk", &cval) == 0 && cval.val != 0) + chunk = (uint64_t)cval.val; + else + chunk = cp->chunk; + } else { + /* + * The only time shared cache configuration uses default + * values is when we are creating the pool. + */ + WT_ERR(__wt_config_gets( + session, cfg, "shared_cache.size", &cval)); + WT_ASSERT(session, cval.val != 0); size = (uint64_t)cval.val; - } - ret = __wt_config_gets(session, cfg, "shared_cache.chunk", &cval); - if (reconfiguring && ret == WT_NOTFOUND) - /* Not being changed; use the old value. */ - chunk = cp->chunk; - else { - WT_ERR(ret); + WT_ERR(__wt_config_gets( + session, cfg, "shared_cache.chunk", &cval)); + WT_ASSERT(session, cval.val != 0); chunk = (uint64_t)cval.val; } + /* * Retrieve the reserve size here for validation of configuration. * Don't save it yet since the connections cache is not created if * we are opening. Cache configuration is responsible for saving the * setting. + * The different conditions when reserved size are set are: + * - It's part of the users configuration - use that value. + * - We are reconfiguring - keep the previous value. + * - We are joining a cache pool for the first time (including + * creating the pool) - use the chunk size; that's the default. */ - ret = __wt_config_gets(session, cfg, "shared_cache.reserve", &cval); - if (reconfiguring && ret == WT_NOTFOUND) - /* It is safe to access the cache during reconfigure. */ - reserve = conn->cache->cp_reserved; - else { - WT_ERR(ret); + if (__wt_config_gets(session, &cfg[1], + "shared_cache.reserve", &cval) == 0 && cval.val != 0) reserve = (uint64_t)cval.val; - } + else if (reconfiguring) + reserve = conn->cache->cp_reserved; + else + reserve = chunk; /* * Validate that size and reserve values don't cause the cache @@ -163,6 +179,11 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg) cp->size = size; cp->chunk = chunk; + /* Wake up the cache pool server so any changes are noticed. */ + if (reconfiguring) + WT_ERR(__wt_cond_signal( + session, __wt_process.cache_pool->cache_pool_cond)); + WT_ERR(__wt_verbose(session, WT_VERB_SHARED_CACHE, "Configured cache pool %s. Size: %" PRIu64 ", chunk size: %" PRIu64, cp->name, cp->size, cp->chunk)); From c97794bc9e0a9a42f2b25b402424ce9a6a63feea Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 11 Sep 2014 09:41:13 -0400 Subject: [PATCH 083/132] Add a spin_destroy call for the new reconfig_lock, sort the lists of locks. --- src/conn/conn_handle.c | 3 ++- src/include/connection.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c index 8deae14ce0c..e4f0a6ddd73 100644 --- a/src/conn/conn_handle.c +++ b/src/conn/conn_handle.c @@ -44,8 +44,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn) WT_RET(__wt_spin_init(session, &conn->checkpoint_lock, "checkpoint")); WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle")); WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list")); - WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_RET(__wt_spin_init(session, &conn->hot_backup_lock, "hot backup")); + WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure")); WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema")); WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS(conn), &conn->page_lock)); for (i = 0; i < WT_PAGE_LOCKS(conn); ++i) @@ -125,6 +125,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn) __wt_spin_destroy(session, &conn->dhandle_lock); __wt_spin_destroy(session, &conn->fh_lock); __wt_spin_destroy(session, &conn->hot_backup_lock); + __wt_spin_destroy(session, &conn->reconfig_lock); __wt_spin_destroy(session, &conn->schema_lock); for (i = 0; i < WT_PAGE_LOCKS(conn); ++i) __wt_spin_destroy(session, &conn->page_lock[i]); diff --git a/src/include/connection.h b/src/include/connection.h index 97314759342..9af23f95cbf 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -75,8 +75,8 @@ struct __wt_connection_impl { WT_SPINLOCK api_lock; /* Connection API spinlock */ WT_SPINLOCK checkpoint_lock; /* Checkpoint spinlock */ WT_SPINLOCK fh_lock; /* File handle queue spinlock */ - WT_SPINLOCK schema_lock; /* Schema operation spinlock */ WT_SPINLOCK reconfig_lock; /* Single thread reconfigure */ + WT_SPINLOCK schema_lock; /* Schema operation spinlock */ /* * We distribute the btree page locks across a set of spin locks; it From 60751aaba624cf364acc1a45f4007f2b031135d3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 11 Sep 2014 10:05:45 -0400 Subject: [PATCH 084/132] LSM tombstones are no longer empty values, remove that caveat from the LSM page. --- src/docs/lsm.dox | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/docs/lsm.dox b/src/docs/lsm.dox index 0313862afdf..d5439d6209e 100644 --- a/src/docs/lsm.dox +++ b/src/docs/lsm.dox @@ -107,12 +107,6 @@ there are chunks in the tree for each cursor that is open on the LSM tree. The number of hazard pointers is configured with the \c "hazard_max" configuration key to ::wiredtiger_open. -@subsection lsm_tombstones Empty values - -Internally, WiredTiger's LSM trees use an empty value to represent a -record that has been removed (also known as a "tombstone"). For this -reason, applications cannot store records in LSM trees with empty values. - @subsection lsm_checkpoints Named checkpoints Named checkpoints are not supported on LSM trees, and cursors cannot be opened From 539f01b310867a5dfb7d4056a305fe11ad57f423 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 11 Sep 2014 10:08:43 -0400 Subject: [PATCH 085/132] I don't expect to fix named checkpoints in LSM, tombstones were the caveat we planned to fix. --- src/docs/lsm.dox | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/docs/lsm.dox b/src/docs/lsm.dox index d5439d6209e..b71fccd7151 100644 --- a/src/docs/lsm.dox +++ b/src/docs/lsm.dox @@ -109,9 +109,8 @@ configuration key to ::wiredtiger_open. @subsection lsm_checkpoints Named checkpoints -Named checkpoints are not supported on LSM trees, and cursors cannot be opened -with a non-empty \c "checkpoint" configuration. - -We intend to address these limitations in future releases. +Named checkpoints are not supported on LSM trees, and cursors cannot be +opened with a non-empty \c "checkpoint" configuration (that is, only the +most recent standard checkpoint can be read). */ From 0fa396cb4ad1d2d8fdf8f4d2fd9eb9907a1c3a46 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 11 Sep 2014 10:40:10 -0400 Subject: [PATCH 086/132] Add a section on choosing a storage option. --- src/docs/file-formats.dox | 31 ++++++++++++++++++++++++++++--- src/docs/programming.dox | 2 +- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/src/docs/file-formats.dox b/src/docs/file-formats.dox index 46865da4811..bc747433172 100644 --- a/src/docs/file-formats.dox +++ b/src/docs/file-formats.dox @@ -3,7 +3,8 @@ @section file_formats_formats File formats WiredTiger supports two underlying file formats: row-store and -column-store, both are key/value stores. +column-store, where both are B+tree implementations of key/value stores. +WiredTiger also supports @ref lsm, implemented as a tree of B+trees. In a row-store, both keys and data are variable-length byte strings. In a column-store, keys are 64-bit record numbers (key_format type 'r'), @@ -28,14 +29,38 @@ deleting a value is the same as storing a value of 0. For the same reason, storing a value of 0 will cause cursor scans to skip the record. WiredTiger does not support duplicate data items: there can be only a -single value for any given key, and applications are responsible for -creating unique key/value pairs. +single value associated with any given key, and applications are +responsible for creating unique key/value pairs. WiredTiger allocates space from the underlying files in block units. The minimum file allocation unit WiredTiger supports is 512B and the maximum file allocation unit is 512MB. File block offsets are 64-bit (meaning the maximum file size is very, very large). +@section file_formats_choice Choosing a file format + +The row-store format is the default choice for most applications. When +the primary key is a record number, there are advantages to storing +columns in separate files, or the underlying data is a set of bits, +column-store format may be a better choice. + +Both row- and column-store formats can maintain high volumes of writes, +but for data sets requiring sustained, extreme write throughput, @ref +lsm are usually a better choice. For applications that do not require +extreme write throughput, row- or column-store is likely to be a better +choice because the read throughput is better than with LSM trees (an +effect that becomes more pronounced as additional read threads are added). + +Applications with complex schemas may also benefit from using multiple +storage formats, that is, using a combination of different formats in +the database, and even in individual tables (for example, a sparse, wide +table configured with a column-store primary, where indexes are stored +in an LSM tree). + +Finally, as WiredTiger makes it easy to switch back-and-forth between +storage configurations, it's usually worthwhile benchmarking possible +configurations when there is any question. + @section file_formats_compression File formats and compression Row-stores support four types of compression: key prefix compression, diff --git a/src/docs/programming.dox b/src/docs/programming.dox index 59eeab7705d..4add19c833b 100644 --- a/src/docs/programming.dox +++ b/src/docs/programming.dox @@ -18,8 +18,8 @@ each of which is ordered by one or more columns.

Storage options

- @subpage schema -- @subpage lsm - @subpage file_formats +- @subpage lsm - @subpage compression

Programming notes

From d3984707e53bf1037b455d9243d2177645e3ba76 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 11 Sep 2014 11:08:13 -0400 Subject: [PATCH 087/132] Refactor to only parse async config once. #1172 --- src/async/async_api.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/src/async/async_api.c b/src/async/async_api.c index 2e788b772fd..23a176653c7 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -216,27 +216,18 @@ __wt_async_stats_update(WT_SESSION_IMPL *session) } /* - * __wt_async_create -- - * Start the async subsystem and worker threads. + * __async_start -- + * Start the async subsystem. All configuration processing has + * already been done by the caller. */ -int -__wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) +static int +__async_start(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_CONNECTION_IMPL *conn; - int run; uint32_t i; conn = S2C(session); - - /* Handle configuration. */ - run = 0; - WT_RET(__async_config(session, conn, cfg, &run)); - - /* If async is not configured, we're done. */ - if (!run) - return (0); - conn->async_cfg = 1; /* * Async is on, allocate the WT_ASYNC structure and initialize the ops. @@ -275,6 +266,28 @@ __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) return (0); } +/* + * __wt_async_create -- + * Start the async subsystem and worker threads. + */ +int +__wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_CONNECTION_IMPL *conn; + int run; + + conn = S2C(session); + + /* Handle configuration. */ + run = 0; + WT_RET(__async_config(session, conn, cfg, &run)); + + /* If async is not configured, we're done. */ + if (!run) + return (0); + return (__async_start(session)); +} + /* * __wt_async_reconfig -- * Start the async subsystem and worker threads. @@ -329,7 +342,7 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) return (ret); } else if (conn->async_cfg == 0 && run) /* Case 2 */ - return (__wt_async_create(session, cfg)); + return (__async_start(session)); else if (conn->async_cfg == 0) /* Case 3 */ return (0); From 13549c7ab002533a3c79ea546cc1f3571e2f037f Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 11 Sep 2014 11:46:44 -0400 Subject: [PATCH 088/132] Split flushing versus compacting phases of compact. Cleanup. #1200 --- src/lsm/lsm_merge.c | 22 ++++++++++++++-------- src/lsm/lsm_tree.c | 15 +++++++++------ src/lsm/lsm_work_unit.c | 11 +++++------ 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 8de7d1350df..04e0bf19254 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -251,16 +251,22 @@ __wt_lsm_merge( /* Allocate an ID for the merge. */ dest_id = WT_ATOMIC_ADD(lsm_tree->last, 1); - WT_RET(__wt_verbose(session, WT_VERB_LSM, - "Merging %s chunks %u-%u into %u (%" PRIu64 " records)" - ", generation %" PRIu32, - lsm_tree->name, - start_chunk, end_chunk, dest_id, record_count, generation)); - if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) + /* + * We only want to do the chunk loop if we're running + * with verbose, we wrap these statements in the conditional. + * Avoid that in the normal path. + */ + if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) { + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "Merging %s chunks %u-%u into %u (%" PRIu64 " records)" + ", generation %" PRIu32, + lsm_tree->name, + start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_RET(__wt_verbose(session, WT_VERB_LSM, - "Chunk[%u] id %u", - verb, lsm_tree->chunk[verb]->id)); + "%s: Chunk[%u] id %u", + lsm_tree->name, verb, lsm_tree->chunk[verb]->id)); + } WT_RET(__wt_calloc_def(session, 1, &chunk)); chunk->id = dest_id; diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index c3082ca4a92..a54c4d227a7 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1040,8 +1040,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) goto err; - compacting = flushing = 1; - F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH | WT_LSM_TREE_COMPACTING); + flushing = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); /* * Set the switch transaction on the current chunk, if it @@ -1069,25 +1069,28 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { - if (flushing && ref && - !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { + if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact flush retry %s chunk %u", name, chunk->id)); + F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); } else { - flushing = ref = 0; - if (chunk != NULL) { + if (ref) { + WT_ASSERT(session, chunk != NULL); WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact flush done %s chunk %u", name, chunk->id)); (void)WT_ATOMIC_SUB(chunk->refcnt, 1); } + flushing = ref = 0; + compacting = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); } } /* diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index f3ae9f04219..a34bf41969c 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -238,14 +238,13 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, /* Stop if a running transaction needs the chunk. */ __wt_txn_update_oldest(session); - if (chunk->switch_txn == WT_TXN_NONE) - WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM ckp txn needs chunk %s: switch %" PRIu64 - " oldest %" PRIu64, chunk->uri, - chunk->switch_txn, S2C(session)->txn_global.oldest_id)); if (chunk->switch_txn == WT_TXN_NONE || - !__wt_txn_visible_all(session, chunk->switch_txn)) + !__wt_txn_visible_all(session, chunk->switch_txn)) { + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "LSM worker %s: running transaction, return", + chunk->uri)); return (0); + } WT_RET(__wt_verbose(session, WT_VERB_LSM, "LSM worker flushing %s", chunk->uri)); From 7179a227e579b49d91608e53c2df4a0d2a0a3030 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 11 Sep 2014 12:35:29 -0400 Subject: [PATCH 089/132] Comments and cleanup. #1200 --- src/lsm/lsm_merge.c | 6 ++--- src/lsm/lsm_tree.c | 52 +++++++++++++++++++++++++++++++---------- src/lsm/lsm_work_unit.c | 6 ++++- src/lsm/lsm_worker.c | 7 +++--- 4 files changed, 51 insertions(+), 20 deletions(-) diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 04e0bf19254..363fe77b93e 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -252,9 +252,9 @@ __wt_lsm_merge( dest_id = WT_ATOMIC_ADD(lsm_tree->last, 1); /* - * We only want to do the chunk loop if we're running - * with verbose, we wrap these statements in the conditional. - * Avoid that in the normal path. + * We only want to do the chunk loop if we're running with verbose, + * so we wrap these statements in the conditional. Avoid the loop + * in the normal path. */ if (WT_VERBOSE_ISSET(session, WT_VERB_LSM)) { WT_RET(__wt_verbose(session, WT_VERB_LSM, diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index a54c4d227a7..4eec6a9b559 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1028,6 +1028,19 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) WT_ERR(__wt_seconds(session, &begin)); + /* + * Compacting has two distinct phases. + * 1. All in-memory chunks up to and including the current + * current chunk must be flushed. Normally, the flush code + * does not flush the last, in-use chunk, so we set a force + * flag to include that last chunk. We monitor the state of the + * last chunk and periodically push another forced flush work + * unit until it is complete. + * 2. After all flushing is done, we move onto the merging + * phase for compaction. Again, we monitor the state and + * continue to push merge work units until all merging is done. + */ + /* Lock the tree: single-thread compaction. */ WT_ERR(__wt_lsm_tree_lock(session, lsm_tree, 1)); locked = 1; @@ -1036,8 +1049,9 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) lsm_tree->merge_throttle = 0; lsm_tree->merge_aggressiveness = 0; - /* If another thread started compacting this tree, we're done. */ - if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) + /* If another thread started a compact on this tree, we're done. */ + if (F_ISSET(lsm_tree, + WT_LSM_TREE_COMPACT_FLUSH | WT_LSM_TREE_COMPACTING)) goto err; flushing = 1; @@ -1053,6 +1067,10 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { if (chunk->switch_txn == WT_TXN_NONE) chunk->switch_txn = __wt_txn_new_id(session); + /* + * If we have a chunk, we want to look for it to be on-disk. + * So we need to add a reference to keep it available. + */ (void)WT_ATOMIC_ADD(chunk->refcnt, 1); ref = 1; } @@ -1060,15 +1078,23 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) locked = 0; WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact force flush %s flags 0x%" PRIx32 " chunk %u flags 0x%" - PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); + if (chunk != NULL) + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact force flush %s flags 0x%" PRIx32 + " chunk %u flags 0x%" + PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); /* Make sure the in-memory chunk gets flushed but not switched. */ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + /* + * The flush flag is cleared when the the chunk has been + * flushed. Continue to push forced flushes until the + * chunk is on disk. Once it is on disk move to the compacting + * phase. + */ if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { if (chunk != NULL && !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { @@ -1099,11 +1125,11 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * stopping otherwise we might not do merges that would * span chunks with different generations. */ - if (!F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) { + if (compacting && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) { if (lsm_tree->merge_aggressiveness < 10) { F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); lsm_tree->merge_aggressiveness = 10; - } else if (!flushing) + } else break; } __wt_sleep(1, 0); @@ -1117,7 +1143,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * done. If we are pushing merges, make sure they are * aggressive, to avoid duplicating effort. */ - if (!flushing) + if (compacting) #define COMPACT_PARALLEL_MERGES 5 for (i = lsm_tree->queue_ref; i < COMPACT_PARALLEL_MERGES; i++) { @@ -1126,9 +1152,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) session, WT_LSM_WORK_MERGE, lsm_tree)); } } -err: if (locked) - WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - /* Ensure the compacting flag is cleared if we set it. */ +err: + /* Ensure anything we set is cleared. */ if (flushing) F_CLR(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); if (ref) @@ -1136,8 +1161,11 @@ err: if (locked) if (compacting) { F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); lsm_tree->merge_aggressiveness = 0; + if (locked) + WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); } - WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact complete %s", name)); + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact %s complete, return %d", name, ret)); __wt_lsm_tree_release(session, lsm_tree); return (ret); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index a34bf41969c..eb791f98f5f 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -81,7 +81,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, /* * Normally we don't want to force out the last chunk. But if we're - * doing a forced flush, likely from a compact call, then we do want + * doing a forced flush, likely from a compact call, then we want * to include the final chunk. */ end = force ? lsm_tree->nchunks : lsm_tree->nchunks - 1; @@ -93,6 +93,10 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, force ? " w/ force" : "", i, end - 1, lsm_tree->chunk[i]->uri)); *chunkp = lsm_tree->chunk[i]; + /* + * Let the caller know if this is the last chunk we + * could have selected or an earlier one. + */ if (i == end - 1) *last = 1; break; diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 68ebe34fe91..1f2b76ba720 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -47,13 +47,12 @@ __lsm_worker_general_op( if ((entry->flags & WT_LSM_WORK_MASK) == WT_LSM_WORK_FLUSH) { force = F_ISSET(entry, WT_LSM_WORK_FORCE); F_CLR(entry, WT_LSM_WORK_FORCE); - /* - * If this is a force flush, we want to force out all - * possible chunks, not just the first one we find. - */ last = 0; WT_ERR(__wt_lsm_get_chunk_to_flush(session, entry->lsm_tree, force, &last, &chunk)); + /* + * If we got a chunk to flush, checkpoint it. + */ if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Flush%s%s chunk %d %s", From 5e2285ec4ad0ab2f2e462263c1cd14322891ca19 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 11 Sep 2014 15:41:39 -0400 Subject: [PATCH 090/132] Restore load time to floating point for better granularity. Restore runner script to expect floating point for load time. --- bench/wtperf/runners/wtperf_run.sh | 26 ++++++++++++++++++++------ bench/wtperf/wtperf.c | 15 +++++++-------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/bench/wtperf/runners/wtperf_run.sh b/bench/wtperf/runners/wtperf_run.sh index 16470f35dfa..3296a4072b5 100755 --- a/bench/wtperf/runners/wtperf_run.sh +++ b/bench/wtperf/runners/wtperf_run.sh @@ -79,7 +79,7 @@ while test "$run" -le "$runmax"; do if test "$?" -ne "0"; then exit 1 fi - # Load uses different text. Handle separately. + # Load is always using floating point, so handle separately l=`grep "^Load time:" ./WT_TEST/test.stat` if test "$?" -eq "0"; then load=`echo $l | cut -d ' ' -f 3` @@ -87,7 +87,7 @@ while test "$run" -le "$runmax"; do load=0 fi cur[$loadindex]=$load - sum[$loadindex]=`expr $load + ${sum[$loadindex]}` + sum[$loadindex]=`echo "${sum[$loadindex]} + $load" | bc` echo "cur ${cur[$loadindex]} sum ${sum[$loadindex]}" >> $outfile for i in ${!ops[*]}; do l=`grep "Executed.*${ops[$i]} operations" ./WT_TEST/test.stat` @@ -109,8 +109,17 @@ while test "$run" -le "$runmax"; do done else for i in ${!cur[*]}; do - min[$i]=$(getval $getmin ${cur[$i]} ${min[$i]}) - max[$i]=$(getval $getmax ${cur[$i]} ${max[$i]}) + if test "$i" -eq "$loadindex"; then + if (($(bc <<< "${cur[$i]} < ${min[$i]}") )); then + min[$i]=${cur[$i]} + fi + if (($(bc <<< "${cur[$i]} > ${max[$i]}") )); then + max[$i]=${cur[$i]} + fi + else + min[$i]=$(getval $getmin ${cur[$i]} ${min[$i]}) + max[$i]=$(getval $getmax ${cur[$i]} ${max[$i]}) + fi done fi # @@ -145,8 +154,13 @@ fi # Average the remaining and write it out to the file. # for i in ${!min[*]}; do - s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}` - avg[$i]=`expr $s / $numruns` + if test "$i" -eq "$loadindex"; then + s=`echo "scale=3; ${sum[$i]} - ${min[$i]} - ${max[$i]}" | bc` + avg[$i]=`echo "scale=3; $s / $numruns" | bc` + else + s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}` + avg[$i]=`expr $s / $numruns` + fi done for i in ${!outp[*]}; do echo "${outp[$i]} ${avg[$i]}" >> $outfile diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 38352fd3ef6..3469f0f1599 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1200,7 +1200,7 @@ execute_populate(CONFIG *cfg) CONFIG_THREAD *popth; WT_ASYNC_OP *asyncop; size_t i; - uint64_t last_ops, secs; + uint64_t last_ops, msecs; uint32_t interval, tables; int elapsed, ret; void *(*pfunc)(void *); @@ -1278,12 +1278,11 @@ execute_populate(CONFIG *cfg) } lprintf(cfg, 0, 1, "Finished load of %" PRIu32 " items", cfg->icount); - secs = WT_TIMEDIFF(stop, start) / BILLION; - if (secs == 0) - ++secs; + msecs = ns_to_ms(WT_TIMEDIFF(stop, start)); lprintf(cfg, 0, 1, - "Load time: %" PRIu64 "\n" "load ops/sec: %" PRIu64, - secs, cfg->icount / secs); + "Load time: %.2f\n" "load ops/sec: %" PRIu64, + (double)msecs / (double)THOUSAND, + (cfg->icount / msecs) / THOUSAND); /* * If configured, compact to allow LSM merging to complete. We @@ -1323,9 +1322,9 @@ execute_populate(CONFIG *cfg) lprintf(cfg, ret, 0, "Get time failed in populate."); return (ret); } - secs = WT_TIMEDIFF(stop, start) / BILLION; lprintf(cfg, 0, 1, - "Compact completed in %" PRIu64 " seconds", secs); + "Compact completed in %" PRIu64 " seconds", + ns_to_sec(WT_TIMEDIFF(stop, start))); assert(tables == 0); } return (0); From 34222b005ee0f5eab62e4e05dc108d4351e0dc78 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 11 Sep 2014 15:50:39 -0400 Subject: [PATCH 091/132] Fix warning. --- bench/wtperf/wtperf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 3469f0f1599..b6d65761b5a 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1282,7 +1282,7 @@ execute_populate(CONFIG *cfg) lprintf(cfg, 0, 1, "Load time: %.2f\n" "load ops/sec: %" PRIu64, (double)msecs / (double)THOUSAND, - (cfg->icount / msecs) / THOUSAND); + (uint64_t)((cfg->icount / msecs) / THOUSAND)); /* * If configured, compact to allow LSM merging to complete. We @@ -1324,7 +1324,7 @@ execute_populate(CONFIG *cfg) } lprintf(cfg, 0, 1, "Compact completed in %" PRIu64 " seconds", - ns_to_sec(WT_TIMEDIFF(stop, start))); + (uint64_t)(ns_to_sec(WT_TIMEDIFF(stop, start)))); assert(tables == 0); } return (0); From 2a594958c1f65d32f85e3b7d19c26d2a4919deff Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 12 Sep 2014 09:58:24 +1000 Subject: [PATCH 092/132] Fix build warning: ../src/lsm/lsm_tree.c:1007: warning: 'chunk' may be used uninitialized in this function --- src/lsm/lsm_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 4eec6a9b559..59baf038fd1 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1010,6 +1010,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) int i, compacting, flushing, locked, ref; compacting = flushing = locked = ref = 0; + chunk = NULL; /* * This function is applied to all matching sources: ignore anything * that is not an LSM tree. @@ -1062,7 +1063,6 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * hasn't been set before. This prevents further writes, so it * can be flushed by the checkpoint worker. */ - chunk = NULL; if (lsm_tree->nchunks > 0 && (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) { if (chunk->switch_txn == WT_TXN_NONE) From 773e8091446214c719a5ae0bc487903a3c3ca6c3 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 12 Sep 2014 08:41:08 +0000 Subject: [PATCH 093/132] Move cascading string comparisons into a switch statement. Cursor open should be as light weight as possible. --- src/session/session_api.c | 108 ++++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 33 deletions(-) diff --git a/src/session/session_api.c b/src/session/session_api.c index e63e2c0284a..0e1bfe11bb8 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -189,41 +189,83 @@ __wt_open_cursor(WT_SESSION_IMPL *session, WT_COLGROUP *colgroup; WT_DATA_SOURCE *dsrc; WT_DECL_RET; + int handled = 0; - if (WT_PREFIX_MATCH(uri, "backup:")) - ret = __wt_curbackup_open(session, uri, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "colgroup:")) { - /* - * Column groups are a special case: open a cursor on the - * underlying data source. - */ - WT_RET(__wt_schema_get_colgroup(session, uri, NULL, &colgroup)); - ret = __wt_open_cursor( - session, colgroup->source, owner, cfg, cursorp); - } else if (WT_PREFIX_MATCH(uri, "config:")) - ret = __wt_curconfig_open(session, uri, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "file:")) - ret = __wt_curfile_open(session, uri, owner, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "lsm:")) - ret = __wt_clsm_open(session, uri, owner, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) - ret = __wt_curmetadata_open(session, uri, owner, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "index:")) - ret = __wt_curindex_open(session, uri, owner, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "log:")) - ret = __wt_curlog_open(session, uri, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "statistics:")) - ret = __wt_curstat_open(session, uri, cfg, cursorp); - else if (WT_PREFIX_MATCH(uri, "table:")) - ret = __wt_curtable_open(session, uri, cfg, cursorp); - else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) - ret = dsrc->open_cursor == NULL ? - __wt_object_unsupported(session, uri) : - __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp); - else - ret = __wt_bad_object_type(session, uri); + /* + * Open specific cursor types we know about, or call the generic data source open + * function. Unwind a set of string comparisons into a switch statement for performance. + */ + switch (uri[0]) { + case 'b': + if (WT_PREFIX_MATCH(uri, "backup:")) { + WT_RET(__wt_curbackup_open(session, uri, cfg, cursorp)); + handled = 1; + } + break; + case 'c': + if (WT_PREFIX_MATCH(uri, "colgroup:")) { + /* + * Column groups are a special case: open a cursor on the + * underlying data source. + */ + WT_RET(__wt_schema_get_colgroup(session, uri, NULL, &colgroup)); + WT_RET(__wt_open_cursor( + session, colgroup->source, owner, cfg, cursorp)); + handled = 1; + } else if (WT_PREFIX_MATCH(uri, "config:")) { + WT_RET(__wt_curconfig_open(session, uri, cfg, cursorp)); + handled = 1; + } + break; + case 'f': + if (WT_PREFIX_MATCH(uri, "file:")) { + WT_RET(__wt_curfile_open(session, uri, owner, cfg, cursorp)); + handled = 1; + } + break; + case 'i': + if (WT_PREFIX_MATCH(uri, "index:")) { + WT_RET(__wt_curindex_open(session, uri, owner, cfg, cursorp)); + handled = 1; + } + break; + case 'l': + if (WT_PREFIX_MATCH(uri, "log:")) { + WT_RET(__wt_curlog_open(session, uri, cfg, cursorp)); + handled = 1; + } else if (WT_PREFIX_MATCH(uri, "lsm:")) { + WT_RET(__wt_clsm_open(session, uri, owner, cfg, cursorp)); + handled = 1; + } + break; + case 's': + if (WT_PREFIX_MATCH(uri, "statistics:")) { + WT_RET(__wt_curstat_open(session, uri, cfg, cursorp)); + handled = 1; + } + break; + case 't': + if (WT_PREFIX_MATCH(uri, "table:")) { + WT_RET(__wt_curtable_open(session, uri, cfg, cursorp)); + handled = 1; + } + break; + default: + /* Explicit default to make lint happy */ + break; - return (ret); + } + if (!handled) { + if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) + WT_RET(__wt_curmetadata_open(session, uri, owner, cfg, cursorp)); + else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) + WT_RET(dsrc->open_cursor == NULL ? + __wt_object_unsupported(session, uri) : + __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp)); + else + WT_RET(__wt_bad_object_type(session, uri)); + } + return (0); } /* From 9d7a071385083340a20da2b9bd1b2ce7c0953c27 Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Fri, 12 Sep 2014 08:44:56 +0000 Subject: [PATCH 094/132] lint. --- src/session/session_api.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/session/session_api.c b/src/session/session_api.c index 0e1bfe11bb8..eff89231d48 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -188,8 +188,9 @@ __wt_open_cursor(WT_SESSION_IMPL *session, { WT_COLGROUP *colgroup; WT_DATA_SOURCE *dsrc; - WT_DECL_RET; - int handled = 0; + int handled; + + handled = 0; /* * Open specific cursor types we know about, or call the generic data source open From 951a4b7459e88d4b6c661afb2d24e756af94392d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 07:25:19 -0400 Subject: [PATCH 095/132] Review where we're passing WT_CONNECTION_IMPL instead of WT_SESSION_IMPL, and for that reason directly accessing WT_CONNECTION_IMPL.default_session, prefer passing the session handle instead of the connection handle where that makes sense. --- src/async/async_api.c | 40 +++++++++++++++++++----------------- src/async/async_op.c | 26 +++++++++++++---------- src/btree/bt_evict.c | 12 ++++++----- src/conn/conn_api.c | 30 +++++++++++++-------------- src/conn/conn_cache.c | 8 ++++---- src/conn/conn_cache_pool.c | 7 +++---- src/conn/conn_ckpt.c | 8 ++++---- src/conn/conn_dhandle.c | 6 +++--- src/conn/conn_log.c | 6 +++--- src/conn/conn_open.c | 38 +++++++++++++++------------------- src/conn/conn_sweep.c | 12 ++++++----- src/include/extern.h | 42 +++++++++++++++++++------------------- src/log/log.c | 2 +- src/lsm/lsm_manager.c | 8 ++++---- src/txn/txn.c | 13 ++++++------ src/txn/txn_recover.c | 4 +--- 16 files changed, 133 insertions(+), 129 deletions(-) diff --git a/src/async/async_api.c b/src/async/async_api.c index 23a176653c7..4ae074c429a 100644 --- a/src/async/async_api.c +++ b/src/async/async_api.c @@ -102,17 +102,17 @@ err: * Find and allocate the next available async op handle. */ static int -__async_new_op_alloc(WT_CONNECTION_IMPL *conn, const char *uri, +__async_new_op_alloc(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_ASYNC_OP_IMPL **opp) { WT_ASYNC *async; WT_ASYNC_OP_IMPL *op; - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; uint32_t i, save_i, view; + conn = S2C(session); async = conn->async; - session = conn->default_session; - WT_STAT_FAST_CONN_INCR(conn->default_session, async_op_alloc); + WT_STAT_FAST_CONN_INCR(session, async_op_alloc); *opp = NULL; retry: @@ -154,7 +154,7 @@ retry: WT_STAT_FAST_CONN_INCR(session, async_alloc_race); goto retry; } - WT_STAT_FAST_CONN_INCRV(conn->default_session, async_alloc_view, view); + WT_STAT_FAST_CONN_INCRV(session, async_alloc_view, view); WT_RET(__async_get_format(conn, uri, config, op)); op->unique_id = WT_ATOMIC_ADD(async->op_id, 1); op->optype = WT_AOP_NONE; @@ -237,7 +237,7 @@ __async_start(WT_SESSION_IMPL *session) STAILQ_INIT(&async->formatqh); WT_RET(__wt_spin_init(session, &async->ops_lock, "ops")); WT_RET(__wt_cond_alloc(session, "async flush", 0, &async->flush_cond)); - WT_RET(__wt_async_op_init(conn)); + WT_RET(__wt_async_op_init(session)); /* * Start up the worker threads. @@ -336,8 +336,8 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) */ if (conn->async_cfg > 0 && !run) { /* Case 1 */ - WT_TRET(__wt_async_flush(conn)); - ret = __wt_async_destroy(conn); + WT_TRET(__wt_async_flush(session)); + ret = __wt_async_destroy(session); conn->async_cfg = 0; return (ret); } else if (conn->async_cfg == 0 && run) @@ -416,17 +416,17 @@ __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]) * Destroy the async worker threads and async subsystem. */ int -__wt_async_destroy(WT_CONNECTION_IMPL *conn) +__wt_async_destroy(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_ASYNC_FORMAT *af, *afnext; WT_ASYNC_OP *op; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; uint32_t i; - session = conn->default_session; + conn = S2C(session); async = conn->async; if (!conn->async_cfg) @@ -481,17 +481,17 @@ __wt_async_destroy(WT_CONNECTION_IMPL *conn) * Implementation of the WT_CONN->async_flush method. */ int -__wt_async_flush(WT_CONNECTION_IMPL *conn) +__wt_async_flush(WT_SESSION_IMPL *session) { WT_ASYNC *async; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SESSION_IMPL *session; + conn = S2C(session); if (!conn->async_cfg) return (0); async = conn->async; - session = conn->default_session; WT_STAT_FAST_CONN_INCR(session, async_flush); /* * We have to do several things. First we have to prevent @@ -522,10 +522,9 @@ retry: */ async->flush_count = 0; (void)WT_ATOMIC_ADD(async->flush_gen, 1); - WT_ASSERT(conn->default_session, - async->flush_op.state == WT_ASYNCOP_FREE); + WT_ASSERT(session, async->flush_op.state == WT_ASYNCOP_FREE); async->flush_op.state = WT_ASYNCOP_READY; - WT_ERR(__wt_async_op_enqueue(conn, &async->flush_op)); + WT_ERR(__wt_async_op_enqueue(session, &async->flush_op)); while (async->flush_state != WT_ASYNC_FLUSH_COMPLETE) WT_ERR(__wt_cond_wait(NULL, async->flush_cond, 100000)); /* @@ -574,19 +573,22 @@ __async_runtime_config(WT_ASYNC_OP_IMPL *op, const char *cfg[]) * Implementation of the WT_CONN->async_new_op method. */ int -__wt_async_new_op(WT_CONNECTION_IMPL *conn, const char *uri, +__wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp) { WT_ASYNC_OP_IMPL *op; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; *opp = NULL; + + conn = S2C(session); if (!conn->async_cfg) return (ENOTSUP); op = NULL; - WT_ERR(__async_new_op_alloc(conn, uri, config, &op)); + WT_ERR(__async_new_op_alloc(session, uri, config, &op)); WT_ERR(__async_runtime_config(op, cfg)); op->cb = cb; *opp = op; diff --git a/src/async/async_op.c b/src/async/async_op.c index 1e9151e0f86..5cd05881fd9 100644 --- a/src/async/async_op.c +++ b/src/async/async_op.c @@ -91,7 +91,7 @@ static int __async_op_wrap(WT_ASYNC_OP_IMPL *op, WT_ASYNC_OPTYPE type) { op->optype = type; - return (__wt_async_op_enqueue(O2C(op), op)); + return (__wt_async_op_enqueue(O2S(op), op)); } /* @@ -254,20 +254,22 @@ __async_op_init(WT_CONNECTION_IMPL *conn, WT_ASYNC_OP_IMPL *op, uint32_t id) * Enqueue an operation onto the work queue. */ int -__wt_async_op_enqueue(WT_CONNECTION_IMPL *conn, WT_ASYNC_OP_IMPL *op) +__wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op) { WT_ASYNC *async; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint64_t cur_head, cur_tail, my_alloc, my_slot; #ifdef HAVE_DIAGNOSTIC WT_ASYNC_OP_IMPL *my_op; #endif + conn = S2C(session); async = conn->async; /* * Enqueue op at the tail of the work queue. */ - WT_ASSERT(conn->default_session, op->state == WT_ASYNCOP_READY); + WT_ASSERT(session, op->state == WT_ASYNCOP_READY); /* * We get our slot in the ring buffer to use. */ @@ -287,7 +289,7 @@ __wt_async_op_enqueue(WT_CONNECTION_IMPL *conn, WT_ASYNC_OP_IMPL *op) #ifdef HAVE_DIAGNOSTIC WT_ORDERED_READ(my_op, async->async_queue[my_slot]); if (my_op != NULL) - return (__wt_panic(conn->default_session)); + return (__wt_panic(session)); #endif WT_PUBLISH(async->async_queue[my_slot], op); op->state = WT_ASYNCOP_ENQUEUED; @@ -311,14 +313,17 @@ __wt_async_op_enqueue(WT_CONNECTION_IMPL *conn, WT_ASYNC_OP_IMPL *op) * Initialize all the op handles. */ int -__wt_async_op_init(WT_CONNECTION_IMPL *conn) +__wt_async_op_init(WT_SESSION_IMPL *session) { WT_ASYNC *async; WT_ASYNC_OP_IMPL *op; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; uint32_t i; + conn = S2C(session); async = conn->async; + /* * Initialize the flush op structure. */ @@ -330,13 +335,12 @@ __wt_async_op_init(WT_CONNECTION_IMPL *conn) * can never overlap the tail. Include extra for the flush op. */ async->async_qsize = conn->async_size + 2; - WT_RET(__wt_calloc_def(conn->default_session, - async->async_qsize, &async->async_queue)); + WT_RET(__wt_calloc_def( + session, async->async_qsize, &async->async_queue)); /* * Allocate and initialize all the user ops. */ - WT_ERR(__wt_calloc_def(conn->default_session, - conn->async_size, &async->async_ops)); + WT_ERR(__wt_calloc_def(session, conn->async_size, &async->async_ops)); for (i = 0; i < conn->async_size; i++) { op = &async->async_ops[i]; WT_ERR(__async_op_init(conn, op, i)); @@ -344,11 +348,11 @@ __wt_async_op_init(WT_CONNECTION_IMPL *conn) return (0); err: if (async->async_ops != NULL) { - __wt_free(conn->default_session, async->async_ops); + __wt_free(session, async->async_ops); async->async_ops = NULL; } if (async->async_queue != NULL) { - __wt_free(conn->default_session, async->async_queue); + __wt_free(session, async->async_queue); async->async_queue = NULL; } return (ret); diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index deab0f54312..84f7571993b 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -227,12 +227,14 @@ err: * Start the eviction server thread. */ int -__wt_evict_create(WT_CONNECTION_IMPL *conn) +__wt_evict_create(WT_SESSION_IMPL *session) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; WT_EVICT_WORKER *workers; u_int i; + conn = S2C(session); + /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_EVICTION_RUN); @@ -287,17 +289,17 @@ __wt_evict_create(WT_CONNECTION_IMPL *conn) * Destroy the eviction server thread. */ int -__wt_evict_destroy(WT_CONNECTION_IMPL *conn) +__wt_evict_destroy(WT_SESSION_IMPL *session) { WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_EVICT_WORKER *workers; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; u_int i; + conn = S2C(session); cache = conn->cache; - session = conn->default_session; workers = conn->evict_workctx; F_CLR(conn, WT_CONN_EVICTION_RUN); diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 6446da4781f..6223f38e3fc 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -306,16 +306,16 @@ err: if (ncoll != NULL) { /* * __wt_conn_remove_collator -- - * remove collator added by WT_CONNECTION->add_collator, - * only used internally. + * Remove collator added by WT_CONNECTION->add_collator, only used + * internally. */ int -__wt_conn_remove_collator(WT_CONNECTION_IMPL *conn, WT_NAMED_COLLATOR *ncoll) +__wt_conn_remove_collator(WT_SESSION_IMPL *session, WT_NAMED_COLLATOR *ncoll) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - session = conn->default_session; + conn = S2C(session); /* Call any termination method. */ if (ncoll->collator->terminate != NULL) @@ -370,17 +370,17 @@ err: if (ncomp != NULL) { /* * __wt_conn_remove_compressor -- - * remove compressor added by WT_CONNECTION->add_compressor, - * only used internally. + * remove compressor added by WT_CONNECTION->add_compressor, only used + * internally. */ int __wt_conn_remove_compressor( - WT_CONNECTION_IMPL *conn, WT_NAMED_COMPRESSOR *ncomp) + WT_SESSION_IMPL *session, WT_NAMED_COMPRESSOR *ncomp) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - session = conn->default_session; + conn = S2C(session); /* Call any termination method. */ if (ncomp->compressor->terminate != NULL) @@ -438,12 +438,12 @@ err: if (ndsrc != NULL) { */ int __wt_conn_remove_data_source( - WT_CONNECTION_IMPL *conn, WT_NAMED_DATA_SOURCE *ndsrc) + WT_SESSION_IMPL *session, WT_NAMED_DATA_SOURCE *ndsrc) { WT_DECL_RET; - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; - session = conn->default_session; + conn = S2C(session); /* Call any termination method. */ if (ndsrc->dsrc->terminate != NULL) @@ -494,7 +494,7 @@ __conn_async_flush(WT_CONNECTION *wt_conn) conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL_NOCONF(conn, session, async_flush); - WT_ERR(__wt_async_flush(conn)); + WT_ERR(__wt_async_flush(session)); err: API_END_RET_NOTFOUND_MAP(session, ret); } @@ -514,7 +514,7 @@ __conn_async_new_op(WT_CONNECTION *wt_conn, const char *uri, const char *config, conn = (WT_CONNECTION_IMPL *)wt_conn; CONNECTION_API_CALL(conn, session, async_new_op, config, cfg); - WT_ERR(__wt_async_new_op(conn, uri, config, cfg, callback, &op)); + WT_ERR(__wt_async_new_op(session, uri, config, cfg, callback, &op)); *asyncopp = &op->iface; diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c index 2c21e5f1fbc..079bd05ff1e 100644 --- a/src/conn/conn_cache.c +++ b/src/conn/conn_cache.c @@ -120,7 +120,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) __wt_cache_stats_update(session); return (0); -err: WT_RET(__wt_cache_destroy(conn)); +err: WT_RET(__wt_cache_destroy(session)); return (ret); } @@ -151,13 +151,13 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) * Discard the underlying cache. */ int -__wt_cache_destroy(WT_CONNECTION_IMPL *conn) +__wt_cache_destroy(WT_SESSION_IMPL *session) { WT_CACHE *cache; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); cache = conn->cache; if (cache == NULL) diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c index 5148229db51..ba80ac15267 100644 --- a/src/conn/conn_cache_pool.c +++ b/src/conn/conn_cache_pool.c @@ -260,17 +260,16 @@ __wt_conn_cache_pool_open(WT_SESSION_IMPL *session) * if we were the last connection. */ int -__wt_conn_cache_pool_destroy(WT_CONNECTION_IMPL *conn) +__wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_CACHE_POOL *cp; - WT_CONNECTION_IMPL *entry; + WT_CONNECTION_IMPL *conn, *entry; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; int cp_locked, found; - session = conn->default_session; + conn = S2C(session); cache = conn->cache; cp_locked = found = 0; cp = __wt_process.cache_pool; diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c index 101877a3ddb..a7b8be0d083 100644 --- a/src/conn/conn_ckpt.c +++ b/src/conn/conn_ckpt.c @@ -157,7 +157,7 @@ __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) /* If there is already a server running, shut it down. */ if (conn->ckpt_session != NULL) - WT_RET(__wt_checkpoint_server_destroy(conn)); + WT_RET(__wt_checkpoint_server_destroy(session)); WT_RET(__ckpt_server_config(session, cfg, &start)); if (start) @@ -171,13 +171,13 @@ __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]) * Destroy the checkpoint server thread. */ int -__wt_checkpoint_server_destroy(WT_CONNECTION_IMPL *conn) +__wt_checkpoint_server_destroy(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); F_CLR(conn, WT_CONN_SERVER_CHECKPOINT); if (conn->ckpt_tid_set) { diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 51b9d0846fd..bb76c031925 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -640,13 +640,13 @@ err: session->dhandle = save_dhandle; * Close/discard all data handles. */ int -__wt_conn_dhandle_discard(WT_CONNECTION_IMPL *conn) +__wt_conn_dhandle_discard(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); /* * Close open data handles: first, everything but the metadata file diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 114e44ea193..48218507d09 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -244,13 +244,13 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) * Destroy the log archiving server thread and logging subsystem. */ int -__wt_logmgr_destroy(WT_CONNECTION_IMPL *conn) +__wt_logmgr_destroy(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); if (!conn->logging) return (0); diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 956b944ec19..87118be6a51 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -61,7 +61,7 @@ __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_RET(__wt_cache_create(session, cfg)); /* Initialize transaction support. */ - WT_RET(__wt_txn_global_init(conn, cfg)); + WT_RET(__wt_txn_global_init(session, cfg)); return (0); } @@ -90,7 +90,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) __wt_txn_update_oldest(session); /* Clear any pending async ops. */ - WT_TRET(__wt_async_flush(conn)); + WT_TRET(__wt_async_flush(session)); /* * Shut down server threads other than the eviction server, which is @@ -99,14 +99,14 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * exit before files are closed. */ F_CLR(conn, WT_CONN_SERVER_RUN); - WT_TRET(__wt_async_destroy(conn)); - WT_TRET(__wt_lsm_manager_destroy(conn)); - WT_TRET(__wt_checkpoint_server_destroy(conn)); + WT_TRET(__wt_async_destroy(session)); + WT_TRET(__wt_lsm_manager_destroy(session)); + WT_TRET(__wt_checkpoint_server_destroy(session)); WT_TRET(__wt_statlog_destroy(session, 1)); - WT_TRET(__wt_sweep_destroy(conn)); + WT_TRET(__wt_sweep_destroy(session)); /* Close open data handles. */ - WT_TRET(__wt_conn_dhandle_discard(conn)); + WT_TRET(__wt_conn_dhandle_discard(session)); /* * Now that all data handles are closed, tell logging that a checkpoint @@ -116,20 +116,20 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) if (conn->logging) { WT_TRET(__wt_txn_checkpoint_log( session, 1, WT_TXN_LOG_CKPT_STOP, NULL)); - WT_TRET(__wt_logmgr_destroy(conn)); + WT_TRET(__wt_logmgr_destroy(session)); } /* Free memory for collators */ while ((ncoll = TAILQ_FIRST(&conn->collqh)) != NULL) - WT_TRET(__wt_conn_remove_collator(conn, ncoll)); + WT_TRET(__wt_conn_remove_collator(session, ncoll)); /* Free memory for compressors */ while ((ncomp = TAILQ_FIRST(&conn->compqh)) != NULL) - WT_TRET(__wt_conn_remove_compressor(conn, ncomp)); + WT_TRET(__wt_conn_remove_compressor(session, ncomp)); /* Free memory for data sources */ while ((ndsrc = TAILQ_FIRST(&conn->dsrcqh)) != NULL) - WT_TRET(__wt_conn_remove_data_source(conn, ndsrc)); + WT_TRET(__wt_conn_remove_data_source(session, ndsrc)); /* * Complain if files weren't closed, ignoring the lock file, we'll @@ -146,16 +146,16 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) } /* Shut down the eviction server thread. */ - WT_TRET(__wt_evict_destroy(conn)); + WT_TRET(__wt_evict_destroy(session)); /* Disconnect from shared cache - must be before cache destroy. */ - WT_TRET(__wt_conn_cache_pool_destroy(conn)); + WT_TRET(__wt_conn_cache_pool_destroy(session)); /* Discard the cache. */ - WT_TRET(__wt_cache_destroy(conn)); + WT_TRET(__wt_cache_destroy(session)); /* Discard transaction state. */ - __wt_txn_global_destroy(conn); + __wt_txn_global_destroy(session); /* Close extensions, first calling any unload entry point. */ while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) { @@ -207,19 +207,15 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) { - WT_CONNECTION_IMPL *conn; - - conn = S2C(session); - /* * Start the eviction thread. */ - WT_RET(__wt_evict_create(conn)); + WT_RET(__wt_evict_create(session)); /* * Start the handle sweep thread. */ - WT_RET(__wt_sweep_create(conn)); + WT_RET(__wt_sweep_create(session)); /* * Start the optional statistics thread. Start statistics first so that diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c index 3f0641923ec..37039504952 100644 --- a/src/conn/conn_sweep.c +++ b/src/conn/conn_sweep.c @@ -127,9 +127,11 @@ err: __wt_err(session, ret, "handle sweep server error"); * Start the handle sweep thread. */ int -__wt_sweep_create(WT_CONNECTION_IMPL *conn) +__wt_sweep_create(WT_SESSION_IMPL *session) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); /* Set first, the thread might run before we finish up. */ F_SET(conn, WT_CONN_SERVER_SWEEP); @@ -159,13 +161,13 @@ __wt_sweep_create(WT_CONNECTION_IMPL *conn) * Destroy the handle-sweep thread. */ int -__wt_sweep_destroy(WT_CONNECTION_IMPL *conn) +__wt_sweep_destroy(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; - session = conn->default_session; + conn = S2C(session); F_CLR(conn, WT_CONN_SERVER_SWEEP); if (conn->sweep_tid_set) { diff --git a/src/include/extern.h b/src/include/extern.h index c5e6a49fdf8..84ba377e075 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -3,17 +3,17 @@ extern void __wt_async_stats_update(WT_SESSION_IMPL *session); extern int __wt_async_create(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_async_reconfig(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_async_destroy(WT_CONNECTION_IMPL *conn); -extern int __wt_async_flush(WT_CONNECTION_IMPL *conn); -extern int __wt_async_new_op(WT_CONNECTION_IMPL *conn, +extern int __wt_async_destroy(WT_SESSION_IMPL *session); +extern int __wt_async_flush(WT_SESSION_IMPL *session); +extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp); -extern int __wt_async_op_enqueue(WT_CONNECTION_IMPL *conn, +extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op); -extern int __wt_async_op_init(WT_CONNECTION_IMPL *conn); +extern int __wt_async_op_init(WT_SESSION_IMPL *session); extern void *__wt_async_worker(void *arg); extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, @@ -320,8 +320,8 @@ extern void __wt_free_ref_index(WT_SESSION_IMPL *session, int free_pages); extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict_server_wake(WT_SESSION_IMPL *session); -extern int __wt_evict_create(WT_CONNECTION_IMPL *conn); -extern int __wt_evict_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_evict_create(WT_SESSION_IMPL *session); +extern int __wt_evict_destroy(WT_SESSION_IMPL *session); extern int __wt_evict_page(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); @@ -597,24 +597,24 @@ extern int __wt_collator_config(WT_SESSION_IMPL *session, const char **cfg, WT_COLLATOR **collatorp, int *ownp); -extern int __wt_conn_remove_collator(WT_CONNECTION_IMPL *conn, +extern int __wt_conn_remove_collator(WT_SESSION_IMPL *session, WT_NAMED_COLLATOR *ncoll); -extern int __wt_conn_remove_compressor( WT_CONNECTION_IMPL *conn, +extern int __wt_conn_remove_compressor( WT_SESSION_IMPL *session, WT_NAMED_COMPRESSOR *ncomp); -extern int __wt_conn_remove_data_source( WT_CONNECTION_IMPL *conn, +extern int __wt_conn_remove_data_source( WT_SESSION_IMPL *session, WT_NAMED_DATA_SOURCE *ndsrc); extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_cache_stats_update(WT_SESSION_IMPL *session); -extern int __wt_cache_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_cache_destroy(WT_SESSION_IMPL *session); extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session); -extern int __wt_conn_cache_pool_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session); extern void *__wt_cache_pool_server(void *arg); extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_checkpoint_server_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session); extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, off_t logsize); extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session); extern int __wt_conn_btree_get(WT_SESSION_IMPL *session, @@ -639,11 +639,11 @@ extern int __wt_conn_dhandle_close_all(WT_SESSION_IMPL *session, extern int __wt_conn_dhandle_discard_single( WT_SESSION_IMPL *session, WT_DATA_HANDLE *dhandle, int final); -extern int __wt_conn_dhandle_discard(WT_CONNECTION_IMPL *conn); +extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session); extern int __wt_connection_init(WT_CONNECTION_IMPL *conn); extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn); extern int __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]); -extern int __wt_logmgr_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session); extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]); extern int __wt_connection_close(WT_CONNECTION_IMPL *conn); extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]); @@ -651,8 +651,8 @@ extern void __wt_conn_stat_init(WT_SESSION_IMPL *session); extern int __wt_statlog_log_one(WT_SESSION_IMPL *session); extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, int is_close); -extern int __wt_sweep_create(WT_CONNECTION_IMPL *conn); -extern int __wt_sweep_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_sweep_create(WT_SESSION_IMPL *session); +extern int __wt_sweep_destroy(WT_SESSION_IMPL *session); extern int __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], @@ -934,7 +934,7 @@ extern int __wt_lsm_manager_config(WT_SESSION_IMPL *session, const char **cfg); extern int __wt_lsm_manager_start(WT_SESSION_IMPL *session); extern void __wt_lsm_manager_free_work_unit( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT *entry); -extern int __wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_lsm_manager_destroy(WT_SESSION_IMPL *session); extern int __wt_lsm_manager_clear_tree( WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree); extern int __wt_lsm_manager_pop_entry( WT_SESSION_IMPL *session, @@ -1608,8 +1608,8 @@ extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]); extern int __wt_txn_init(WT_SESSION_IMPL *session); extern void __wt_txn_destroy(WT_SESSION_IMPL *session); -extern int __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]); -extern void __wt_txn_global_destroy(WT_CONNECTION_IMPL *conn); +extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]); +extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session); extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len); @@ -1645,4 +1645,4 @@ extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *stop); extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session); extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out); -extern int __wt_txn_recover(WT_SESSION_IMPL *default_session); +extern int __wt_txn_recover(WT_CONNECTION_IMPL *conn); diff --git a/src/log/log.c b/src/log/log.c index 12a12f02375..c7d362973aa 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -246,7 +246,7 @@ __wt_log_open(WT_SESSION_IMPL *session) */ if (logcount > 0) { log->trunc_lsn = log->alloc_lsn; - WT_ERR(__wt_txn_recover(session)); + WT_ERR(__wt_txn_recover(conn)); } err: __wt_log_files_free(session, logfiles, logcount); diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 2ac20b9b92d..e413763bb50 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -108,17 +108,17 @@ __wt_lsm_manager_free_work_unit( * Destroy the LSM manager threads and subsystem. */ int -__wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn) +__wt_lsm_manager_destroy(WT_SESSION_IMPL *session) { + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_LSM_MANAGER *manager; WT_LSM_WORK_UNIT *current, *next; WT_SESSION *wt_session; - WT_SESSION_IMPL *session; uint32_t i; uint64_t removed; - session = conn->default_session; + conn = S2C(session); manager = &conn->lsm_manager; removed = 0; @@ -128,7 +128,7 @@ __wt_lsm_manager_destroy(WT_CONNECTION_IMPL *conn) __wt_yield(); /* Clean up open LSM handles. */ - ret = __wt_lsm_tree_close_all(conn->default_session); + ret = __wt_lsm_tree_close_all(session); WT_TRET(__wt_thread_join( session, manager->lsm_worker_cookies[0].tid)); diff --git a/src/txn/txn.c b/src/txn/txn.c index 3e18eecc962..bbcb39e2dc2 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -474,15 +474,16 @@ __wt_txn_destroy(WT_SESSION_IMPL *session) * Initialize the global transaction state. */ int -__wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]) +__wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; WT_TXN_GLOBAL *txn_global; WT_TXN_STATE *s; u_int i; WT_UNUSED(cfg); - session = conn->default_session; + conn = S2C(session); + txn_global = &conn->txn_global; txn_global->current = 1; txn_global->oldest_id = 1; @@ -501,12 +502,12 @@ __wt_txn_global_init(WT_CONNECTION_IMPL *conn, const char *cfg[]) * Destroy the global transaction state. */ void -__wt_txn_global_destroy(WT_CONNECTION_IMPL *conn) +__wt_txn_global_destroy(WT_SESSION_IMPL *session) { - WT_SESSION_IMPL *session; + WT_CONNECTION_IMPL *conn; WT_TXN_GLOBAL *txn_global; - session = conn->default_session; + conn = S2C(session); txn_global = &conn->txn_global; if (txn_global != NULL) diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c index d52a471449a..2fce335dee9 100644 --- a/src/txn/txn_recover.c +++ b/src/txn/txn_recover.c @@ -399,16 +399,14 @@ err: if (r->nfiles > r->max_fileid) * Run recovery. */ int -__wt_txn_recover(WT_SESSION_IMPL *default_session) +__wt_txn_recover(WT_CONNECTION_IMPL *conn) { - WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_RECOVERY r; WT_SESSION_IMPL *session; const char *config; int was_backup; - conn = S2C(default_session); WT_CLEAR(r); INIT_LSN(&r.ckpt_lsn); was_backup = F_ISSET(conn, WT_CONN_WAS_BACKUP) ? 1 : 0; From b82fa31a5c3f27e7d975bc1eb16a76e3181e85cb Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 07:32:32 -0400 Subject: [PATCH 096/132] typo. --- src/lsm/lsm_tree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 59baf038fd1..fac47ff0465 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1090,10 +1090,9 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { /* - * The flush flag is cleared when the the chunk has been - * flushed. Continue to push forced flushes until the - * chunk is on disk. Once it is on disk move to the compacting - * phase. + * The flush flag is cleared when the chunk has been flushed. + * Continue to push forced flushes until the chunk is on disk. + * Once it is on disk move to the compacting phase. */ if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { if (chunk != NULL && From 5e45a19052423bbef7760e48a34401cea54995d9 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 07:41:03 -0400 Subject: [PATCH 097/132] wrap lines at 80 columns --- src/session/session_api.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/src/session/session_api.c b/src/session/session_api.c index d4dc26f6c49..2a3aa0f51f4 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -193,8 +193,9 @@ __wt_open_cursor(WT_SESSION_IMPL *session, handled = 0; /* - * Open specific cursor types we know about, or call the generic data source open - * function. Unwind a set of string comparisons into a switch statement for performance. + * Open specific cursor types we know about, or call the generic data + * source open function. Unwind a set of string comparisons into a + * switch statement for performance. */ switch (uri[0]) { case 'b': @@ -206,10 +207,11 @@ __wt_open_cursor(WT_SESSION_IMPL *session, case 'c': if (WT_PREFIX_MATCH(uri, "colgroup:")) { /* - * Column groups are a special case: open a cursor on the - * underlying data source. + * Column groups are a special case: open a cursor on + * the underlying data source. */ - WT_RET(__wt_schema_get_colgroup(session, uri, NULL, &colgroup)); + WT_RET(__wt_schema_get_colgroup( + session, uri, NULL, &colgroup)); WT_RET(__wt_open_cursor( session, colgroup->source, owner, cfg, cursorp)); handled = 1; @@ -220,13 +222,15 @@ __wt_open_cursor(WT_SESSION_IMPL *session, break; case 'f': if (WT_PREFIX_MATCH(uri, "file:")) { - WT_RET(__wt_curfile_open(session, uri, owner, cfg, cursorp)); + WT_RET(__wt_curfile_open( + session, uri, owner, cfg, cursorp)); handled = 1; } break; case 'i': if (WT_PREFIX_MATCH(uri, "index:")) { - WT_RET(__wt_curindex_open(session, uri, owner, cfg, cursorp)); + WT_RET(__wt_curindex_open( + session, uri, owner, cfg, cursorp)); handled = 1; } break; @@ -235,7 +239,8 @@ __wt_open_cursor(WT_SESSION_IMPL *session, WT_RET(__wt_curlog_open(session, uri, cfg, cursorp)); handled = 1; } else if (WT_PREFIX_MATCH(uri, "lsm:")) { - WT_RET(__wt_clsm_open(session, uri, owner, cfg, cursorp)); + WT_RET(__wt_clsm_open( + session, uri, owner, cfg, cursorp)); handled = 1; } break; @@ -258,11 +263,13 @@ __wt_open_cursor(WT_SESSION_IMPL *session, } if (!handled) { if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) - WT_RET(__wt_curmetadata_open(session, uri, owner, cfg, cursorp)); + WT_RET(__wt_curmetadata_open( + session, uri, owner, cfg, cursorp)); else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) WT_RET(dsrc->open_cursor == NULL ? __wt_object_unsupported(session, uri) : - __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp)); + __wt_curds_open( + session, uri, owner, cfg, dsrc, cursorp)); else WT_RET(__wt_bad_object_type(session, uri)); } From 13106151577222b46e502111f3c8ba08376d95a2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 07:59:18 -0400 Subject: [PATCH 098/132] Expand on Alex's ideas: we can return out of the switch statement instead of breaking out of the switch statement (which means we don't need a flag if we've been successful, and we should list the common cursor cases (like "table") first in the switch statement, that way if the compiler can't create a jump table, we test the right cases first. --- src/session/session_api.c | 113 ++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 60 deletions(-) diff --git a/src/session/session_api.c b/src/session/session_api.c index 2a3aa0f51f4..e66f3ec5fb2 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -188,21 +188,22 @@ __wt_open_cursor(WT_SESSION_IMPL *session, { WT_COLGROUP *colgroup; WT_DATA_SOURCE *dsrc; - int handled; - - handled = 0; /* * Open specific cursor types we know about, or call the generic data - * source open function. Unwind a set of string comparisons into a - * switch statement for performance. + * source open function. + * + * Unwind a set of string comparisons into a switch statement hoping + * the compiler can make it fast, but list the common choices first + * instead of sorting so if/else patterns are still fast. */ switch (uri[0]) { - case 'b': - if (WT_PREFIX_MATCH(uri, "backup:")) { - WT_RET(__wt_curbackup_open(session, uri, cfg, cursorp)); - handled = 1; - } + /* + * Common cursor types. + */ + case 't': + if (WT_PREFIX_MATCH(uri, "table:")) + return (__wt_curtable_open(session, uri, cfg, cursorp)); break; case 'c': if (WT_PREFIX_MATCH(uri, "colgroup:")) { @@ -212,68 +213,60 @@ __wt_open_cursor(WT_SESSION_IMPL *session, */ WT_RET(__wt_schema_get_colgroup( session, uri, NULL, &colgroup)); - WT_RET(__wt_open_cursor( + return (__wt_open_cursor( session, colgroup->source, owner, cfg, cursorp)); - handled = 1; - } else if (WT_PREFIX_MATCH(uri, "config:")) { - WT_RET(__wt_curconfig_open(session, uri, cfg, cursorp)); - handled = 1; - } - break; - case 'f': - if (WT_PREFIX_MATCH(uri, "file:")) { - WT_RET(__wt_curfile_open( - session, uri, owner, cfg, cursorp)); - handled = 1; } + + if (WT_PREFIX_MATCH(uri, "config:")) + return ( + __wt_curconfig_open(session, uri, cfg, cursorp)); break; case 'i': - if (WT_PREFIX_MATCH(uri, "index:")) { - WT_RET(__wt_curindex_open( + if (WT_PREFIX_MATCH(uri, "index:")) + return (__wt_curindex_open( session, uri, owner, cfg, cursorp)); - handled = 1; - } break; case 'l': - if (WT_PREFIX_MATCH(uri, "log:")) { - WT_RET(__wt_curlog_open(session, uri, cfg, cursorp)); - handled = 1; - } else if (WT_PREFIX_MATCH(uri, "lsm:")) { - WT_RET(__wt_clsm_open( + if (WT_PREFIX_MATCH(uri, "lsm:")) + return (__wt_clsm_open( session, uri, owner, cfg, cursorp)); - handled = 1; - } - break; - case 's': - if (WT_PREFIX_MATCH(uri, "statistics:")) { - WT_RET(__wt_curstat_open(session, uri, cfg, cursorp)); - handled = 1; - } - break; - case 't': - if (WT_PREFIX_MATCH(uri, "table:")) { - WT_RET(__wt_curtable_open(session, uri, cfg, cursorp)); - handled = 1; - } - break; - default: - /* Explicit default to make lint happy */ + + if (WT_PREFIX_MATCH(uri, "log:")) + return (__wt_curlog_open(session, uri, cfg, cursorp)); break; - } - if (!handled) { - if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) - WT_RET(__wt_curmetadata_open( + /* + * Less common cursor types. + */ + case 'f': + if (WT_PREFIX_MATCH(uri, "file:")) + return (__wt_curfile_open( session, uri, owner, cfg, cursorp)); - else if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) - WT_RET(dsrc->open_cursor == NULL ? - __wt_object_unsupported(session, uri) : - __wt_curds_open( - session, uri, owner, cfg, dsrc, cursorp)); - else - WT_RET(__wt_bad_object_type(session, uri)); + break; + case 'm': + if (WT_PREFIX_MATCH(uri, WT_METADATA_URI)) + return (__wt_curmetadata_open( + session, uri, owner, cfg, cursorp)); + break; + case 'b': + if (WT_PREFIX_MATCH(uri, "backup:")) + return ( + __wt_curbackup_open(session, uri, cfg, cursorp)); + break; + case 's': + if (WT_PREFIX_MATCH(uri, "statistics:")) + return (__wt_curstat_open(session, uri, cfg, cursorp)); + break; + default: + break; } - return (0); + + if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) + return (dsrc->open_cursor == NULL ? + __wt_object_unsupported(session, uri) : + __wt_curds_open(session, uri, owner, cfg, dsrc, cursorp)); + + return (__wt_bad_object_type(session, uri)); } /* From 967277a708e83b46ae98d856e6ac7e5912806af8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 08:56:11 -0400 Subject: [PATCH 099/132] WiredTiger has a lot of cascading if/else statements checking for string matches, and clang (version 3.3) doesn't inline strncmp. Add an initial check to see if the first character matches before calling strncmp. --- src/include/misc.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/include/misc.h b/src/include/misc.h index d28de81a6aa..aae3cdb53df 100644 --- a/src/include/misc.h +++ b/src/include/misc.h @@ -148,11 +148,17 @@ /* Check if a string matches a prefix. */ #define WT_PREFIX_MATCH(str, pfx) \ - (strncmp((str), (pfx), strlen(pfx)) == 0) + (((const char *)str)[0] == ((const char *)pfx)[0] && \ + strncmp((str), (pfx), strlen(pfx)) == 0) +/* Check if a non-nul-terminated string matches a prefix. */ #define WT_PREFIX_MATCH_LEN(str, len, pfx) \ ((len) >= strlen(pfx) && WT_PREFIX_MATCH(str, pfx)) +/* Check if a string matches a prefix, and move past it. */ +#define WT_PREFIX_SKIP(str, pfx) \ + (WT_PREFIX_MATCH(str, pfx) ? ((str) += strlen(pfx), 1) : 0) + /* * Check if a variable string equals a constant string. Inline the common * case for WiredTiger of a single byte string. This is required because not @@ -162,14 +168,10 @@ (sizeof(cs) == 2 ? (s)[0] == (cs)[0] && (s)[1] == '\0' : \ strcmp(s, cs) == 0) -/* Check if a string matches a prefix, and move past it. */ -#define WT_PREFIX_SKIP(str, pfx) \ - ((strncmp((str), (pfx), strlen(pfx)) == 0) ? \ - ((str) += strlen(pfx), 1) : 0) - /* Check if a string matches a byte string of len bytes. */ #define WT_STRING_MATCH(str, bytes, len) \ - (strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') + (((const char *)str)[0] == ((const char *)bytes)[0] && \ + strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0') /* * Macro that produces a string literal that isn't wrapped in quotes, to avoid From 6985d990a5f499972b779a21c3e1547b0a66507d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 09:12:19 -0400 Subject: [PATCH 100/132] Coverity #1199717, fix a bug in 7179a227 where the test for a lock was accidentally included inside a test for compacting. --- src/lsm/lsm_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index fac47ff0465..c464d7a9991 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1160,9 +1160,9 @@ err: if (compacting) { F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); lsm_tree->merge_aggressiveness = 0; + } if (locked) WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - } WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact %s complete, return %d", name, ret)); __wt_lsm_tree_release(session, lsm_tree); From 31e150dfd7e33aa8254566510481250830f43e8a Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 09:27:40 -0400 Subject: [PATCH 101/132] Update dist/s_style to check for cases where we jump to the error label after passing it, that's an infinite loop. --- dist/s_style | 4 ++-- src/block/block_ckpt.c | 2 +- src/lsm/lsm_tree.c | 6 ++++-- src/os_posix/os_open.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/dist/s_style b/dist/s_style index 56e4ddfc529..9325e3e298b 100644 --- a/dist/s_style +++ b/dist/s_style @@ -16,10 +16,10 @@ for f in `find examples ext src test -name '*.[ci]'`; do -e 's/\n/ /g' \ -e p \ -e '{s/.*//;x;}' | - egrep '(WT_ERR|WT_ERR_MSG|WT_ERR_NOTFOUND_OK|WT_ERR_TEST|WT_ILLEGAL_VALUE_ERR|WT_VERBOSE_ERR)\(.*(WT_ASSERT_RET|WT_ILLEGAL_VALUE|WT_RET|WT_RET_MSG|WT_RET_NOTFOUND_OK|WT_RET_TEST|WT_VERBOSE_RET|WT_VERBOSE_RETVAL)\(.*err:' | + egrep '(WT_ERR|WT_ERR_MSG|WT_ERR_NOTFOUND_OK|WT_ERR_TEST|WT_ILLEGAL_VALUE_ERR)\(.*(WT_ASSERT_RET|WT_ILLEGAL_VALUE|WT_RET|WT_RET_MSG|WT_RET_NOTFOUND_OK|WT_RET_TEST|WT_VERBOSE_RET|WT_VERBOSE_RETVAL)\(.*err:|[^a-z_]err:.*(WT_ERR|WT_ERR_MSG|WT_ERR_NOTFOUND_OK|WT_ERR_TEST|WT_ILLEGAL_VALUE_ERR)\(' | sed 's/:.*//' > $t test -s $t && { - echo "$f: function with return after a jump to an error label" + echo "$f: return after a jump to the error label or a jump to the error label after the error label" sed 's/^/function @ line:/' < $t } done diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c index 4227d2d7c03..b7ec8afff29 100644 --- a/src/block/block_ckpt.c +++ b/src/block/block_ckpt.c @@ -142,7 +142,7 @@ err: /* * allocated memory was in the service of verify, clean that up. */ if (block->verify) - WT_ERR(__wt_verify_ckpt_unload(session, block)); + WT_TRET(__wt_verify_ckpt_unload(session, block)); } /* Checkpoints don't need the original information, discard it. */ diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index c464d7a9991..4667ab7a039 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1162,9 +1162,11 @@ err: lsm_tree->merge_aggressiveness = 0; } if (locked) - WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - WT_ERR(__wt_verbose(session, WT_VERB_LSM, + WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); + + WT_TRET(__wt_verbose(session, WT_VERB_LSM, "Compact %s complete, return %d", name, ret)); + __wt_lsm_tree_release(session, lsm_tree); return (ret); diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 9a0bff8df16..680be9677ba 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -46,7 +46,7 @@ __open_directory_sync(WT_SESSION_IMPL *session, char *path) err: WT_SYSCALL_RETRY(close(fd), ret); if (ret != 0) - WT_ERR_MSG(session, ret, "%s: close", path); + __wt_err(session, ret, "%s: close", path); #else WT_UNUSED(session); WT_UNUSED(path); From 2c4f114c3da01b93ee47f88cb83e12fcd88f01dc Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 11:27:24 -0400 Subject: [PATCH 102/132] clang gets pthread_t initialization wrong: wtperf.c:1865:28: warning: variable 'monitor_thread' may be uninitialized when used here [-Wconditional-uninitialized] --- bench/wtperf/wtperf.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index b6d65761b5a..002c1327e01 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -1745,6 +1745,8 @@ start_run(CONFIG *cfg) char helium_buf[256]; monitor_created = ret = 0; + /* [-Wconditional-uninitialized] */ + memset(&monitor_thread, 0, sizeof(monitor_thread)); if ((ret = setup_log_file(cfg)) != 0) goto err; From 7f14a7870cea9b3a02d05d465febbd0d19487d98 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Fri, 12 Sep 2014 16:45:17 -0400 Subject: [PATCH 103/132] If we're doing raw compression in service of eviction, and haven't found any rows to write, the page may not be evictable if we skipped an update. Reference #1221. --- src/btree/rec_write.c | 59 +++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index bae29929aa5..0c5ee9a8cc9 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -1979,6 +1979,30 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r) return (0); } +/* + * __rec_skipped_update_chk -- + * Return if a skipped update makes this a waste of time. + */ +static inline int +__rec_skipped_update_chk(WT_SESSION_IMPL *session, WT_RECONCILE *r) +{ + /* + * If we're doing an eviction, and we skipped an update, it only pays + * off to continue if we're writing multiple blocks, that is, we'll be + * able to evict something. This should be unlikely (why did eviction + * choose a recently written, small block), but it's possible. Our + * caller is responsible for calling us at the right moment, when all + * of the rows have been reviewed and we're about to finalize a write. + */ + if (F_ISSET(r, WT_SKIP_UPDATE_RESTORE) && + r->bnd_next == 0 && r->leave_dirty) { + WT_STAT_FAST_CONN_INCR(session, rec_skipped_update); + WT_STAT_FAST_DATA_INCR(session, rec_skipped_update); + return (EBUSY); + } + return (0); +} + /* * __rec_split_raw_worker -- * Handle the raw compression page reconciliation bookkeeping. @@ -2338,22 +2362,13 @@ no_slots: return (0); } + /* Check if a skipped update makes this a waste of time. */ + if (last_block) + WT_RET (__rec_skipped_update_chk(session, r)); + /* We have a block, update the boundary counter. */ ++r->bnd_next; - /* - * If we're doing an eviction, and we skipped an update, it only pays - * off to continue if we're writing multiple blocks, that is, we'll be - * able to evict something. This should be unlikely (why did eviction - * choose a recently written, small block), but it's possible. - */ - if (r->bnd_next == 1 && last_block && - F_ISSET(r, WT_SKIP_UPDATE_RESTORE) && r->leave_dirty) { - WT_STAT_FAST_CONN_INCR(session, rec_skipped_update); - WT_STAT_FAST_DATA_INCR(session, rec_skipped_update); - return (EBUSY); - } - /* * If we are writing the whole page in our first/only attempt, it might * be a checkpoint (checkpoints are only a single page, by definition). @@ -2458,18 +2473,8 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) WT_ILLEGAL_VALUE(session); } - /* - * If we're doing an eviction, and we skipped an update, it only pays - * off to continue if we're writing multiple blocks, that is, we'll be - * able to evict something. This should be unlikely (why did eviction - * choose a recently written, small block), but it's possible. - */ - if (F_ISSET(r, WT_SKIP_UPDATE_RESTORE) && - r->bnd_next == 0 && r->leave_dirty) { - WT_STAT_FAST_CONN_INCR(session, rec_skipped_update); - WT_STAT_FAST_DATA_INCR(session, rec_skipped_update); - return (EBUSY); - } + /* Check if a skipped update makes this a waste of time. */ + WT_RET (__rec_skipped_update_chk(session, r)); /* * We only arrive here with no entries to write if the page was entirely @@ -2504,6 +2509,10 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r) static inline int __rec_split_finish_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r) { + /* Check if a skipped update makes this a waste of time. */ + if (r->entries == 0) + WT_RET (__rec_skipped_update_chk(session, r)); + while (r->entries != 0) WT_RET(__rec_split_raw_worker(session, r, 1)); return (0); From ef58f295c74f94f9612851628da26336088469c7 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 13 Sep 2014 07:29:37 -0400 Subject: [PATCH 104/132] whitespace --- bench/wtperf/wtperf.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h index d2baa558af0..fa5994b7bcf 100644 --- a/bench/wtperf/wtperf.h +++ b/bench/wtperf/wtperf.h @@ -104,13 +104,13 @@ struct __config { /* Configuration struction */ CONFIG_THREAD *ckptthreads, *popthreads; #define WORKLOAD_MAX 50 - CONFIG_THREAD *workers; /* Worker threads */ + CONFIG_THREAD *workers; /* Worker threads */ u_int workers_cnt; - WORKLOAD *workload; /* Workloads */ + WORKLOAD *workload; /* Workloads */ u_int workload_cnt; - uint32_t use_asyncops; /* Use async operations */ + uint32_t use_asyncops; /* Use async operations */ /* State tracking variables. */ uint64_t ckpt_ops; /* checkpoint operations */ From 031e80a87a8bbf986e7f4665a91196785ba4c52d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 13 Sep 2014 09:23:52 -0400 Subject: [PATCH 105/132] Make random number generation be per session in the WiredTiger library, and per thread in wtperf and test/checkpoint, reference #1223. --- bench/wtperf/wtperf.c | 49 +++++++++++++++++++++------------------ bench/wtperf/wtperf.h | 2 ++ src/block/block_session.c | 2 +- src/btree/col_modify.c | 2 +- src/btree/rec_track.c | 4 ++-- src/btree/rec_write.c | 2 +- src/btree/row_modify.c | 2 +- src/btree/row_srch.c | 5 ++-- src/conn/conn_api.c | 1 + src/include/btree.i | 4 ++-- src/include/extern.h | 3 ++- src/include/session.h | 2 ++ src/log/log_slot.c | 2 +- src/session/session_api.c | 2 ++ src/support/rand.c | 26 ++++++++++++++++----- test/checkpoint/workers.c | 5 +++- 16 files changed, 72 insertions(+), 41 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 002c1327e01..785f81ce668 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -83,7 +83,7 @@ static int execute_workload(CONFIG *); static int find_table_count(CONFIG *); static void *monitor(void *); static void *populate_thread(void *); -static void randomize_value(CONFIG *, char *); +static void randomize_value(CONFIG_THREAD *, char *); static int start_all_runs(CONFIG *); static int start_run(CONFIG *); static int start_threads(CONFIG *, @@ -91,7 +91,7 @@ static int start_threads(CONFIG *, static int stop_threads(CONFIG *, u_int, CONFIG_THREAD *); static void *thread_run_wtperf(void *); static void *worker(void *); -static uint64_t wtperf_rand(CONFIG *); +static uint64_t wtperf_rand(CONFIG_THREAD *); static uint64_t wtperf_value_range(CONFIG *); #define HELIUM_NAME "dev1" @@ -100,11 +100,12 @@ static uint64_t wtperf_value_range(CONFIG *); #define HELIUM_CONFIG ",type=helium" /* - * wtperf uses a couple of internal WiredTiger library routines for timing - * and generating random numbers. + * wtperf uses internal WiredTiger library routines for timing and generating + * random numbers. */ extern int __wt_epoch(void *, struct timespec *); -extern uint32_t __wt_random(void); +extern uint32_t __wt_random(uint32_t *); +extern void __wt_random_init(uint32_t *); /* Retrieve an ID for the next insert operation. */ static inline uint64_t @@ -130,7 +131,7 @@ generate_key(CONFIG *cfg, char *key_buf, uint64_t keyno) } static void -randomize_value(CONFIG *cfg, char *value_buf) +randomize_value(CONFIG_THREAD *thread, char *value_buf) { uint8_t *vb; uint32_t i; @@ -140,13 +141,13 @@ randomize_value(CONFIG *cfg, char *value_buf) * randomly chosen byte (other than the trailing NUL). * Make sure we don't write a NUL: keep the value the same length. */ - i = __wt_random() % (cfg->value_sz - 1); + i = __wt_random(thread->rnd) % (thread->cfg->value_sz - 1); while (value_buf[i] == '\0' && i > 0) --i; if (i > 0) { vb = (uint8_t *)value_buf; - vb[0] = (__wt_random() % 255) + 1; - vb[i] = (__wt_random() % 255) + 1; + vb[0] = (__wt_random(thread->rnd) % 255) + 1; + vb[i] = (__wt_random(thread->rnd) % 255) + 1; } } @@ -317,13 +318,13 @@ worker_async(void *arg) case WORKER_INSERT: case WORKER_INSERT_RMW: if (cfg->random_range) - next_val = wtperf_rand(cfg); + next_val = wtperf_rand(thread); else next_val = cfg->icount + get_next_incr(cfg); break; case WORKER_READ: case WORKER_UPDATE: - next_val = wtperf_rand(cfg); + next_val = wtperf_rand(thread); /* * If the workload is started without a populate phase @@ -361,14 +362,14 @@ worker_async(void *arg) goto op_err; case WORKER_INSERT: if (cfg->random_value) - randomize_value(cfg, value_buf); + randomize_value(thread, value_buf); asyncop->set_value(asyncop, value_buf); if ((ret = asyncop->insert(asyncop)) == 0) break; goto op_err; case WORKER_UPDATE: if (cfg->random_value) - randomize_value(cfg, value_buf); + randomize_value(thread, value_buf); asyncop->set_value(asyncop, value_buf); if ((ret = asyncop->update(asyncop)) == 0) break; @@ -455,7 +456,7 @@ worker(void *arg) case WORKER_INSERT_RMW: trk = &thread->insert; if (cfg->random_range) - next_val = wtperf_rand(cfg); + next_val = wtperf_rand(thread); else next_val = cfg->icount + get_next_incr(cfg); break; @@ -465,7 +466,7 @@ worker(void *arg) case WORKER_UPDATE: if (*op == WORKER_UPDATE) trk = &thread->update; - next_val = wtperf_rand(cfg); + next_val = wtperf_rand(thread); /* * If the workload is started without a populate phase @@ -532,7 +533,7 @@ worker(void *arg) /* FALLTHROUGH */ case WORKER_INSERT: if (cfg->random_value) - randomize_value(cfg, value_buf); + randomize_value(thread, value_buf); cursor->set_value(cursor, value_buf); if ((ret = cursor->insert(cursor)) == 0) break; @@ -556,7 +557,7 @@ worker(void *arg) else value_buf[0] = 'a'; if (cfg->random_value) - randomize_value(cfg, value_buf); + randomize_value(thread, value_buf); cursor->set_value(cursor, value_buf); if ((ret = cursor->update(cursor)) == 0) break; @@ -812,7 +813,7 @@ populate_thread(void *arg) } cursor->set_key(cursor, key_buf); if (cfg->random_value) - randomize_value(cfg, value_buf); + randomize_value(thread, value_buf); cursor->set_value(cursor, value_buf); if ((ret = cursor->insert(cursor)) != 0) { lprintf(cfg, ret, 0, "Failed inserting"); @@ -941,7 +942,7 @@ populate_async(void *arg) generate_key(cfg, key_buf, op); asyncop->set_key(asyncop, key_buf); if (cfg->random_value) - randomize_value(cfg, value_buf); + randomize_value(thread, value_buf); asyncop->set_value(asyncop, value_buf); if ((ret = asyncop->insert(asyncop)) != 0) { lprintf(cfg, ret, 0, "Failed inserting"); @@ -2112,6 +2113,7 @@ start_threads(CONFIG *cfg, for (i = 0; i < num; ++i, ++thread) { thread->cfg = cfg; + __wt_random_init(thread->rnd); thread->workload = workp; /* @@ -2129,7 +2131,7 @@ start_threads(CONFIG *cfg, */ memset(thread->value_buf, 'a', cfg->value_sz - 1); if (cfg->random_value) - randomize_value(cfg, thread->value_buf); + randomize_value(thread, thread->value_buf); /* * Every thread gets tracking information and is initialized @@ -2190,16 +2192,19 @@ wtperf_value_range(CONFIG *cfg) } static uint64_t -wtperf_rand(CONFIG *cfg) +wtperf_rand(CONFIG_THREAD *thread) { + CONFIG *cfg; double S1, S2, U; uint64_t rval; + cfg = thread->cfg; + /* * Use WiredTiger's random number routine: it's lock-free and fairly * good. */ - rval = (uint64_t)__wt_random(); + rval = (uint64_t)__wt_random(thread->rnd); /* Use Pareto distribution to give 80/20 hot/cold values. */ if (cfg->pareto) { diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h index fa5994b7bcf..2845762f50b 100644 --- a/bench/wtperf/wtperf.h +++ b/bench/wtperf/wtperf.h @@ -191,6 +191,8 @@ typedef struct { struct __config_thread { /* Per-thread structure */ CONFIG *cfg; /* Enclosing configuration */ + uint32_t rnd[2]; /* Random number generation state */ + pthread_t handle; /* Handle */ char *key_buf, *value_buf; /* Key/value memory */ diff --git a/src/block/block_session.c b/src/block/block_session.c index 17767fc815f..fa56b72f49b 100644 --- a/src/block/block_session.c +++ b/src/block/block_session.c @@ -29,7 +29,7 @@ __block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) u_int skipdepth; - skipdepth = __wt_skip_choose_depth(); + skipdepth = __wt_skip_choose_depth(session); WT_RET(__wt_calloc(session, 1, sizeof(WT_EXT) + skipdepth * 2 * sizeof(WT_EXT *), &ext)); ext->depth = (uint8_t)skipdepth; diff --git a/src/btree/col_modify.c b/src/btree/col_modify.c index e2e3adbd714..3a4a2a2987d 100644 --- a/src/btree/col_modify.c +++ b/src/btree/col_modify.c @@ -121,7 +121,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, ins_head = *ins_headp; /* Choose a skiplist depth for this insert. */ - skipdepth = __wt_skip_choose_depth(); + skipdepth = __wt_skip_choose_depth(session); /* * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and diff --git a/src/btree/rec_track.c b/src/btree/rec_track.c index 165df9d61e5..a02e6c32ec7 100644 --- a/src/btree/rec_track.c +++ b/src/btree/rec_track.c @@ -495,7 +495,7 @@ __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, head = page->modify->ovfl_track->ovfl_reuse; /* Choose a skiplist depth for this insert. */ - skipdepth = __wt_skip_choose_depth(); + skipdepth = __wt_skip_choose_depth(session); /* * Allocate the WT_OVFL_REUSE structure, next pointers for the skip @@ -783,7 +783,7 @@ __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, head = page->modify->ovfl_track->ovfl_txnc; /* Choose a skiplist depth for this insert. */ - skipdepth = __wt_skip_choose_depth(); + skipdepth = __wt_skip_choose_depth(session); /* * Allocate the WT_OVFL_TXNC structure, next pointers for the skip diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c index 0c5ee9a8cc9..ca14b4d4db9 100644 --- a/src/btree/rec_write.c +++ b/src/btree/rec_write.c @@ -5423,7 +5423,7 @@ __rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots) WT_RET(__wt_calloc(session, r->dictionary_slots, sizeof(WT_DICTIONARY *), &r->dictionary)); for (i = 0; i < r->dictionary_slots; ++i) { - depth = __wt_skip_choose_depth(); + depth = __wt_skip_choose_depth(session); WT_RET(__wt_calloc(session, 1, sizeof(WT_DICTIONARY) + depth * sizeof(WT_DICTIONARY *), &r->dictionary[i])); diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c index 03772e317b4..a87a93b05a1 100644 --- a/src/btree/row_modify.c +++ b/src/btree/row_modify.c @@ -118,7 +118,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, ins_head = *ins_headp; /* Choose a skiplist depth for this insert. */ - skipdepth = __wt_skip_choose_depth(); + skipdepth = __wt_skip_choose_depth(session); /* * Allocate a WT_INSERT/WT_UPDATE pair and transaction ID, and diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 7e7bc788b4d..268e1e0f0a7 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -487,7 +487,8 @@ restart: break; pindex = WT_INTL_INDEX_COPY(page); - child = pindex->index[__wt_random() % pindex->entries]; + child = pindex->index[ + __wt_random(session->rnd) % pindex->entries]; /* * Swap the parent page for the child page; return on error, @@ -521,7 +522,7 @@ restart: cbt->compare = 0; pindex = WT_INTL_INDEX_COPY(btree->root.page); cbt->slot = pindex->entries < 2 ? - __wt_random() % page->pg_row_entries : 0; + __wt_random(session->rnd) % page->pg_row_entries : 0; return (__wt_row_leaf_key(session, page, page->pg_row_d + cbt->slot, &cbt->search_key, 0)); diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c index 6446da4781f..9d6e5ea816b 100644 --- a/src/conn/conn_api.c +++ b/src/conn/conn_api.c @@ -1241,6 +1241,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler, session = conn->default_session = &conn->dummy_session; session->iface.connection = &conn->iface; session->name = "wiredtiger_open"; + __wt_random_init(session->rnd); __wt_event_handler_set(session, event_handler); /* Remaining basic initialization of the connection structure. */ diff --git a/src/include/btree.i b/src/include/btree.i index 8cd48d222f0..5a333da192d 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1071,12 +1071,12 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page) * Randomly choose a depth for a skiplist insert. */ static inline u_int -__wt_skip_choose_depth(void) +__wt_skip_choose_depth(WT_SESSION_IMPL *session) { u_int d; for (d = 1; d < WT_SKIP_MAXDEPTH && - __wt_random() < WT_SKIP_PROBABILITY; d++) + __wt_random(session->rnd) < WT_SKIP_PROBABILITY; d++) ; return (d); } diff --git a/src/include/extern.h b/src/include/extern.h index 9783de0a7a6..a106f7d9859 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1562,7 +1562,8 @@ extern uint32_t __wt_nlpo2(uint32_t v); extern uint32_t __wt_log2_int(uint32_t n); extern int __wt_ispo2(uint32_t v); extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2); -extern uint32_t __wt_random(void); +extern void __wt_random_init(uint32_t *rnd); +extern uint32_t __wt_random(uint32_t *rnd); extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size); diff --git a/src/include/session.h b/src/include/session.h index 31d58ff61e5..eace12844e9 100644 --- a/src/include/session.h +++ b/src/include/session.h @@ -52,6 +52,8 @@ struct __wt_session_impl { WT_CONDVAR *cond; /* Condition variable */ + uint32_t rnd[2]; /* Random number generation state */ + WT_EVENT_HANDLER *event_handler;/* Application's event handlers */ WT_DATA_HANDLE *dhandle; /* Current data handle */ diff --git a/src/log/log_slot.c b/src/log/log_slot.c index cc5dee721fa..8f72763b4e8 100644 --- a/src/log/log_slot.c +++ b/src/log/log_slot.c @@ -106,7 +106,7 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize, log = conn->log; slot_grow_attempts = 0; find_slot: - allocated_slot = __wt_random() % SLOT_ACTIVE; + allocated_slot = __wt_random(session->rnd) % SLOT_ACTIVE; slot = log->slot_array[allocated_slot]; old_state = slot->slot_state; join_slot: diff --git a/src/session/session_api.c b/src/session/session_api.c index e66f3ec5fb2..d4d5fe5e2c9 100644 --- a/src/session/session_api.c +++ b/src/session/session_api.c @@ -919,6 +919,8 @@ __wt_open_session(WT_CONNECTION_IMPL *conn, WT_ERR(__wt_cond_alloc(session, "session", 0, &session_ret->cond)); + __wt_random_init(session_ret->rnd); + __wt_event_handler_set(session_ret, event_handler == NULL ? session->event_handler : event_handler); diff --git a/src/support/rand.c b/src/support/rand.c index 248f9c59ff1..b716eb8c58b 100644 --- a/src/support/rand.c +++ b/src/support/rand.c @@ -27,6 +27,22 @@ #include "wt_internal.h" +#undef M_W +#define M_W (rnd)[0] +#undef M_Z +#define M_Z (rnd)[1] + +/* + * __wt_random_init -- + * Initialize return of a 32-bit pseudo-random number. + */ +void +__wt_random_init(uint32_t *rnd) +{ + M_W = 521288629; + M_Z = 362436069; +} + /* * __wt_random -- * Return a 32-bit pseudo-random number. @@ -43,13 +59,11 @@ * forever. Take local copies of the shared values to avoid this. */ uint32_t -__wt_random(void) +__wt_random(uint32_t *rnd) { - static uint32_t m_w = 521288629; - static uint32_t m_z = 362436069; - uint32_t w = m_w, z = m_z; + uint32_t w = M_W, z = M_Z; - m_z = z = 36969 * (z & 65535) + (z >> 16); - m_w = w = 18000 * (w & 65535) + (w >> 16); + M_Z = z = 36969 * (z & 65535) + (z >> 16); + M_W = w = 18000 * (w & 65535) + (w >> 16); return (z << 16) + (w & 65535); } diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c index 1a01dda649c..b7858cb8292 100644 --- a/test/checkpoint/workers.c +++ b/test/checkpoint/workers.c @@ -169,11 +169,14 @@ real_worker(void) { WT_CURSOR **cursors; WT_SESSION *session; + uint32_t rnd[2]; u_int i, keyno; int j, ret, t_ret; ret = t_ret = 0; + __wt_random_init(rnd); + if ((cursors = calloc( (size_t)(g.ntables), sizeof(WT_CURSOR *))) == NULL) return (log_print_err("malloc", ENOMEM, 1)); @@ -197,7 +200,7 @@ real_worker(void) "real_worker:begin_transaction", ret, 1); goto err; } - keyno = __wt_random() % g.nkeys + 1; + keyno = __wt_random(rnd) % g.nkeys + 1; for (j = 0; j < g.ntables; j++) { if ((ret = worker_op(cursors[j], keyno, i)) != 0) break; From f85108d092ff7e12f30a8fd8153642977a878774 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sat, 13 Sep 2014 12:03:28 -0400 Subject: [PATCH 106/132] The error routine we're hitting isn't row-store specific, fix the error message. --- src/btree/bt_slvg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index 1816167d5c3..7f66e0e623b 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -2318,8 +2318,8 @@ __slvg_ovfl_ref(WT_SESSION_IMPL *session, WT_TRACK *trk) { if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) WT_PANIC_RET(session, EINVAL, - "overflow record at row-store page merge referenced " - "multiple times"); + "overflow record referenced multiple times during leaf " + "page merge"); F_SET(trk, WT_TRACK_OVFL_REFD); return (0); From ab934a4f4244b011d3266cd1c9d0d19978c31443 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Sun, 14 Sep 2014 09:42:06 -0400 Subject: [PATCH 107/132] Add a debug routine to print out an address, it helps with salvage debugging. --- dist/s_funcs.list | 1 + src/btree/bt_debug.c | 18 ++++++++++++++++++ src/include/extern.h | 3 +++ 3 files changed, 22 insertions(+) diff --git a/dist/s_funcs.list b/dist/s_funcs.list index bf1d5156820..2bc87233084 100644 --- a/dist/s_funcs.list +++ b/dist/s_funcs.list @@ -14,6 +14,7 @@ __wt_cache_dump __wt_config_getone __wt_cursor_get_raw_value __wt_debug_addr +__wt_debug_addr_print __wt_debug_offset __wt_debug_set_verbose __wt_debug_tree diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 84c4565eafe..5412286621e 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -194,6 +194,24 @@ __dmsg(WT_DBG *ds, const char *fmt, ...) } } +/* + * __wt_debug_addr_print -- + * Print out an address. + */ +int +__wt_debug_addr_print( + WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +{ + WT_DECL_ITEM(buf); + + WT_RET(__wt_scr_alloc(session, 128, &buf)); + fprintf(stderr, "%s\n", + __wt_addr_string(session, addr, addr_size, buf)); + __wt_scr_free(&buf); + + return (0); +} + /* * __wt_debug_addr -- * Read and dump a disk page in debugging mode, using an addr/size pair. diff --git a/src/include/extern.h b/src/include/extern.h index 9783de0a7a6..26321bcc909 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -277,6 +277,9 @@ extern int __wt_btcur_range_truncate(WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop); extern int __wt_btcur_close(WT_CURSOR_BTREE *cbt); extern int __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v); +extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, + const uint8_t *addr, + size_t addr_size); extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, From 9f272ffd01c6f5b751cff017e1ed9d09d31d0b2e Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 15 Sep 2014 12:39:18 +1000 Subject: [PATCH 108/132] Fix a hang in LSM compact. Refs #1225 --- src/lsm/lsm_tree.c | 63 ++++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 4667ab7a039..ea184858d05 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1055,9 +1055,6 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) WT_LSM_TREE_COMPACT_FLUSH | WT_LSM_TREE_COMPACTING)) goto err; - flushing = 1; - F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); - /* * Set the switch transaction on the current chunk, if it * hasn't been set before. This prevents further writes, so it @@ -1078,14 +1075,21 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) locked = 0; WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree)); - if (chunk != NULL) + if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, "Compact force flush %s flags 0x%" PRIx32 " chunk %u flags 0x%" PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); - /* Make sure the in-memory chunk gets flushed but not switched. */ - WT_ERR(__wt_lsm_manager_push_entry(session, - WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); + flushing = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); + /* + * Make sure the in-memory chunk gets flushed do not push a + * switch, because we don't want to create a new in-memory + * chunk if the tree is being used read-only now. + */ + WT_ERR(__wt_lsm_manager_push_entry(session, + WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); + } /* Wait for the work unit queues to drain. */ while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { @@ -1094,30 +1098,29 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * Continue to push forced flushes until the chunk is on disk. * Once it is on disk move to the compacting phase. */ - if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { - if (chunk != NULL && - !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { - WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush retry %s chunk %u", - name, chunk->id)); - F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); - WT_ERR(__wt_lsm_manager_push_entry(session, - WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, - lsm_tree)); - } else { - if (ref) { - WT_ASSERT(session, chunk != NULL); - WT_ERR(__wt_verbose(session, - WT_VERB_LSM, - "Compact flush done %s chunk %u", - name, chunk->id)); - (void)WT_ATOMIC_SUB(chunk->refcnt, 1); - } - flushing = ref = 0; - compacting = 1; - F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); - } + if (flushing && + F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH) && + !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact flush retry %s chunk %u", + name, chunk->id)); + F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); + WT_ERR(__wt_lsm_manager_push_entry(session, + WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); } + /* Start merges when the tree has cleared the flush flag */ + if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { + WT_ASSERT(session, chunk != NULL); + WT_ERR(__wt_verbose(session, + WT_VERB_LSM, + "Compact flush done %s chunk %u", + name, chunk->id)); + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + flushing = ref = 0; + compacting = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); + } + /* * The compacting flag is cleared when no merges can be done. * Ensure that we push through some aggressive merges before From 1e7a51226df9c425d1ab505057b757bde9a36a9e Mon Sep 17 00:00:00 2001 From: Alex Gorrod Date: Mon, 15 Sep 2014 15:32:15 +1000 Subject: [PATCH 109/132] Update LSM compact to avoid a hang. Only wait for the newest chunk that is present when a compact is started to be flushed before moving onto the merge phase. Otherwise workloads that compact while updating in parallel can wait forever for compact to finish. Refs #1225 --- src/include/extern.h | 1 - src/include/lsm.h | 9 ++++----- src/lsm/lsm_tree.c | 44 ++++++++++++++++++----------------------- src/lsm/lsm_work_unit.c | 9 +-------- src/lsm/lsm_worker.c | 15 +++----------- 5 files changed, 27 insertions(+), 51 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index 02850e132bd..211f90399a2 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -1018,7 +1018,6 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, int force, - int *last, WT_LSM_CHUNK **chunkp); extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, diff --git a/src/include/lsm.h b/src/include/lsm.h index 48984399acd..2b8624a37c5 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -193,11 +193,10 @@ struct __wt_lsm_tree { uint32_t merge_aggressiveness; /* Increase amount of work per merge */ #define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ -#define WT_LSM_TREE_COMPACT_FLUSH 0x02 /* Flushed for compact */ -#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */ -#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */ -#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */ -#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */ +#define WT_LSM_TREE_COMPACTING 0x02 /* Tree being compacted */ +#define WT_LSM_TREE_NEED_SWITCH 0x04 /* New chunk needs creating */ +#define WT_LSM_TREE_OPEN 0x08 /* The tree is open */ +#define WT_LSM_TREE_THROTTLE 0x10 /* Throttle updates */ uint32_t flags; #define WT_LSM_TREE_EXCLUSIVE 0x01 /* Tree is opened exclusively */ diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index ea184858d05..f0494e9dd77 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1051,8 +1051,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) lsm_tree->merge_aggressiveness = 0; /* If another thread started a compact on this tree, we're done. */ - if (F_ISSET(lsm_tree, - WT_LSM_TREE_COMPACT_FLUSH | WT_LSM_TREE_COMPACTING)) + if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) goto err; /* @@ -1081,7 +1080,6 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) " chunk %u flags 0x%" PRIx32, name, lsm_tree->flags, chunk->id, chunk->flags)); flushing = 1; - F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); /* * Make sure the in-memory chunk gets flushed do not push a * switch, because we don't want to create a new in-memory @@ -1098,27 +1096,25 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * Continue to push forced flushes until the chunk is on disk. * Once it is on disk move to the compacting phase. */ - if (flushing && - F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH) && - !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { - WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush retry %s chunk %u", - name, chunk->id)); - F_SET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); - WT_ERR(__wt_lsm_manager_push_entry(session, - WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); - } - /* Start merges when the tree has cleared the flush flag */ - if (flushing && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) { + if (flushing) { WT_ASSERT(session, chunk != NULL); - WT_ERR(__wt_verbose(session, - WT_VERB_LSM, - "Compact flush done %s chunk %u", - name, chunk->id)); - (void)WT_ATOMIC_SUB(chunk->refcnt, 1); - flushing = ref = 0; - compacting = 1; - F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); + if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { + WT_ERR(__wt_verbose(session, + WT_VERB_LSM, + "Compact flush done %s chunk %u", + name, chunk->id)); + (void)WT_ATOMIC_SUB(chunk->refcnt, 1); + flushing = ref = 0; + compacting = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); + } else { + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "Compact flush retry %s chunk %u", + name, chunk->id)); + WT_ERR(__wt_lsm_manager_push_entry(session, + WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, + lsm_tree)); + } } /* @@ -1156,8 +1152,6 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) } err: /* Ensure anything we set is cleared. */ - if (flushing) - F_CLR(lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); if (ref) (void)WT_ATOMIC_SUB(chunk->refcnt, 1); if (compacting) { diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index eb791f98f5f..c0d57283400 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -67,12 +67,11 @@ err: WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); */ int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, - WT_LSM_TREE *lsm_tree, int force, int *last, WT_LSM_CHUNK **chunkp) + WT_LSM_TREE *lsm_tree, int force, WT_LSM_CHUNK **chunkp) { u_int i, end; *chunkp = NULL; - *last = 0; WT_ASSERT(session, lsm_tree->queue_ref > 0); WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 0)); @@ -93,12 +92,6 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, force ? " w/ force" : "", i, end - 1, lsm_tree->chunk[i]->uri)); *chunkp = lsm_tree->chunk[i]; - /* - * Let the caller know if this is the last chunk we - * could have selected or an earlier one. - */ - if (i == end - 1) - *last = 1; break; } } diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 1f2b76ba720..d3f46532f64 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -32,7 +32,7 @@ __lsm_worker_general_op( WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_WORK_UNIT *entry; - int force, last; + int force; *completed = 0; if (!F_ISSET(cookie, WT_LSM_WORK_FLUSH) && @@ -47,17 +47,15 @@ __lsm_worker_general_op( if ((entry->flags & WT_LSM_WORK_MASK) == WT_LSM_WORK_FLUSH) { force = F_ISSET(entry, WT_LSM_WORK_FORCE); F_CLR(entry, WT_LSM_WORK_FORCE); - last = 0; WT_ERR(__wt_lsm_get_chunk_to_flush(session, - entry->lsm_tree, force, &last, &chunk)); + entry->lsm_tree, force, &chunk)); /* * If we got a chunk to flush, checkpoint it. */ if (chunk != NULL) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Flush%s%s chunk %d %s", + "Flush%s chunk %d %s", force ? " w/ force" : "", - last ? " last" : "", chunk->id, chunk->uri)); ret = __wt_lsm_checkpoint_chunk( session, entry->lsm_tree, chunk); @@ -65,13 +63,6 @@ __lsm_worker_general_op( (void)WT_ATOMIC_SUB(chunk->refcnt, 1); WT_ERR(ret); } - /* - * If we flushed the last chunk for a compact, clear the - * flag so compact knows that is complete. - */ - if (last && force && - F_ISSET(entry->lsm_tree, WT_LSM_TREE_COMPACT_FLUSH)) - F_CLR(entry->lsm_tree, WT_LSM_TREE_COMPACT_FLUSH); } else if (entry->flags == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); else if (entry->flags == WT_LSM_WORK_BLOOM) { From d6b91e3119938978fb44f24e71f2302e2a38b24e Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Mon, 15 Sep 2014 10:05:40 -0400 Subject: [PATCH 110/132] Set compacting if we don't have a chunk to flush. #1225 --- src/lsm/lsm_tree.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index f0494e9dd77..815562f6c66 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1087,6 +1087,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) */ WT_ERR(__wt_lsm_manager_push_entry(session, WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); + } else { + /* + * If there is no chunk to flush, go straight to the + * compacting state. + */ + compacting = 1; + F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); } /* Wait for the work unit queues to drain. */ From ec953a4eac9ce51c84f4a63375ba3260dfdaedbf Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Mon, 15 Sep 2014 10:55:39 -0400 Subject: [PATCH 111/132] Ensure the wtperf threads don't start in lock-step, run each thread's RNG state 1,000 steps beyond the last thread's state, before starting the threads. --- bench/wtperf/wtperf.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c index 785f81ce668..93182550dc8 100644 --- a/bench/wtperf/wtperf.c +++ b/bench/wtperf/wtperf.c @@ -2106,16 +2106,30 @@ err: config_free(cfg); static int start_threads(CONFIG *cfg, - WORKLOAD *workp, CONFIG_THREAD *thread, u_int num, void *(*func)(void *)) + WORKLOAD *workp, CONFIG_THREAD *base, u_int num, void *(*func)(void *)) { - u_int i; + CONFIG_THREAD *thread; + u_int i, j; int ret; - for (i = 0; i < num; ++i, ++thread) { + /* Initialize the threads. */ + for (i = 0, thread = base; i < num; ++i, ++thread) { thread->cfg = cfg; - __wt_random_init(thread->rnd); thread->workload = workp; + /* + * We don't want the threads executing in lock-step, move each + * new RNG state further along in the sequence. + */ + if (i == 0) + __wt_random_init(thread->rnd); + else { + thread->rnd[0] = (thread - 1)->rnd[0]; + thread->rnd[1] = (thread - 1)->rnd[1]; + } + for (j = 0; j < 1000; ++j) + (void)__wt_random(thread->rnd); + /* * Every thread gets a key/data buffer because we don't bother * to distinguish between threads needing them and threads that @@ -2142,13 +2156,16 @@ start_threads(CONFIG *cfg, thread->update.min_latency = UINT32_MAX; thread->ckpt.max_latency = thread->insert.max_latency = thread->read.max_latency = thread->update.max_latency = 0; + } + /* Start the threads. */ + for (i = 0, thread = base; i < num; ++i, ++thread) if ((ret = pthread_create( &thread->handle, NULL, func, thread)) != 0) { lprintf(cfg, ret, 0, "Error creating thread"); return (ret); } - } + return (0); } From 1bdb85ecfa86e39a34b19db72a7fc0acbdc52110 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Mon, 15 Sep 2014 13:50:55 -0400 Subject: [PATCH 112/132] Separate LSM work unit types from flags. #1208 --- src/include/extern.h | 3 ++- src/include/lsm.h | 18 +++++++++--------- src/lsm/lsm_cursor.c | 2 +- src/lsm/lsm_manager.c | 29 +++++++++++++++-------------- src/lsm/lsm_merge.c | 2 +- src/lsm/lsm_tree.c | 8 ++++---- src/lsm/lsm_work_unit.c | 4 ++-- src/lsm/lsm_worker.c | 22 +++++++++++----------- 8 files changed, 45 insertions(+), 43 deletions(-) diff --git a/src/include/extern.h b/src/include/extern.h index af4cef7f9ea..73ce475574a 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -943,8 +943,9 @@ extern int __wt_lsm_manager_clear_tree( WT_SESSION_IMPL *session, extern int __wt_lsm_manager_pop_entry( WT_SESSION_IMPL *session, uint32_t type, WT_LSM_WORK_UNIT **entryp); -extern int __wt_lsm_manager_push_entry( WT_SESSION_IMPL *session, +extern int __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, uint32_t type, + uint32_t flags, WT_LSM_TREE *lsm_tree); extern int __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, diff --git a/src/include/lsm.h b/src/include/lsm.h index 2b8624a37c5..1dc7714b42e 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -87,9 +87,7 @@ struct __wt_lsm_chunk { #define WT_LSM_WORK_DROP 0x02 /* Drop unused chunks */ #define WT_LSM_WORK_FLUSH 0x04 /* Flush a chunk to disk */ #define WT_LSM_WORK_MERGE 0x08 /* Look for a tree merge */ -#define WT_LSM_WORK_SWITCH 0x10 /* Switch to a new in memory chunk */ -#define WT_LSM_WORK_FORCE 0x10000 /* Force last chunk flush */ -#define WT_LSM_WORK_MASK 0xffff /* Mask for work types */ +#define WT_LSM_WORK_SWITCH 0x10 /* Switch to new in-memory chunk */ /* * WT_LSM_WORK_UNIT -- @@ -97,7 +95,9 @@ struct __wt_lsm_chunk { */ struct __wt_lsm_work_unit { TAILQ_ENTRY(__wt_lsm_work_unit) q; /* Worker unit queue */ - uint32_t flags; /* The type of operation */ + uint32_t type; /* Type of operation */ +#define WT_LSM_WORK_FORCE 0x0001 /* Force operation */ + uint32_t flags; /* Flags for operation */ WT_LSM_TREE *lsm_tree; }; @@ -228,9 +228,9 @@ struct __wt_lsm_worker_cookie { * State for an LSM worker thread. */ struct __wt_lsm_worker_args { - WT_SESSION_IMPL *session; - WT_CONDVAR *work_cond; /* Owned by the manager */ - pthread_t tid; - u_int id; - uint32_t flags; + WT_SESSION_IMPL *session; /* Session */ + WT_CONDVAR *work_cond; /* Owned by the manager */ + pthread_t tid; /* Thread id */ + u_int id; /* My manager slot id */ + uint32_t type; /* Types of operations handled */ }; diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index df2f7bba271..3ef22162254 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -92,7 +92,7 @@ __clsm_enter_update(WT_CURSOR_LSM *clsm) if (clsm->dsk_gen == lsm_tree->dsk_gen && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_SWITCH, lsm_tree)); + session, WT_LSM_WORK_SWITCH, 0, lsm_tree)); F_SET(lsm_tree, WT_LSM_TREE_NEED_SWITCH); } WT_RET(__wt_lsm_tree_unlock(session, lsm_tree)); diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 4b3523aa6f4..46bc6ab5aad 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -231,7 +231,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) worker_args = &manager->lsm_worker_cookies[1]; worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers++; - worker_args->flags = WT_LSM_WORK_SWITCH | WT_LSM_WORK_DROP; + worker_args->type = WT_LSM_WORK_SWITCH | WT_LSM_WORK_DROP; /* Start the switch thread. */ WT_RET(__wt_lsm_worker_start(session, worker_args)); @@ -247,7 +247,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) &manager->lsm_worker_cookies[manager->lsm_workers]; worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers; - worker_args->flags = + worker_args->type = WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_FLUSH | @@ -259,7 +259,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) * least one thread capable of running merges. */ if (manager->lsm_workers % 2 == 1) - F_SET(worker_args, WT_LSM_WORK_MERGE); + FLD_SET(worker_args->type, WT_LSM_WORK_MERGE); WT_RET(__wt_lsm_worker_start(session, worker_args)); } return (0); @@ -342,15 +342,15 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) lsm_tree->nchunks > 1) || pushms > fillms) { WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_SWITCH, lsm_tree)); + session, WT_LSM_WORK_SWITCH, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_DROP, lsm_tree)); + session, WT_LSM_WORK_DROP, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_FLUSH, lsm_tree)); + session, WT_LSM_WORK_FLUSH, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_BLOOM, lsm_tree)); + session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_MERGE, lsm_tree)); + session, WT_LSM_WORK_MERGE, 0, lsm_tree)); } } } @@ -480,7 +480,7 @@ __wt_lsm_manager_pop_entry( if (!TAILQ_EMPTY(&manager->managerqh)) { entry = TAILQ_FIRST(&manager->managerqh); WT_ASSERT(session, entry != NULL); - if (F_ISSET(entry, type)) + if (FLD_ISSET(entry->type, type)) TAILQ_REMOVE(&manager->managerqh, entry, q); else entry = NULL; @@ -507,7 +507,7 @@ __wt_lsm_manager_pop_entry( for (entry = TAILQ_FIRST(&manager->appqh); entry != NULL; entry = TAILQ_NEXT(entry, q)) { - if (FLD_ISSET(type, entry->flags)) { + if (FLD_ISSET(type, entry->type)) { TAILQ_REMOVE(&manager->appqh, entry, q); break; } @@ -526,8 +526,8 @@ __wt_lsm_manager_pop_entry( * Add an entry to the end of the switch queue. */ int -__wt_lsm_manager_push_entry( - WT_SESSION_IMPL *session, uint32_t type, WT_LSM_TREE *lsm_tree) +__wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, + uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree) { WT_LSM_MANAGER *manager; WT_LSM_WORK_UNIT *entry; @@ -537,12 +537,13 @@ __wt_lsm_manager_push_entry( WT_RET(__wt_epoch(session, &lsm_tree->work_push_ts)); WT_RET(__wt_calloc_def(session, 1, &entry)); - entry->flags = type; + entry->type = type; + entry->flags = flags; entry->lsm_tree = lsm_tree; (void)WT_ATOMIC_ADD(lsm_tree->queue_ref, 1); WT_STAT_FAST_CONN_INCR(session, lsm_work_units_created); - switch (type & WT_LSM_WORK_MASK) { + switch (type) { case WT_LSM_WORK_SWITCH: __wt_spin_lock(session, &manager->switch_lock); TAILQ_INSERT_TAIL(&manager->switchqh, entry, q); diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 363fe77b93e..964aeb9529d 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -440,7 +440,7 @@ __wt_lsm_merge( /* Schedule a pass to discard old chunks */ WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_DROP, lsm_tree)); + session, WT_LSM_WORK_DROP, 0, lsm_tree)); err: if (locked) WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 815562f6c66..99e4804861b 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -769,7 +769,7 @@ err: WT_TRET(__wt_lsm_tree_unlock(session, lsm_tree)); WT_PANIC_RET(session, ret, "Failed doing LSM switch"); else if (!first_switch) WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_FLUSH, lsm_tree)); + session, WT_LSM_WORK_FLUSH, 0, lsm_tree)); return (ret); } @@ -1086,7 +1086,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) * chunk if the tree is being used read-only now. */ WT_ERR(__wt_lsm_manager_push_entry(session, - WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, lsm_tree)); + WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, lsm_tree)); } else { /* * If there is no chunk to flush, go straight to the @@ -1119,7 +1119,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) "Compact flush retry %s chunk %u", name, chunk->id)); WT_ERR(__wt_lsm_manager_push_entry(session, - WT_LSM_WORK_FLUSH | WT_LSM_WORK_FORCE, + WT_LSM_WORK_FLUSH, WT_LSM_WORK_FORCE, lsm_tree)); } } @@ -1154,7 +1154,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) i < COMPACT_PARALLEL_MERGES; i++) { lsm_tree->merge_aggressiveness = 10; WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_MERGE, lsm_tree)); + session, WT_LSM_WORK_MERGE, 0, lsm_tree)); } } err: diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index c0d57283400..bb32e30856a 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -315,10 +315,10 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, * Schedule a bloom filter create for our newly flushed chunk */ if (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF)) WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_BLOOM, lsm_tree)); + session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); else WT_RET(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_MERGE, lsm_tree)); + session, WT_LSM_WORK_MERGE, 0, lsm_tree)); return (0); } diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index d3f46532f64..c86b294f44b 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -35,16 +35,16 @@ __lsm_worker_general_op( int force; *completed = 0; - if (!F_ISSET(cookie, WT_LSM_WORK_FLUSH) && - !F_ISSET(cookie, WT_LSM_WORK_DROP) && - !F_ISSET(cookie, WT_LSM_WORK_BLOOM)) + if (!FLD_ISSET(cookie->type, WT_LSM_WORK_FLUSH) && + !FLD_ISSET(cookie->type, WT_LSM_WORK_DROP) && + !FLD_ISSET(cookie->type, WT_LSM_WORK_BLOOM)) return (WT_NOTFOUND); if ((ret = __wt_lsm_manager_pop_entry(session, - cookie->flags, &entry)) != 0 || entry == NULL) + cookie->type, &entry)) != 0 || entry == NULL) return (ret); - if ((entry->flags & WT_LSM_WORK_MASK) == WT_LSM_WORK_FLUSH) { + if (entry->type == WT_LSM_WORK_FLUSH) { force = F_ISSET(entry, WT_LSM_WORK_FORCE); F_CLR(entry, WT_LSM_WORK_FORCE); WT_ERR(__wt_lsm_get_chunk_to_flush(session, @@ -63,12 +63,12 @@ __lsm_worker_general_op( (void)WT_ATOMIC_SUB(chunk->refcnt, 1); WT_ERR(ret); } - } else if (entry->flags == WT_LSM_WORK_DROP) + } else if (entry->type == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); - else if (entry->flags == WT_LSM_WORK_BLOOM) { + else if (entry->type == WT_LSM_WORK_BLOOM) { WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree)); WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_MERGE, entry->lsm_tree)); + session, WT_LSM_WORK_MERGE, 0, entry->lsm_tree)); } *completed = 1; @@ -99,7 +99,7 @@ __lsm_worker(void *arg) progress = 0; /* Switches are always a high priority */ - while (F_ISSET(cookie, WT_LSM_WORK_SWITCH) && + while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_SWITCH, &entry)) == 0 && entry != NULL) @@ -114,11 +114,11 @@ __lsm_worker(void *arg) WT_ERR(ret); progress = progress || ran; - if (F_ISSET(cookie, WT_LSM_WORK_MERGE) && + if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_MERGE, &entry)) == 0 && entry != NULL) { - WT_ASSERT(session, entry->flags == WT_LSM_WORK_MERGE); + WT_ASSERT(session, entry->type == WT_LSM_WORK_MERGE); ret = __wt_lsm_merge(session, entry->lsm_tree, cookie->id); if (ret == WT_NOTFOUND) { From 9b49d2e57447dfaecaef1eeef3293748e1098cce Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 16 Sep 2014 15:30:27 +1000 Subject: [PATCH 113/132] Don't block on the data handle lock while holding the schema lock. If we can't get the data handle lock immediately, give up attempting to close the file: the sweep server will retry. --- src/conn/conn_dhandle.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index bb76c031925..de716433598 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -575,6 +575,7 @@ __wt_conn_dhandle_discard_single( WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *save_dhandle; WT_DECL_RET; + WT_DECL_SPINLOCK_ID(id); /* Must appear last */ conn = S2C(session); @@ -594,12 +595,21 @@ __wt_conn_dhandle_discard_single( /* * Get the schema lock (required to remove entries from the data handle - * list), get the dhandle lock to block the eviction server from walking - * the list. + * list), get the dhandle lock to block the eviction server from + * walking the list. */ F_SET(session, WT_SESSION_SCHEMA_LOCKED); __wt_spin_lock(session, &conn->schema_lock); - __wt_spin_lock(session, &conn->dhandle_lock); + + /* + * If the eviction server is running, don't block waiting for it while + * holding the schema lock. The sweep server will try again. + */ + if (final) + __wt_spin_lock(session, &conn->dhandle_lock); + else if ((ret = + __wt_spin_trylock(session, &conn->dhandle_lock, &id)) != 0) + goto unlock; /* * Check if the handle was reacquired by a session while we waited; @@ -612,7 +622,8 @@ __wt_conn_dhandle_discard_single( SLIST_REMOVE(&conn->dhlh, dhandle, __wt_data_handle, l); __wt_spin_unlock(session, &conn->dhandle_lock); - __wt_spin_unlock(session, &conn->schema_lock); + +unlock: __wt_spin_unlock(session, &conn->schema_lock); F_CLR(session, WT_SESSION_SCHEMA_LOCKED); /* From 3c1416ac0ee28c696b7c6292ee8f069ffdc11e7b Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Tue, 16 Sep 2014 15:30:27 +1000 Subject: [PATCH 114/132] Wait for the oldest transaction ID to catch up to current during connection close. This avoids an EBUSY if we try to close when the eviction server is in the middle of a long-running operation such as evicting a 100MB page from an LSM tree. refs #1201 --- src/conn/conn_open.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c index 87118be6a51..5bcf05975e7 100644 --- a/src/conn/conn_open.c +++ b/src/conn/conn_open.c @@ -81,13 +81,27 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_NAMED_COMPRESSOR *ncomp; WT_NAMED_DATA_SOURCE *ndsrc; WT_SESSION_IMPL *s, *session; + WT_TXN_GLOBAL *txn_global; u_int i; wt_conn = &conn->iface; + txn_global = &conn->txn_global; session = conn->default_session; - /* We're shutting down. Make sure everything gets freed. */ - __wt_txn_update_oldest(session); + /* + * We're shutting down. Make sure everything gets freed. + * + * It's possible that the eviction server is in the middle of a long + * operation, with a transaction ID pinned. In that case, we will loop + * here until the transaction ID is released, when the oldest + * transaction ID will catch up with the current ID. + */ + for (;;) { + __wt_txn_update_oldest(session); + if (txn_global->oldest_id == txn_global->current) + break; + __wt_yield(); + } /* Clear any pending async ops. */ WT_TRET(__wt_async_flush(session)); From 4fac1bd28fb4916f04c141cc32c54e49980a472d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 07:45:42 -0400 Subject: [PATCH 115/132] Return an error when the configuration merge fails. --- src/config/config_collapse.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 5621ad492f6..b8e69835781 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -197,10 +197,11 @@ __config_merge_scan(WT_SESSION_IMPL *session, cp->entries[cp->entries_next].gen = cp->entries_next; ++cp->entries_next; } + WT_ERR_NOTFOUND_OK(ret); err: __wt_scr_free(&kb); __wt_scr_free(&vb); - return (0); + return (ret); } /* From 08edb1f6b932f646fd3d5d8f7d3b9cb4244de157 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 07:51:33 -0400 Subject: [PATCH 116/132] Fix the separator character error message to complain about the correct key. --- src/config/config_collapse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index b8e69835781..70f28b9233d 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -156,8 +156,9 @@ __config_merge_scan(WT_SESSION_IMPL *session, for (str = k.str, len = k.len; len > 0; ++str, --len) if (*str == SEPC) WT_ERR_MSG(session, EINVAL, - "key %s contains a separator character " - "(%s)", (char *)kb->data, SEP); + "key %.*s contains a '%c' separator " + "character", + (int)k.len, (char *)k.str, SEPC); /* Build the key/value strings. */ WT_ERR(__wt_buf_fmt(session, From 48b29989b891af8f1ec95b1a071eb7a7d013eb42 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 08:07:55 -0400 Subject: [PATCH 117/132] Don't use '.' as a key name nested structure separator character during configuration merge, applications can create their own nested key name space by specifying a shared library name to the "extensions" configuration string, and '.' is extremely likely to appear in that shared library name. Reference #1229. --- src/config/config_collapse.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c index 70f28b9233d..2c469c25996 100644 --- a/src/config/config_collapse.c +++ b/src/config/config_collapse.c @@ -80,9 +80,9 @@ err: __wt_scr_free(&tmp); * We need a character that can't appear in a key as a separator. */ #undef SEP /* separator key, character */ -#define SEP "." +#define SEP "[" #undef SEPC -#define SEPC '.' +#define SEPC '[' /* * Individual configuration entries, including a generation number used to make @@ -117,7 +117,6 @@ __config_merge_scan(WT_SESSION_IMPL *session, WT_DECL_ITEM(vb); WT_DECL_RET; size_t len; - const char *str; WT_ERR(__wt_scr_alloc(session, 0, &kb)); WT_ERR(__wt_scr_alloc(session, 0, &vb)); @@ -141,20 +140,17 @@ __config_merge_scan(WT_SESSION_IMPL *session, /* * !!! - * WiredTiger names its internal checkpoints with a trailing - * dot and a number, for example, "WiredTigerCheckpoint.37". - * We're using dot to separate names in nested structures, - * and there's an obvious conflict. This works for now because - * that's the only case of a dot in a key name, and we never - * merge configuration strings that contain checkpoint names, - * for historic reasons. For now, return an error if there's - * ever a problem. (Note, it's probably safe if the dot is in - * a quoted key, that is, a key of type WT_CONFIG_ITEM_STRING, - * but since this isn't ever supposed to happen, I'm leaving - * the test simple.) + * We're using a JSON quote character to separate the names we + * create for nested structures. That's not completely safe as + * it's possible to quote characters in JSON such that a quote + * character appears as a literal character in a key name. In + * a few cases, applications can create their own key namespace + * (for example, shared library extension names), and therefore + * it's possible for an application to confuse us. Error if we + * we ever see a key with a magic character. */ - for (str = k.str, len = k.len; len > 0; ++str, --len) - if (*str == SEPC) + for (len = 0; len < k.len; ++len) + if (k.str[len] == SEPC) WT_ERR_MSG(session, EINVAL, "key %.*s contains a '%c' separator " "character", From 3a174f569b21e614b54ce068cb112d1744fbb3e3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 08:30:34 -0400 Subject: [PATCH 118/132] Typo, returning 0, not "ret", don't lose the return value on failure. --- src/schema/schema_truncate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/schema/schema_truncate.c b/src/schema/schema_truncate.c index 91f38206265..da526dfe5f4 100644 --- a/src/schema/schema_truncate.c +++ b/src/schema/schema_truncate.c @@ -88,7 +88,7 @@ __truncate_dsrc(WT_SESSION_IMPL *session, const char *uri) WT_ERR_NOTFOUND_OK(ret); err: WT_TRET(cursor->close(cursor)); - return (0); + return (ret); } /* From b15a01951feadb497ea831f45ae39e944e1f6474 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 08:34:09 -0400 Subject: [PATCH 119/132] Typo, calling log_print_err() with the wrong return error value. Typo, verify_checkpoint wasn't returning the final error status. --- test/checkpoint/checkpointer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c index 9f12e323878..57478350b52 100644 --- a/test/checkpoint/checkpointer.c +++ b/test/checkpoint/checkpointer.c @@ -188,7 +188,7 @@ verify_checkpoint(WT_SESSION *session) continue; t_ret = cursors[i]->next(cursors[i]); if (t_ret != 0 && t_ret != WT_NOTFOUND) { - (void)log_print_err("cursor->next", ret, 1); + (void)log_print_err("cursor->next", t_ret, 1); goto err; } @@ -224,7 +224,7 @@ err: for (i = 0; i < g.ntables; i++) { "verify_checkpoint:cursor close", ret, 1); } free(cursors); - return (0); + return (ret); } /* From fd0ac9486d5761c8578a6cfbe205bc36e6322bd3 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 08:49:37 -0400 Subject: [PATCH 120/132] Typo, final return value was being lost on failure. --- src/os_posix/os_open.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c index 680be9677ba..a826573d99f 100644 --- a/src/os_posix/os_open.c +++ b/src/os_posix/os_open.c @@ -47,11 +47,12 @@ __open_directory_sync(WT_SESSION_IMPL *session, char *path) err: WT_SYSCALL_RETRY(close(fd), ret); if (ret != 0) __wt_err(session, ret, "%s: close", path); + return (ret); #else WT_UNUSED(session); WT_UNUSED(path); -#endif return (0); +#endif } /* From c6fc79ff46f89c99666d4e0b991b006d3c29fbbf Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 08:56:58 -0400 Subject: [PATCH 121/132] Add a check for a return(0) after an error lable. --- dist/s_style | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/dist/s_style b/dist/s_style index 9325e3e298b..09b1b9460d2 100644 --- a/dist/s_style +++ b/dist/s_style @@ -6,26 +6,47 @@ trap 'rm -f $t; exit 0' 0 1 2 3 13 15 cd .. -# Returns in functions after a jump to the error label. -for f in `find examples ext src test -name '*.[ci]'`; do +# Turn a C file into a line per function so we can use grep on it. +file_parse() +{ sed -n \ - -e '/^{$/,/^}$/{=;p;}' $f | + -e '/^{$/,/^}$/{=;p;}' $1 | sed 'N;s/\n/:/' | sed -e '/./{H;/^[0-9][0-9]*:}$/!d;}' \ -e x \ -e 's/\n/ /g' \ -e p \ - -e '{s/.*//;x;}' | + -e '{s/.*//;x;}' +} + +# Returns in functions after a jump to the error label, or an infinite loop +# where there's a jump to the error label after the error label. +for f in `find bench examples ext src test -name '*.[ci]'`; do + file_parse $f | egrep '(WT_ERR|WT_ERR_MSG|WT_ERR_NOTFOUND_OK|WT_ERR_TEST|WT_ILLEGAL_VALUE_ERR)\(.*(WT_ASSERT_RET|WT_ILLEGAL_VALUE|WT_RET|WT_RET_MSG|WT_RET_NOTFOUND_OK|WT_RET_TEST|WT_VERBOSE_RET|WT_VERBOSE_RETVAL)\(.*err:|[^a-z_]err:.*(WT_ERR|WT_ERR_MSG|WT_ERR_NOTFOUND_OK|WT_ERR_TEST|WT_ILLEGAL_VALUE_ERR)\(' | - sed 's/:.*//' > $t + sed 's/:.*//' > $t + test -s $t && { echo "$f: return after a jump to the error label or a jump to the error label after the error label" sed 's/^/function @ line:/' < $t } done +# Return of 0 in functions after a jump to the error label. +for f in `find bench examples ext src test -name '*.[ci]'`; do + file_parse $f | + egrep -v '[^a-z_]err:.*return \(ret|[^a-z_]err:.*WT_RET' | + egrep '[^a-z_]err:.*return \(0\);' | + sed 's/:.*//' > $t + + test -s $t && { + echo "$f: error label followed by a return of 0" + sed 's/^/function @ line:/' < $t + } +done + for f in \ - `find bench/wtperf examples ext src test -name '*.[chisy]' -o -name '*.in' | + `find bench examples ext src test -name '*.[chisy]' -o -name '*.in' | sed '/Makefile.in/d'`; do if grep "^[^}]*while (0);" $f > $t; then echo "$f: while (0) has trailing semi-colon" From dd67db2ff1d8b5057f5f7d913e4d9035c8e1d904 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 12:53:29 -0400 Subject: [PATCH 122/132] Fix a salvage bug: if salvaging a variable-length column-store page, and we split a key range inside an RLE unit, we can end up with multiple key range chunks referencing a single overflow value when during the leaf-page merge phase. There's not much we can do at that point, delete the value. Reference #1222. --- src/btree/bt_slvg.c | 119 ++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 42 deletions(-) diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index 7f66e0e623b..10366e91a0e 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -115,7 +115,7 @@ struct __wt_track { static int __slvg_cleanup(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_col_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *); static int __slvg_col_build_leaf(WT_SESSION_IMPL *, WT_TRACK *, WT_REF *); -static int __slvg_col_merge_ovfl( +static int __slvg_col_ovfl( WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint64_t, uint64_t); static int __slvg_col_range(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_col_range_missing(WT_SESSION_IMPL *, WT_STUFF *); @@ -126,13 +126,13 @@ static int __slvg_merge_block_free(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_ovfl_compare(const void *, const void *); static int __slvg_ovfl_discard(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_ovfl_reconcile(WT_SESSION_IMPL *, WT_STUFF *); -static int __slvg_ovfl_ref(WT_SESSION_IMPL *, WT_TRACK *); +static int __slvg_ovfl_ref(WT_SESSION_IMPL *, WT_TRACK *, int); static int __slvg_ovfl_ref_all(WT_SESSION_IMPL *, WT_TRACK *); static int __slvg_read(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_row_build_internal(WT_SESSION_IMPL *, uint32_t, WT_STUFF *); static int __slvg_row_build_leaf( WT_SESSION_IMPL *, WT_TRACK *, WT_REF *, WT_STUFF *); -static int __slvg_row_merge_ovfl( +static int __slvg_row_ovfl( WT_SESSION_IMPL *, WT_TRACK *, WT_PAGE *, uint32_t, uint32_t); static int __slvg_row_range(WT_SESSION_IMPL *, WT_STUFF *); static int __slvg_row_range_overlap( @@ -203,10 +203,37 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]) /* * Step 3: - * Review the relationships between the pages and the overflow items. + * Discard any page referencing a non-existent overflow page. We do + * this before checking overlapping key ranges on the grounds that a + * bad key range we can use is better than a terrific key range that + * references pages we don't have. On the other hand, we subsequently + * discard key ranges where there are better overlapping ranges, and + * it would be better if we let the availability of an overflow value + * inform our choices as to the key ranges we select, ideally on a + * per-key basis. + * + * A complicating problem is found in variable-length column-store + * objects, where we potentially split key ranges within RLE units. + * For example, if there's a page with rows 15-20 and we later find + * row 17 with a larger LSN, the range splits into 3 chunks, 15-16, + * 17, and 18-20. If rows 15-20 were originally a single value (an + * RLE of 6), and that record is an overflow record, we end up with + * two chunks, both of which want to reference the same overflow value. + * + * Instead of the approach just described, we're first discarding any + * pages referencing non-existent overflow pages, then we're reviewing + * our key ranges and discarding any that overlap. We're doing it that + * way for a few reasons: absent corruption, missing overflow items are + * strong arguments the page was replaced (on the other hand, some kind + * of file corruption is probably why we're here); it's a significant + * amount of additional complexity to simultaneously juggle overlapping + * ranges and missing overflow items; finally, real-world applications + * usually don't have a lot of overflow items, as WiredTiger supports + * very large page sizes, overflow items shouldn't be common. * * Step 4: - * Add unreferenced overflow page blocks to the free list. + * Add unreferenced overflow page blocks to the free list so they are + * reused immediately. */ if (ss->ovfl_next != 0) { WT_ERR(__slvg_ovfl_reconcile(session, ss)); @@ -1239,7 +1266,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) /* Set the referenced flag on overflow pages we're using. */ if (page->type == WT_PAGE_COL_VAR && trk->trk_ovfl_cnt != 0) - WT_ERR(__slvg_col_merge_ovfl(session, trk, page, skip, take)); + WT_ERR(__slvg_col_ovfl(session, trk, page, skip, take)); /* * If we're missing some part of the range, the real start range is in @@ -1293,12 +1320,12 @@ err: WT_TRET(__wt_page_release(session, ref, 0)); } /* - * __slvg_col_merge_ovfl_single -- + * __slvg_col_ovfl_single -- * Find a single overflow record in the merge page's list, and mark it as * referenced. */ static int -__slvg_col_merge_ovfl_single( +__slvg_col_ovfl_single( WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL_UNPACK *unpack) { WT_TRACK *ovfl; @@ -1312,7 +1339,7 @@ __slvg_col_merge_ovfl_single( ovfl = trk->ss->ovfl[trk->trk_ovfl_slot[i]]; if (unpack->size == ovfl->trk_addr_size && memcmp(unpack->data, ovfl->trk_addr, unpack->size) == 0) - return (__slvg_ovfl_ref(session, ovfl)); + return (__slvg_ovfl_ref(session, ovfl, 0)); } WT_PANIC_RET(session, @@ -1320,16 +1347,17 @@ __slvg_col_merge_ovfl_single( } /* - * __slvg_col_merge_ovfl -- + * __slvg_col_ovfl -- * Mark overflow items referenced by the merged page. */ static int -__slvg_col_merge_ovfl(WT_SESSION_IMPL *session, +__slvg_col_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint64_t skip, uint64_t take) { WT_CELL_UNPACK unpack; WT_CELL *cell; WT_COL *cip; + WT_DECL_RET; uint64_t recno, start, stop; uint32_t i; @@ -1359,9 +1387,28 @@ __slvg_col_merge_ovfl(WT_SESSION_IMPL *session, * because stop is the last record wanted, if the record number * equals stop, we want the next record. */ - if (recno > start && unpack.type == WT_CELL_VALUE_OVFL) - WT_RET(__slvg_col_merge_ovfl_single( - session, trk, &unpack)); + if (recno > start && unpack.type == WT_CELL_VALUE_OVFL) { + ret = __slvg_col_ovfl_single(session, trk, &unpack); + + /* + * When handling overlapping ranges on variable-length + * column-store leaf pages, we split ranges without + * considering if we were splitting RLE units. (See + * note at the beginning of this file for explanation + * of the overall process.) If the RLE unit was on-page, + * we can simply write it again. If the RLE unit was an + * overflow value that's already been used by another + * row (from some other page created by a range split), + * there's not much to do, this row can't reference an + * overflow record we don't have: delete the row. + */ + if (ret == EBUSY) { + __wt_cell_type_reset(session, + cell, WT_CELL_VALUE_OVFL, WT_CELL_DEL); + ret = 0; + } + WT_RET(ret); + } if (recno > stop) break; } @@ -1936,7 +1983,7 @@ __slvg_row_build_leaf( /* Set the referenced flag on overflow pages we're using. */ if (trk->trk_ovfl_cnt != 0) - WT_ERR(__slvg_row_merge_ovfl(session, + WT_ERR(__slvg_row_ovfl(session, trk, page, skip_start, page->pg_row_entries - skip_stop)); /* @@ -1984,13 +2031,12 @@ err: WT_TRET(__wt_page_release(session, ref, 0)); } /* - * __slvg_row_merge_ovfl_single -- + * __slvg_row_ovfl_single -- * Find a single overflow record in the merge page's list, and mark it as * referenced. */ static int -__slvg_row_merge_ovfl_single( - WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL *cell) +__slvg_row_ovfl_single(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_CELL *cell) { WT_CELL_UNPACK unpack; WT_TRACK *ovfl; @@ -2010,7 +2056,7 @@ __slvg_row_merge_ovfl_single( ovfl = trk->ss->ovfl[trk->trk_ovfl_slot[i]]; if (unpack.size == ovfl->trk_addr_size && memcmp(unpack.data, ovfl->trk_addr, unpack.size) == 0) - return (__slvg_ovfl_ref(session, ovfl)); + return (__slvg_ovfl_ref(session, ovfl, 1)); } WT_PANIC_RET(session, @@ -2018,11 +2064,11 @@ __slvg_row_merge_ovfl_single( } /* - * __slvg_row_merge_ovfl -- + * __slvg_row_ovfl -- * Mark overflow items referenced by the merged page. */ static int -__slvg_row_merge_ovfl(WT_SESSION_IMPL *session, +__slvg_row_ovfl(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_PAGE *page, uint32_t start, uint32_t stop) { WT_CELL *cell; @@ -2038,12 +2084,10 @@ __slvg_row_merge_ovfl(WT_SESSION_IMPL *session, (void)__wt_row_leaf_key_info( page, copy, NULL, &cell, NULL, NULL); if (cell != NULL) - WT_RET( - __slvg_row_merge_ovfl_single(session, trk, cell)); + WT_RET(__slvg_row_ovfl_single(session, trk, cell)); cell = __wt_row_leaf_value_cell(page, rip, NULL); if (cell != NULL) - WT_RET( - __slvg_row_merge_ovfl_single(session, trk, cell)); + WT_RET(__slvg_row_ovfl_single(session, trk, cell)); } return (0); } @@ -2113,18 +2157,6 @@ __slvg_ovfl_reconcile(WT_SESSION_IMPL *session, WT_STUFF *ss) slot = NULL; /* - * Discard any page referencing a non-existent overflow page. We do - * this before checking overlapping key ranges on the grounds that a - * bad key range we can use is better than a terrific key range that - * references pages we don't have. - * - * An alternative would be to discard only the on-page item referencing - * the missing overflow item. We're not doing that because: (1) absent - * corruption, a missing overflow item is a strong argument the page was - * replaced (but admittedly, corruption is probably why we're here); (2) - * it's a lot of work, and as WiredTiger supports very large page sizes, - * overflow items simply shouldn't be common. - * * If an overflow page is referenced more than once, discard leaf pages * with the lowest LSNs until overflow pages are only referenced once. * @@ -2314,12 +2346,15 @@ __slvg_merge_block_free(WT_SESSION_IMPL *session, WT_STUFF *ss) * Reference an overflow page, checking for multiple references. */ static int -__slvg_ovfl_ref(WT_SESSION_IMPL *session, WT_TRACK *trk) +__slvg_ovfl_ref(WT_SESSION_IMPL *session, WT_TRACK *trk, int multi_panic) { - if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) + if (F_ISSET(trk, WT_TRACK_OVFL_REFD)) { + if (!multi_panic) + return (EBUSY); WT_PANIC_RET(session, EINVAL, - "overflow record referenced multiple times during leaf " - "page merge"); + "overflow record unexpectedly referenced multiple times " + "during leaf page merge"); + } F_SET(trk, WT_TRACK_OVFL_REFD); return (0); @@ -2336,7 +2371,7 @@ __slvg_ovfl_ref_all(WT_SESSION_IMPL *session, WT_TRACK *trk) for (i = 0; i < trk->trk_ovfl_cnt; ++i) WT_RET(__slvg_ovfl_ref( - session, trk->ss->ovfl[trk->trk_ovfl_slot[i]])); + session, trk->ss->ovfl[trk->trk_ovfl_slot[i]], 1)); return (0); } From ab6f8721f936313c11ccc68497e59e4ad955d3d0 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 16 Sep 2014 13:29:27 -0400 Subject: [PATCH 123/132] Add push/pop macros for the different queues. #1208 --- src/lsm/lsm_manager.c | 120 ++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 76 deletions(-) diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 46bc6ab5aad..3eeb118f7b8 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -443,6 +443,24 @@ __wt_lsm_manager_clear_tree( return (0); } +/* + * We assume this is only called from __wt_lsm_manager_pop_entry and we + * have session, entry and type available to use. If the queue is empty + * we may return from the macro. + */ +#define LSM_POP_ENTRY(qh, qlock) do { \ + if (TAILQ_EMPTY(qh)) \ + return (0); \ + __wt_spin_lock(session, qlock); \ + TAILQ_FOREACH(entry, (qh), q) { \ + if (FLD_ISSET(type, entry->type)) { \ + TAILQ_REMOVE(qh, entry, q); \ + break; \ + } \ + } \ + __wt_spin_unlock(session, (qlock)); \ +} while (0) + /* * __wt_lsm_manager_pop_entry -- * Retrieve the head of the queue, if it matches the requested work @@ -459,68 +477,32 @@ __wt_lsm_manager_pop_entry( *entryp = NULL; entry = NULL; - switch (type) { - case WT_LSM_WORK_SWITCH: - if (TAILQ_EMPTY(&manager->switchqh)) - return (0); - - __wt_spin_lock(session, &manager->switch_lock); - if (!TAILQ_EMPTY(&manager->switchqh)) { - entry = TAILQ_FIRST(&manager->switchqh); - WT_ASSERT(session, entry != NULL); - TAILQ_REMOVE(&manager->switchqh, entry, q); - } - __wt_spin_unlock(session, &manager->switch_lock); - break; - case WT_LSM_WORK_MERGE: - if (TAILQ_EMPTY(&manager->managerqh)) - return (0); - - __wt_spin_lock(session, &manager->manager_lock); - if (!TAILQ_EMPTY(&manager->managerqh)) { - entry = TAILQ_FIRST(&manager->managerqh); - WT_ASSERT(session, entry != NULL); - if (FLD_ISSET(entry->type, type)) - TAILQ_REMOVE(&manager->managerqh, entry, q); - else - entry = NULL; - } - - __wt_spin_unlock(session, &manager->manager_lock); - break; - default: - /* - * The app queue is the only one that has multiple different - * work unit types, allow a request for a variety. - */ - WT_ASSERT(session, FLD_ISSET(type, WT_LSM_WORK_BLOOM) || - FLD_ISSET(type, WT_LSM_WORK_DROP) || - FLD_ISSET(type, WT_LSM_WORK_FLUSH)); - if (TAILQ_EMPTY(&manager->appqh)) - return (0); - - __wt_spin_lock(session, &manager->app_lock); - /* - * Find and remove the first entry in the queue that matches the - * request. - */ - for (entry = TAILQ_FIRST(&manager->appqh); - entry != NULL; - entry = TAILQ_NEXT(entry, q)) { - if (FLD_ISSET(type, entry->type)) { - TAILQ_REMOVE(&manager->appqh, entry, q); - break; - } - } - __wt_spin_unlock(session, &manager->app_lock); - break; - } + /* + * Pop the entry off the correct queue based on our work type. + */ + if (type == WT_LSM_WORK_SWITCH) + LSM_POP_ENTRY(&manager->switchqh, &manager->switch_lock); + else if (type == WT_LSM_WORK_MERGE) + LSM_POP_ENTRY(&manager->managerqh, &manager->manager_lock); + else + LSM_POP_ENTRY(&manager->appqh, &manager->app_lock); if (entry != NULL) WT_STAT_FAST_CONN_INCR(session, lsm_work_units_done); *entryp = entry; return (0); } +/* + * Push a work unit onto the appropriate queue. This macro assumes we are + * called from __wt_lsm_manager_push_entry and we have session and entry + * available for use. + */ +#define LSM_PUSH_ENTRY(qh, qlock) do { \ + __wt_spin_lock(session, qlock); \ + TAILQ_INSERT_TAIL((qh), entry, q); \ + __wt_spin_unlock(session, qlock); \ +} while (0) + /* * __wt_lsm_manager_push_entry -- * Add an entry to the end of the switch queue. @@ -543,26 +525,12 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, (void)WT_ATOMIC_ADD(lsm_tree->queue_ref, 1); WT_STAT_FAST_CONN_INCR(session, lsm_work_units_created); - switch (type) { - case WT_LSM_WORK_SWITCH: - __wt_spin_lock(session, &manager->switch_lock); - TAILQ_INSERT_TAIL(&manager->switchqh, entry, q); - __wt_spin_unlock(session, &manager->switch_lock); - break; - case WT_LSM_WORK_BLOOM: - case WT_LSM_WORK_DROP: - case WT_LSM_WORK_FLUSH: - __wt_spin_lock(session, &manager->app_lock); - TAILQ_INSERT_TAIL(&manager->appqh, entry, q); - __wt_spin_unlock(session, &manager->app_lock); - break; - case WT_LSM_WORK_MERGE: - __wt_spin_lock(session, &manager->manager_lock); - TAILQ_INSERT_TAIL(&manager->managerqh, entry, q); - __wt_spin_unlock(session, &manager->manager_lock); - break; - WT_ILLEGAL_VALUE(session); - } + if (type == WT_LSM_WORK_SWITCH) + LSM_PUSH_ENTRY(&manager->switchqh, &manager->switch_lock); + else if (type == WT_LSM_WORK_MERGE) + LSM_PUSH_ENTRY(&manager->managerqh, &manager->manager_lock); + else + LSM_PUSH_ENTRY(&manager->appqh, &manager->app_lock); WT_RET(__wt_cond_signal(session, manager->work_cond)); From 093efda8f863b3f49a7d44a47028ffe9e445ced5 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Tue, 16 Sep 2014 13:43:21 -0400 Subject: [PATCH 124/132] Add length tracking and assertions for lsm work queues. #1208 #1230 --- src/include/lsm.h | 4 ++++ src/lsm/lsm_manager.c | 26 ++++++++++++++++++-------- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/include/lsm.h b/src/include/lsm.h index 1dc7714b42e..8f8f531305b 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -123,6 +123,10 @@ struct __wt_lsm_manager { WT_SPINLOCK switch_lock; /* Lock for switch queue */ WT_SPINLOCK app_lock; /* Lock for application queue */ WT_SPINLOCK manager_lock; /* Lock for manager queue */ +#define LSM_MAX_WORK_QUEUE_LEN 10000 + int64_t switch_len; /* Length of switch queue */ + int64_t app_len; /* Length of application queue */ + int64_t manager_len; /* Length of manager queue */ WT_CONDVAR *work_cond; /* Used to notify worker of activity */ uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 3eeb118f7b8..5598b7f5e10 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -448,13 +448,15 @@ __wt_lsm_manager_clear_tree( * have session, entry and type available to use. If the queue is empty * we may return from the macro. */ -#define LSM_POP_ENTRY(qh, qlock) do { \ +#define LSM_POP_ENTRY(qh, qlock, qlen) do { \ if (TAILQ_EMPTY(qh)) \ return (0); \ __wt_spin_lock(session, qlock); \ TAILQ_FOREACH(entry, (qh), q) { \ if (FLD_ISSET(type, entry->type)) { \ TAILQ_REMOVE(qh, entry, q); \ + (qlen)--; \ + WT_ASSERT(session, (qlen) >= 0); \ break; \ } \ } \ @@ -481,11 +483,14 @@ __wt_lsm_manager_pop_entry( * Pop the entry off the correct queue based on our work type. */ if (type == WT_LSM_WORK_SWITCH) - LSM_POP_ENTRY(&manager->switchqh, &manager->switch_lock); + LSM_POP_ENTRY(&manager->switchqh, + &manager->switch_lock, manager->switch_len); else if (type == WT_LSM_WORK_MERGE) - LSM_POP_ENTRY(&manager->managerqh, &manager->manager_lock); + LSM_POP_ENTRY(&manager->managerqh, + &manager->manager_lock, manager->manager_len); else - LSM_POP_ENTRY(&manager->appqh, &manager->app_lock); + LSM_POP_ENTRY(&manager->appqh, + &manager->app_lock, manager->app_len); if (entry != NULL) WT_STAT_FAST_CONN_INCR(session, lsm_work_units_done); *entryp = entry; @@ -497,9 +502,11 @@ __wt_lsm_manager_pop_entry( * called from __wt_lsm_manager_push_entry and we have session and entry * available for use. */ -#define LSM_PUSH_ENTRY(qh, qlock) do { \ +#define LSM_PUSH_ENTRY(qh, qlock, qlen) do { \ __wt_spin_lock(session, qlock); \ TAILQ_INSERT_TAIL((qh), entry, q); \ + (qlen)++; \ + WT_ASSERT(session, (qlen) <= LSM_MAX_WORK_QUEUE_LEN); \ __wt_spin_unlock(session, qlock); \ } while (0) @@ -526,11 +533,14 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, WT_STAT_FAST_CONN_INCR(session, lsm_work_units_created); if (type == WT_LSM_WORK_SWITCH) - LSM_PUSH_ENTRY(&manager->switchqh, &manager->switch_lock); + LSM_PUSH_ENTRY(&manager->switchqh, + &manager->switch_lock, manager->switch_len); else if (type == WT_LSM_WORK_MERGE) - LSM_PUSH_ENTRY(&manager->managerqh, &manager->manager_lock); + LSM_PUSH_ENTRY(&manager->managerqh, + &manager->manager_lock, manager->manager_len); else - LSM_PUSH_ENTRY(&manager->appqh, &manager->app_lock); + LSM_PUSH_ENTRY(&manager->appqh, + &manager->app_lock, manager->app_len); WT_RET(__wt_cond_signal(session, manager->work_cond)); From b17ae928762fbd3416e53959e5b53ebbe0c3f5c8 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 18:25:12 -0400 Subject: [PATCH 125/132] __wt_cursor_init has a long list of tests for cursor methods that we don't need, except for get/set-key and get/set-value, almost all of the methods are explicitly set, it's simpler to set them all then to leave that code in place. Two real changes besides the semantic sugar: change the config cursor reset method from not-supported to a no-op, that's the default, delete the __cursor_search function (the default search method), no code needed it. --- src/cursor/cur_backup.c | 4 +-- src/cursor/cur_config.c | 12 ++++----- src/cursor/cur_ds.c | 8 +++--- src/cursor/cur_dump.c | 2 +- src/cursor/cur_file.c | 8 +++--- src/cursor/cur_index.c | 6 ++--- src/cursor/cur_log.c | 8 +++--- src/cursor/cur_metadata.c | 8 +++--- src/cursor/cur_stat.c | 2 +- src/cursor/cur_std.c | 51 +-------------------------------------- src/lsm/lsm_cursor.c | 8 +++--- 11 files changed, 34 insertions(+), 83 deletions(-) diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c index fc3bd6e3abd..bef0b70b50c 100644 --- a/src/cursor/cur_backup.c +++ b/src/cursor/cur_backup.c @@ -103,11 +103,11 @@ __wt_curbackup_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ + __wt_cursor_get_key, /* get-key */ __wt_cursor_notsup, /* get-value */ __wt_cursor_notsup, /* set-key */ __wt_cursor_notsup, /* set-value */ - NULL, /* compare */ + __wt_cursor_notsup, /* compare */ __curbackup_next, /* next */ __wt_cursor_notsup, /* prev */ __curbackup_reset, /* reset */ diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c index 9eb9a6ef74d..b7bd05b4e24 100644 --- a/src/cursor/cur_config.c +++ b/src/cursor/cur_config.c @@ -26,14 +26,14 @@ __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ - NULL, /* get-value */ - NULL, /* set-key */ - NULL, /* set-value */ - NULL, /* compare */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ + __wt_cursor_notsup, /* compare */ __wt_cursor_notsup, /* next */ __wt_cursor_notsup, /* prev */ - __wt_cursor_notsup, /* reset */ + __wt_cursor_noop, /* reset */ __wt_cursor_notsup, /* search */ __wt_cursor_notsup, /* search-near */ __wt_cursor_notsup, /* insert */ diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c index 69370f1fa6b..6fa10739856 100644 --- a/src/cursor/cur_ds.c +++ b/src/cursor/cur_ds.c @@ -448,10 +448,10 @@ __wt_curds_open( const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ - NULL, /* get-value */ - NULL, /* set-key */ - NULL, /* set-value */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ __curds_compare, /* compare */ __curds_next, /* next */ __curds_prev, /* prev */ diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c index 266ef0d7207..28dffd8aff2 100644 --- a/src/cursor/cur_dump.c +++ b/src/cursor/cur_dump.c @@ -349,7 +349,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp) __curdump_get_value, /* get-value */ __curdump_set_key, /* set-key */ __curdump_set_value, /* set-value */ - NULL, /* compare */ + __wt_cursor_notsup, /* compare */ __curdump_next, /* next */ __curdump_prev, /* prev */ __curdump_reset, /* reset */ diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c index 08129e668f5..688419d1916 100644 --- a/src/cursor/cur_file.c +++ b/src/cursor/cur_file.c @@ -340,10 +340,10 @@ __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ - NULL, /* get-value */ - NULL, /* set-key */ - NULL, /* set-value */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ __curfile_compare, /* compare */ __curfile_next, /* next */ __curfile_prev, /* prev */ diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c index ae438618bbc..bf73b3612c1 100644 --- a/src/cursor/cur_index.c +++ b/src/cursor/cur_index.c @@ -337,11 +337,11 @@ __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ + __wt_cursor_get_key, /* get-key */ __curindex_get_value, /* get-value */ - NULL, /* set-key */ + __wt_cursor_set_key, /* set-key */ __curindex_set_value, /* set-value */ - NULL, /* compare */ + __wt_cursor_notsup, /* compare */ __curindex_next, /* next */ __curindex_prev, /* prev */ __curindex_reset, /* reset */ diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c index e67caa2f4d5..4ecbcae96dd 100644 --- a/src/cursor/cur_log.c +++ b/src/cursor/cur_log.c @@ -308,10 +308,10 @@ __wt_curlog_open(WT_SESSION_IMPL *session, { WT_CONNECTION_IMPL *conn; WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ - NULL, /* get-value */ - NULL, /* set-key */ - NULL, /* set-value */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ __curlog_compare, /* compare */ __curlog_next, /* next */ __wt_cursor_notsup, /* prev */ diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c index 39512b91612..30fe3b28625 100644 --- a/src/cursor/cur_metadata.c +++ b/src/cursor/cur_metadata.c @@ -403,10 +403,10 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ - NULL, /* get-value */ - NULL, /* set-key */ - NULL, /* set-value */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ __curmetadata_compare, /* compare */ __curmetadata_next, /* next */ __curmetadata_prev, /* prev */ diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c index 3dcfa638a3d..fe4660ae0a3 100644 --- a/src/cursor/cur_stat.c +++ b/src/cursor/cur_stat.c @@ -483,7 +483,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session, __curstat_get_value, /* get-value */ __curstat_set_key, /* set-key */ __curstat_set_value, /* set-value */ - NULL, /* compare */ + __wt_cursor_notsup, /* compare */ __curstat_next, /* next */ __curstat_prev, /* prev */ __curstat_reset, /* reset */ diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index 095e2cbb72c..a92f4628c4b 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -433,19 +433,6 @@ err: cursor->saved_err = ret; API_END(session, ret); } -/* - * __cursor_search -- - * WT_CURSOR->search default implementation. - */ -static int -__cursor_search(WT_CURSOR *cursor) -{ - int exact; - - WT_RET(cursor->search_near(cursor, &exact)); - return ((exact == 0) ? 0 : WT_NOTFOUND); -} - /* * __wt_cursor_close -- * WT_CURSOR->close default implementation. @@ -555,42 +542,6 @@ __wt_cursor_init(WT_CURSOR *cursor, session = (WT_SESSION_IMPL *)cursor->session; - /* - * Fill in unspecified cursor methods: get/set key/value, position - * duplication, search and reconfiguration are all standard, else - * if the method isn't set, assume it's unsupported. - */ - if (cursor->get_key == NULL) - cursor->get_key = __wt_cursor_get_key; - if (cursor->get_value == NULL) - cursor->get_value = __wt_cursor_get_value; - if (cursor->set_key == NULL) - cursor->set_key = __wt_cursor_set_key; - if (cursor->set_value == NULL) - cursor->set_value = __wt_cursor_set_value; - if (cursor->compare == NULL) - cursor->compare = (int (*) - (WT_CURSOR *, WT_CURSOR *, int *))__wt_cursor_notsup; - if (cursor->next == NULL) - cursor->next = __wt_cursor_notsup; - if (cursor->prev == NULL) - cursor->prev = __wt_cursor_notsup; - if (cursor->reset == NULL) - cursor->reset = __wt_cursor_noop; - if (cursor->search == NULL) - cursor->search = __cursor_search; - if (cursor->search_near == NULL) - cursor->search_near = - (int (*)(WT_CURSOR *, int *))__wt_cursor_notsup; - if (cursor->insert == NULL) - cursor->insert = __wt_cursor_notsup; - if (cursor->update == NULL) - cursor->update = __wt_cursor_notsup; - if (cursor->remove == NULL) - cursor->remove = __wt_cursor_notsup; - if (cursor->close == NULL) - WT_RET_MSG(session, EINVAL, "cursor lacks a close method"); - if (cursor->uri == NULL) WT_RET(__wt_strdup(session, uri, &cursor->uri)); @@ -618,8 +569,8 @@ __wt_cursor_init(WT_CURSOR *cursor, cursor->remove = __wt_cursor_notsup; } - /* dump */ /* + * dump * If an index cursor is opened with dump, then this * function is called on the index files, with the dump * config string, and with the index cursor as an owner. diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index df2f7bba271..6ae0bd26c1f 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -1442,10 +1442,10 @@ __wt_clsm_open(WT_SESSION_IMPL *session, { WT_CONFIG_ITEM cval; WT_CURSOR_STATIC_INIT(iface, - NULL, /* get-key */ - NULL, /* get-value */ - NULL, /* set-key */ - NULL, /* set-value */ + __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ __clsm_compare, /* compare */ __clsm_next, /* next */ __clsm_prev, /* prev */ From c14ea4132628244f5508f15a39dcc7b343786e71 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 16 Sep 2014 19:54:38 -0400 Subject: [PATCH 126/132] KNF --- src/cursor/cur_std.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c index a92f4628c4b..cf03fc744b8 100644 --- a/src/cursor/cur_std.c +++ b/src/cursor/cur_std.c @@ -536,8 +536,8 @@ int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) { - WT_CURSOR *cdump; WT_CONFIG_ITEM cval; + WT_CURSOR *cdump; WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)cursor->session; From 39b1fa5837e6cc4d2d79856f250d1a0674f825a2 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Wed, 17 Sep 2014 15:44:17 -0400 Subject: [PATCH 127/132] Tweak the LSM manager aggressive update alg and server trigger. #1230 --- src/include/lsm.h | 2 +- src/lsm/lsm_manager.c | 35 ++++++++++++++++++++--------------- src/lsm/lsm_tree.c | 5 ++++- src/lsm/lsm_work_unit.c | 13 ++++++++++--- src/lsm/lsm_worker.c | 22 ++++++++++++++++------ 5 files changed, 51 insertions(+), 26 deletions(-) diff --git a/src/include/lsm.h b/src/include/lsm.h index 8f8f531305b..12b31707458 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -123,7 +123,7 @@ struct __wt_lsm_manager { WT_SPINLOCK switch_lock; /* Lock for switch queue */ WT_SPINLOCK app_lock; /* Lock for application queue */ WT_SPINLOCK manager_lock; /* Lock for manager queue */ -#define LSM_MAX_WORK_QUEUE_LEN 10000 +#define LSM_MAX_WORK_QUEUE_LEN 1000 int64_t switch_len; /* Length of switch queue */ int64_t app_len; /* Length of application queue */ int64_t manager_len; /* Length of manager queue */ diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 5598b7f5e10..4f5d3b7a0ff 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -187,7 +187,7 @@ __lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) { struct timespec now; uint64_t chunk_wait, stallms; - u_int old_aggressive; + u_int new_aggressive; WT_RET(__wt_epoch(session, &now)); stallms = WT_TIMEDIFF(now, lsm_tree->last_flush_ts) / WT_MILLION; @@ -196,18 +196,22 @@ __lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * been created by now. Use 10 seconds as a default if we don't have an * estimate. */ - chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ? - 10000 : lsm_tree->chunk_fill_ms); - old_aggressive = lsm_tree->merge_aggressiveness; - lsm_tree->merge_aggressiveness = - (u_int)(chunk_wait / lsm_tree->merge_min); + if (lsm_tree->nchunks > lsm_tree->merge_min) + chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ? + 10000 : lsm_tree->chunk_fill_ms); + else + chunk_wait = 0; + new_aggressive = (u_int)(chunk_wait / lsm_tree->merge_min); - if (lsm_tree->merge_aggressiveness > old_aggressive) + if (new_aggressive > lsm_tree->merge_aggressiveness) { WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM merge %s got aggressive (%u), " - "%u / %" PRIu64, - lsm_tree->name, lsm_tree->merge_aggressiveness, stallms, + "LSM merge %s got aggressive (old %u new %u), " + "merge_min %d, %u / %" PRIu64, + lsm_tree->name, lsm_tree->merge_aggressiveness, + new_aggressive, lsm_tree->merge_min, stallms, lsm_tree->chunk_fill_ms)); + lsm_tree->merge_aggressiveness = new_aggressive; + } return (0); } @@ -336,11 +340,12 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) * to how often new chunks are being created add some * more. */ - if ((!lsm_tree->modified && lsm_tree->nchunks > 1) || - lsm_tree->merge_aggressiveness > 3 || - (lsm_tree->queue_ref == 0 && - lsm_tree->nchunks > 1) || - pushms > fillms) { + if (lsm_tree->nchunks > 1 && + (!lsm_tree->modified || + lsm_tree->queue_ref == 0 || + (lsm_tree->merge_aggressiveness > 3 && + !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) || + pushms > fillms)) { WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_SWITCH, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 99e4804861b..988c39607b6 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1094,6 +1094,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) */ compacting = 1; F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); + WT_ERR(__wt_verbose(session, WT_VERB_LSM, + "COMPACT: Start compacting %s", lsm_tree->name)); } /* Wait for the work unit queues to drain. */ @@ -1108,7 +1110,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) { WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "Compact flush done %s chunk %u", + "Compact flush done %s chunk %u. " + "Start compacting", name, chunk->id)); (void)WT_ATOMIC_SUB(chunk->refcnt, 1); flushing = ref = 0; diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index bb32e30856a..5d1ec2fabca 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -146,11 +146,10 @@ __wt_lsm_work_switch( else *ran = 1; } - __wt_lsm_manager_free_work_unit(session, entry); - return (ret); } + /* * __wt_lsm_work_bloom -- * Try to create a Bloom filter for the newest on-disk chunk that doesn't @@ -187,14 +186,22 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * recheck that the chunk still needs a Bloom filter. */ if (WT_ATOMIC_CAS(chunk->bloom_busy, 0, 1)) { - if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) + if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM)) { ret = __lsm_bloom_create( session, lsm_tree, chunk, (u_int)i); + /* + * Push a merge work unit if we created a + * bloom filter. + */ + WT_ERR(__wt_lsm_manager_push_entry(session, + WT_LSM_WORK_MERGE, 0, lsm_tree)); + } chunk->bloom_busy = 0; break; } } +err: __lsm_unpin_chunks(session, &cookie); __wt_free(session, cookie.chunk_array); return (ret); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index c86b294f44b..90083c9c21d 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -65,11 +65,8 @@ __lsm_worker_general_op( } } else if (entry->type == WT_LSM_WORK_DROP) WT_ERR(__wt_lsm_free_chunks(session, entry->lsm_tree)); - else if (entry->type == WT_LSM_WORK_BLOOM) { + else if (entry->type == WT_LSM_WORK_BLOOM) WT_ERR(__wt_lsm_work_bloom(session, entry->lsm_tree)); - WT_ERR(__wt_lsm_manager_push_entry( - session, WT_LSM_WORK_MERGE, 0, entry->lsm_tree)); - } *completed = 1; err: __wt_lsm_manager_free_work_unit(session, entry); @@ -98,7 +95,12 @@ __lsm_worker(void *arg) while (F_ISSET(conn, WT_CONN_SERVER_RUN)) { progress = 0; - /* Switches are always a high priority */ + /* + * Workers process the different LSM work queues. Some workers + * can handle several or all work unit types. So they are + * prioritized so important operations happen first. + * Switches are the highest priority. + */ while (FLD_ISSET(cookie->type, WT_LSM_WORK_SWITCH) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_SWITCH, &entry)) == 0 && @@ -108,12 +110,20 @@ __lsm_worker(void *arg) /* Flag an error if the pop failed. */ WT_ERR(ret); + /* + * Next the general operations. + */ ret = __lsm_worker_general_op(session, cookie, &ran); if (ret == EBUSY || ret == WT_NOTFOUND) ret = 0; WT_ERR(ret); progress = progress || ran; + /* + * Finally see if there is any merge work we can do. This is + * last because the earlier operations may result in adding + * merge work to the queue. + */ if (FLD_ISSET(cookie->type, WT_LSM_WORK_MERGE) && (ret = __wt_lsm_manager_pop_entry( session, WT_LSM_WORK_MERGE, &entry)) == 0 && @@ -135,7 +145,7 @@ __lsm_worker(void *arg) /* Flag an error if the pop failed. */ WT_ERR(ret); - /* Don't busy wait if there isn't any work to do. */ + /* Don't busy wait if there was any work to do. */ if (!progress) { WT_ERR( __wt_cond_wait(session, cookie->work_cond, 10000)); From f43b07032ce1cfa0e844a4365082b0e001e0a83d Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Wed, 17 Sep 2014 16:58:14 -0400 Subject: [PATCH 128/132] row-search releases "child" on error, and we were updating "child" to point to pages that were the wrong pages to release on error. The code in row-random and column-search don't require the same changes, but I made them so the code continues to look roughly the same in all three functions. --- src/btree/col_srch.c | 40 ++++++++++++++--------------- src/btree/row_srch.c | 60 ++++++++++++++++++++++---------------------- 2 files changed, 49 insertions(+), 51 deletions(-) diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index d367157b400..e4083e2282f 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -22,7 +22,7 @@ __wt_col_search(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head; WT_PAGE *page; WT_PAGE_INDEX *pindex; - WT_REF *child, *parent; + WT_REF *current, *descent; uint32_t base, indx, limit; int depth; @@ -35,68 +35,66 @@ __wt_col_search(WT_SESSION_IMPL *session, * page, not a full tree. */ if (leaf != NULL) { - child = leaf; + current = leaf; goto leaf_only; } /* Search the internal pages of the tree. */ - parent = child = &btree->root; + current = &btree->root; for (depth = 2;; ++depth) { -restart: page = parent->page; +restart: page = current->page; if (page->type != WT_PAGE_COL_INT) break; - WT_ASSERT(session, parent->key.recno == page->pg_intl_recno); + WT_ASSERT(session, current->key.recno == page->pg_intl_recno); pindex = WT_INTL_INDEX_COPY(page); base = pindex->entries; - child = pindex->index[base - 1]; + descent = pindex->index[base - 1]; /* Fast path appends. */ - if (recno >= child->key.recno) + if (recno >= descent->key.recno) goto descend; /* Binary search of internal pages. */ for (base = 0, limit = pindex->entries - 1; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - child = pindex->index[indx]; + descent = pindex->index[indx]; - if (recno == child->key.recno) + if (recno == descent->key.recno) break; - if (recno < child->key.recno) + if (recno < descent->key.recno) continue; base = indx + 1; --limit; } -descend: WT_ASSERT(session, child != NULL); - - /* +descend: /* * Reference the slot used for next step down the tree. * * Base is the smallest index greater than recno and may be the * (last + 1) index. The slot for descent is the one before * base. */ - if (recno != child->key.recno) { + if (recno != descent->key.recno) { /* * We don't have to correct for base == 0 because the * only way for base to be 0 is if recno is the page's * starting recno. */ WT_ASSERT(session, base > 0); - child = pindex->index[base - 1]; + descent = pindex->index[base - 1]; } /* - * Swap the parent page for the child page. If the page splits - * while we're retrieving it, restart the search in the parent + * Swap the current page for the child page. If the page splits + * while we're retrieving it, restart the search in the current * page; otherwise return on error, the swap call ensures we're * holding nothing on failure. */ - switch (ret = __wt_page_swap(session, parent, child, 0)) { + switch (ret = __wt_page_swap(session, current, descent, 0)) { case 0: - parent = child; + current = descent; break; case WT_RESTART: goto restart; @@ -110,8 +108,8 @@ descend: WT_ASSERT(session, child != NULL); btree->maximum_depth = depth; leaf_only: - page = child->page; - cbt->ref = child; + page = current->page; + cbt->ref = current; cbt->recno = recno; cbt->compare = 0; diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 268e1e0f0a7..b190aaaded5 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -144,7 +144,7 @@ __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *item; WT_PAGE *page; WT_PAGE_INDEX *pindex; - WT_REF *child, *parent; + WT_REF *current, *descent; WT_ROW *rip; size_t match, skiphigh, skiplow; uint32_t base, indx, limit; @@ -182,15 +182,15 @@ __wt_row_search(WT_SESSION_IMPL *session, * page, not a full tree. */ if (leaf != NULL) { - child = leaf; + current = leaf; goto leaf_only; } /* Search the internal pages of the tree. */ cmp = -1; - parent = child = &btree->root; + current = &btree->root; for (depth = 2;; ++depth) { -restart: page = parent->page; +restart: page = current->page; if (page->type != WT_PAGE_ROW_INT) break; @@ -201,14 +201,14 @@ restart: page = parent->page; * the root page in new trees. */ if (pindex->entries == 1) { - child = pindex->index[0]; + descent = pindex->index[0]; goto descend; } /* Fast-path appends. */ if (append_check) { - child = pindex->index[pindex->entries - 1]; - __wt_ref_key(page, child, &item->data, &item->size); + descent = pindex->index[pindex->entries - 1]; + __wt_ref_key(page, descent, &item->data, &item->size); WT_ERR(__wt_compare( session, collator, srch_key, item, &cmp)); if (cmp >= 0) @@ -240,9 +240,9 @@ restart: page = parent->page; if (collator == NULL) for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - child = pindex->index[indx]; + descent = pindex->index[indx]; __wt_ref_key( - page, child, &item->data, &item->size); + page, descent, &item->data, &item->size); match = WT_MIN(skiplow, skiphigh); cmp = __wt_lex_compare_skip( @@ -259,9 +259,9 @@ restart: page = parent->page; else for (; limit != 0; limit >>= 1) { indx = base + (limit >> 1); - child = pindex->index[indx]; + descent = pindex->index[indx]; __wt_ref_key( - page, child, &item->data, &item->size); + page, descent, &item->data, &item->size); WT_ERR(__wt_compare( session, collator, srch_key, item, &cmp)); @@ -273,11 +273,11 @@ restart: page = parent->page; } /* - * Set the slot to descend the tree: child is already set if + * Set the slot to descend the tree: descent is already set if * there was an exact match on the page, otherwise, base is * the smallest index greater than key, possibly (last + 1). */ - child = pindex->index[base - 1]; + descent = pindex->index[base - 1]; /* * If we end up somewhere other than the last slot, it's not a @@ -287,14 +287,14 @@ restart: page = parent->page; descend_right = 0; descend: /* - * Swap the parent page for the child page. If the page splits - * while we're retrieving it, restart the search in the parent + * Swap the current page for the child page. If the page splits + * while we're retrieving it, restart the search in the current * page; otherwise return on error, the swap call ensures we're * holding nothing on failure. */ - switch (ret = __wt_page_swap(session, parent, child, 0)) { + switch (ret = __wt_page_swap(session, current, descent, 0)) { case 0: - parent = child; + current = descent; break; case WT_RESTART: skiphigh = skiplow = 0; @@ -309,8 +309,8 @@ descend: /* btree->maximum_depth = depth; leaf_only: - page = child->page; - cbt->ref = child; + page = current->page; + cbt->ref = current; /* * In the case of a right-side tree descent during an insert, do a fast @@ -456,7 +456,7 @@ leaf_match: cbt->compare = 0; return (0); err: if (leaf != NULL) - WT_TRET(__wt_page_release(session, child, 0)); + WT_TRET(__wt_page_release(session, current, 0)); return (ret); } @@ -472,7 +472,7 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_INSERT *p, *t; WT_PAGE *page; WT_PAGE_INDEX *pindex; - WT_REF *child, *parent; + WT_REF *current, *descent; btree = S2BT(session); @@ -480,22 +480,22 @@ __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) restart: /* Walk the internal pages of the tree. */ - parent = child = &btree->root; + current = &btree->root; for (;;) { - page = parent->page; + page = current->page; if (page->type != WT_PAGE_ROW_INT) break; pindex = WT_INTL_INDEX_COPY(page); - child = pindex->index[ + descent = pindex->index[ __wt_random(session->rnd) % pindex->entries]; /* * Swap the parent page for the child page; return on error, * the swap function ensures we're holding nothing on failure. */ - if ((ret = __wt_page_swap(session, parent, child, 0)) == 0) { - parent = child; + if ((ret = __wt_page_swap(session, current, descent, 0)) == 0) { + current = descent; continue; } /* @@ -504,7 +504,7 @@ restart: * it and restart the search from the top of the tree. */ if (ret == WT_RESTART && - (ret = __wt_page_release(session, parent, 0)) == 0) + (ret = __wt_page_release(session, current, 0)) == 0) goto restart; return (ret); } @@ -518,7 +518,7 @@ restart: * or a tree with just one big page, that's not going to work, * check for that. */ - cbt->ref = child; + cbt->ref = current; cbt->compare = 0; pindex = WT_INTL_INDEX_COPY(btree->root.page); cbt->slot = pindex->entries < 2 ? @@ -542,12 +542,12 @@ restart: break; t = WT_SKIP_NEXT(t); } - cbt->ref = child; + cbt->ref = current; cbt->compare = 0; cbt->ins = t; return (0); -err: WT_TRET(__wt_page_release(session, child, 0)); +err: WT_TRET(__wt_page_release(session, current, 0)); return (ret); } From 47037bd4398c385baa3c1fad70b010e904cc3b41 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 18 Sep 2014 12:03:20 -0400 Subject: [PATCH 129/132] Check tree's queue length and remove global assertion. #1230 --- dist/s_string.ok | 3 +++ src/include/lsm.h | 2 +- src/lsm/lsm_manager.c | 13 +++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dist/s_string.ok b/dist/s_string.ok index 9b1f6ffd9b1..4b381b68eed 100644 --- a/dist/s_string.ok +++ b/dist/s_string.ok @@ -512,6 +512,7 @@ fileop fileops filesize filesystem +fillms firstfit fixup flcs @@ -690,6 +691,7 @@ namespace namespaces nbits nbsp +nchunks nclr nd negint @@ -762,6 +764,7 @@ ps pse psp pthread +pushms putK putV pv diff --git a/src/include/lsm.h b/src/include/lsm.h index 12b31707458..972dc71b106 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -123,7 +123,6 @@ struct __wt_lsm_manager { WT_SPINLOCK switch_lock; /* Lock for switch queue */ WT_SPINLOCK app_lock; /* Lock for application queue */ WT_SPINLOCK manager_lock; /* Lock for manager queue */ -#define LSM_MAX_WORK_QUEUE_LEN 1000 int64_t switch_len; /* Length of switch queue */ int64_t app_len; /* Length of application queue */ int64_t manager_len; /* Length of manager queue */ @@ -146,6 +145,7 @@ struct __wt_lsm_tree { const char *collator_name; int refcnt; /* Number of users of the tree */ +#define LSM_TREE_MAX_QUEUE 100 int queue_ref; WT_RWLOCK *rwlock; TAILQ_ENTRY(__wt_lsm_tree) q; diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 4f5d3b7a0ff..dc07df52c0b 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -196,7 +196,7 @@ __lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) * been created by now. Use 10 seconds as a default if we don't have an * estimate. */ - if (lsm_tree->nchunks > lsm_tree->merge_min) + if (lsm_tree->nchunks > 1) chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms); else @@ -341,6 +341,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) * more. */ if (lsm_tree->nchunks > 1 && + lsm_tree->queue_ref < LSM_TREE_MAX_QUEUE && (!lsm_tree->modified || lsm_tree->queue_ref == 0 || (lsm_tree->merge_aggressiveness > 3 && @@ -354,6 +355,15 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_FLUSH, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "MGR %s: queue %d mod %d nchunks %d" + " flags 0x%x aggressive %d pushms %" PRIu64 + " fillms %" PRIu64, + lsm_tree->name, lsm_tree->queue_ref, + lsm_tree->modified, lsm_tree->nchunks, + lsm_tree->flags, + lsm_tree->merge_aggressiveness, + pushms, fillms)); WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_MERGE, 0, lsm_tree)); } @@ -511,7 +521,6 @@ __wt_lsm_manager_pop_entry( __wt_spin_lock(session, qlock); \ TAILQ_INSERT_TAIL((qh), entry, q); \ (qlen)++; \ - WT_ASSERT(session, (qlen) <= LSM_MAX_WORK_QUEUE_LEN); \ __wt_spin_unlock(session, qlock); \ } while (0) From fdb23f23c15202fa6af6318a2c67fd7d9f1f96cf Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 18 Sep 2014 14:00:18 -0400 Subject: [PATCH 130/132] Remove manager struct lengths and add in per-queue lengths to stats. Restore some of the manager conditional. My changes broke normal switchng and its intent is covered in the aggressiveness changes. #1230 --- dist/stat_data.py | 6 +++++ src/include/lsm.h | 3 --- src/include/stat.h | 3 +++ src/include/wiredtiger.in | 50 ++++++++++++++++++++++----------------- src/lsm/lsm_manager.c | 25 ++++++++++---------- src/support/stat.c | 5 ++++ tools/stat_data.py | 3 +++ 7 files changed, 57 insertions(+), 38 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index 88eff5f1156..8a8cfd9a4c1 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -180,6 +180,12 @@ connection_stats = [ 'sleep for LSM checkpoint throttle'), Stat('lsm_merge_throttle', 'sleep for LSM merge throttle'), Stat('lsm_rows_merged', 'rows merged in an LSM tree'), + Stat('lsm_work_queue_app', 'LSM App work units currently queued', + 'no_clear,no_scale'), + Stat('lsm_work_queue_manager', 'LSM Merge work units currently queued', + 'no_clear,no_scale'), + Stat('lsm_work_queue_switch', 'LSM Switch work units currently queued', + 'no_clear,no_scale'), Stat('lsm_work_units_created', 'LSM tree maintenance operations scheduled'), Stat('lsm_work_units_discarded', diff --git a/src/include/lsm.h b/src/include/lsm.h index 972dc71b106..666acddc124 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -123,9 +123,6 @@ struct __wt_lsm_manager { WT_SPINLOCK switch_lock; /* Lock for switch queue */ WT_SPINLOCK app_lock; /* Lock for application queue */ WT_SPINLOCK manager_lock; /* Lock for manager queue */ - int64_t switch_len; /* Length of switch queue */ - int64_t app_len; /* Length of application queue */ - int64_t manager_len; /* Length of manager queue */ WT_CONDVAR *work_cond; /* Used to notify worker of activity */ uint32_t lsm_workers; /* Current number of LSM workers */ uint32_t lsm_workers_max; diff --git a/src/include/stat.h b/src/include/stat.h index e2eedd76632..14c78715e52 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -207,6 +207,9 @@ struct __wt_connection_stats { WT_STATS lsm_checkpoint_throttle; WT_STATS lsm_merge_throttle; WT_STATS lsm_rows_merged; + WT_STATS lsm_work_queue_app; + WT_STATS lsm_work_queue_manager; + WT_STATS lsm_work_queue_switch; WT_STATS lsm_work_units_created; WT_STATS lsm_work_units_discarded; WT_STATS lsm_work_units_done; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 46fb5acc5ee..f4beb2d9db0 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -3167,50 +3167,56 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LSM_MERGE_THROTTLE 1078 /*! rows merged in an LSM tree */ #define WT_STAT_CONN_LSM_ROWS_MERGED 1079 +/*! LSM App work units currently queued */ +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1080 +/*! LSM Merge work units currently queued */ +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1081 +/*! LSM Switch work units currently queued */ +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1082 /*! LSM tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1080 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1083 /*! LSM tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1081 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1084 /*! LSM tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1082 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1085 /*! memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1083 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1086 /*! memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1084 +#define WT_STAT_CONN_MEMORY_FREE 1087 /*! memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1085 +#define WT_STAT_CONN_MEMORY_GROW 1088 /*! total read I/Os */ -#define WT_STAT_CONN_READ_IO 1086 +#define WT_STAT_CONN_READ_IO 1089 /*! page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1087 +#define WT_STAT_CONN_REC_PAGES 1090 /*! page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1088 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1091 /*! reconciliation failed because an update could not be included */ -#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1089 +#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1092 /*! split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1090 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1093 /*! split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1091 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1094 /*! pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1092 +#define WT_STAT_CONN_RWLOCK_READ 1095 /*! pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1093 +#define WT_STAT_CONN_RWLOCK_WRITE 1096 /*! open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1094 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1097 /*! transactions */ -#define WT_STAT_CONN_TXN_BEGIN 1095 +#define WT_STAT_CONN_TXN_BEGIN 1098 /*! transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1096 +#define WT_STAT_CONN_TXN_CHECKPOINT 1099 /*! transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1097 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1100 /*! transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1098 +#define WT_STAT_CONN_TXN_COMMIT 1101 /*! transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1099 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1102 /*! transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1100 +#define WT_STAT_CONN_TXN_ROLLBACK 1103 /*! total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1101 +#define WT_STAT_CONN_WRITE_IO 1104 /*! * @} diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index dc07df52c0b..c4b6b46e36f 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -340,10 +340,10 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) * to how often new chunks are being created add some * more. */ - if (lsm_tree->nchunks > 1 && - lsm_tree->queue_ref < LSM_TREE_MAX_QUEUE && - (!lsm_tree->modified || - lsm_tree->queue_ref == 0 || + if (lsm_tree->queue_ref < LSM_TREE_MAX_QUEUE && + ((!lsm_tree->modified && lsm_tree->nchunks > 1) || + (lsm_tree->queue_ref == 0 && + lsm_tree->nchunks > 1) || (lsm_tree->merge_aggressiveness > 3 && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) || pushms > fillms)) { @@ -470,8 +470,7 @@ __wt_lsm_manager_clear_tree( TAILQ_FOREACH(entry, (qh), q) { \ if (FLD_ISSET(type, entry->type)) { \ TAILQ_REMOVE(qh, entry, q); \ - (qlen)--; \ - WT_ASSERT(session, (qlen) >= 0); \ + WT_STAT_FAST_CONN_DECR(session, qlen); \ break; \ } \ } \ @@ -499,13 +498,13 @@ __wt_lsm_manager_pop_entry( */ if (type == WT_LSM_WORK_SWITCH) LSM_POP_ENTRY(&manager->switchqh, - &manager->switch_lock, manager->switch_len); + &manager->switch_lock, lsm_work_queue_switch); else if (type == WT_LSM_WORK_MERGE) LSM_POP_ENTRY(&manager->managerqh, - &manager->manager_lock, manager->manager_len); + &manager->manager_lock, lsm_work_queue_manager); else LSM_POP_ENTRY(&manager->appqh, - &manager->app_lock, manager->app_len); + &manager->app_lock, lsm_work_queue_app); if (entry != NULL) WT_STAT_FAST_CONN_INCR(session, lsm_work_units_done); *entryp = entry; @@ -520,7 +519,7 @@ __wt_lsm_manager_pop_entry( #define LSM_PUSH_ENTRY(qh, qlock, qlen) do { \ __wt_spin_lock(session, qlock); \ TAILQ_INSERT_TAIL((qh), entry, q); \ - (qlen)++; \ + WT_STAT_FAST_CONN_INCR(session, qlen); \ __wt_spin_unlock(session, qlock); \ } while (0) @@ -548,13 +547,13 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, if (type == WT_LSM_WORK_SWITCH) LSM_PUSH_ENTRY(&manager->switchqh, - &manager->switch_lock, manager->switch_len); + &manager->switch_lock, lsm_work_queue_switch); else if (type == WT_LSM_WORK_MERGE) LSM_PUSH_ENTRY(&manager->managerqh, - &manager->manager_lock, manager->manager_len); + &manager->manager_lock, lsm_work_queue_manager); else LSM_PUSH_ENTRY(&manager->appqh, - &manager->app_lock, manager->app_len); + &manager->app_lock, lsm_work_queue_app); WT_RET(__wt_cond_signal(session, manager->work_cond)); diff --git a/src/support/stat.c b/src/support/stat.c index 482e3d3923b..6e20c1bee63 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -412,6 +412,11 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) "sleep for LSM checkpoint throttle"; stats->lsm_merge_throttle.desc = "sleep for LSM merge throttle"; stats->lsm_rows_merged.desc = "rows merged in an LSM tree"; + stats->lsm_work_queue_app.desc = "LSM App work units currently queued"; + stats->lsm_work_queue_manager.desc = + "LSM Merge work units currently queued"; + stats->lsm_work_queue_switch.desc = + "LSM Switch work units currently queued"; stats->lsm_work_units_created.desc = "LSM tree maintenance operations scheduled"; stats->lsm_work_units_discarded.desc = diff --git a/tools/stat_data.py b/tools/stat_data.py index 60319c6a846..2a7fbfbe884 100644 --- a/tools/stat_data.py +++ b/tools/stat_data.py @@ -9,6 +9,9 @@ no_scale_per_second_list = [ 'cache: pages currently held in the cache', 'files currently open', 'log: total log buffer size', + 'LSM App work units currently queued', + 'LSM Merge work units currently queued', + 'LSM Switch work units currently queued', 'split bytes currently awaiting free', 'split objects currently awaiting free', 'open cursor count', From 917a404acbec0e9ebcfea5e0645ce3c428ea86d4 Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 18 Sep 2014 15:13:09 -0400 Subject: [PATCH 131/132] Add stat to count number of times we reach queue_ref maximum. #1230 --- dist/stat_data.py | 1 + src/include/stat.h | 1 + src/include/wiredtiger.in | 48 ++++++++++++++++++++------------------- src/lsm/lsm_manager.c | 16 +++++++++---- src/support/stat.c | 2 ++ 5 files changed, 40 insertions(+), 28 deletions(-) diff --git a/dist/stat_data.py b/dist/stat_data.py index 8a8cfd9a4c1..e5cf75cb59a 100644 --- a/dist/stat_data.py +++ b/dist/stat_data.py @@ -184,6 +184,7 @@ connection_stats = [ 'no_clear,no_scale'), Stat('lsm_work_queue_manager', 'LSM Merge work units currently queued', 'no_clear,no_scale'), + Stat('lsm_work_queue_max', 'LSM tree queue hit maximum'), Stat('lsm_work_queue_switch', 'LSM Switch work units currently queued', 'no_clear,no_scale'), Stat('lsm_work_units_created', diff --git a/src/include/stat.h b/src/include/stat.h index 14c78715e52..55332e34250 100644 --- a/src/include/stat.h +++ b/src/include/stat.h @@ -209,6 +209,7 @@ struct __wt_connection_stats { WT_STATS lsm_rows_merged; WT_STATS lsm_work_queue_app; WT_STATS lsm_work_queue_manager; + WT_STATS lsm_work_queue_max; WT_STATS lsm_work_queue_switch; WT_STATS lsm_work_units_created; WT_STATS lsm_work_units_discarded; diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index f4beb2d9db0..0a375d1bbc7 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -3171,52 +3171,54 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1080 /*! LSM Merge work units currently queued */ #define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1081 +/*! LSM tree queue hit maximum */ +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1082 /*! LSM Switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1082 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1083 /*! LSM tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1083 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1084 /*! LSM tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1084 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1085 /*! LSM tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1085 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1086 /*! memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1086 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1087 /*! memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1087 +#define WT_STAT_CONN_MEMORY_FREE 1088 /*! memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1088 +#define WT_STAT_CONN_MEMORY_GROW 1089 /*! total read I/Os */ -#define WT_STAT_CONN_READ_IO 1089 +#define WT_STAT_CONN_READ_IO 1090 /*! page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1090 +#define WT_STAT_CONN_REC_PAGES 1091 /*! page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1091 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1092 /*! reconciliation failed because an update could not be included */ -#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1092 +#define WT_STAT_CONN_REC_SKIPPED_UPDATE 1093 /*! split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1093 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1094 /*! split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1094 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1095 /*! pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1095 +#define WT_STAT_CONN_RWLOCK_READ 1096 /*! pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1096 +#define WT_STAT_CONN_RWLOCK_WRITE 1097 /*! open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1097 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1098 /*! transactions */ -#define WT_STAT_CONN_TXN_BEGIN 1098 +#define WT_STAT_CONN_TXN_BEGIN 1099 /*! transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1099 +#define WT_STAT_CONN_TXN_CHECKPOINT 1100 /*! transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1100 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1101 /*! transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1101 +#define WT_STAT_CONN_TXN_COMMIT 1102 /*! transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1102 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1103 /*! transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1103 +#define WT_STAT_CONN_TXN_ROLLBACK 1104 /*! total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1104 +#define WT_STAT_CONN_WRITE_IO 1105 /*! * @} diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index c4b6b46e36f..9dd8d7754e6 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -217,7 +217,7 @@ __lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) /* * __lsm_manager_worker_setup -- - * Do setup owned by the LSM manager thread includes starting the worker + * Do setup owned by the LSM manager thread including starting the worker * threads. */ static int @@ -231,7 +231,10 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; WT_ASSERT(session, manager->lsm_workers == 1); - + /* + * The LSM manager is worker[0]. The switch thread is worker[1]. + * Setup and start the switch/drop worker explicitly. + */ worker_args = &manager->lsm_worker_cookies[1]; worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers++; @@ -340,13 +343,16 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) * to how often new chunks are being created add some * more. */ - if (lsm_tree->queue_ref < LSM_TREE_MAX_QUEUE && - ((!lsm_tree->modified && lsm_tree->nchunks > 1) || + if (lsm_tree->queue_ref >= LSM_TREE_MAX_QUEUE) + WT_STAT_FAST_CONN_INCR(session, + lsm_work_queue_max); + else if ((!lsm_tree->modified && + lsm_tree->nchunks > 1) || (lsm_tree->queue_ref == 0 && lsm_tree->nchunks > 1) || (lsm_tree->merge_aggressiveness > 3 && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) || - pushms > fillms)) { + pushms > fillms) { WT_RET(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_SWITCH, 0, lsm_tree)); WT_RET(__wt_lsm_manager_push_entry( diff --git a/src/support/stat.c b/src/support/stat.c index 6e20c1bee63..edf64b3f19b 100644 --- a/src/support/stat.c +++ b/src/support/stat.c @@ -415,6 +415,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) stats->lsm_work_queue_app.desc = "LSM App work units currently queued"; stats->lsm_work_queue_manager.desc = "LSM Merge work units currently queued"; + stats->lsm_work_queue_max.desc = "LSM tree queue hit maximum"; stats->lsm_work_queue_switch.desc = "LSM Switch work units currently queued"; stats->lsm_work_units_created.desc = @@ -531,6 +532,7 @@ __wt_stat_refresh_connection_stats(void *stats_arg) stats->lsm_checkpoint_throttle.v = 0; stats->lsm_merge_throttle.v = 0; stats->lsm_rows_merged.v = 0; + stats->lsm_work_queue_max.v = 0; stats->lsm_work_units_created.v = 0; stats->lsm_work_units_discarded.v = 0; stats->lsm_work_units_done.v = 0; From 7a08b41cf692273f957a03aba36a1f020c2df9ad Mon Sep 17 00:00:00 2001 From: Susan LoVerso Date: Thu, 18 Sep 2014 15:46:05 -0400 Subject: [PATCH 132/132] Comments, cleanup and fix an error path. #1208 --- src/lsm/lsm_manager.c | 2 +- src/lsm/lsm_work_unit.c | 17 ++++++++++++----- src/lsm/lsm_worker.c | 16 +++++++++------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 9dd8d7754e6..fce030459a3 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -238,7 +238,7 @@ __lsm_manager_worker_setup(WT_SESSION_IMPL *session) worker_args = &manager->lsm_worker_cookies[1]; worker_args->work_cond = manager->work_cond; worker_args->id = manager->lsm_workers++; - worker_args->type = WT_LSM_WORK_SWITCH | WT_LSM_WORK_DROP; + worker_args->type = WT_LSM_WORK_DROP | WT_LSM_WORK_SWITCH; /* Start the switch thread. */ WT_RET(__wt_lsm_worker_start(session, worker_args)); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 5d1ec2fabca..050c364f5d8 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -161,13 +161,14 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_DECL_RET; WT_LSM_CHUNK *chunk; WT_LSM_WORKER_COOKIE cookie; - u_int i; + u_int i, merge; WT_CLEAR(cookie); WT_RET(__lsm_copy_chunks(session, lsm_tree, &cookie, 0)); /* Create bloom filters in all checkpointed chunks. */ + merge = 0; for (i = 0; i < cookie.nchunks; i++) { chunk = cookie.chunk_array[i]; @@ -190,16 +191,22 @@ __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) ret = __lsm_bloom_create( session, lsm_tree, chunk, (u_int)i); /* - * Push a merge work unit if we created a - * bloom filter. + * Record if we were successful so that we can + * later push a merge work unit. */ - WT_ERR(__wt_lsm_manager_push_entry(session, - WT_LSM_WORK_MERGE, 0, lsm_tree)); + if (ret == 0) + merge = 1; } chunk->bloom_busy = 0; break; } } + /* + * If we created any bloom filters, we push a merge work unit now. + */ + if (merge) + WT_ERR(__wt_lsm_manager_push_entry( + session, WT_LSM_WORK_MERGE, 0, lsm_tree)); err: __lsm_unpin_chunks(session, &cookie); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 90083c9c21d..3b662e2ebee 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -13,7 +13,7 @@ static void * __lsm_worker(void *); /* * __wt_lsm_worker_start -- - * A wrapper around the LSM worker thread start + * A wrapper around the LSM worker thread start. */ int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) @@ -23,7 +23,7 @@ __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) /* * __lsm_worker_general_op -- - * Execute a single bloom, drop or flush work unit + * Execute a single bloom, drop or flush work unit. */ static int __lsm_worker_general_op( @@ -35,10 +35,12 @@ __lsm_worker_general_op( int force; *completed = 0; - if (!FLD_ISSET(cookie->type, WT_LSM_WORK_FLUSH) && - !FLD_ISSET(cookie->type, WT_LSM_WORK_DROP) && - !FLD_ISSET(cookie->type, WT_LSM_WORK_BLOOM)) - return (WT_NOTFOUND); + /* + * Return if this thread cannot process a bloom, drop or flush. + */ + if (!FLD_ISSET(cookie->type, + WT_LSM_WORK_BLOOM | WT_LSM_WORK_DROP | WT_LSM_WORK_FLUSH)) + return (WT_NOTFOUND); if ((ret = __wt_lsm_manager_pop_entry(session, cookie->type, &entry)) != 0 || entry == NULL) @@ -97,7 +99,7 @@ __lsm_worker(void *arg) /* * Workers process the different LSM work queues. Some workers - * can handle several or all work unit types. So they are + * can handle several or all work unit types. So the code is * prioritized so important operations happen first. * Switches are the highest priority. */