Compare commits
106 Commits
mongodb-3.
...
mongodb-3.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6da948fe3d | ||
|
|
acd71ffa0d | ||
|
|
30590abdf0 | ||
|
|
10da3a325c | ||
|
|
f5c08e2b5f | ||
|
|
d48181f6f4 | ||
|
|
040e3d6f76 | ||
|
|
187707a5c1 | ||
|
|
2e2d5fe23b | ||
|
|
b060bd4cc0 | ||
|
|
b11ed312ce | ||
|
|
9cf2f89d6d | ||
|
|
bb18c43915 | ||
|
|
911c940ada | ||
|
|
7d45541f3a | ||
|
|
fb6e667667 | ||
|
|
dddca65606 | ||
|
|
2969c02ab9 | ||
|
|
82323cf72f | ||
|
|
859c8eda1e | ||
|
|
24cd107c1d | ||
|
|
3dc8860451 | ||
|
|
9383fc98ee | ||
|
|
307e63aac7 | ||
|
|
6f81f19793 | ||
|
|
848e5f5c0b | ||
|
|
8b7110bfac | ||
|
|
f4954f6592 | ||
|
|
dd2a33849a | ||
|
|
a63e21b838 | ||
|
|
30e49acc90 | ||
|
|
063dbdfe45 | ||
|
|
9ee39b8aea | ||
|
|
d68800d0e9 | ||
|
|
ded2149b2c | ||
|
|
0f36f40c37 | ||
|
|
a6a64e986b | ||
|
|
234b68b116 | ||
|
|
5d215904c3 | ||
|
|
18879587af | ||
|
|
6bfcb1ca5b | ||
|
|
71c0588a77 | ||
|
|
58765850aa | ||
|
|
30d327f810 | ||
|
|
88b898e7cb | ||
|
|
7ea2631de2 | ||
|
|
039fe06082 | ||
|
|
43e885a0f9 | ||
|
|
5cdd3e320c | ||
|
|
563b7823f7 | ||
|
|
5e3a56f0ab | ||
|
|
bc929dbcf1 | ||
|
|
07966a492a | ||
|
|
67e412d4c5 | ||
|
|
3c2ad56b50 | ||
|
|
b1768d0d9f | ||
|
|
2893117baa | ||
|
|
4380cec93d | ||
|
|
21b5f9951e | ||
|
|
decd9166cc | ||
|
|
d835a0c0a8 | ||
|
|
48e1343e40 | ||
|
|
eb838c7f12 | ||
|
|
a6957512a4 | ||
|
|
197eef00fd | ||
|
|
7a4f3259b4 | ||
|
|
8326df6b76 | ||
|
|
b65381f64c | ||
|
|
0019262fed | ||
|
|
4d72349b8a | ||
|
|
4898aa408f | ||
|
|
9d375e3416 | ||
|
|
d9ec1ff8ec | ||
|
|
465dca8b46 | ||
|
|
f95877af13 | ||
|
|
62c1a7aa36 | ||
|
|
0dc3f20df6 | ||
|
|
0537648e03 | ||
|
|
3c856645c8 | ||
|
|
10208e8284 | ||
|
|
16e3e48d98 | ||
|
|
5205bb1f0f | ||
|
|
dca63120b7 | ||
|
|
0cccab30c0 | ||
|
|
578a856c19 | ||
|
|
a85c5cda41 | ||
|
|
6da2dc175b | ||
|
|
7ffa315e39 | ||
|
|
26d1ad271f | ||
|
|
fdedd3621c | ||
|
|
4187f419f8 | ||
|
|
42823c9682 | ||
|
|
fbaf1cf4f5 | ||
|
|
3d845c98cb | ||
|
|
1d2fe8a145 | ||
|
|
bdaaaec87d | ||
|
|
35cc116acd | ||
|
|
cbe0fad3e9 | ||
|
|
4f9aa1c548 | ||
|
|
1f44c05f91 | ||
|
|
e31aa8cf29 | ||
|
|
c90bc747e1 | ||
|
|
2c1b7aa80b | ||
|
|
41762ae13c | ||
|
|
f7691f63a6 | ||
|
|
9be5497753 |
20
bench/wtperf/runners/checkpoint_schema_race.wtperf
Normal file
20
bench/wtperf/runners/checkpoint_schema_race.wtperf
Normal file
@@ -0,0 +1,20 @@
|
||||
# Check create and drop behavior concurrent with checkpoints (WT-2798).
|
||||
# Setup a multiple tables and a cache size large enough that checkpoints can
|
||||
# take a long time.
|
||||
conn_config="cache_size=8GB,log=(enabled=false),checkpoint=(wait=30)"
|
||||
table_config="leaf_page_max=4k,internal_page_max=16k,type=file"
|
||||
icount=10000000
|
||||
table_count=100
|
||||
table_count_idle=100
|
||||
# Turn on create/drop of idle tables, but don't worry if individual operations
|
||||
# take a long time.
|
||||
idle_table_cycle=120
|
||||
populate_threads=5
|
||||
checkpoint_threads=0
|
||||
report_interval=5
|
||||
# 100 million
|
||||
random_range=10000000
|
||||
run_time=300
|
||||
# Setup a workload that dirties a lot of the cache
|
||||
threads=((count=2,reads=1),(count=2,inserts=1),(count=2,updates=1))
|
||||
value_sz=500
|
||||
3
dist/s_prototypes
vendored
3
dist/s_prototypes
vendored
@@ -42,9 +42,6 @@ proto()
|
||||
-e '# Add the warn_unused_result attribute to any external' \
|
||||
-e '# functions that return an int.' \
|
||||
-e '/^extern int /s/$/ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result))/' \
|
||||
-e '# Add the hidden attribute to any external functions without' \
|
||||
-e '# an explicit visibility.' \
|
||||
-e '/visibility/!s/$/ WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")))/' \
|
||||
-e 's/$/;/' \
|
||||
-e p < $1
|
||||
}
|
||||
|
||||
@@ -440,7 +440,7 @@ __wt_async_destroy(WT_SESSION_IMPL *session)
|
||||
session, async->worker_tids[i]));
|
||||
async->worker_tids[i] = 0;
|
||||
}
|
||||
WT_TRET(__wt_cond_destroy(session, &async->flush_cond));
|
||||
__wt_cond_destroy(session, &async->flush_cond);
|
||||
|
||||
/* Close the server threads' sessions. */
|
||||
for (i = 0; i < conn->async_workers; i++)
|
||||
|
||||
@@ -60,7 +60,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
|
||||
*/
|
||||
if (mod->rec_result == WT_PM_REC_REPLACE ||
|
||||
mod->rec_result == WT_PM_REC_MULTIBLOCK)
|
||||
__wt_writelock(session, &page->page_lock);
|
||||
WT_PAGE_LOCK(session, page);
|
||||
|
||||
if (mod->rec_result == WT_PM_REC_REPLACE)
|
||||
ret = bm->compact_page_skip(bm, session,
|
||||
@@ -80,7 +80,7 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
|
||||
|
||||
if (mod->rec_result == WT_PM_REC_REPLACE ||
|
||||
mod->rec_result == WT_PM_REC_MULTIBLOCK)
|
||||
__wt_writeunlock(session, &page->page_lock);
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -369,7 +369,7 @@ __cursor_col_modify(
|
||||
WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool is_remove)
|
||||
{
|
||||
return (__wt_col_modify(session,
|
||||
cbt, cbt->iface.recno, &cbt->iface.value, NULL, is_remove));
|
||||
cbt, cbt->iface.recno, &cbt->iface.value, NULL, is_remove, false));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -381,7 +381,7 @@ __cursor_row_modify(
|
||||
WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool is_remove)
|
||||
{
|
||||
return (__wt_row_modify(session,
|
||||
cbt, &cbt->iface.key, &cbt->iface.value, NULL, is_remove));
|
||||
cbt, &cbt->iface.key, &cbt->iface.value, NULL, is_remove, false));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -689,8 +689,6 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
|
||||
WT_RET(ds->f(ds, ", entries %" PRIu32, entries));
|
||||
WT_RET(ds->f(ds,
|
||||
", %s", __wt_page_is_modified(page) ? "dirty" : "clean"));
|
||||
WT_RET(ds->f(ds, ", %s", __wt_rwlock_islocked(
|
||||
session, &page->page_lock) ? "locked" : "unlocked"));
|
||||
|
||||
if (F_ISSET_ATOMIC(page, WT_PAGE_BUILD_KEYS))
|
||||
WT_RET(ds->f(ds, ", keys-built"));
|
||||
|
||||
@@ -98,7 +98,6 @@ __page_out_int(WT_SESSION_IMPL *session, WT_PAGE **pagep, bool rewrite)
|
||||
*/
|
||||
WT_ASSERT(session, !__wt_page_is_modified(page));
|
||||
WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
|
||||
WT_ASSERT(session, !__wt_rwlock_islocked(session, &page->page_lock));
|
||||
|
||||
/*
|
||||
* If a root page split, there may be one or more pages linked from the
|
||||
@@ -254,6 +253,7 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
__wt_ovfl_discard_free(session, page);
|
||||
|
||||
__wt_free(session, page->modify->ovfl_track);
|
||||
__wt_spin_destroy(session, &page->modify->page_lock);
|
||||
|
||||
__wt_free(session, page->modify);
|
||||
}
|
||||
|
||||
@@ -444,7 +444,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
|
||||
}
|
||||
|
||||
/* Initialize locks. */
|
||||
__wt_rwlock_init(session, &btree->ovfl_lock);
|
||||
WT_RET(__wt_rwlock_init(session, &btree->ovfl_lock));
|
||||
WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush"));
|
||||
|
||||
btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */
|
||||
|
||||
@@ -90,7 +90,7 @@ __col_instantiate(WT_SESSION_IMPL *session,
|
||||
{
|
||||
/* Search the page and add updates. */
|
||||
WT_RET(__wt_col_search(session, recno, ref, cbt));
|
||||
WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false));
|
||||
WT_RET(__wt_col_modify(session, cbt, recno, NULL, upd, false, false));
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -104,7 +104,7 @@ __row_instantiate(WT_SESSION_IMPL *session,
|
||||
{
|
||||
/* Search the page and add updates. */
|
||||
WT_RET(__wt_row_search(session, key, ref, cbt, true));
|
||||
WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false));
|
||||
WT_RET(__wt_row_modify(session, cbt, key, NULL, upd, false, false));
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
@@ -1300,13 +1300,19 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
|
||||
for (;;) {
|
||||
parent = ref->home;
|
||||
|
||||
/*
|
||||
* The page will be marked dirty, and we can only lock a page
|
||||
* with a modify structure.
|
||||
*/
|
||||
WT_RET(__wt_page_modify_init(session, parent));
|
||||
|
||||
if (trylock)
|
||||
WT_RET(__wt_try_writelock(session, &parent->page_lock));
|
||||
WT_RET(WT_PAGE_TRYLOCK(session, parent));
|
||||
else
|
||||
__wt_writelock(session, &parent->page_lock);
|
||||
WT_PAGE_LOCK(session, parent);
|
||||
if (parent == ref->home)
|
||||
break;
|
||||
__wt_writeunlock(session, &parent->page_lock);
|
||||
WT_PAGE_UNLOCK(session, parent);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1329,7 +1335,7 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
|
||||
*parentp = parent;
|
||||
return (0);
|
||||
|
||||
err: __wt_writeunlock(session, &parent->page_lock);
|
||||
err: WT_PAGE_UNLOCK(session, parent);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -1345,7 +1351,7 @@ __split_internal_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard)
|
||||
if (hazard)
|
||||
ret = __wt_hazard_clear(session, parent->pg_intl_parent_ref);
|
||||
|
||||
__wt_writeunlock(session, &parent->page_lock);
|
||||
WT_PAGE_UNLOCK(session, parent);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -1559,7 +1565,7 @@ __split_multi_inmem(
|
||||
|
||||
/* Apply the modification. */
|
||||
WT_ERR(__wt_col_modify(
|
||||
session, &cbt, recno, NULL, upd, false));
|
||||
session, &cbt, recno, NULL, upd, false, true));
|
||||
break;
|
||||
case WT_PAGE_ROW_LEAF:
|
||||
/* Build a key. */
|
||||
@@ -1581,7 +1587,7 @@ __split_multi_inmem(
|
||||
|
||||
/* Apply the modification. */
|
||||
WT_ERR(__wt_row_modify(
|
||||
session, &cbt, key, NULL, upd, false));
|
||||
session, &cbt, key, NULL, upd, false, true));
|
||||
break;
|
||||
WT_ILLEGAL_VALUE_ERR(session);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,8 @@ static int __col_insert_alloc(
|
||||
*/
|
||||
int
|
||||
__wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove)
|
||||
uint64_t recno, WT_ITEM *value,
|
||||
WT_UPDATE *upd_arg, bool is_remove, bool exclusive)
|
||||
{
|
||||
WT_BTREE *btree;
|
||||
WT_DECL_RET;
|
||||
@@ -103,7 +104,7 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
|
||||
/* Serialize the update. */
|
||||
WT_ERR(__wt_update_serial(
|
||||
session, page, &cbt->ins->upd, &upd, upd_size));
|
||||
session, page, &cbt->ins->upd, &upd, upd_size, false));
|
||||
} else {
|
||||
/* Allocate the append/update list reference as necessary. */
|
||||
if (append) {
|
||||
@@ -185,11 +186,11 @@ __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
if (append)
|
||||
WT_ERR(__wt_col_append_serial(
|
||||
session, page, cbt->ins_head, cbt->ins_stack,
|
||||
&ins, ins_size, &cbt->recno, skipdepth));
|
||||
&ins, ins_size, &cbt->recno, skipdepth, exclusive));
|
||||
else
|
||||
WT_ERR(__wt_insert_serial(
|
||||
session, page, cbt->ins_head, cbt->ins_stack,
|
||||
&ins, ins_size, skipdepth));
|
||||
&ins, ins_size, skipdepth, exclusive));
|
||||
}
|
||||
|
||||
/* If the update was successful, add it to the in-memory log. */
|
||||
|
||||
@@ -15,18 +15,13 @@
|
||||
int
|
||||
__wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
{
|
||||
WT_CONNECTION_IMPL *conn;
|
||||
WT_DECL_RET;
|
||||
WT_PAGE_MODIFY *modify;
|
||||
|
||||
conn = S2C(session);
|
||||
|
||||
WT_RET(__wt_calloc_one(session, &modify));
|
||||
|
||||
/*
|
||||
* Select a spinlock for the page; let the barrier immediately below
|
||||
* keep things from racing too badly.
|
||||
*/
|
||||
modify->page_lock = ++conn->page_lock_cnt % WT_PAGE_LOCKS;
|
||||
/* Initialize the spinlock for the page. */
|
||||
WT_ERR(__wt_spin_init(session, &modify->page_lock, "btree page"));
|
||||
|
||||
/*
|
||||
* Multiple threads of control may be searching and deciding to modify
|
||||
@@ -37,8 +32,8 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
if (__wt_atomic_cas_ptr(&page->modify, NULL, modify))
|
||||
__wt_cache_page_inmem_incr(session, page, sizeof(*modify));
|
||||
else
|
||||
__wt_free(session, modify);
|
||||
return (0);
|
||||
err: __wt_free(session, modify);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -47,7 +42,8 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
*/
|
||||
int
|
||||
__wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
WT_ITEM *key, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove)
|
||||
WT_ITEM *key, WT_ITEM *value,
|
||||
WT_UPDATE *upd_arg, bool is_remove, bool exclusive)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
WT_INSERT *ins;
|
||||
@@ -132,7 +128,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
|
||||
/* Serialize the update. */
|
||||
WT_ERR(__wt_update_serial(
|
||||
session, page, upd_entry, &upd, upd_size));
|
||||
session, page, upd_entry, &upd, upd_size, exclusive));
|
||||
} else {
|
||||
/*
|
||||
* Allocate the insert array as necessary.
|
||||
@@ -207,7 +203,7 @@ __wt_row_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
|
||||
/* Insert the WT_INSERT structure. */
|
||||
WT_ERR(__wt_insert_serial(
|
||||
session, page, cbt->ins_head, cbt->ins_stack,
|
||||
&ins, ins_size, skipdepth));
|
||||
&ins, ins_size, skipdepth, exclusive));
|
||||
}
|
||||
|
||||
if (logged)
|
||||
|
||||
@@ -312,7 +312,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
|
||||
cache->bytes_dirty_intl + cache->bytes_dirty_leaf,
|
||||
cache->pages_dirty_intl + cache->pages_dirty_leaf);
|
||||
|
||||
WT_TRET(__wt_cond_destroy(session, &cache->evict_cond));
|
||||
__wt_cond_destroy(session, &cache->evict_cond);
|
||||
__wt_spin_destroy(session, &cache->evict_pass_lock);
|
||||
__wt_spin_destroy(session, &cache->evict_queue_lock);
|
||||
__wt_spin_destroy(session, &cache->evict_walk_lock);
|
||||
|
||||
@@ -225,7 +225,7 @@ err: __wt_spin_unlock(session, &__wt_process.spinlock);
|
||||
__wt_free(session, pool_name);
|
||||
if (ret != 0 && created) {
|
||||
__wt_free(session, cp->name);
|
||||
WT_TRET(__wt_cond_destroy(session, &cp->cache_pool_cond));
|
||||
__wt_cond_destroy(session, &cp->cache_pool_cond);
|
||||
__wt_free(session, cp);
|
||||
}
|
||||
return (ret);
|
||||
@@ -391,7 +391,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session)
|
||||
__wt_free(session, cp->name);
|
||||
|
||||
__wt_spin_destroy(session, &cp->cache_pool_lock);
|
||||
WT_TRET(__wt_cond_destroy(session, &cp->cache_pool_cond));
|
||||
__wt_cond_destroy(session, &cp->cache_pool_cond);
|
||||
__wt_free(session, cp);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,8 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp)
|
||||
|
||||
*startp = false;
|
||||
|
||||
*startp = false;
|
||||
|
||||
conn = S2C(session);
|
||||
|
||||
WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
|
||||
@@ -231,7 +233,7 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
|
||||
WT_TRET(__wt_thread_join(session, conn->ckpt_tid));
|
||||
conn->ckpt_tid_set = false;
|
||||
}
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond));
|
||||
__wt_cond_destroy(session, &conn->ckpt_cond);
|
||||
|
||||
/* Close the server thread's session. */
|
||||
if (conn->ckpt_session != NULL) {
|
||||
|
||||
@@ -52,7 +52,7 @@ __wt_conn_dhandle_alloc(
|
||||
|
||||
WT_RET(__wt_calloc_one(session, &dhandle));
|
||||
|
||||
__wt_rwlock_init(session, &dhandle->rwlock);
|
||||
WT_ERR(__wt_rwlock_init(session, &dhandle->rwlock));
|
||||
dhandle->name_hash = __wt_hash_city64(uri, strlen(uri));
|
||||
WT_ERR(__wt_strdup(session, uri, &dhandle->name));
|
||||
WT_ERR(__wt_strdup(session, checkpoint, &dhandle->checkpoint));
|
||||
|
||||
@@ -62,14 +62,9 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
|
||||
WT_RET(__wt_spin_init(session, &conn->turtle_lock, "turtle file"));
|
||||
|
||||
/* Read-write locks */
|
||||
__wt_rwlock_init(session, &conn->dhandle_lock);
|
||||
__wt_rwlock_init(session, &conn->hot_backup_lock);
|
||||
__wt_rwlock_init(session, &conn->table_lock);
|
||||
|
||||
WT_RET(__wt_calloc_def(session, WT_PAGE_LOCKS, &conn->page_lock));
|
||||
for (i = 0; i < WT_PAGE_LOCKS; ++i)
|
||||
WT_RET(
|
||||
__wt_spin_init(session, &conn->page_lock[i], "btree page"));
|
||||
WT_RET(__wt_rwlock_init(session, &conn->dhandle_lock));
|
||||
WT_RET(__wt_rwlock_init(session, &conn->hot_backup_lock));
|
||||
WT_RET(__wt_rwlock_init(session, &conn->table_lock));
|
||||
|
||||
/* Setup the spin locks for the LSM manager queues. */
|
||||
WT_RET(__wt_spin_init(session,
|
||||
@@ -113,7 +108,6 @@ void
|
||||
__wt_connection_destroy(WT_CONNECTION_IMPL *conn)
|
||||
{
|
||||
WT_SESSION_IMPL *session;
|
||||
u_int i;
|
||||
|
||||
/* Check there's something to destroy. */
|
||||
if (conn == NULL)
|
||||
@@ -144,9 +138,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
|
||||
__wt_spin_destroy(session, &conn->schema_lock);
|
||||
__wt_rwlock_destroy(session, &conn->table_lock);
|
||||
__wt_spin_destroy(session, &conn->turtle_lock);
|
||||
for (i = 0; i < WT_PAGE_LOCKS; ++i)
|
||||
__wt_spin_destroy(session, &conn->page_lock[i]);
|
||||
__wt_free(session, conn->page_lock);
|
||||
|
||||
/* Free allocated memory. */
|
||||
__wt_free(session, conn->cfg);
|
||||
|
||||
@@ -880,7 +880,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
WT_RET(__wt_spin_init(session, &log->log_sync_lock, "log sync"));
|
||||
WT_RET(__wt_spin_init(session, &log->log_writelsn_lock,
|
||||
"log write LSN"));
|
||||
__wt_rwlock_init(session, &log->log_archive_lock);
|
||||
WT_RET(__wt_rwlock_init(session, &log->log_archive_lock));
|
||||
if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
|
||||
log->allocsize = (uint32_t)
|
||||
WT_MAX(conn->buffer_alignment, WT_LOG_ALIGN);
|
||||
@@ -1043,12 +1043,12 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
|
||||
}
|
||||
|
||||
/* Destroy the condition variables now that all threads are stopped */
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->log_cond));
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond));
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->log_wrlsn_cond));
|
||||
__wt_cond_destroy(session, &conn->log_cond);
|
||||
__wt_cond_destroy(session, &conn->log_file_cond);
|
||||
__wt_cond_destroy(session, &conn->log_wrlsn_cond);
|
||||
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond));
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond));
|
||||
__wt_cond_destroy(session, &conn->log->log_sync_cond);
|
||||
__wt_cond_destroy(session, &conn->log->log_write_cond);
|
||||
__wt_rwlock_destroy(session, &conn->log->log_archive_lock);
|
||||
__wt_spin_destroy(session, &conn->log->log_lock);
|
||||
__wt_spin_destroy(session, &conn->log->log_slot_lock);
|
||||
|
||||
@@ -648,7 +648,7 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
|
||||
WT_TRET(__wt_thread_join(session, conn->stat_tid));
|
||||
conn->stat_tid_set = false;
|
||||
}
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->stat_cond));
|
||||
__wt_cond_destroy(session, &conn->stat_cond);
|
||||
|
||||
/* Log a set of statistics on shutdown if configured. */
|
||||
if (is_close)
|
||||
|
||||
@@ -432,7 +432,7 @@ __wt_sweep_destroy(WT_SESSION_IMPL *session)
|
||||
WT_TRET(__wt_thread_join(session, conn->sweep_tid));
|
||||
conn->sweep_tid_set = 0;
|
||||
}
|
||||
WT_TRET(__wt_cond_destroy(session, &conn->sweep_cond));
|
||||
__wt_cond_destroy(session, &conn->sweep_cond);
|
||||
|
||||
if (conn->sweep_session != NULL) {
|
||||
wt_session = &conn->sweep_session->iface;
|
||||
|
||||
@@ -486,7 +486,14 @@ __curfile_create(WT_SESSION_IMPL *session,
|
||||
WT_STAT_DATA_INCR(session, cursor_create);
|
||||
|
||||
if (0) {
|
||||
err: WT_TRET(__curfile_close(cursor));
|
||||
err: /*
|
||||
* Our caller expects to release the data handle if we fail.
|
||||
* Disconnect it from the cursor before closing.
|
||||
*/
|
||||
if (session->dhandle != NULL)
|
||||
__wt_cursor_dhandle_decr_use(session);
|
||||
cbt->btree = NULL;
|
||||
WT_TRET(__curfile_close(cursor));
|
||||
*cursorp = NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -414,18 +414,20 @@ struct __wt_page_modify {
|
||||
size_t discard_allocated;
|
||||
} *ovfl_track;
|
||||
|
||||
#define WT_PAGE_LOCK(s, p) \
|
||||
__wt_spin_lock((s), &(p)->modify->page_lock)
|
||||
#define WT_PAGE_TRYLOCK(s, p) \
|
||||
__wt_spin_trylock((s), &(p)->modify->page_lock)
|
||||
#define WT_PAGE_UNLOCK(s, p) \
|
||||
__wt_spin_unlock((s), &(p)->modify->page_lock)
|
||||
WT_SPINLOCK page_lock; /* Page's spinlock */
|
||||
|
||||
/*
|
||||
* The write generation is incremented when a page is modified, a page
|
||||
* is clean if the write generation is 0.
|
||||
*/
|
||||
uint32_t write_gen;
|
||||
|
||||
#define WT_PAGE_LOCK(s, p) \
|
||||
__wt_spin_lock((s), &S2C(s)->page_lock[(p)->modify->page_lock])
|
||||
#define WT_PAGE_UNLOCK(s, p) \
|
||||
__wt_spin_unlock((s), &S2C(s)->page_lock[(p)->modify->page_lock])
|
||||
uint8_t page_lock; /* Page's spinlock */
|
||||
|
||||
#define WT_PM_REC_EMPTY 1 /* Reconciliation: no replacement */
|
||||
#define WT_PM_REC_MULTIBLOCK 2 /* Reconciliation: multiple blocks */
|
||||
#define WT_PM_REC_REPLACE 3 /* Reconciliation: single block */
|
||||
@@ -602,13 +604,6 @@ struct __wt_page {
|
||||
|
||||
uint8_t unused[2]; /* Unused padding */
|
||||
|
||||
/*
|
||||
* Used to protect and co-ordinate splits for internal pages and
|
||||
* reconciliation for all pages. Only used to co-ordinate among the
|
||||
* uncommon cases that require exclusive access to a page.
|
||||
*/
|
||||
WT_RWLOCK page_lock;
|
||||
|
||||
/*
|
||||
* The page's read generation acts as an LRU value for each page in the
|
||||
* tree; it is used by the eviction server thread to select pages to be
|
||||
@@ -635,8 +630,6 @@ struct __wt_page {
|
||||
#define WT_READGEN_STEP 100
|
||||
uint64_t read_gen;
|
||||
|
||||
uint64_t evict_pass_gen; /* Eviction pass generation */
|
||||
|
||||
size_t memory_footprint; /* Memory attached to the page */
|
||||
|
||||
/* Page's on-disk representation: NULL for pages created in memory. */
|
||||
@@ -644,6 +637,10 @@ struct __wt_page {
|
||||
|
||||
/* If/when the page is modified, we need lots more information. */
|
||||
WT_PAGE_MODIFY *modify;
|
||||
|
||||
/* This is the 64 byte boundary, try to keep hot fields above here. */
|
||||
|
||||
uint64_t evict_pass_gen; /* Eviction pass generation */
|
||||
};
|
||||
|
||||
/*
|
||||
|
||||
@@ -175,21 +175,6 @@ struct __wt_connection_impl {
|
||||
WT_SPINLOCK turtle_lock; /* Turtle file spinlock */
|
||||
WT_RWLOCK dhandle_lock; /* Data handle list lock */
|
||||
|
||||
/*
|
||||
* We distribute the btree page locks across a set of spin locks. Don't
|
||||
* use too many: they are only held for very short operations, each one
|
||||
* is 64 bytes, so 256 will fill the L1 cache on most CPUs.
|
||||
*
|
||||
* Use a prime number of buckets rather than assuming a good hash
|
||||
* (Reference Sedgewick, Algorithms in C, "Hash Functions").
|
||||
*
|
||||
* Note: this can't be an array, we impose cache-line alignment and gcc
|
||||
* doesn't support that for arrays smaller than the alignment.
|
||||
*/
|
||||
#define WT_PAGE_LOCKS 17
|
||||
WT_SPINLOCK *page_lock; /* Btree page spinlocks */
|
||||
u_int page_lock_cnt; /* Next spinlock to use */
|
||||
|
||||
/* Connection queue */
|
||||
TAILQ_ENTRY(__wt_connection_impl) q;
|
||||
/* Cache pool queue */
|
||||
|
||||
1429
src/include/extern.h
1429
src/include/extern.h
File diff suppressed because it is too large
Load Diff
@@ -1,32 +1,32 @@
|
||||
/* DO NOT EDIT: automatically built by dist/s_prototypes. */
|
||||
|
||||
extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_os_posix(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_posix_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_posix_file_extend( WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_os_posix(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_posix_map(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_posix_map_preload(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, const void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_posix_map_discard(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *map, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_posix_unmap(WT_FILE_HANDLE *fh, WT_SESSION *wt_session, void *mapped_region, size_t len, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
|
||||
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
|
||||
extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
|
||||
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern bool __wt_absolute_path(const char *path);
|
||||
extern const char *__wt_path_separator(void);
|
||||
extern bool __wt_has_priv(void);
|
||||
extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
|
||||
extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
|
||||
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
|
||||
extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
|
||||
extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
|
||||
|
||||
@@ -1,35 +1,35 @@
|
||||
/* DO NOT EDIT: automatically built by dist/s_prototypes. */
|
||||
|
||||
extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern const char *__wt_path_separator(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern DWORD __wt_getlasterror(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
|
||||
extern int __wt_win_directory_list(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *directory, const char *prefix, char ***dirlistp, uint32_t *countp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_win_directory_list_free(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, char **dirlist, uint32_t count) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, bool fail, void *sym_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_win_fs_size(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_os_win(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_getenv(WT_SESSION_IMPL *session, const char *variable, const char **envp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_win_map(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_regionp, size_t *lenp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_win_unmap(WT_FILE_HANDLE *file_handle, WT_SESSION *wt_session, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern void __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs, bool (*run_func)(WT_SESSION_IMPL *), bool *signalled);
|
||||
extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
|
||||
extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
|
||||
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern bool __wt_absolute_path(const char *path);
|
||||
extern const char *__wt_path_separator(void);
|
||||
extern bool __wt_has_priv(void);
|
||||
extern void __wt_stream_set_line_buffer(FILE *fp);
|
||||
extern void __wt_stream_set_no_buffer(FILE *fp);
|
||||
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
|
||||
extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
|
||||
extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern DWORD __wt_getlasterror(void);
|
||||
extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
|
||||
extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error);
|
||||
extern void __wt_yield(void);
|
||||
|
||||
@@ -37,17 +37,21 @@ struct __wt_condvar {
|
||||
* Don't modify this structure without understanding the read/write locking
|
||||
* functions.
|
||||
*/
|
||||
union __wt_rwlock { /* Read/write lock */
|
||||
uint64_t u;
|
||||
struct {
|
||||
uint32_t wr; /* Writers and readers */
|
||||
} i;
|
||||
struct {
|
||||
uint16_t writers; /* Now serving for writers */
|
||||
uint16_t readers; /* Now serving for readers */
|
||||
uint16_t next; /* Next available ticket number */
|
||||
uint16_t writers_active;/* Count of active writers */
|
||||
} s;
|
||||
struct __wt_rwlock { /* Read/write lock */
|
||||
volatile union {
|
||||
uint64_t v; /* Full 64-bit value */
|
||||
struct {
|
||||
uint8_t current; /* Current ticket */
|
||||
uint8_t next; /* Next available ticket */
|
||||
uint8_t reader; /* Read queue ticket */
|
||||
uint8_t __notused; /* Padding */
|
||||
uint16_t readers_active;/* Count of active readers */
|
||||
uint16_t readers_queued;/* Count of queued readers */
|
||||
} s;
|
||||
} u;
|
||||
|
||||
WT_CONDVAR *cond_readers; /* Blocking readers */
|
||||
WT_CONDVAR *cond_writers; /* Blocking writers */
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -63,8 +67,8 @@ union __wt_rwlock { /* Read/write lock */
|
||||
#define SPINLOCK_PTHREAD_MUTEX_ADAPTIVE 3
|
||||
|
||||
struct __wt_spinlock {
|
||||
WT_CACHE_LINE_PAD_BEGIN
|
||||
#if SPINLOCK_TYPE == SPINLOCK_GCC
|
||||
WT_CACHE_LINE_PAD_BEGIN
|
||||
volatile int lock;
|
||||
#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\
|
||||
SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\
|
||||
@@ -87,5 +91,8 @@ struct __wt_spinlock {
|
||||
int16_t stat_int_usecs_off; /* waiting server threads offset */
|
||||
|
||||
int8_t initialized; /* Lock initialized, for cleanup */
|
||||
|
||||
#if SPINLOCK_TYPE == SPINLOCK_GCC
|
||||
WT_CACHE_LINE_PAD_END
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -154,7 +154,7 @@ __col_append_serial_func(WT_SESSION_IMPL *session, WT_INSERT_HEAD *ins_head,
|
||||
static inline int
|
||||
__wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp,
|
||||
size_t new_ins_size, uint64_t *recnop, u_int skipdepth)
|
||||
size_t new_ins_size, uint64_t *recnop, u_int skipdepth, bool exclusive)
|
||||
{
|
||||
WT_INSERT *new_ins = *new_insp;
|
||||
WT_DECL_RET;
|
||||
@@ -165,11 +165,16 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
/* Clear references to memory we now own and must free on error. */
|
||||
*new_insp = NULL;
|
||||
|
||||
/* Acquire the page's spinlock, call the worker function. */
|
||||
WT_PAGE_LOCK(session, page);
|
||||
/*
|
||||
* Acquire the page's spinlock unless we already have exclusive access.
|
||||
* Then call the worker function.
|
||||
*/
|
||||
if (!exclusive)
|
||||
WT_PAGE_LOCK(session, page);
|
||||
ret = __col_append_serial_func(
|
||||
session, ins_head, ins_stack, new_ins, recnop, skipdepth);
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
if (!exclusive)
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
|
||||
if (ret != 0) {
|
||||
/* Free unused memory on error. */
|
||||
@@ -198,7 +203,7 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
static inline int
|
||||
__wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
WT_INSERT_HEAD *ins_head, WT_INSERT ***ins_stack, WT_INSERT **new_insp,
|
||||
size_t new_ins_size, u_int skipdepth)
|
||||
size_t new_ins_size, u_int skipdepth, bool exclusive)
|
||||
{
|
||||
WT_INSERT *new_ins = *new_insp;
|
||||
WT_DECL_RET;
|
||||
@@ -220,10 +225,12 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
ret = __insert_simple_func(
|
||||
session, ins_stack, new_ins, skipdepth);
|
||||
else {
|
||||
WT_PAGE_LOCK(session, page);
|
||||
if (!exclusive)
|
||||
WT_PAGE_LOCK(session, page);
|
||||
ret = __insert_serial_func(
|
||||
session, ins_head, ins_stack, new_ins, skipdepth);
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
if (!exclusive)
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
}
|
||||
|
||||
if (ret != 0) {
|
||||
@@ -252,7 +259,8 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
*/
|
||||
static inline int
|
||||
__wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size)
|
||||
WT_UPDATE **srch_upd, WT_UPDATE **updp, size_t upd_size,
|
||||
bool exclusive)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
WT_UPDATE *obsolete, *upd = *updp;
|
||||
@@ -295,7 +303,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
/*
|
||||
* If there are no subsequent WT_UPDATE structures we are done here.
|
||||
*/
|
||||
if (upd->next == NULL)
|
||||
if (upd->next == NULL || exclusive)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
@@ -316,11 +324,11 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
|
||||
}
|
||||
|
||||
/* If we can't lock it, don't scan, that's okay. */
|
||||
if (__wt_try_writelock(session, &page->page_lock) != 0)
|
||||
if (WT_PAGE_TRYLOCK(session, page) != 0)
|
||||
return (0);
|
||||
|
||||
obsolete = __wt_update_obsolete_check(session, page, upd->next);
|
||||
__wt_writeunlock(session, &page->page_lock);
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
if (obsolete != NULL)
|
||||
__wt_update_obsolete_free(session, page, obsolete);
|
||||
|
||||
|
||||
@@ -97,6 +97,10 @@ struct __wt_session_impl {
|
||||
*/
|
||||
TAILQ_HEAD(__tables, __wt_table) tables;
|
||||
|
||||
/* Current rwlock for callback. */
|
||||
WT_RWLOCK *current_rwlock;
|
||||
uint8_t current_rwticket;
|
||||
|
||||
WT_ITEM **scratch; /* Temporary memory for any function */
|
||||
u_int scratch_alloc; /* Currently allocated */
|
||||
size_t scratch_cached; /* Scratch bytes cached */
|
||||
|
||||
@@ -59,7 +59,6 @@ __wt_verify_build(void)
|
||||
sizeof(s) > WT_CACHE_LINE_ALIGNMENT || \
|
||||
sizeof(s) % WT_CACHE_LINE_ALIGNMENT == 0)
|
||||
WT_PADDING_CHECK(WT_LOGSLOT);
|
||||
WT_PADDING_CHECK(WT_SPINLOCK);
|
||||
WT_PADDING_CHECK(WT_TXN_STATE);
|
||||
|
||||
/*
|
||||
|
||||
@@ -268,6 +268,8 @@ struct __wt_ref;
|
||||
typedef struct __wt_ref WT_REF;
|
||||
struct __wt_row;
|
||||
typedef struct __wt_row WT_ROW;
|
||||
struct __wt_rwlock;
|
||||
typedef struct __wt_rwlock WT_RWLOCK;
|
||||
struct __wt_salvage_cookie;
|
||||
typedef struct __wt_salvage_cookie WT_SALVAGE_COOKIE;
|
||||
struct __wt_save_upd;
|
||||
@@ -302,8 +304,6 @@ union __wt_lsn;
|
||||
typedef union __wt_lsn WT_LSN;
|
||||
union __wt_rand_state;
|
||||
typedef union __wt_rand_state WT_RAND_STATE;
|
||||
union __wt_rwlock;
|
||||
typedef union __wt_rwlock WT_RWLOCK;
|
||||
/*
|
||||
* Forward type declarations for internal types: END
|
||||
* DO NOT EDIT: automatically built by dist/s_typedef.
|
||||
|
||||
@@ -334,7 +334,7 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
|
||||
__wt_spin_destroy(session, &manager->switch_lock);
|
||||
__wt_spin_destroy(session, &manager->app_lock);
|
||||
__wt_spin_destroy(session, &manager->manager_lock);
|
||||
WT_TRET(__wt_cond_destroy(session, &manager->work_cond));
|
||||
__wt_cond_destroy(session, &manager->work_cond);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -471,7 +471,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
|
||||
|
||||
/* Try to open the tree. */
|
||||
WT_RET(__wt_calloc_one(session, &lsm_tree));
|
||||
__wt_rwlock_init(session, &lsm_tree->rwlock);
|
||||
WT_ERR(__wt_rwlock_init(session, &lsm_tree->rwlock));
|
||||
|
||||
WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
|
||||
|
||||
|
||||
@@ -328,8 +328,9 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
|
||||
*/
|
||||
saved_isolation = session->txn.isolation;
|
||||
session->txn.isolation = WT_ISO_READ_UNCOMMITTED;
|
||||
WT_ERR(__wt_cache_op(session, WT_SYNC_WRITE_LEAVES));
|
||||
ret = __wt_cache_op(session, WT_SYNC_WRITE_LEAVES);
|
||||
session->txn.isolation = saved_isolation;
|
||||
WT_ERR(ret);
|
||||
|
||||
__wt_verbose(session, WT_VERB_LSM, "LSM worker checkpointing %s",
|
||||
chunk->uri);
|
||||
|
||||
@@ -153,7 +153,7 @@ err:
|
||||
* __wt_cond_destroy --
|
||||
* Destroy a condition variable.
|
||||
*/
|
||||
int
|
||||
void
|
||||
__wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
|
||||
{
|
||||
WT_CONDVAR *cond;
|
||||
@@ -161,11 +161,15 @@ __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
|
||||
|
||||
cond = *condp;
|
||||
if (cond == NULL)
|
||||
return (0);
|
||||
return;
|
||||
|
||||
if ((ret = pthread_cond_destroy(&cond->cond)) != 0)
|
||||
WT_PANIC_MSG(
|
||||
session, ret, "pthread_cond_destroy: %s", cond->name);
|
||||
|
||||
if ((ret = pthread_mutex_destroy(&cond->mtx)) != 0)
|
||||
WT_PANIC_MSG(
|
||||
session, ret, "pthread_mutex_destroy: %s", cond->name);
|
||||
|
||||
ret = pthread_cond_destroy(&cond->cond);
|
||||
WT_TRET(pthread_mutex_destroy(&cond->mtx));
|
||||
__wt_free(session, *condp);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -163,18 +163,16 @@ __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond)
|
||||
* __wt_cond_destroy --
|
||||
* Destroy a condition variable.
|
||||
*/
|
||||
int
|
||||
void
|
||||
__wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp)
|
||||
{
|
||||
WT_CONDVAR *cond;
|
||||
|
||||
cond = *condp;
|
||||
if (cond == NULL)
|
||||
return (0);
|
||||
return;
|
||||
|
||||
/* Do nothing to delete Condition Variable */
|
||||
DeleteCriticalSection(&cond->mtx);
|
||||
__wt_free(session, *condp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@@ -386,7 +386,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
* In-memory splits: reconciliation of an internal page cannot handle
|
||||
* a child page splitting during the reconciliation.
|
||||
*/
|
||||
__wt_writelock(session, &page->page_lock);
|
||||
WT_PAGE_LOCK(session, page);
|
||||
|
||||
oldest_id = __wt_txn_oldest_id(session);
|
||||
if (LF_ISSET(WT_EVICTING))
|
||||
@@ -405,7 +405,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
/* Initialize the reconciliation structure for each new run. */
|
||||
if ((ret = __rec_write_init(
|
||||
session, ref, flags, salvage, &session->reconcile)) != 0) {
|
||||
__wt_writeunlock(session, &page->page_lock);
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
return (ret);
|
||||
}
|
||||
r = session->reconcile;
|
||||
@@ -446,7 +446,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
|
||||
WT_TRET(__rec_write_wrapup_err(session, r, page));
|
||||
|
||||
/* Release the reconciliation lock. */
|
||||
__wt_writeunlock(session, &page->page_lock);
|
||||
WT_PAGE_UNLOCK(session, page);
|
||||
|
||||
/*
|
||||
* If our caller can configure lookaside table reconciliation, flag if
|
||||
|
||||
@@ -261,8 +261,8 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
|
||||
* can get a handle without special flags.
|
||||
*/
|
||||
if (F_ISSET(dhandle, WT_DHANDLE_DISCARD | WT_DHANDLE_DISCARD_FORCE)) {
|
||||
__session_find_dhandle(session,
|
||||
dhandle->name, dhandle->checkpoint, &dhandle_cache);
|
||||
WT_SAVE_DHANDLE(session, __session_find_dhandle(session,
|
||||
dhandle->name, dhandle->checkpoint, &dhandle_cache));
|
||||
if (dhandle_cache != NULL)
|
||||
__session_discard_dhandle(session, dhandle_cache);
|
||||
}
|
||||
|
||||
@@ -27,7 +27,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst:
|
||||
* Inspired by "Spinlocks and Read-Write Locks" by Dr. Steven Fuerst:
|
||||
* http://locklessinc.com/articles/locks/
|
||||
*
|
||||
* Dr. Fuerst further credits:
|
||||
@@ -39,77 +39,46 @@
|
||||
* by John Mellor-Crummey and Michael Scott in their landmark paper "Scalable
|
||||
* Reader-Writer Synchronization for Shared-Memory Multiprocessors".
|
||||
*
|
||||
* The following is an explanation of this code. First, the underlying lock
|
||||
* structure.
|
||||
* The following is an explanation of our interpretation and implementation.
|
||||
* First, the underlying lock structure.
|
||||
*
|
||||
* volatile union {
|
||||
* uint64_t v; // Full 64-bit value
|
||||
* struct {
|
||||
* uint16_t writers; Now serving for writers
|
||||
* uint16_t readers; Now serving for readers
|
||||
* uint16_t next; Next available ticket number
|
||||
* uint16_t __notused; Padding
|
||||
* }
|
||||
* uint8_t current; // Current ticket
|
||||
* uint8_t next; // Next available ticket
|
||||
* uint8_t reader; // Read queue ticket
|
||||
* uint8_t __notused; // Padding
|
||||
* uint16_t readers_active; // Count of active readers
|
||||
* uint16_t readers_queued; // Count of queued readers
|
||||
* } s;
|
||||
* } u;
|
||||
*
|
||||
* First, imagine a store's 'take a number' ticket algorithm. A customer takes
|
||||
* a unique ticket number and customers are served in ticket order. In the data
|
||||
* structure, 'writers' is the next writer to be served, 'readers' is the next
|
||||
* reader to be served, and 'next' is the next available ticket number.
|
||||
* structure, 'next' is the ticket that will be allocated next, and 'current'
|
||||
* is the ticket being served.
|
||||
*
|
||||
* Next, consider exclusive (write) locks. The 'now serving' number for writers
|
||||
* is 'writers'. To lock, 'take a number' and wait until that number is being
|
||||
* served; more specifically, atomically copy and increment the current value of
|
||||
* 'next', and then wait until 'writers' equals that copied number.
|
||||
* Next, consider exclusive (write) locks. To lock, 'take a number' and wait
|
||||
* until that number is being served; more specifically, atomically increment
|
||||
* 'next', and then wait until 'current' equals that allocated ticket.
|
||||
*
|
||||
* Shared (read) locks are similar. Like writers, readers atomically get the
|
||||
* next number available. However, instead of waiting for 'writers' to equal
|
||||
* their number, they wait for 'readers' to equal their number.
|
||||
* Shared (read) locks are similar, except that readers can share a ticket
|
||||
* (both with each other and with a single writer). Readers with a given
|
||||
* ticket execute before the writer with that ticket. In other words, writers
|
||||
* wait for both their ticket to become current and for all readers to exit
|
||||
* the lock.
|
||||
*
|
||||
* This has the effect of queuing lock requests in the order they arrive
|
||||
* (incidentally avoiding starvation).
|
||||
* If there are no active writers (indicated by 'current' == 'next'), readers
|
||||
* can immediately enter the lock by atomically incrementing 'readers_active'.
|
||||
* When there are writers active, readers form a new queue by first setting
|
||||
* 'reader' to 'next' (i.e. readers are scheduled after any queued writers,
|
||||
* avoiding starvation), then atomically incrementing 'readers_queued'.
|
||||
*
|
||||
* Each lock/unlock pair requires incrementing both 'readers' and 'writers'.
|
||||
* In the case of a reader, the 'readers' increment happens when the reader
|
||||
* acquires the lock (to allow read-lock sharing), and the 'writers' increment
|
||||
* happens when the reader releases the lock. In the case of a writer, both
|
||||
* 'readers' and 'writers' are incremented when the writer releases the lock.
|
||||
*
|
||||
* For example, consider the following read (R) and write (W) lock requests:
|
||||
*
|
||||
* writers readers next
|
||||
* 0 0 0
|
||||
* R: ticket 0, readers match OK 0 1 1
|
||||
* R: ticket 1, readers match OK 0 2 2
|
||||
* R: ticket 2, readers match OK 0 3 3
|
||||
* W: ticket 3, writers no match block 0 3 4
|
||||
* R: ticket 2, unlock 1 3 4
|
||||
* R: ticket 0, unlock 2 3 4
|
||||
* R: ticket 1, unlock 3 3 4
|
||||
* W: ticket 3, writers match OK 3 3 4
|
||||
*
|
||||
* Note the writer blocks until 'writers' equals its ticket number and it does
|
||||
* not matter if readers unlock in order or not.
|
||||
*
|
||||
* Readers or writers entering the system after the write lock is queued block,
|
||||
* and the next ticket holder (reader or writer) will unblock when the writer
|
||||
* unlocks. An example, continuing from the last line of the above example:
|
||||
*
|
||||
* writers readers next
|
||||
* W: ticket 3, writers match OK 3 3 4
|
||||
* R: ticket 4, readers no match block 3 3 5
|
||||
* R: ticket 5, readers no match block 3 3 6
|
||||
* W: ticket 6, writers no match block 3 3 7
|
||||
* W: ticket 3, unlock 4 4 7
|
||||
* R: ticket 4, readers match OK 4 5 7
|
||||
* R: ticket 5, readers match OK 4 6 7
|
||||
*
|
||||
* The 'next' field is a 2-byte value so the available ticket number wraps at
|
||||
* 64K requests. If a thread's lock request is not granted until the 'next'
|
||||
* field cycles and the same ticket is taken by another thread, we could grant
|
||||
* a lock to two separate threads at the same time, and bad things happen: two
|
||||
* writer threads or a reader thread and a writer thread would run in parallel,
|
||||
* and lock waiters could be skipped if the unlocks race. This is unlikely, it
|
||||
* only happens if a lock request is blocked by 64K other requests. The fix is
|
||||
* to grow the lock structure fields, but the largest atomic instruction we have
|
||||
* is 8 bytes, the structure has no room to grow.
|
||||
* The 'next' field is a 1-byte value so the available ticket number wraps
|
||||
* after 256 requests. If a thread's write lock request would cause the 'next'
|
||||
* field to catch up with 'current', instead it waits to avoid the same ticket
|
||||
* being allocated to multiple threads.
|
||||
*/
|
||||
|
||||
#include "wt_internal.h"
|
||||
@@ -118,12 +87,14 @@
|
||||
* __wt_rwlock_init --
|
||||
* Initialize a read/write lock.
|
||||
*/
|
||||
void
|
||||
int
|
||||
__wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
WT_UNUSED(session);
|
||||
l->u.v = 0;
|
||||
|
||||
l->u = 0;
|
||||
WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_readers));
|
||||
WT_RET(__wt_cond_alloc(session, "rwlock wait", &l->cond_writers));
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -133,9 +104,10 @@ __wt_rwlock_init(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
void
|
||||
__wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
WT_UNUSED(session);
|
||||
l->u.v = 0;
|
||||
|
||||
l->u = 0;
|
||||
__wt_cond_destroy(session, &l->cond_readers);
|
||||
__wt_cond_destroy(session, &l->cond_writers);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -149,46 +121,34 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
|
||||
WT_STAT_CONN_INCR(session, rwlock_read);
|
||||
|
||||
new = old = *l;
|
||||
old.u.v = l->u.v;
|
||||
|
||||
/*
|
||||
* This read lock can only be granted if the lock was last granted to
|
||||
* a reader and there are no readers or writers blocked on the lock,
|
||||
* that is, if this thread's ticket would be the next ticket granted.
|
||||
* Do the cheap test to see if this can possibly succeed (and confirm
|
||||
* the lock is in the correct state to grant this read lock).
|
||||
*/
|
||||
if (old.s.readers != old.s.next)
|
||||
/* This read lock can only be granted if there are no active writers. */
|
||||
if (old.u.s.current != old.u.s.next)
|
||||
return (EBUSY);
|
||||
|
||||
/*
|
||||
* The replacement lock value is a result of allocating a new ticket and
|
||||
* incrementing the reader value to match it.
|
||||
* The replacement lock value is a result of adding an active reader.
|
||||
* Check for overflow: if the maximum number of readers are already
|
||||
* active, no new readers can enter the lock.
|
||||
*/
|
||||
new.s.readers = new.s.next = old.s.next + 1;
|
||||
return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY);
|
||||
new.u.v = old.u.v;
|
||||
if (++new.u.s.readers_active == 0)
|
||||
return (EBUSY);
|
||||
|
||||
/* We rely on this atomic operation to provide a barrier. */
|
||||
return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_readlock_spin --
|
||||
* Spin to get a read lock: only yield the CPU if the lock is held
|
||||
* exclusive.
|
||||
* __read_blocked --
|
||||
* Check whether the current read lock request should keep waiting.
|
||||
*/
|
||||
void
|
||||
__wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
static bool
|
||||
__read_blocked(WT_SESSION_IMPL *session)
|
||||
{
|
||||
/*
|
||||
* Try to get the lock in a single operation if it is available to
|
||||
* readers. This avoids the situation where multiple readers arrive
|
||||
* concurrently and have to line up in order to enter the lock. For
|
||||
* read-heavy workloads it can make a significant difference.
|
||||
*/
|
||||
while (__wt_try_readlock(session, l) != 0) {
|
||||
if (l->s.writers_active > 0)
|
||||
__wt_yield();
|
||||
else
|
||||
WT_PAUSE();
|
||||
}
|
||||
return (session->current_rwticket !=
|
||||
session->current_rwlock->u.s.current);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -198,43 +158,95 @@ __wt_readlock_spin(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
void
|
||||
__wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
uint16_t ticket;
|
||||
WT_RWLOCK new, old;
|
||||
int pause_cnt;
|
||||
int16_t writers_active;
|
||||
uint8_t ticket;
|
||||
|
||||
WT_STAT_CONN_INCR(session, rwlock_read);
|
||||
|
||||
WT_DIAGNOSTIC_YIELD;
|
||||
|
||||
/*
|
||||
* Possibly wrap: if we have more than 64K lockers waiting, the ticket
|
||||
* value will wrap and two lockers will simultaneously be granted the
|
||||
* lock.
|
||||
*/
|
||||
ticket = __wt_atomic_fetch_add16(&l->s.next, 1);
|
||||
for (pause_cnt = 0; ticket != l->s.readers;) {
|
||||
for (;;) {
|
||||
/*
|
||||
* We failed to get the lock; pause before retrying and if we've
|
||||
* paused enough, yield so we don't burn CPU to no purpose. This
|
||||
* situation happens if there are more threads than cores in the
|
||||
* system and we're thrashing on shared resources.
|
||||
* Fast path: if there is no active writer, join the current
|
||||
* group.
|
||||
*/
|
||||
if (++pause_cnt < WT_THOUSAND)
|
||||
for (old.u.v = l->u.v;
|
||||
old.u.s.current == old.u.s.next;
|
||||
old.u.v = l->u.v) {
|
||||
new.u.v = old.u.v;
|
||||
/*
|
||||
* Check for overflow: if the maximum number of readers
|
||||
* are already active, no new readers can enter the
|
||||
* lock.
|
||||
*/
|
||||
if (++new.u.s.readers_active == 0)
|
||||
goto stall;
|
||||
if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
|
||||
return;
|
||||
WT_PAUSE();
|
||||
else
|
||||
}
|
||||
|
||||
/*
|
||||
* There is an active writer: join the next group.
|
||||
*
|
||||
* Limit how many readers can queue: don't allow more readers
|
||||
* to queue than there are active writers (calculated as
|
||||
* `next - current`): otherwise, in write-heavy workloads,
|
||||
* readers can keep queuing up in front of writers and
|
||||
* throughput is unstable.
|
||||
*
|
||||
* If the maximum number of readers are already queued, wait
|
||||
* until we can get a valid ticket.
|
||||
*/
|
||||
writers_active = old.u.s.next - old.u.s.current;
|
||||
if (old.u.s.readers_queued > writers_active) {
|
||||
stall: __wt_cond_wait(session,
|
||||
l->cond_readers, 10 * WT_THOUSAND, NULL);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we are the first reader to queue, set the next read
|
||||
* group. Note: don't re-read from the lock or we could race
|
||||
* with a writer unlocking.
|
||||
*/
|
||||
new.u.v = old.u.v;
|
||||
if (new.u.s.readers_queued++ == 0)
|
||||
new.u.s.reader = new.u.s.next;
|
||||
ticket = new.u.s.reader;
|
||||
|
||||
if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
|
||||
break;
|
||||
}
|
||||
|
||||
/* Wait for our group to start. */
|
||||
for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) {
|
||||
if (pause_cnt < 1000)
|
||||
WT_PAUSE();
|
||||
else if (pause_cnt < 1200)
|
||||
__wt_yield();
|
||||
else {
|
||||
session->current_rwlock = l;
|
||||
session->current_rwticket = ticket;
|
||||
__wt_cond_wait(session,
|
||||
l->cond_readers, 10 * WT_THOUSAND, __read_blocked);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We're the only writer of the readers field, so the update does not
|
||||
* need to be atomic.
|
||||
*/
|
||||
++l->s.readers;
|
||||
|
||||
/*
|
||||
* Applications depend on a barrier here so that operations holding the
|
||||
* lock see consistent data.
|
||||
* lock see consistent data. The atomic operation above isn't
|
||||
* sufficient here because we don't own the lock until our ticket comes
|
||||
* up and whatever data we are protecting may have changed in the
|
||||
* meantime.
|
||||
*/
|
||||
WT_READ_BARRIER();
|
||||
|
||||
/* Sanity check that we (still) have the lock. */
|
||||
WT_ASSERT(session,
|
||||
ticket == l->u.s.current && l->u.s.readers_active > 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -244,13 +256,22 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
void
|
||||
__wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
WT_UNUSED(session);
|
||||
WT_RWLOCK new, old;
|
||||
|
||||
/*
|
||||
* Increment the writers value (other readers are doing the same, make
|
||||
* sure we don't race).
|
||||
*/
|
||||
(void)__wt_atomic_add16(&l->s.writers, 1);
|
||||
do {
|
||||
old.u.v = l->u.v;
|
||||
WT_ASSERT(session, old.u.s.readers_active > 0);
|
||||
|
||||
/*
|
||||
* Decrement the active reader count (other readers are doing
|
||||
* the same, make sure we don't race).
|
||||
*/
|
||||
new.u.v = old.u.v;
|
||||
--new.u.s.readers_active;
|
||||
} while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v));
|
||||
|
||||
if (new.u.s.readers_active == 0 && new.u.s.current != new.u.s.next)
|
||||
__wt_cond_signal(session, l->cond_writers);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -264,22 +285,44 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
|
||||
WT_STAT_CONN_INCR(session, rwlock_write);
|
||||
|
||||
old = new = *l;
|
||||
|
||||
/*
|
||||
* This write lock can only be granted if the lock was last granted to
|
||||
* a writer and there are no readers or writers blocked on the lock,
|
||||
* that is, if this thread's ticket would be the next ticket granted.
|
||||
* Do the cheap test to see if this can possibly succeed (and confirm
|
||||
* the lock is in the correct state to grant this write lock).
|
||||
* This write lock can only be granted if no readers or writers blocked
|
||||
* on the lock, that is, if this thread's ticket would be the next
|
||||
* ticket granted. Check if this can possibly succeed (and confirm the
|
||||
* lock is in the correct state to grant this write lock).
|
||||
*/
|
||||
if (old.s.writers != old.s.next)
|
||||
old.u.v = l->u.v;
|
||||
if (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0)
|
||||
return (EBUSY);
|
||||
|
||||
/* The replacement lock value is a result of allocating a new ticket. */
|
||||
++new.s.next;
|
||||
++new.s.writers_active;
|
||||
return (__wt_atomic_cas64(&l->u, old.u, new.u) ? 0 : EBUSY);
|
||||
/*
|
||||
* We've checked above that there is no writer active (since
|
||||
* `current == next`), so there should be no readers queued.
|
||||
*/
|
||||
WT_ASSERT(session, old.u.s.readers_queued == 0);
|
||||
|
||||
/*
|
||||
* The replacement lock value is a result of allocating a new ticket.
|
||||
*
|
||||
* We rely on this atomic operation to provide a barrier.
|
||||
*/
|
||||
new.u.v = old.u.v;
|
||||
new.u.s.next++;
|
||||
return (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v) ? 0 : EBUSY);
|
||||
}
|
||||
|
||||
/*
|
||||
* __write_blocked --
|
||||
* Check whether the current write lock request should keep waiting.
|
||||
*/
|
||||
static bool
|
||||
__write_blocked(WT_SESSION_IMPL *session)
|
||||
{
|
||||
WT_RWLOCK *l;
|
||||
|
||||
l = session->current_rwlock;
|
||||
return (session->current_rwticket != l->u.s.current ||
|
||||
l->u.s.readers_active != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -289,36 +332,68 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
void
|
||||
__wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
uint16_t ticket;
|
||||
WT_RWLOCK new, old;
|
||||
int pause_cnt;
|
||||
uint8_t ticket;
|
||||
|
||||
WT_STAT_CONN_INCR(session, rwlock_write);
|
||||
|
||||
/*
|
||||
* Possibly wrap: if we have more than 64K lockers waiting, the ticket
|
||||
* value will wrap and two lockers will simultaneously be granted the
|
||||
* lock.
|
||||
*/
|
||||
ticket = __wt_atomic_fetch_add16(&l->s.next, 1);
|
||||
(void)__wt_atomic_add16(&l->s.writers_active, 1);
|
||||
for (pause_cnt = 0; ticket != l->s.writers;) {
|
||||
for (;;) {
|
||||
old.u.v = l->u.v;
|
||||
|
||||
/* Allocate a ticket. */
|
||||
new.u.v = old.u.v;
|
||||
ticket = new.u.s.next++;
|
||||
|
||||
/*
|
||||
* We failed to get the lock; pause before retrying and if we've
|
||||
* paused enough, sleep so we don't burn CPU to no purpose. This
|
||||
* situation happens if there are more threads than cores in the
|
||||
* system and we're thrashing on shared resources.
|
||||
* Check for overflow: if the next ticket is allowed to catch
|
||||
* up with the current batch, two writers could be granted the
|
||||
* lock simultaneously.
|
||||
*/
|
||||
if (++pause_cnt < WT_THOUSAND)
|
||||
if (new.u.s.current == new.u.s.next) {
|
||||
__wt_cond_wait(session,
|
||||
l->cond_writers, 10 * WT_THOUSAND, NULL);
|
||||
continue;
|
||||
}
|
||||
if (__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v))
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for our group to start and any readers to drain.
|
||||
*
|
||||
* We take care here to do an atomic read of the full 64-bit lock
|
||||
* value. Otherwise, reads are not guaranteed to be ordered and we
|
||||
* could see no readers active from a different batch and decide that
|
||||
* we have the lock.
|
||||
*/
|
||||
for (pause_cnt = 0, old.u.v = l->u.v;
|
||||
ticket != old.u.s.current || old.u.s.readers_active != 0;
|
||||
pause_cnt++, old.u.v = l->u.v) {
|
||||
if (pause_cnt < 1000)
|
||||
WT_PAUSE();
|
||||
else
|
||||
__wt_sleep(0, 10);
|
||||
else if (pause_cnt < 1200)
|
||||
__wt_yield();
|
||||
else {
|
||||
session->current_rwlock = l;
|
||||
session->current_rwticket = ticket;
|
||||
__wt_cond_wait(session,
|
||||
l->cond_writers, 10 * WT_THOUSAND, __write_blocked);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Applications depend on a barrier here so that operations holding the
|
||||
* lock see consistent data.
|
||||
* lock see consistent data. The atomic operation above isn't
|
||||
* sufficient here because we don't own the lock until our ticket comes
|
||||
* up and whatever data we are protecting may have changed in the
|
||||
* meantime.
|
||||
*/
|
||||
WT_READ_BARRIER();
|
||||
|
||||
/* Sanity check that we (still) have the lock. */
|
||||
WT_ASSERT(session,
|
||||
ticket == l->u.s.current && l->u.s.readers_active == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -328,29 +403,35 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
void
|
||||
__wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
WT_RWLOCK new;
|
||||
WT_RWLOCK new, old;
|
||||
|
||||
WT_UNUSED(session);
|
||||
do {
|
||||
old.u.v = l->u.v;
|
||||
|
||||
(void)__wt_atomic_sub16(&l->s.writers_active, 1);
|
||||
/*
|
||||
* We're holding the lock exclusive, there shouldn't be any
|
||||
* active readers.
|
||||
*/
|
||||
WT_ASSERT(session, old.u.s.readers_active == 0);
|
||||
|
||||
/*
|
||||
* Ensure that all updates made while the lock was held are visible to
|
||||
* the next thread to acquire the lock.
|
||||
*/
|
||||
WT_WRITE_BARRIER();
|
||||
/*
|
||||
* Allow the next batch to start.
|
||||
*
|
||||
* If there are readers in the next group, swap queued readers
|
||||
* to active: this could race with new readlock requests, so we
|
||||
* have to spin.
|
||||
*/
|
||||
new.u.v = old.u.v;
|
||||
if (++new.u.s.current == new.u.s.reader) {
|
||||
new.u.s.readers_active = new.u.s.readers_queued;
|
||||
new.u.s.readers_queued = 0;
|
||||
}
|
||||
} while (!__wt_atomic_casv64(&l->u.v, old.u.v, new.u.v));
|
||||
|
||||
new = *l;
|
||||
|
||||
/*
|
||||
* We're the only writer of the writers/readers fields, so the update
|
||||
* does not need to be atomic; we have to update both values at the
|
||||
* same time though, otherwise we'd potentially race with the thread
|
||||
* next granted the lock.
|
||||
*/
|
||||
++new.s.writers;
|
||||
++new.s.readers;
|
||||
l->i.wr = new.i.wr;
|
||||
if (new.u.s.readers_active != 0)
|
||||
__wt_cond_signal(session, l->cond_readers);
|
||||
else if (new.u.s.current != new.u.s.next)
|
||||
__wt_cond_signal(session, l->cond_writers);
|
||||
|
||||
WT_DIAGNOSTIC_YIELD;
|
||||
}
|
||||
@@ -363,8 +444,11 @@ __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
bool
|
||||
__wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l)
|
||||
{
|
||||
WT_RWLOCK old;
|
||||
|
||||
WT_UNUSED(session);
|
||||
|
||||
return (l->s.writers != l->s.next || l->s.readers != l->s.next);
|
||||
old.u.v = l->u.v;
|
||||
return (old.u.s.current != old.u.s.next || old.u.s.readers_active != 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -257,7 +257,7 @@ __wt_thread_group_create(
|
||||
__wt_verbose(session, WT_VERB_THREAD_GROUP,
|
||||
"Creating thread group: %p", (void *)group);
|
||||
|
||||
__wt_rwlock_init(session, &group->lock);
|
||||
WT_RET(__wt_rwlock_init(session, &group->lock));
|
||||
WT_ERR(__wt_cond_alloc(
|
||||
session, "thread group cond", &group->wait_cond));
|
||||
cond_alloced = true;
|
||||
@@ -272,7 +272,7 @@ __wt_thread_group_create(
|
||||
/* Cleanup on error to avoid leaking resources */
|
||||
err: if (ret != 0) {
|
||||
if (cond_alloced)
|
||||
WT_TRET(__wt_cond_destroy(session, &group->wait_cond));
|
||||
__wt_cond_destroy(session, &group->wait_cond);
|
||||
__wt_rwlock_destroy(session, &group->lock);
|
||||
}
|
||||
return (ret);
|
||||
@@ -297,7 +297,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
|
||||
|
||||
__wt_free(session, group->threads);
|
||||
|
||||
WT_TRET(__wt_cond_destroy(session, &group->wait_cond));
|
||||
__wt_cond_destroy(session, &group->wait_cond);
|
||||
__wt_rwlock_destroy(session, &group->lock);
|
||||
|
||||
/*
|
||||
|
||||
@@ -126,7 +126,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
|
||||
n = 0;
|
||||
|
||||
/* We're going to scan the table: wait for the lock. */
|
||||
__wt_readlock_spin(session, &txn_global->scan_rwlock);
|
||||
__wt_readlock(session, &txn_global->scan_rwlock);
|
||||
|
||||
current_id = pinned_id = txn_global->current;
|
||||
prev_oldest_id = txn_global->oldest_id;
|
||||
@@ -293,7 +293,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
|
||||
|
||||
/* First do a read-only scan. */
|
||||
if (wait)
|
||||
__wt_readlock_spin(session, &txn_global->scan_rwlock);
|
||||
__wt_readlock(session, &txn_global->scan_rwlock);
|
||||
else if ((ret =
|
||||
__wt_try_readlock(session, &txn_global->scan_rwlock)) != 0)
|
||||
return (ret == EBUSY ? 0 : ret);
|
||||
@@ -768,8 +768,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
|
||||
WT_RET(__wt_spin_init(session,
|
||||
&txn_global->id_lock, "transaction id lock"));
|
||||
__wt_rwlock_init(session, &txn_global->scan_rwlock);
|
||||
__wt_rwlock_init(session, &txn_global->nsnap_rwlock);
|
||||
WT_RET(__wt_rwlock_init(session, &txn_global->scan_rwlock));
|
||||
WT_RET(__wt_rwlock_init(session, &txn_global->nsnap_rwlock));
|
||||
txn_global->nsnap_oldest_id = WT_TXN_NONE;
|
||||
TAILQ_INIT(&txn_global->nsnaph);
|
||||
|
||||
|
||||
@@ -57,6 +57,9 @@ noinst_PROGRAMS += test_wt3135_search_near_collator
|
||||
test_wt3184_dup_index_collator_SOURCES = wt3184_dup_index_collator/main.c
|
||||
noinst_PROGRAMS += test_wt3184_dup_index_collator
|
||||
|
||||
test_rwlock_SOURCES = rwlock/main.c
|
||||
noinst_PROGRAMS += test_rwlock
|
||||
|
||||
# Run this during a "make check" smoke test.
|
||||
TESTS = $(noinst_PROGRAMS)
|
||||
LOG_COMPILER = $(TEST_WRAPPER)
|
||||
|
||||
184
test/csuite/rwlock/main.c
Normal file
184
test/csuite/rwlock/main.c
Normal file
@@ -0,0 +1,184 @@
|
||||
/*-
|
||||
* Public Domain 2014-2017 MongoDB, Inc.
|
||||
* Public Domain 2008-2014 WiredTiger, Inc.
|
||||
*
|
||||
* This is free and unencumbered software released into the public domain.
|
||||
*
|
||||
* Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
* distribute this software, either in source code form or as a compiled
|
||||
* binary, for any purpose, commercial or non-commercial, and by any
|
||||
* means.
|
||||
*
|
||||
* In jurisdictions that recognize copyright laws, the author or authors
|
||||
* of this software dedicate any and all copyright interest in the
|
||||
* software to the public domain. We make this dedication for the benefit
|
||||
* of the public at large and to the detriment of our heirs and
|
||||
* successors. We intend this dedication to be an overt act of
|
||||
* relinquishment in perpetuity of all present and future rights to this
|
||||
* software under copyright law.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include "test_util.h"
|
||||
|
||||
/*
|
||||
* JIRA ticket reference: HELP-4355
|
||||
* Test rwlock collapse under load.
|
||||
*/
|
||||
#define MAX_THREADS 1000
|
||||
#define READS_PER_WRITE 10000
|
||||
//#define READS_PER_WRITE 1000000
|
||||
//#define READS_PER_WRITE 100
|
||||
|
||||
#define CHECK_CORRECTNESS 1
|
||||
//#define USE_POSIX 1
|
||||
|
||||
static WT_RWLOCK rwlock;
|
||||
static pthread_rwlock_t p_rwlock;
|
||||
static bool running;
|
||||
static uint64_t shared_counter;
|
||||
|
||||
void *thread_rwlock(void *);
|
||||
void *thread_dump(void *);
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
TEST_OPTS *opts, _opts;
|
||||
struct timespec te, ts;
|
||||
pthread_t dump_id, id[MAX_THREADS];
|
||||
int i;
|
||||
|
||||
if (!testutil_enable_long_tests()) /* Ignore unless requested */
|
||||
return (EXIT_SUCCESS);
|
||||
|
||||
opts = &_opts;
|
||||
memset(opts, 0, sizeof(*opts));
|
||||
opts->nthreads = 100;
|
||||
opts->nops = 1000000; /* per thread */
|
||||
testutil_check(testutil_parse_opts(argc, argv, opts));
|
||||
running = true;
|
||||
|
||||
testutil_make_work_dir(opts->home);
|
||||
testutil_check(wiredtiger_open(opts->home, NULL,
|
||||
"create,session_max=1000,statistics=(fast)", &opts->conn));
|
||||
|
||||
testutil_check(__wt_rwlock_init(NULL, &rwlock));
|
||||
testutil_check(pthread_rwlock_init(&p_rwlock, NULL));
|
||||
|
||||
testutil_check(pthread_create(
|
||||
&dump_id, NULL, thread_dump, (void *)opts));
|
||||
|
||||
__wt_epoch(NULL, &ts);
|
||||
for (i = 0; i < (int)opts->nthreads; ++i)
|
||||
testutil_check(pthread_create(
|
||||
&id[i], NULL, thread_rwlock, (void *)opts));
|
||||
|
||||
while (--i >= 0)
|
||||
testutil_check(pthread_join(id[i], NULL));
|
||||
__wt_epoch(NULL, &te);
|
||||
printf("%.2lf\n", WT_TIMEDIFF_MS(te, ts) / 1000.0);
|
||||
|
||||
running = false;
|
||||
testutil_check(pthread_join(dump_id, NULL));
|
||||
|
||||
testutil_check(pthread_rwlock_destroy(&p_rwlock));
|
||||
testutil_cleanup(opts);
|
||||
return (EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Acquire a rwlock, every Nth operation, acquire exclusive.
|
||||
*/
|
||||
void *
|
||||
thread_rwlock(void *arg)
|
||||
{
|
||||
TEST_OPTS *opts;
|
||||
WT_SESSION *wt_session;
|
||||
WT_SESSION_IMPL *session;
|
||||
uint64_t i, counter;
|
||||
bool writelock;
|
||||
|
||||
opts = (TEST_OPTS *)arg;
|
||||
testutil_check(
|
||||
opts->conn->open_session(opts->conn, NULL, NULL, &wt_session));
|
||||
session = (WT_SESSION_IMPL *)wt_session;
|
||||
|
||||
printf("Running rwlock thread\n");
|
||||
for (i = 1; i <= opts->nops; ++i) {
|
||||
writelock = (i % READS_PER_WRITE == 0);
|
||||
|
||||
#ifdef USE_POSIX
|
||||
if (writelock)
|
||||
testutil_check(pthread_rwlock_wrlock(&p_rwlock));
|
||||
else
|
||||
testutil_check(pthread_rwlock_rdlock(&p_rwlock));
|
||||
#else
|
||||
if (writelock)
|
||||
__wt_writelock(session, &rwlock);
|
||||
else
|
||||
__wt_readlock(session, &rwlock);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Do a tiny amount of work inside the lock so the compiler
|
||||
* can't optimize everything away.
|
||||
*/
|
||||
(void)__wt_atomic_add64(&counter, 1);
|
||||
|
||||
#ifdef CHECK_CORRECTNESS
|
||||
if (writelock)
|
||||
counter = ++shared_counter;
|
||||
else
|
||||
counter = shared_counter;
|
||||
|
||||
__wt_yield();
|
||||
|
||||
testutil_assert(counter == shared_counter);
|
||||
#endif
|
||||
|
||||
#ifdef USE_POSIX
|
||||
testutil_check(pthread_rwlock_unlock(&p_rwlock));
|
||||
#else
|
||||
if (writelock)
|
||||
__wt_writeunlock(session, &rwlock);
|
||||
else
|
||||
__wt_readunlock(session, &rwlock);
|
||||
#endif
|
||||
|
||||
if (i % 10000 == 0) {
|
||||
printf("%s", session->id == 20 ? ".\n" : ".");
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
opts->running = false;
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
void *
|
||||
thread_dump(void *arg) {
|
||||
WT_UNUSED(arg);
|
||||
|
||||
while (running) {
|
||||
sleep(1);
|
||||
printf("\n"
|
||||
"rwlock { current %" PRIu8 ", next %" PRIu8
|
||||
", reader %" PRIu8 ", readers_active %" PRIu16
|
||||
", readers_queued %" PRIu16 " }\n",
|
||||
rwlock.u.s.current,
|
||||
rwlock.u.s.next,
|
||||
rwlock.u.s.reader,
|
||||
rwlock.u.s.readers_active,
|
||||
rwlock.u.s.readers_queued);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
Reference in New Issue
Block a user