Compare commits
108 Commits
mongodb-3.
...
2.7.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
025aacb645 | ||
|
|
15c6015019 | ||
|
|
67a0f8dc87 | ||
|
|
57c3e84ce6 | ||
|
|
59864bee3b | ||
|
|
04e0c89bea | ||
|
|
4c49043f5c | ||
|
|
f1b011ab23 | ||
|
|
dc38ce52ec | ||
|
|
b3c1d5ce3f | ||
|
|
656a1b7578 | ||
|
|
ebe74287d9 | ||
|
|
d4717fbc6e | ||
|
|
0a24dc9321 | ||
|
|
b12f6dfb13 | ||
|
|
3ff5b3d4e3 | ||
|
|
debe1b23f3 | ||
|
|
2d27b44ad1 | ||
|
|
b2517d049d | ||
|
|
c48b42748c | ||
|
|
8d27ecbc5c | ||
|
|
7cc881e987 | ||
|
|
d5092c4dd0 | ||
|
|
7b1d722bd7 | ||
|
|
9b46f3f8e4 | ||
|
|
76349d6cd8 | ||
|
|
f36b1a7aa5 | ||
|
|
e53e2795e1 | ||
|
|
2df5658da3 | ||
|
|
884c8e114c | ||
|
|
7f5b70d1d7 | ||
|
|
fc6db4dccc | ||
|
|
c1013cee06 | ||
|
|
fb7fc2f35e | ||
|
|
d04d2ba924 | ||
|
|
be3bc4918a | ||
|
|
04a09e7795 | ||
|
|
bff6525c83 | ||
|
|
97b549e75f | ||
|
|
c70b0973e1 | ||
|
|
5fe8c70e33 | ||
|
|
745eb56977 | ||
|
|
6c82703fd0 | ||
|
|
4fc3e3982e | ||
|
|
5ebfd92119 | ||
|
|
fa28552449 | ||
|
|
8cf3e9bbaf | ||
|
|
7a1050dbdd | ||
|
|
f6a2db06a9 | ||
|
|
479818af02 | ||
|
|
f1a93162f2 | ||
|
|
e731ef8ab8 | ||
|
|
e0f7961e07 | ||
|
|
81b1d09a0f | ||
|
|
f192c3903f | ||
|
|
0e93d60d0d | ||
|
|
e2a91fe5f6 | ||
|
|
cf62c714ce | ||
|
|
8cb4ecce11 | ||
|
|
264ec216ef | ||
|
|
7880ced1b7 | ||
|
|
dca1411e73 | ||
|
|
7c66f601b4 | ||
|
|
f721883c06 | ||
|
|
4415f79afe | ||
|
|
f2fa6b9283 | ||
|
|
4c49948727 | ||
|
|
8e9cc8d32c | ||
|
|
9f2e4f395e | ||
|
|
6d7d76f65d | ||
|
|
abb07da300 | ||
|
|
37655b96df | ||
|
|
494accec89 | ||
|
|
29aea6835a | ||
|
|
2dcf7b18d5 | ||
|
|
2e3a02cc52 | ||
|
|
05a3b8f3d8 | ||
|
|
6c65c86bb9 | ||
|
|
4aa7ba0d85 | ||
|
|
770ccf05c4 | ||
|
|
a6da10e9fe | ||
|
|
5dd8d4dc2e | ||
|
|
4d7c9cef69 | ||
|
|
bac122021c | ||
|
|
a5b4ace6e5 | ||
|
|
6995565214 | ||
|
|
39dfd21030 | ||
|
|
ad2500b3d8 | ||
|
|
982c5862fb | ||
|
|
8fe7bb1ece | ||
|
|
4e1844c6a2 | ||
|
|
cace179242 | ||
|
|
3b70b692f0 | ||
|
|
3f306ce74f | ||
|
|
890ee34474 | ||
|
|
9ecf70c9c0 | ||
|
|
6c7338f2e6 | ||
|
|
354c0314cd | ||
|
|
978c237f01 | ||
|
|
bc1301ad7b | ||
|
|
1e094eeee8 | ||
|
|
5a51b154c6 | ||
|
|
d55a5b1a03 | ||
|
|
c9907c6289 | ||
|
|
714ae53068 | ||
|
|
d66e84a16a | ||
|
|
c3d02dc409 | ||
|
|
801fcc687c |
231
NEWS
231
NEWS
@@ -1,3 +1,228 @@
|
||||
WiredTiger release 2.7.0, 2015-12-08
|
||||
------------------------------------
|
||||
|
||||
The WiredTiger 2.7.0 release contains new features, minor API changes and bug
|
||||
fixes.
|
||||
|
||||
New features and API changes; refer to the API documentation for full details:
|
||||
|
||||
* 959376c WT-147: Create indexes on non-empty tables.
|
||||
* 4368d39 WT-1315: Add an implementation of cursor joins via a new WT_SESSION::join API.
|
||||
* 944ccd1 WT-1350: Add a new configuration option to ::wiredtiger_open and
|
||||
WT_CONNECTION::reconfigure called "eviction_dirty_trigger" that causes eviction to start evicting
|
||||
dirty pages from cache once the given threshold has been reached.
|
||||
* ab5a8fb WT-1728: Add a WT_SESSION::reset method to release resources held by a session.
|
||||
* 263c5b7 WT-1930: Allow setting "file_manager=(close_idle_time=0)" to ::wiredtiger_open and
|
||||
WT_CONNECTION::reconfigure to disable closing idle handles.
|
||||
* 6310c3f WT-1959: Change verify to distinguish between warnings and errors. Add a new strict mode
|
||||
to verify that causes warnings to be reported as errors. Use strict mode to match earlier
|
||||
behavior. See the upgrading documentation for more information.
|
||||
* e0d6229 WT-1980: Add a new "metadata:create" URI to WT_SESSION::open_cursor for metadata cursors
|
||||
that return strings useful for passing to WT_SESSION::create.
|
||||
* 292712e WT-2065: Add a new configuration option to ::wiredtiger_open and
|
||||
WT_CONNECTION::reconfigure called "shared_cache=(quota)" that limits the amount of shared cache a
|
||||
participant can be assigned.
|
||||
* 4d0ebf4 WT-2104: Add a method to flush log files via a new WT_SESSION::log_flush API. Made
|
||||
WT_SESSION::commit_transaction configuration options match WT_SESSION::log_flush. Change the
|
||||
default WT_SESSION::transaction_sync timeout to 20 minutes rather than infinity.
|
||||
* 21b8330 WT-2151: Enhance logging configuration to allow reconfiguration and add a new
|
||||
"log=(zero_fill)" configuration option that causes WiredTiger to zero-fill log files on creation.
|
||||
* 368b307 WT-2200: Add a new configuration option to ::wiredtiger_open called "write_through" that
|
||||
causes WiredTiger to specify the FILE_FLAG_WRITE_THROUGH on Windows when writing files (default
|
||||
false, including when "direct_io" is configured).
|
||||
* 08c0fcd WT-2217: After a successful call to WT_CURSOR::insert, the key and value will be
|
||||
cleared from the cursor. See the upgrading documentation for more information.
|
||||
* d4fc69a SERVER-17078: Add a "statistics=(size)" mode to statistics cursors, which allows for
|
||||
retrieving file size only.
|
||||
* b83b901 SERVER-18356: Changed the handling of the "config_base" option to ::wiredtiger_open. See
|
||||
upgrading documentation for more information.
|
||||
|
||||
|
||||
The following statistics were removed:
|
||||
|
||||
* f1ed3b9 WT-1481: connection dhandles swept.
|
||||
* f1ed3b9 WT-1481: connection candidate referenced.
|
||||
* 4ba4518 WT-1481: failed to find a slot large enough for record.
|
||||
* 28563af WT-1989: log buffer size increases.
|
||||
* f81c70d WT-1989: slots selected for switching that were unavailable.
|
||||
* df4f69c WT-2094: log records written directly.
|
||||
* df4f69c WT-2094: record size exceeded maximum.
|
||||
* d68e078 WT-2182: pages split during eviction.
|
||||
|
||||
Lookaside table:
|
||||
|
||||
* 6a5a461 WT-1967: Allow eviction of updates required by old readers.
|
||||
* 87592ec WT-2074: Fix a race between lookaside table reconciliation and checkpoints.
|
||||
* 0390b29 WT-2149: Fix the order of creation of the lookaside table.
|
||||
* 7518a69 WT-2190: Fix transaction visibility test that is applied to the lookaside table.
|
||||
* 2cf57a6 SERVER-21585: Don't use the lookaside file until the cache is stuck full.
|
||||
|
||||
Issues fixed in MongoDB:
|
||||
|
||||
* d57dc26 SERVER-18829: Have pages start in the middle of the LRU queue for eviction.
|
||||
* b847ccc SERVER-18838: During drops, don't remove files until the metadata is durable.
|
||||
* 8f7da9a SERVER-18875: Clean up deleted pages.
|
||||
* d04083d SERVER-18899: Add unit test to simulate fsyncLock.
|
||||
* 3ec45a7 SERVER-19340: Avoid type aliasing in the random number generator.
|
||||
* 907c0ca SERVER-19445: Have the oldest transaction update the oldest tracked ID.
|
||||
* fb8739f SERVER-19522: Try to evict internal pages with no useful child pages.
|
||||
* 4545a8b SERVER-19573: Change row-store inserts to avoid page locking.
|
||||
* b52d2d3 SERVER-19751: Retry pthread_create on EAGAIN or EINTR.
|
||||
* 46b4ad5 SERVER-19954: Don't scan tracked handles during checkpoints.
|
||||
* 65abd20 SERVER-19989: Add a write barrier before data handles are added to shared lists.
|
||||
* 3e46e79 SERVER-19990: Don't assert on eviction of live updates from dead trees.
|
||||
* 38dad39 SERVER-20008: Don't reset eviction walks when hitting a busy page.
|
||||
* 3b72361 SERVER-20159: Make all readers wait while the cache is full.
|
||||
* 8be547b SERVER-20193: Fix obsolete transaction check.
|
||||
* ad56c6a SERVER-20303: Tune in-memory splits when inserting large objects.
|
||||
* 7505a02 SERVER-20385: Make WT_CURSOR::next(random) more random.
|
||||
* 35d46c3 SERVER-21027: Reverse split if there are many deleted pages.
|
||||
* a6da10e SERVER-21553: Enable fast-path truncate after splits.
|
||||
* 890ee34 SERVER-21619: Don't do internal page splits after a tree is marked DEAD.
|
||||
* 0e93d60 SERVER-21691: Avoid insert stalls.
|
||||
|
||||
Other note worthy changes since the previous release:
|
||||
|
||||
* bc2aa57 WT-1744: Throttle worker threads based on eviction targets.
|
||||
* 55a989e WT-1845: Allow read only transactions to commit after failure.
|
||||
* df625dc WT-1869: Avoid doing in memory splits while checkpointing a tree.
|
||||
* ddac54f WT-1942: Add atomic implementations for PPC64 architecture.
|
||||
* 3866fa6 WT-1962: Make the hot_backup_lock a read/write lock.
|
||||
* 58f9e99 WT-1963: Fix backup cursor Java API.
|
||||
* 4e0fe59 WT-1964: Fix a bug in the Java API when closing handles from a different thread.
|
||||
* 60e2150 WT-1966: Change how the shared cache assigns priority to participants.
|
||||
* 76d2e73 WT-1975: Ensure previous log files are complete for forced sync.
|
||||
* e43b22a WT-1977: Improve performance of getting snapshots with many sessions.
|
||||
* 5eaf63e WT-1978: Better checking and tests for index cursor comparison.
|
||||
* 1602a4b WT-1981: Fix a signed 32-bit integer unpacking bug.
|
||||
* cd1704d WT-1982: Fix a bug where cached overflow items were freed too early.
|
||||
* 57a9f38 WT-1985: Integer packing and other fixes for Python and Java.
|
||||
* 9897eb2 WT-1986: Fix a race renaming temporary log files.
|
||||
* b10bff9 WT-1989: Improve scalability of log writes.
|
||||
* f8dc12b WT-1996: Fix a bug where we would free the fist update during a page rewrite on error.
|
||||
* 144a383 WT-1998: Fixes for indexes with some rarely used key/value formats.
|
||||
* 8af8b8a WT-2002: Fix a bug in verify where it would panic when encountering a corrupted file.
|
||||
* e1d8bc7 WT-2007: Statically allocate log slot buffers to a maximum size.
|
||||
* 911158c WT-2008: Fix a bug in recovery where a file create went missing.
|
||||
* 3e2e7e6 WT-2009: Apply tracked metadata operations post-commit.
|
||||
* 1255cb2 WT-2012: Fix a bug updating the oldest ID.
|
||||
* ef9d56f WT-2013: Add gcc asm definitions for ARM64.
|
||||
* c8633e6 WT-2014: Fix a bug in checkpoints where files could be flushed in the wrong order.
|
||||
* 9b09e69 WT-2015: Fix a bug in error handling during block open.
|
||||
* 4938b8d WT-2017: Once an eviction server thread is started keep it running.
|
||||
* 298f86c WT-2019: Fix a logic bug tracking the maximum transaction ID in clean trees.
|
||||
* 7d6075c WT-2020: Clarify checksum error failure messages.
|
||||
* 7b302d3 WT-2021: Fix a bug moving the oldest ID forward (introduced by WT-1967).
|
||||
* 9df72d7 WT-2022: Fix a bug not releasing a handle when opening a non-existent index cursor.
|
||||
* 81ffc2d WT-2023: Improve locking primitives: simplify read-write lock operations.
|
||||
* 6b84722 WT-2029: Improve scalability of statistics.
|
||||
* f97cfe9 WT-2031: Log slot revamp.
|
||||
* bee11c3 WT-2032: Improve next_random cursors to work with small trees.
|
||||
* cf53696 WT-2034: Improve shared cache balancing algorithm.
|
||||
* aee1c94 WT-2035: For index cursors, keep track of which column groups need to be positioned.
|
||||
* 36310d4 WT-2036: Make handle sweeps more robust.
|
||||
* c948fbb WT-2037: Only write a checkpoint to the log on close if it wasn't.
|
||||
* e25e615 WT-2038: Avoid long scans holding the handle list lock.
|
||||
* 75a4655 WT-2039: Add error check and unit test for log records over 4 GB.
|
||||
* 5ab26af WT-2042: Only try to evict tombstones that are visible to all readers.
|
||||
* ce223ac WT-2045: Don't let the eviction server do slow reconciliation, it can stall eviction.
|
||||
* 6665618 WT-2046: Add a statistic for search restarts.
|
||||
* 98b4a28 WT-2047: Fix a bug in the random generator code to handle an uninitialized state.
|
||||
* 258e2e1 WT-2050: Show size with memory allocation errors.
|
||||
* 2e1471c WT-2053: Fix a bug in disk verify messages.
|
||||
* e316e61 WT-2056: Reorder btree cursor close so stats are maintained correctly.
|
||||
* 70f9100 WT-2057: Remove the verbose configuration when writing the base configuration file.
|
||||
* 41b6fb8 WT-2058: Fix an alignment bug in the mutex and log-slot code.
|
||||
* d72012b WT-2059: Include non-aggregated stats in cursor results.
|
||||
* 3e0c7bf WT-2062: Try harder to make progress on in-memory splits.
|
||||
* 66757f7 WT-2064: Don't spin indefinitely waiting for the handle list lock in eviction.
|
||||
* 8f42f02 WT-2066: Update the oldest transaction ID from eviction.
|
||||
* e167592 WT-2068: Protect discarding handles with the handle list lock.
|
||||
* fd72a09 WT-2075: Fix a hang in logging with parallel workload.
|
||||
* 11c0fa0 WT-2078: Fix a bug in error handling with statistics cursors.
|
||||
* 9734d85 WT-2081: Make verify progress reporting less verbose.
|
||||
* 6008b41 WT-2085: Run some of the log_server threads operations more frequently.
|
||||
* 39a69ec WT-2086: Add a statistic to track when eviction finds a page that can be split.
|
||||
* 334e103 WT-2089: Relax restrictions on multiblock eviction and in-memory splits.
|
||||
* f13b788 WT-2090: Fix a bug in the Windows OS layer that swallowed error returns.
|
||||
* 83b8db7 WT-2092: Free log condition variables after all threads are joined.
|
||||
* d9391c0 WT-2093: Use the C99 bool type to clarify when functions return true/false.
|
||||
* f883d27 WT-2094: Eliminate direct write and record unbuffered log records.
|
||||
* 9008260 WT-2097: Reintroduce immediate waits when forced eviction is necessary.
|
||||
* ff1da28 WT-2100: Rename evict to evict_queue so it's easier to search for.
|
||||
* 41db2ee WT-2101: Don't update the logging ckpt_lsn on clean shutdown.
|
||||
* e1d6886 WT-2102: Fix a hang in log slot join when forcing log writes.
|
||||
* 0e96683 WT-2105: Fix a bug where we could reference an invalid memory address if a file is
|
||||
corrupted on disk.
|
||||
* 6a565bc WT-2108: Rework in-memory page rewrite support (WT_PM_REC_REWRITE).
|
||||
* dcb0ddb WT-2114: Make application eviction fairer.
|
||||
* 10c2f15 WT-2115: Don't skip truncated pages that are part of a checkpoint.
|
||||
* cd6ce97 WT-2116: Add diagnostic checks for stuck cache and dump the state.
|
||||
* 51cf672 WT-2119: Don't evict clean multiblock pages with overflow items during checkpoints.
|
||||
* 346ad40 WT-2126: Clean up if there is an error during splits.
|
||||
* 6831485 WT-2127: Deepen the tree more regularly to avoid wide internal pages.
|
||||
* a0b5d2b WT-2128: When decoding huffman encoding during salvage it's possible to have fewer bits
|
||||
than the symbol length during decoding, if the value has been corrupted.
|
||||
* 79f74e5 WT-2131: Switch to using a lock to control page splits to avoid starvation.
|
||||
* 02a3d9f WT-2132: Make debug dump function more robust to errors.
|
||||
* 8c223e4 WT-2134: Flush all buffered log records in log_flush.
|
||||
* d1b5e7f WT-2135: Fix log_only setting for backup cursor. Fix initialization.
|
||||
* aab8101 WT-2137: Check the sync_lsn is in the correct file before moving it forward.
|
||||
* 323af84 WT-2139: Fix a transaction visibility bug in read-uncommitted transactions.
|
||||
* 751c628 WT-2146: Improve performance when searching for short keys.
|
||||
* 62998ce WT-2148: Fix a compiler warning in encoding functions.
|
||||
* 6c16fdd WT-2153: Fix bug. Now we always need to start the log_server thread.
|
||||
* 6a5fca3 WT-2154: Make btree dump safer.
|
||||
* 0d74bc6 WT-2155: Remove last use of F_CAS_ATOMIC and the associated macro.
|
||||
* cc42bda WT-2156: Allow eviction workers to restart.
|
||||
* bf1d359 WT-2157: Fix a bug where a failed page split could lead to incomplete checkpoints.
|
||||
* ce9d265 WT-2159: Don't check the config twice in one path.
|
||||
* 544f27d WT-2162: Add null pointer check, needed after an index is dropped.
|
||||
* 0d85ebe WT-2164: Prevent another LSM chunk checkpoint while the first is still in progress.
|
||||
* a81aae8 WT-2165: Stop using FALLOC_FL_KEEP_SIZE flag when pre-allocating files.
|
||||
* 2865a76 WT-2167: Switch recovery to using an internal session.
|
||||
* 5d4c952 WT-2170: Protect the turtle file with a lock.
|
||||
* 497b744 WT-2174: Avoid the table list lock when creating a size only statistics cursor.
|
||||
* fdfa804 WT-2178: In-memory storage engine support.
|
||||
* b9bd01f WT-2179: Added decorator to mark txn13 as part of the --long test suite.
|
||||
* be544dd WT-2180: Remove cursor.{search,search-near,remove} key size validation.
|
||||
* be412b5 WT-2182: When internal pages grow large enough, split them into their parents.
|
||||
* c27e78e WT-2184: Fix log scan bug when final record has many trailing zeros.
|
||||
* 9584be3 WT-2185: Don't do reverse splits when closing a file.
|
||||
* f6b12d3 WT-2187: Add flag for flushing a slot.
|
||||
* a4545bf WT-2189: Update flag set and clear macros to be less error prone.
|
||||
* 30ab327 WT-2191: In-memory disk image no longer the same as saved updates.
|
||||
* 4ba5698 WT-2192: Fix the logic around checking whether internal page is evictable.
|
||||
* 2f0b3e2 WT-2193: Handle read-committed metadata checkpoints during snapshot transactions.
|
||||
* 9b1febc WT-2194: Java close callbacks should handle cursors that Java code did not open.
|
||||
* 438f455 WT-2195: Fix a hang after giving up on a reverse split.
|
||||
* ff27fe9 WT-2196: Fix error handling in size only statistics.
|
||||
* 0a1ee34 WT-2199: Fix transaction sync inconsistency.
|
||||
* 2ff1fd6 WT-2203: Release an allocated page on error.
|
||||
* 3b3cf2a WT-2204: Don't take a local copy of page->modify until we know the page is dirty.
|
||||
* 179d4d0 WT-2206: Change cache operations from flags to an enumeration.
|
||||
* 82514ca WT-2207: Track whenever a session has a handle exclusive.
|
||||
* 78bd4ac WT-2210: Raw compression fails if row-store recovery precedes column-store recovery.
|
||||
* c360d53 WT-2212: Add a "use_environment" config to ::wiredtiger_open.
|
||||
* a72ddb7 WT-2218: Add truncate stats.
|
||||
* ce8c091 WT-2219: Enhancements to in-memory testing.
|
||||
* e2f1130 WT-2220: Update time comparison macros.
|
||||
* 59857f9 WT-2222: Add statistics for named snapshots.
|
||||
* fb9cebe WT-2224: Track which deleted refs are discarded by a split.
|
||||
* cace179 WT-2228: Avoid unnecessary raw-compression calls.
|
||||
* 0a52a80 WT-2237: Have threads publish unique transaction IDs so that updates always become
|
||||
visible immediately on commit.
|
||||
* 6c7338f WT-2241: Use a lock to protect transaction ID allocation.
|
||||
* 39dfd21 WT-2243: Don't keep transaction IDs pinned for reading from checkpoints.
|
||||
* 4c49948 WT-2244: Trigger in-memory splits sooner.
|
||||
* 9f2e4f3 WT-2248: WT_SESSION::close is updating WT_CONNECTION_IMPL.default_session.
|
||||
* 264ec21 WT-2249: Keep eviction stuck until cache usage is under 100%.
|
||||
* dca1411 WT-2250: Minor fix. Use SET instead of increment for stat.
|
||||
* e731ef8 WT-2251: Free addresses when we discard deleted page references.
|
||||
* 4fc3e39 WT-2253: Evict pages left behind by in-memory splits.
|
||||
* 2df5658 WT-2257: Fixes when given multiple thread workload configurations.
|
||||
* 4c49043 WT-2260: Avoid adding internal pages to the eviction queue
|
||||
|
||||
WiredTiger release 2.6.1, 2015-05-13
|
||||
------------------------------------
|
||||
|
||||
@@ -255,7 +480,7 @@ API and behavior changes:
|
||||
* Update configuration string parsing to always be case sensitive. See
|
||||
upgrading documentation for more information.
|
||||
|
||||
* Change the statistics cursor WT_CURSOR.reset method to re-load statistics
|
||||
* Change the statistics cursor WT_CURSOR::reset method to re-load statistics
|
||||
values. See upgrading documentation for more information.
|
||||
refs WT-1533
|
||||
|
||||
@@ -465,7 +690,7 @@ New features and API changes:
|
||||
See API documentation for more information.
|
||||
refs #1381
|
||||
|
||||
* Add a new WT_SESSION.strerror method, a thread-safe alternative to
|
||||
* Add a new WT_SESSION::strerror method, a thread-safe alternative to
|
||||
::wiredtiger_strerror.
|
||||
refs #1516
|
||||
|
||||
@@ -1271,7 +1496,7 @@ This is primarily a bugfix and performance tuning release. The main changes are:
|
||||
* The default behavior of the wt utility's load command has been changed to
|
||||
overwrite existing data.
|
||||
|
||||
* Add a WT_SESSION.create prefix_compression_min configuration option with a
|
||||
* Add a WT_SESSION::create prefix_compression_min configuration option with a
|
||||
default value of 4. [#624] and [#624]
|
||||
|
||||
* Fix "make install" of Python API. [#598]
|
||||
|
||||
2
README
2
README
@@ -1,4 +1,4 @@
|
||||
WiredTiger 2.7.0: (November 19, 2015)
|
||||
WiredTiger 2.7.0: (December 8, 2015)
|
||||
|
||||
This is version 2.7.0 of WiredTiger.
|
||||
|
||||
|
||||
@@ -46,7 +46,6 @@ static void config_opt_usage(void);
|
||||
#define STRING_MATCH(str, bytes, len) \
|
||||
(strncmp(str, bytes, len) == 0 && (str)[(len)] == '\0')
|
||||
|
||||
|
||||
/*
|
||||
* config_assign --
|
||||
* Assign the src config to the dest, any storage allocated in dest is
|
||||
@@ -181,6 +180,16 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
|
||||
int ret;
|
||||
|
||||
group = scan = NULL;
|
||||
if (cfg->workload != NULL) {
|
||||
/*
|
||||
* This call overrides an earlier call. Free and
|
||||
* reset everything.
|
||||
*/
|
||||
free(cfg->workload);
|
||||
cfg->workload = NULL;
|
||||
cfg->workload_cnt = 0;
|
||||
cfg->workers_cnt = 0;
|
||||
}
|
||||
/* Allocate the workload array. */
|
||||
if ((cfg->workload = calloc(WORKLOAD_MAX, sizeof(WORKLOAD))) == NULL)
|
||||
return (enomem(cfg));
|
||||
@@ -201,7 +210,7 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
|
||||
if ((ret = wiredtiger_config_parser_open(
|
||||
NULL, groupk.str, groupk.len, &scan)) != 0)
|
||||
goto err;
|
||||
|
||||
|
||||
/* Move to the next workload slot. */
|
||||
if (cfg->workload_cnt == WORKLOAD_MAX) {
|
||||
fprintf(stderr,
|
||||
@@ -308,7 +317,7 @@ err: if (group != NULL)
|
||||
(void)group->close(group);
|
||||
if (scan != NULL)
|
||||
(void)scan->close(scan);
|
||||
|
||||
|
||||
fprintf(stderr,
|
||||
"invalid thread configuration or scan error: %.*s\n",
|
||||
(int)len, config);
|
||||
@@ -677,7 +686,7 @@ config_print(CONFIG *cfg)
|
||||
for (i = 0, workp = cfg->workload;
|
||||
i < cfg->workload_cnt; ++i, ++workp)
|
||||
printf("\t\t%" PRId64 " threads (inserts=%" PRId64
|
||||
", reads=%" PRId64 ", updates=%" PRId64
|
||||
", reads=%" PRId64 ", updates=%" PRId64
|
||||
", truncates=% " PRId64 ")\n",
|
||||
workp->threads,
|
||||
workp->insert, workp->read,
|
||||
|
||||
@@ -8,4 +8,4 @@ run_time=500
|
||||
populate_threads=1
|
||||
# Setup three threads to insert into the oplog
|
||||
# Setup one thread to be doing truncates from the oplog
|
||||
threads=((count=3,inserts=1,throttle=2000),(count=1,truncate=1,truncate_pct=10,truncate_count=50000))
|
||||
threads=((count=3,inserts=1,throttle=4000),(count=1,truncate=1,truncate_pct=10,truncate_count=50000))
|
||||
|
||||
@@ -12,16 +12,27 @@
|
||||
# This script should be invoked with the pathname of the wtperf test
|
||||
# config to run and the number of runs.
|
||||
#
|
||||
if test "$#" -ne "2"; then
|
||||
if test "$#" -lt "2"; then
|
||||
echo "Must specify wtperf test to run and number of runs"
|
||||
exit 1
|
||||
fi
|
||||
wttest=$1
|
||||
runmax=$2
|
||||
# Jenkins removes the quotes from the passed in arg so we may
|
||||
# have 3 or 4 args.
|
||||
wtarg=""
|
||||
wtarg2=""
|
||||
if test "$#" -gt "2"; then
|
||||
wtarg=$3
|
||||
if test "$#" -eq "4"; then
|
||||
wtarg2=$4
|
||||
fi
|
||||
fi
|
||||
|
||||
home=./WT_TEST
|
||||
outfile=./wtperf.out
|
||||
rm -f $outfile
|
||||
echo "Parsed $# args: test: $wttest runmax: $runmax args: $wtarg $wtarg2" >> $outfile
|
||||
|
||||
# Each of these has an entry for each op in ops below.
|
||||
avg=(0 0 0 0)
|
||||
@@ -77,7 +88,7 @@ run=1
|
||||
while test "$run" -le "$runmax"; do
|
||||
rm -rf $home
|
||||
mkdir $home
|
||||
LD_PRELOAD=/usr/lib64/libjemalloc.so.1 LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ./wtperf -O $wttest
|
||||
LD_PRELOAD=/usr/lib64/libjemalloc.so.1 LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ./wtperf -O $wttest $wtarg $wtarg2
|
||||
if test "$?" -ne "0"; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -1534,8 +1534,10 @@ execute_workload(CONFIG *cfg)
|
||||
lprintf(cfg, 0, 1,
|
||||
"Starting workload #%d: %" PRId64 " threads, inserts=%"
|
||||
PRId64 ", reads=%" PRId64 ", updates=%" PRId64
|
||||
", truncate=%" PRId64, i + 1, workp->threads, workp->insert,
|
||||
workp->read, workp->update, workp->truncate);
|
||||
", truncate=%" PRId64 ", throttle=%" PRId64,
|
||||
i + 1, workp->threads, workp->insert,
|
||||
workp->read, workp->update, workp->truncate,
|
||||
workp->throttle);
|
||||
|
||||
/* Figure out the workload's schedule. */
|
||||
if ((ret = run_mix_schedule(cfg, workp)) != 0)
|
||||
@@ -1906,7 +1908,7 @@ start_run(CONFIG *cfg)
|
||||
monitor_created = ret = 0;
|
||||
/* [-Wconditional-uninitialized] */
|
||||
memset(&monitor_thread, 0, sizeof(monitor_thread));
|
||||
|
||||
|
||||
if ((ret = setup_log_file(cfg)) != 0)
|
||||
goto err;
|
||||
|
||||
@@ -2427,6 +2429,11 @@ worker_throttle(int64_t throttle, int64_t *ops, struct timespec *interval)
|
||||
if (usecs_to_complete < USEC_PER_SEC)
|
||||
(void)usleep((useconds_t)(USEC_PER_SEC - usecs_to_complete));
|
||||
|
||||
/*
|
||||
* After sleeping, set the interval to the current time.
|
||||
*/
|
||||
if (__wt_epoch(NULL, &now) != 0)
|
||||
return;
|
||||
*ops = 0;
|
||||
*interval = now;
|
||||
}
|
||||
|
||||
@@ -116,6 +116,7 @@ struct __truncate_struct {
|
||||
uint64_t last_total_inserts;
|
||||
uint64_t num_stones;
|
||||
uint64_t last_key;
|
||||
uint64_t catchup_multiplier;
|
||||
};
|
||||
|
||||
/* Queue entry for use with the Truncate Logic */
|
||||
|
||||
@@ -164,8 +164,8 @@ DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' "
|
||||
"'update' entries are the ratios of insert, read and update operations "
|
||||
"done by each worker thread; If a throttle value is provided each thread "
|
||||
"will do a maximum of that number of operations per second; multiple "
|
||||
"workload configurations may be "
|
||||
"specified; for example, a more complex threads configuration might be "
|
||||
"workload configurations may be specified per threads configuration; "
|
||||
"for example, a more complex threads configuration might be "
|
||||
"'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' "
|
||||
"which would create 2 threads doing nothing but reads and 8 threads "
|
||||
"each doing 50% inserts and 25% reads and updates. Allowed configuration "
|
||||
|
||||
@@ -54,6 +54,12 @@ setup_truncate(CONFIG *cfg, CONFIG_THREAD *thread, WT_SESSION *session) {
|
||||
session, cfg->uris[0], NULL, NULL, &cursor)) != 0)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* If we find the workload getting behind we multiply the number of
|
||||
* records to be truncated.
|
||||
*/
|
||||
trunc_cfg->catchup_multiplier = 1;
|
||||
|
||||
/* How many entries between each stone. */
|
||||
trunc_cfg->stone_gap =
|
||||
(workload->truncate_count * workload->truncate_pct) / 100;
|
||||
@@ -133,6 +139,7 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
|
||||
TRUNCATE_QUEUE_ENTRY *truncate_item;
|
||||
char *truncate_key;
|
||||
int ret, t_ret;
|
||||
uint64_t used_stone_gap;
|
||||
|
||||
ret = 0;
|
||||
trunc_cfg = &thread->trunc_cfg;
|
||||
@@ -145,11 +152,32 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
|
||||
trunc_cfg->last_total_inserts = trunc_cfg->total_inserts;
|
||||
|
||||
/* We are done if there isn't enough data to trigger a new milestone. */
|
||||
if (trunc_cfg->expected_total <= trunc_cfg->needed_stones)
|
||||
if (trunc_cfg->expected_total <= thread->workload->truncate_count)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If we are falling behind and using more than one stone per lap we
|
||||
* should widen the stone gap for this lap to try and catch up quicker.
|
||||
*/
|
||||
if (trunc_cfg->expected_total >
|
||||
thread->workload->truncate_count + trunc_cfg->stone_gap) {
|
||||
/*
|
||||
* Increase the multiplier until we create stones that are
|
||||
* almost large enough to truncate the whole expected table size
|
||||
* in one operation.
|
||||
*/
|
||||
trunc_cfg->catchup_multiplier =
|
||||
WT_MIN(trunc_cfg->catchup_multiplier + 1,
|
||||
trunc_cfg->needed_stones - 1);
|
||||
} else {
|
||||
/* Back off if we start seeing an improvement */
|
||||
trunc_cfg->catchup_multiplier =
|
||||
WT_MAX(trunc_cfg->catchup_multiplier - 1, 1);
|
||||
}
|
||||
used_stone_gap = trunc_cfg->stone_gap * trunc_cfg->catchup_multiplier;
|
||||
|
||||
while (trunc_cfg->num_stones < trunc_cfg->needed_stones) {
|
||||
trunc_cfg->last_key += trunc_cfg->stone_gap;
|
||||
trunc_cfg->last_key += used_stone_gap;
|
||||
truncate_key = calloc(cfg->key_sz, 1);
|
||||
if (truncate_key == NULL) {
|
||||
lprintf(cfg, ENOMEM, 0,
|
||||
@@ -165,7 +193,7 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
|
||||
}
|
||||
generate_key(cfg, truncate_key, trunc_cfg->last_key);
|
||||
truncate_item->key = truncate_key;
|
||||
truncate_item->diff = trunc_cfg->stone_gap;
|
||||
truncate_item->diff = used_stone_gap;
|
||||
TAILQ_INSERT_TAIL(&cfg->stone_head, truncate_item, q);
|
||||
trunc_cfg->num_stones++;
|
||||
}
|
||||
@@ -189,7 +217,6 @@ run_truncate(CONFIG *cfg, CONFIG_THREAD *thread,
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
||||
*truncatedp = 1;
|
||||
trunc_cfg->expected_total -= truncate_item->diff;
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ dnl build by dist/s_version
|
||||
VERSION_MAJOR=2
|
||||
VERSION_MINOR=7
|
||||
VERSION_PATCH=0
|
||||
VERSION_STRING='"WiredTiger 2.7.0: (November 19, 2015)"'
|
||||
VERSION_STRING='"WiredTiger 2.7.0: (December 8, 2015)"'
|
||||
|
||||
AC_SUBST(VERSION_MAJOR)
|
||||
AC_SUBST(VERSION_MINOR)
|
||||
|
||||
3
dist/s_docs
vendored
3
dist/s_docs
vendored
@@ -22,7 +22,8 @@ changelog()
|
||||
(echo "WiredTiger Change Log"
|
||||
echo "====================="
|
||||
echo
|
||||
cat ../NEWS) > ../src/docs/changelog.md
|
||||
sed -e 's, \([0-9a-f]\{7\}\) , [\1](https://github.com/wiredtiger/wiredtiger/commit/\1) ,g' \
|
||||
-e 's,\(\(WT\|SERVER\)-[0-9]*\),[\1](https://jira.mongodb.org/browse/\1),g' ../NEWS) > ../src/docs/changelog.md
|
||||
}
|
||||
|
||||
wtperf_config()
|
||||
|
||||
3
dist/s_string
vendored
3
dist/s_string
vendored
@@ -30,7 +30,8 @@ replace() {
|
||||
# check:
|
||||
# Check the spelling of an individual file.
|
||||
check() {
|
||||
aspell --lang=en $1 list < ../$2 |
|
||||
# Strip out git hashes, which are seven character hex strings.
|
||||
sed 's/ [0-9a-f]\{7\} / /g' ../$2 | aspell --lang=en $1 list |
|
||||
sort -u |
|
||||
comm -23 /dev/stdin s_string.ok > $t
|
||||
test -s $t && {
|
||||
|
||||
5
dist/s_string.ok
vendored
5
dist/s_string.ok
vendored
@@ -102,6 +102,7 @@ Encryptor
|
||||
Encryptors
|
||||
Enqueue
|
||||
Eron
|
||||
FALLOC
|
||||
FALLTHROUGH
|
||||
FH
|
||||
FLD
|
||||
@@ -224,6 +225,7 @@ Obama
|
||||
Outfmt
|
||||
PARAM
|
||||
POSIX
|
||||
PPC
|
||||
PREDEFINE
|
||||
PRIu
|
||||
PRNG
|
||||
@@ -358,6 +360,7 @@ arg
|
||||
argc
|
||||
args
|
||||
argv
|
||||
asm
|
||||
async
|
||||
asyncopp
|
||||
asyncops
|
||||
@@ -594,6 +597,7 @@ free'd
|
||||
fscanf
|
||||
fstat
|
||||
fsync
|
||||
fsyncLock
|
||||
fsyncs
|
||||
ftruncate
|
||||
func
|
||||
@@ -876,6 +880,7 @@ runtime
|
||||
rwlock
|
||||
sH
|
||||
sHQ
|
||||
scalability
|
||||
sched
|
||||
scr
|
||||
sd
|
||||
|
||||
2
dist/s_whitespace
vendored
2
dist/s_whitespace
vendored
@@ -32,7 +32,7 @@ for f in `find dist -name '*.py' -name 's_*'`; do
|
||||
done
|
||||
|
||||
# C-language sources.
|
||||
for f in `find examples ext src test \
|
||||
for f in `find bench examples ext src test \
|
||||
-name '*.[chi]' -o \
|
||||
-name '*.dox' -o \
|
||||
-name '*.in' -o \
|
||||
|
||||
@@ -97,8 +97,10 @@ csv_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
|
||||
strncpy(copy, p, len);
|
||||
copy[len] = '\0';
|
||||
if (csv_extractor->format_isnum) {
|
||||
if ((val = atoi(copy)) < 0)
|
||||
if ((val = atoi(copy)) < 0) {
|
||||
free(copy);
|
||||
return (EINVAL);
|
||||
}
|
||||
result_cursor->set_key(result_cursor, val);
|
||||
} else
|
||||
result_cursor->set_key(result_cursor, copy);
|
||||
@@ -150,7 +152,7 @@ csv_customize(WT_EXTRACTOR *extractor, WT_SESSION *session,
|
||||
return (errno);
|
||||
|
||||
*csv_extractor = *orig;
|
||||
csv_extractor->field = field_num;
|
||||
csv_extractor->field = (int)field_num;
|
||||
csv_extractor->format_isnum = (format.str[0] == 'i');
|
||||
*customp = (WT_EXTRACTOR *)csv_extractor;
|
||||
return (0);
|
||||
|
||||
@@ -77,7 +77,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
|
||||
}
|
||||
|
||||
(void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1);
|
||||
ret = __wt_evict(session, ref, 0);
|
||||
ret = __wt_evict(session, ref, false);
|
||||
(void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1);
|
||||
WT_RET_BUSY_OK(ret);
|
||||
}
|
||||
@@ -99,25 +99,18 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
|
||||
/*
|
||||
* We cannot fast-delete pages that have overflow key/value items as
|
||||
* the overflow blocks have to be discarded. The way we figure that
|
||||
* out is to check the on-page cell type for the page, cells for leaf
|
||||
* pages that have no overflow items are special.
|
||||
*
|
||||
* In some cases, the reference address may not reference an on-page
|
||||
* cell (for example, some combination of page splits), in which case
|
||||
* we can't check the original cell value and we fail.
|
||||
* out is to check the page's cell type, cells for leaf pages without
|
||||
* overflow items are special.
|
||||
*
|
||||
* To look at an on-page cell, we need to look at the parent page, and
|
||||
* that's dangerous, our parent page could change without warning if
|
||||
* the parent page were to split, deepening the tree. It's safe: the
|
||||
* page's reference will always point to some valid page, and if we find
|
||||
* any problems we simply fail the fast-delete optimization.
|
||||
*
|
||||
* !!!
|
||||
* I doubt it's worth the effort, but we could copy the cell's type into
|
||||
* the reference structure, and then we wouldn't need an on-page cell.
|
||||
*/
|
||||
parent = ref->home;
|
||||
if (__wt_off_page(parent, ref->addr) ||
|
||||
if (__wt_off_page(parent, ref->addr) ?
|
||||
((WT_ADDR *)ref->addr)->type != WT_ADDR_LEAF_NO :
|
||||
__wt_cell_type_raw(ref->addr) != WT_CELL_ADDR_LEAF_NO)
|
||||
goto err;
|
||||
|
||||
|
||||
@@ -50,15 +50,18 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
|
||||
page = *pagep;
|
||||
*pagep = NULL;
|
||||
|
||||
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
|
||||
__wt_page_modify_clear(session, page);
|
||||
|
||||
/*
|
||||
* We should never discard ...
|
||||
* We should never discard:
|
||||
* - a dirty page,
|
||||
* - a page queued for eviction, or
|
||||
* - a locked page.
|
||||
*/
|
||||
WT_ASSERT( /* ... a dirty page */
|
||||
session, !__wt_page_is_modified(page));
|
||||
WT_ASSERT( /* ... a page queued for LRU eviction */
|
||||
session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
|
||||
WT_ASSERT( /* ... a locked page */
|
||||
session, !__wt_fair_islocked(session, &page->page_lock));
|
||||
WT_ASSERT(session, !__wt_page_is_modified(page));
|
||||
WT_ASSERT(session, !F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU));
|
||||
WT_ASSERT(session, !__wt_fair_islocked(session, &page->page_lock));
|
||||
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
{
|
||||
@@ -227,7 +230,7 @@ __free_page_int(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
*/
|
||||
void
|
||||
__wt_free_ref(
|
||||
WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref, bool free_pages)
|
||||
WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages)
|
||||
{
|
||||
WT_IKEY *ikey;
|
||||
|
||||
@@ -246,8 +249,15 @@ __wt_free_ref(
|
||||
__wt_page_out(session, &ref->page);
|
||||
}
|
||||
|
||||
/* Free any key allocation. */
|
||||
switch (page->type) {
|
||||
/*
|
||||
* Optionally free row-store WT_REF key allocation. Historic versions of
|
||||
* this code looked in a passed-in page argument, but that is dangerous,
|
||||
* some of our error-path callers create WT_REF structures without ever
|
||||
* setting WT_REF.home or having a parent page to which the WT_REF will
|
||||
* be linked. Those WT_REF structures invariably have instantiated keys,
|
||||
* (they obviously cannot be on-page keys), and we must free the memory.
|
||||
*/
|
||||
switch (page_type) {
|
||||
case WT_PAGE_ROW_INT:
|
||||
case WT_PAGE_ROW_LEAF:
|
||||
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
|
||||
@@ -255,11 +265,11 @@ __wt_free_ref(
|
||||
break;
|
||||
}
|
||||
|
||||
/* Free any address allocation. */
|
||||
if (ref->addr != NULL && __wt_off_page(page, ref->addr)) {
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
}
|
||||
/*
|
||||
* Free any address allocation; if there's no linked WT_REF page, it
|
||||
* must be allocated.
|
||||
*/
|
||||
__wt_ref_addr_free(session, ref);
|
||||
|
||||
/* Free any page-deleted information. */
|
||||
if (ref->page_del != NULL) {
|
||||
@@ -272,7 +282,7 @@ __wt_free_ref(
|
||||
|
||||
/*
|
||||
* __wt_free_ref_index --
|
||||
* Discard a page index and it's references.
|
||||
* Discard a page index and its references.
|
||||
*/
|
||||
void
|
||||
__wt_free_ref_index(WT_SESSION_IMPL *session,
|
||||
@@ -284,7 +294,8 @@ __wt_free_ref_index(WT_SESSION_IMPL *session,
|
||||
return;
|
||||
|
||||
for (i = 0; i < pindex->entries; ++i)
|
||||
__wt_free_ref(session, page, pindex->index[i], free_pages);
|
||||
__wt_free_ref(
|
||||
session, pindex->index[i], page->type, free_pages);
|
||||
__wt_free(session, pindex);
|
||||
}
|
||||
|
||||
|
||||
@@ -696,6 +696,13 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
|
||||
WT_MIN(btree->maxmempage, cache_size / 4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try in-memory splits once we hit 80% of the maximum in-memory page
|
||||
* size. This gives multi-threaded append workloads a better chance of
|
||||
* not stalling.
|
||||
*/
|
||||
btree->splitmempage = 8 * btree->maxmempage / 10;
|
||||
|
||||
/*
|
||||
* Get the split percentage (reconciliation splits pages into smaller
|
||||
* than the maximum page size chunks so we don't split every time a
|
||||
|
||||
@@ -307,10 +307,6 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
btree = S2BT(session);
|
||||
page = ref->page;
|
||||
|
||||
/* Pages are usually small enough, check that first. */
|
||||
if (page->memory_footprint < btree->maxmempage)
|
||||
return (0);
|
||||
|
||||
/* Leaf pages only. */
|
||||
if (WT_PAGE_IS_INTERNAL(page))
|
||||
return (0);
|
||||
@@ -322,6 +318,12 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
if (page->modify == NULL)
|
||||
return (0);
|
||||
|
||||
/* Pages are usually small enough, check that first. */
|
||||
if (page->memory_footprint < btree->splitmempage)
|
||||
return (0);
|
||||
else if (page->memory_footprint < btree->maxmempage)
|
||||
return (__wt_leaf_page_can_split(session, page));
|
||||
|
||||
/* Trigger eviction on the next page release. */
|
||||
__wt_page_evict_soon(page);
|
||||
|
||||
@@ -329,7 +331,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
__wt_txn_update_oldest(session, false);
|
||||
|
||||
/* If eviction cannot succeed, don't try. */
|
||||
return (__wt_page_can_evict(session, ref, true, NULL));
|
||||
return (__wt_page_can_evict(session, ref, NULL));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -326,7 +326,7 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
|
||||
*/
|
||||
if (ss->root_ref.page != NULL) {
|
||||
btree->ckpt = ckptbase;
|
||||
ret = __wt_evict(session, &ss->root_ref, 1);
|
||||
ret = __wt_evict(session, &ss->root_ref, true);
|
||||
ss->root_ref.page = NULL;
|
||||
btree->ckpt = NULL;
|
||||
}
|
||||
@@ -1290,9 +1290,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
|
||||
* would have been lost.) Clear the reference addr so eviction doesn't
|
||||
* free the underlying blocks.
|
||||
*/
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
ref->addr = NULL;
|
||||
__wt_ref_addr_free(session, ref);
|
||||
|
||||
/* Write the new version of the leaf page to disk. */
|
||||
WT_ERR(__slvg_modify_init(session, page));
|
||||
@@ -1304,7 +1302,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
|
||||
|
||||
ret = __wt_page_release(session, ref, 0);
|
||||
if (ret == 0)
|
||||
ret = __wt_evict(session, ref, 1);
|
||||
ret = __wt_evict(session, ref, true);
|
||||
|
||||
if (0) {
|
||||
err: WT_TRET(__wt_page_release(session, ref, 0));
|
||||
@@ -2013,9 +2011,7 @@ __slvg_row_build_leaf(
|
||||
* would have been lost.) Clear the reference addr so eviction doesn't
|
||||
* free the underlying blocks.
|
||||
*/
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
ref->addr = NULL;
|
||||
__wt_ref_addr_free(session, ref);
|
||||
|
||||
/* Write the new version of the leaf page to disk. */
|
||||
WT_ERR(__slvg_modify_init(session, page));
|
||||
@@ -2030,7 +2026,7 @@ __slvg_row_build_leaf(
|
||||
*/
|
||||
ret = __wt_page_release(session, ref, 0);
|
||||
if (ret == 0)
|
||||
ret = __wt_evict(session, ref, 1);
|
||||
ret = __wt_evict(session, ref, true);
|
||||
|
||||
if (0) {
|
||||
err: WT_TRET(__wt_page_release(session, ref, 0));
|
||||
|
||||
@@ -340,8 +340,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
|
||||
return (ret);
|
||||
}
|
||||
addr->size = (uint8_t)unpack.size;
|
||||
addr->type =
|
||||
unpack.raw == WT_CELL_ADDR_INT ? WT_ADDR_INT : WT_ADDR_LEAF;
|
||||
switch (unpack.raw) {
|
||||
case WT_CELL_ADDR_INT:
|
||||
addr->type = WT_ADDR_INT;
|
||||
break;
|
||||
case WT_CELL_ADDR_LEAF:
|
||||
addr->type = WT_ADDR_LEAF;
|
||||
break;
|
||||
case WT_CELL_ADDR_LEAF_NO:
|
||||
addr->type = WT_ADDR_LEAF_NO;
|
||||
break;
|
||||
WT_ILLEGAL_VALUE(session);
|
||||
}
|
||||
ref->addr = addr;
|
||||
}
|
||||
|
||||
@@ -399,17 +409,8 @@ __split_ref_move_final(
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *child;
|
||||
WT_REF *ref, *child_ref;
|
||||
uint64_t txn_new_id;
|
||||
uint32_t i;
|
||||
|
||||
/*
|
||||
* When creating new internal pages as part of a split, we set a field
|
||||
* in those pages modify structure to prevent them from being evicted
|
||||
* until all threads are known to have exited the index of the page that
|
||||
* previously "owned" the WT_REF. Set that field to a safe value.
|
||||
*/
|
||||
txn_new_id = __wt_txn_id_alloc(session, false);
|
||||
|
||||
/*
|
||||
* The WT_REF structures moved to newly allocated child pages reference
|
||||
* the wrong parent page and we have to fix that up. The problem is
|
||||
@@ -461,8 +462,6 @@ __split_ref_move_final(
|
||||
if (child_ref->home != child) {
|
||||
child_ref->home = child;
|
||||
child_ref->pindex_hint = 0;
|
||||
|
||||
child->modify->mod_split_txn = txn_new_id;
|
||||
}
|
||||
} WT_INTL_FOREACH_END;
|
||||
WT_LEAVE_PAGE_INDEX(session);
|
||||
@@ -896,6 +895,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
|
||||
*/
|
||||
WT_ASSERT(session, next_ref->page_del == NULL);
|
||||
|
||||
WT_TRET(__wt_ref_block_free(session, next_ref));
|
||||
WT_TRET(__split_safe_free(
|
||||
session, split_gen, exclusive, next_ref, sizeof(WT_REF)));
|
||||
parent_decr += sizeof(WT_REF);
|
||||
@@ -1183,8 +1183,8 @@ err: /*
|
||||
* Lock an internal page.
|
||||
*/
|
||||
static int
|
||||
__split_internal_lock(
|
||||
WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE **parentp, bool *hazardp)
|
||||
__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
|
||||
WT_PAGE **parentp, bool *hazardp)
|
||||
{
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *parent;
|
||||
@@ -1202,7 +1202,7 @@ __split_internal_lock(
|
||||
* loop until the exclusive lock is resolved). If we want to split
|
||||
* the parent, give up to avoid that deadlock.
|
||||
*/
|
||||
if (S2BT(session)->checkpointing != WT_CKPT_OFF)
|
||||
if (!trylock && S2BT(session)->checkpointing != WT_CKPT_OFF)
|
||||
return (EBUSY);
|
||||
|
||||
/*
|
||||
@@ -1227,7 +1227,10 @@ __split_internal_lock(
|
||||
if (F_ISSET_ATOMIC(parent, WT_PAGE_SPLIT_BLOCK))
|
||||
return (EBUSY);
|
||||
|
||||
WT_RET(__wt_fair_lock(session, &parent->page_lock));
|
||||
if (trylock)
|
||||
WT_RET(__wt_fair_trylock(session, &parent->page_lock));
|
||||
else
|
||||
WT_RET(__wt_fair_lock(session, &parent->page_lock));
|
||||
if (parent == ref->home)
|
||||
break;
|
||||
WT_RET(__wt_fair_unlock(session, &parent->page_lock));
|
||||
@@ -1371,7 +1374,7 @@ __split_parent_climb(WT_SESSION_IMPL *session, WT_PAGE *page, bool page_hazard)
|
||||
* locks, lock-coupling up the tree.
|
||||
*/
|
||||
WT_ERR(__split_internal_lock(
|
||||
session, ref, &parent, &parent_hazard));
|
||||
session, ref, true, &parent, &parent_hazard));
|
||||
ret = __split_internal(session, parent, page);
|
||||
WT_TRET(__split_internal_unlock(session, page, page_hazard));
|
||||
|
||||
@@ -1527,7 +1530,7 @@ __split_multi_inmem_final(WT_PAGE *orig, WT_MULTI *multi)
|
||||
* Discard allocated pages after failure.
|
||||
*/
|
||||
static void
|
||||
__split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
__split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
|
||||
{
|
||||
/*
|
||||
* We failed creating new in-memory pages. For error-handling reasons,
|
||||
@@ -1537,7 +1540,7 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
*/
|
||||
if (ref->page != NULL) {
|
||||
F_SET_ATOMIC(ref->page, WT_PAGE_UPDATE_IGNORE);
|
||||
__wt_free_ref(session, ref->page, ref, true);
|
||||
__wt_free_ref(session, ref, orig->type, true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1635,7 +1638,7 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
*
|
||||
* Note this page has already been through an in-memory split.
|
||||
*/
|
||||
WT_ASSERT(session, __wt_page_can_split(session, page));
|
||||
WT_ASSERT(session, __wt_leaf_page_can_split(session, page));
|
||||
WT_ASSERT(session, __wt_page_is_modified(page));
|
||||
F_SET_ATOMIC(page, WT_PAGE_SPLIT_INSERT);
|
||||
|
||||
@@ -1668,6 +1671,12 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
child->state = WT_REF_MEM;
|
||||
child->addr = ref->addr;
|
||||
|
||||
/*
|
||||
* The address has moved to the replacement WT_REF. Make sure it isn't
|
||||
* freed when the original ref is discarded.
|
||||
*/
|
||||
ref->addr = NULL;
|
||||
|
||||
/*
|
||||
* Copy the first key from the original page into first ref in the new
|
||||
* parent. Pages created in memory always have a "smallest" insert
|
||||
@@ -1817,13 +1826,6 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
WT_ASSERT(session, ins != moved_ins);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Save the transaction ID when the split happened. Application
|
||||
* threads will not try to forcibly evict the page again until
|
||||
* all concurrent transactions commit.
|
||||
*/
|
||||
page->modify->inmem_split_txn = __wt_txn_id_alloc(session, false);
|
||||
|
||||
/*
|
||||
* Update the page accounting.
|
||||
*
|
||||
@@ -1864,6 +1866,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
return (0);
|
||||
|
||||
err: if (split_ref[0] != NULL) {
|
||||
/*
|
||||
* The address was moved to the replacement WT_REF, restore it.
|
||||
*/
|
||||
ref->addr = split_ref[0]->addr;
|
||||
|
||||
__wt_free(session, split_ref[0]->key.ikey);
|
||||
__wt_free(session, split_ref[0]);
|
||||
}
|
||||
@@ -1891,7 +1898,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
WT_RET(__wt_verbose(
|
||||
session, WT_VERB_SPLIT, "%p: split-insert", ref->page));
|
||||
|
||||
WT_RET(__split_internal_lock(session, ref, &parent, &hazard));
|
||||
WT_RET(__split_internal_lock(session, ref, true, &parent, &hazard));
|
||||
if ((ret = __split_insert(session, ref)) != 0) {
|
||||
WT_TRET(__split_internal_unlock(session, parent, hazard));
|
||||
return (ret);
|
||||
@@ -1962,7 +1969,7 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
|
||||
if (0) {
|
||||
err: for (i = 0; i < new_entries; ++i)
|
||||
__split_multi_inmem_fail(session, ref_new[i]);
|
||||
__split_multi_inmem_fail(session, page, ref_new[i]);
|
||||
}
|
||||
|
||||
__wt_free(session, ref_new);
|
||||
@@ -1983,7 +1990,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
|
||||
WT_RET(__wt_verbose(
|
||||
session, WT_VERB_SPLIT, "%p: split-multi", ref->page));
|
||||
|
||||
WT_RET(__split_internal_lock(session, ref, &parent, &hazard));
|
||||
WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard));
|
||||
if ((ret = __split_multi(session, ref, closing)) != 0 || closing) {
|
||||
WT_TRET(__split_internal_unlock(session, parent, hazard));
|
||||
return (ret);
|
||||
@@ -2012,7 +2019,7 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
WT_RET(__wt_verbose(
|
||||
session, WT_VERB_SPLIT, "%p: reverse-split", ref->page));
|
||||
|
||||
WT_RET(__split_internal_lock(session, ref, &parent, &hazard));
|
||||
WT_RET(__split_internal_lock(session, ref, false, &parent, &hazard));
|
||||
ret = __split_parent(session, ref, NULL, 0, 0, false, true);
|
||||
WT_TRET(__split_internal_unlock(session, parent, hazard));
|
||||
return (ret);
|
||||
@@ -2072,6 +2079,6 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
|
||||
return (0);
|
||||
|
||||
err: __split_multi_inmem_fail(session, &new);
|
||||
err: __split_multi_inmem_fail(session, page, &new);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -244,7 +244,8 @@ ascend: /*
|
||||
* If we see any child states other than deleted, the
|
||||
* page isn't empty.
|
||||
*/
|
||||
if (ref->state != WT_REF_DELETED)
|
||||
if (ref->state != WT_REF_DELETED &&
|
||||
!LF_ISSET(WT_READ_TRUNCATE))
|
||||
empty_internal = false;
|
||||
|
||||
if (LF_ISSET(WT_READ_CACHE)) {
|
||||
@@ -270,6 +271,7 @@ ascend: /*
|
||||
WT_ERR(__wt_delete_page(session, ref, &skip));
|
||||
if (skip)
|
||||
break;
|
||||
empty_internal = false;
|
||||
} else if (LF_ISSET(WT_READ_COMPACT)) {
|
||||
/*
|
||||
* Skip deleted pages, rewriting them doesn't
|
||||
|
||||
@@ -379,7 +379,7 @@ __curfile_close(WT_CURSOR *cursor)
|
||||
* updated correctly.
|
||||
*/
|
||||
if (session->dhandle != NULL) {
|
||||
/* Increment the data-source's in-use counter. */
|
||||
/* Decrement the data-source's in-use counter. */
|
||||
__wt_cursor_dhandle_decr_use(session);
|
||||
WT_TRET(__wt_session_release_btree(session));
|
||||
}
|
||||
@@ -439,6 +439,9 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
|
||||
cursor->value_format = btree->value_format;
|
||||
cbt->btree = btree;
|
||||
|
||||
if (session->dhandle->checkpoint != NULL)
|
||||
F_SET(cbt, WT_CBT_NO_TXN);
|
||||
|
||||
if (bulk) {
|
||||
F_SET(cursor, WT_CURSTD_BULK);
|
||||
|
||||
|
||||
@@ -6,12 +6,12 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
|
||||
@section releases Releases
|
||||
|
||||
<table>
|
||||
@row{<b>WiredTiger 2.7.0</b> (previous),
|
||||
<a href="releases/wiredtiger-2.7.0.tar.bz2"><b>[Release package]</b></a>,
|
||||
<a href="2.7.0/index.html"><b>[Documentation]</b></a>}
|
||||
@row{<b>WiredTiger 2.6.1</b> (current),
|
||||
<a href="releases/wiredtiger-2.6.1.tar.bz2"><b>[Release package]</b></a>,
|
||||
<a href="2.6.1/index.html"><b>[Documentation]</b></a>}
|
||||
@row{<b>WiredTiger 2.5.3</b> (previous),
|
||||
<a href="releases/wiredtiger-2.5.3.tar.bz2"><b>[Release package]</b></a>,
|
||||
<a href="2.5.3/index.html"><b>[Documentation]</b></a>}
|
||||
@row{<b>Development branch</b>,
|
||||
<a href="https://github.com/wiredtiger/wiredtiger"><b>[Source code]</b></a>,
|
||||
<a href="develop/index.html"><b>[Documentation]</b></a>}
|
||||
|
||||
@@ -230,8 +230,9 @@ threads, and the 'insert', 'read' and 'update' entries are the ratios
|
||||
of insert, read and update operations done by each worker thread; If a
|
||||
throttle value is provided each thread will do a maximum of that
|
||||
number of operations per second; multiple workload configurations may
|
||||
be specified; for example, a more complex threads configuration might
|
||||
be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))'
|
||||
be specified per threads configuration; for example, a more complex
|
||||
threads configuration might be
|
||||
'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))'
|
||||
which would create 2 threads doing nothing but reads and 8 threads
|
||||
each doing 50% inserts and 25% reads and updates. Allowed
|
||||
configuration values are 'count', 'throttle', 'reads', 'inserts',
|
||||
|
||||
@@ -76,22 +76,16 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
|
||||
/*
|
||||
* Evict the page.
|
||||
*/
|
||||
WT_ERR(__wt_evict(session, ref, 1));
|
||||
WT_ERR(__wt_evict(session, ref, true));
|
||||
break;
|
||||
case WT_SYNC_DISCARD:
|
||||
/*
|
||||
* Dead handles may reference dirty pages; clean the
|
||||
* page, both to keep statistics correct, and to let
|
||||
* the page-discard function assert no dirty page is
|
||||
* ever discarded.
|
||||
* Discard the page regardless of whether it is dirty.
|
||||
*/
|
||||
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
|
||||
__wt_page_modify_clear(session, page);
|
||||
|
||||
WT_ASSERT(session,
|
||||
F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
|
||||
__wt_page_can_evict(session, ref, false, NULL));
|
||||
__wt_evict_page_clean_update(session, ref, 1);
|
||||
__wt_page_can_evict(session, ref, NULL));
|
||||
__wt_evict_page_clean_update(session, ref, true);
|
||||
break;
|
||||
WT_ILLEGAL_VALUE_ERR(session);
|
||||
}
|
||||
|
||||
@@ -36,6 +36,10 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
|
||||
|
||||
page = entry->ref->page;
|
||||
|
||||
/* Any page set to the oldest generation should be discarded. */
|
||||
if (page->read_gen == WT_READGEN_OLDEST)
|
||||
return (WT_READGEN_OLDEST);
|
||||
|
||||
/* Any empty page (leaf or internal), is a good choice. */
|
||||
if (__wt_page_is_empty(page))
|
||||
return (WT_READGEN_OLDEST);
|
||||
@@ -159,7 +163,8 @@ __evict_server(void *arg)
|
||||
WT_DECL_RET;
|
||||
WT_SESSION_IMPL *session;
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
struct timespec now, stuck_ts = { 0, 0 };
|
||||
struct timespec now, stuck_ts;
|
||||
uint64_t pages_evicted = 0;
|
||||
#endif
|
||||
u_int spins;
|
||||
|
||||
@@ -204,10 +209,11 @@ __evict_server(void *arg)
|
||||
/* Next time we wake up, reverse the sweep direction. */
|
||||
cache->flags ^= WT_CACHE_WALK_REVERSE;
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
stuck_ts.tv_sec = 0;
|
||||
} else if (stuck_ts.tv_sec == 0)
|
||||
pages_evicted = 0;
|
||||
} else if (pages_evicted != cache->pages_evict) {
|
||||
WT_ERR(__wt_epoch(session, &stuck_ts));
|
||||
else {
|
||||
pages_evicted = cache->pages_evict;
|
||||
} else {
|
||||
/* After being stuck for 5 minutes, give up. */
|
||||
WT_ERR(__wt_epoch(session, &now));
|
||||
if (WT_TIMEDIFF_SEC(now, stuck_ts) > 300) {
|
||||
@@ -466,6 +472,15 @@ __evict_update_work(WT_SESSION_IMPL *session)
|
||||
if (!F_ISSET(conn, WT_CONN_EVICTION_RUN))
|
||||
return (false);
|
||||
|
||||
/*
|
||||
* Setup the number of refs to consider in each handle, depending
|
||||
* on how many handles are open. We want to consider less candidates
|
||||
* from each file as more files are open. Handle the case where there
|
||||
* are no files open by adding 1.
|
||||
*/
|
||||
cache->evict_max_refs_per_file =
|
||||
WT_MAX(100, WT_MILLION / (conn->open_file_count + 1));
|
||||
|
||||
/*
|
||||
* Page eviction overrides the dirty target and other types of eviction,
|
||||
* that is, we don't care where we are with respect to the dirty target
|
||||
@@ -481,6 +496,13 @@ __evict_update_work(WT_SESSION_IMPL *session)
|
||||
goto done;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the cache has been stuck and is now under control, clear the
|
||||
* stuck flag.
|
||||
*/
|
||||
if (bytes_inuse < bytes_max)
|
||||
F_CLR(cache, WT_CACHE_STUCK);
|
||||
|
||||
dirty_inuse = __wt_cache_dirty_inuse(cache);
|
||||
if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) {
|
||||
FLD_SET(cache->state, WT_EVICT_PASS_DIRTY);
|
||||
@@ -498,6 +520,7 @@ __evict_update_work(WT_SESSION_IMPL *session)
|
||||
F_CLR(cache, WT_CACHE_WOULD_BLOCK);
|
||||
goto done;
|
||||
}
|
||||
|
||||
return (false);
|
||||
|
||||
done: if (F_ISSET(cache, WT_CACHE_STUCK))
|
||||
@@ -1169,7 +1192,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
|
||||
uint64_t pages_walked;
|
||||
uint32_t walk_flags;
|
||||
int internal_pages, restarts;
|
||||
bool enough, modified;
|
||||
bool enough, modified, would_split;
|
||||
|
||||
conn = S2C(session);
|
||||
btree = S2BT(session);
|
||||
@@ -1202,7 +1225,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
|
||||
evict < end && !enough && (ret == 0 || ret == WT_NOTFOUND);
|
||||
ret = __wt_tree_walk(
|
||||
session, &btree->evict_ref, &pages_walked, walk_flags)) {
|
||||
enough = pages_walked > WT_EVICT_MAX_PER_FILE;
|
||||
enough = pages_walked > cache->evict_max_refs_per_file;
|
||||
if ((ref = btree->evict_ref) == NULL) {
|
||||
if (++restarts == 2 || enough)
|
||||
break;
|
||||
@@ -1237,6 +1260,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
|
||||
* eviction, skip anything that isn't marked.
|
||||
*/
|
||||
if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
|
||||
page->memory_footprint < btree->splitmempage &&
|
||||
page->read_gen != WT_READGEN_OLDEST)
|
||||
continue;
|
||||
|
||||
@@ -1254,9 +1278,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
|
||||
page->read_gen = __wt_cache_read_gen_new(session);
|
||||
|
||||
fast: /* If the page can't be evicted, give up. */
|
||||
if (!__wt_page_can_evict(session, ref, true, NULL))
|
||||
if (!__wt_page_can_evict(session, ref, &would_split))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Note: take care with ordering: if we detected that
|
||||
* the page is modified above, we expect mod != NULL.
|
||||
*/
|
||||
mod = page->modify;
|
||||
|
||||
/*
|
||||
* Additional tests if eviction is likely to succeed.
|
||||
*
|
||||
@@ -1269,12 +1299,6 @@ fast: /* If the page can't be evicted, give up. */
|
||||
*/
|
||||
if (!FLD_ISSET(cache->state,
|
||||
WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
|
||||
/*
|
||||
* Note: take care with ordering: if we detected that
|
||||
* the page is modified above, we expect mod != NULL.
|
||||
*/
|
||||
mod = page->modify;
|
||||
|
||||
/*
|
||||
* If the page is clean but has modifications that
|
||||
* appear too new to evict, skip it.
|
||||
@@ -1282,19 +1306,6 @@ fast: /* If the page can't be evicted, give up. */
|
||||
if (!modified && mod != NULL &&
|
||||
!__wt_txn_visible_all(session, mod->rec_max_txn))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the oldest transaction hasn't changed since the
|
||||
* last time this page was written, it's unlikely we
|
||||
* can make progress. Similarly, if the most recent
|
||||
* update on the page is not yet globally visible,
|
||||
* eviction will fail. These heuristics attempt to
|
||||
* avoid repeated attempts to evict the same page.
|
||||
*/
|
||||
if (modified &&
|
||||
(mod->disk_snap_min == conn->txn_global.oldest_id ||
|
||||
!__wt_txn_visible_all(session, mod->update_txn)))
|
||||
continue;
|
||||
}
|
||||
|
||||
WT_ASSERT(session, evict->ref == NULL);
|
||||
@@ -1419,7 +1430,6 @@ static int
|
||||
__evict_page(WT_SESSION_IMPL *session, bool is_server)
|
||||
{
|
||||
WT_BTREE *btree;
|
||||
WT_CACHE *cache;
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *page;
|
||||
WT_REF *ref;
|
||||
@@ -1454,26 +1464,10 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
|
||||
if (page->read_gen != WT_READGEN_OLDEST)
|
||||
page->read_gen = __wt_cache_read_gen_bump(session);
|
||||
|
||||
/*
|
||||
* If we are evicting in a dead tree, don't write dirty pages.
|
||||
*
|
||||
* Force pages clean to keep statistics correct and to let the
|
||||
* page-discard function assert that no dirty pages are ever
|
||||
* discarded.
|
||||
*/
|
||||
if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD))
|
||||
__wt_page_modify_clear(session, page);
|
||||
|
||||
WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, 0));
|
||||
WT_WITH_BTREE(session, btree, ret = __wt_evict(session, ref, false));
|
||||
|
||||
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
|
||||
|
||||
WT_RET(ret);
|
||||
|
||||
cache = S2C(session)->cache;
|
||||
if (F_ISSET(cache, WT_CACHE_STUCK))
|
||||
F_CLR(cache, WT_CACHE_STUCK);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
||||
@@ -1617,8 +1611,8 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
|
||||
|
||||
next_walk = NULL;
|
||||
session->dhandle = dhandle;
|
||||
while (__wt_tree_walk(session,
|
||||
&next_walk, NULL, WT_READ_CACHE | WT_READ_NO_WAIT) == 0 &&
|
||||
while (__wt_tree_walk(session, &next_walk, NULL,
|
||||
WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
|
||||
next_walk != NULL) {
|
||||
page = next_walk->page;
|
||||
size = page->memory_footprint;
|
||||
|
||||
@@ -55,7 +55,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *page;
|
||||
WT_PAGE_MODIFY *mod;
|
||||
bool forced_eviction, inmem_split;
|
||||
bool clean_page, forced_eviction, inmem_split, tree_dead;
|
||||
|
||||
conn = S2C(session);
|
||||
|
||||
@@ -65,6 +65,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
page = ref->page;
|
||||
forced_eviction = page->read_gen == WT_READGEN_OLDEST;
|
||||
inmem_split = false;
|
||||
tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
|
||||
|
||||
WT_RET(__wt_verbose(session, WT_VERB_EVICT,
|
||||
"page %p (%s)", page, __wt_page_type_string(page->type)));
|
||||
@@ -105,24 +106,26 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
if (page->memory_footprint > conn->cache->evict_max_page_size)
|
||||
conn->cache->evict_max_page_size = page->memory_footprint;
|
||||
|
||||
/* Update the reference and discard the page. */
|
||||
if ((mod == NULL || mod->rec_result == 0) &&
|
||||
!F_ISSET(conn, WT_CONN_IN_MEMORY)) {
|
||||
if (__wt_ref_is_root(ref))
|
||||
__wt_ref_out(session, ref);
|
||||
else
|
||||
WT_ERR(__wt_evict_page_clean_update(
|
||||
session, ref, closing));
|
||||
/* Figure out whether reconciliation was done on the page */
|
||||
clean_page = mod == NULL || mod->rec_result == 0;
|
||||
|
||||
/* Update the reference and discard the page. */
|
||||
if (__wt_ref_is_root(ref))
|
||||
__wt_ref_out(session, ref);
|
||||
else if (tree_dead || (clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)))
|
||||
/*
|
||||
* Pages that belong to dead trees never write back to disk
|
||||
* and can't support page splits.
|
||||
*/
|
||||
WT_ERR(__wt_evict_page_clean_update(
|
||||
session, ref, tree_dead || closing));
|
||||
else
|
||||
WT_ERR(__evict_page_dirty_update(session, ref, closing));
|
||||
|
||||
if (clean_page) {
|
||||
WT_STAT_FAST_CONN_INCR(session, cache_eviction_clean);
|
||||
WT_STAT_FAST_DATA_INCR(session, cache_eviction_clean);
|
||||
} else {
|
||||
if (__wt_ref_is_root(ref))
|
||||
__wt_ref_out(session, ref);
|
||||
else
|
||||
WT_ERR(__evict_page_dirty_update(
|
||||
session, ref, closing));
|
||||
|
||||
WT_STAT_FAST_CONN_INCR(session, cache_eviction_dirty);
|
||||
WT_STAT_FAST_DATA_INCR(session, cache_eviction_dirty);
|
||||
}
|
||||
@@ -238,20 +241,14 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
{
|
||||
WT_ADDR *addr;
|
||||
WT_DECL_RET;
|
||||
WT_PAGE *parent;
|
||||
WT_PAGE_MODIFY *mod;
|
||||
|
||||
parent = ref->home;
|
||||
mod = ref->page->modify;
|
||||
|
||||
WT_ASSERT(session, ref->addr == NULL);
|
||||
|
||||
switch (mod->rec_result) {
|
||||
case WT_PM_REC_EMPTY: /* Page is empty */
|
||||
/* Discard the parent's address. */
|
||||
if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the parent to reference a deleted page. The fact that
|
||||
* reconciliation left the page "empty" means there's no older
|
||||
@@ -304,12 +301,6 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
|
||||
if (!closing && __wt_eviction_dirty_target(session))
|
||||
return (EBUSY);
|
||||
|
||||
/* Discard the parent's address. */
|
||||
if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the parent to reference the replacement page.
|
||||
*
|
||||
@@ -399,6 +390,13 @@ __evict_review(
|
||||
WT_RET(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* It is always OK to evict pages from dead trees if they don't have
|
||||
* children.
|
||||
*/
|
||||
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Retrieve the modified state of the page. This must happen after the
|
||||
* check for evictable internal pages otherwise there is a race where a
|
||||
@@ -424,7 +422,7 @@ __evict_review(
|
||||
if (modified)
|
||||
__wt_txn_update_oldest(session, true);
|
||||
|
||||
if (!__wt_page_can_evict(session, ref, false, inmem_splitp))
|
||||
if (!__wt_page_can_evict(session, ref, inmem_splitp))
|
||||
return (EBUSY);
|
||||
|
||||
/*
|
||||
|
||||
@@ -198,20 +198,9 @@ struct __wt_ovfl_txnc {
|
||||
* When a page is modified, there's additional information to maintain.
|
||||
*/
|
||||
struct __wt_page_modify {
|
||||
/*
|
||||
* Track the highest transaction ID at which the page was written to
|
||||
* disk. This can be used to avoid trying to write the page multiple
|
||||
* times if a snapshot is keeping old versions pinned (e.g., in a
|
||||
* checkpoint).
|
||||
*/
|
||||
uint64_t disk_snap_min;
|
||||
|
||||
/* The first unwritten transaction ID (approximate). */
|
||||
uint64_t first_dirty_txn;
|
||||
|
||||
/* In-memory split transaction ID. */
|
||||
uint64_t inmem_split_txn;
|
||||
|
||||
/* Avoid checking for obsolete updates during checkpoints. */
|
||||
uint64_t obsolete_check_txn;
|
||||
|
||||
@@ -221,10 +210,8 @@ struct __wt_page_modify {
|
||||
/* The largest update transaction ID (approximate). */
|
||||
uint64_t update_txn;
|
||||
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
/* Check that transaction time moves forward. */
|
||||
uint64_t last_oldest_id;
|
||||
#endif
|
||||
|
||||
/* Dirty bytes added to the cache. */
|
||||
size_t bytes_dirty;
|
||||
@@ -313,17 +300,8 @@ struct __wt_page_modify {
|
||||
* so they can be discarded when no longer needed.
|
||||
*/
|
||||
WT_PAGE *root_split; /* Linked list of root split pages */
|
||||
|
||||
/*
|
||||
* When we deepen the tree, newly created internal pages cannot
|
||||
* be evicted until all threads have exited the original page
|
||||
* index structure. We set a transaction value during the split
|
||||
* that's checked during eviction.
|
||||
*/
|
||||
uint64_t split_txn; /* Split eviction transaction value */
|
||||
} intl;
|
||||
#define mod_root_split u2.intl.root_split
|
||||
#define mod_split_txn u2.intl.split_txn
|
||||
struct {
|
||||
/*
|
||||
* Appended items to column-stores: there is only a single one
|
||||
|
||||
@@ -88,7 +88,8 @@ struct __wt_btree {
|
||||
uint32_t maxleafpage; /* Leaf page max size */
|
||||
uint32_t maxleafkey; /* Leaf page max key size */
|
||||
uint32_t maxleafvalue; /* Leaf page max value size */
|
||||
uint64_t maxmempage; /* In memory page max size */
|
||||
uint64_t maxmempage; /* In-memory page max size */
|
||||
uint64_t splitmempage; /* In-memory split trigger size */
|
||||
|
||||
void *huffman_key; /* Key huffman encoding */
|
||||
void *huffman_value; /* Value huffman encoding */
|
||||
|
||||
@@ -37,6 +37,23 @@ __wt_page_is_modified(WT_PAGE *page)
|
||||
return (page->modify != NULL && page->modify->write_gen != 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_btree_block_free --
|
||||
* Helper function to free a block from the current tree.
|
||||
*/
|
||||
static inline int
|
||||
__wt_btree_block_free(
|
||||
WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
|
||||
{
|
||||
WT_BM *bm;
|
||||
WT_BTREE *btree;
|
||||
|
||||
btree = S2BT(session);
|
||||
bm = btree->bm;
|
||||
|
||||
return (bm->free(bm, session, addr, addr_size));
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_cache_page_inmem_incr --
|
||||
* Increment a page's memory footprint in the cache.
|
||||
@@ -330,6 +347,8 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
{
|
||||
uint64_t last_running;
|
||||
|
||||
WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD));
|
||||
|
||||
last_running = 0;
|
||||
if (page->modify->write_gen == 0)
|
||||
last_running = S2C(session)->txn_global.last_running;
|
||||
@@ -346,13 +365,6 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
if (__wt_atomic_add32(&page->modify->write_gen, 1) == 1) {
|
||||
__wt_cache_dirty_incr(session, page);
|
||||
|
||||
/*
|
||||
* The page can never end up with changes older than the oldest
|
||||
* running transaction.
|
||||
*/
|
||||
if (F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT))
|
||||
page->modify->disk_snap_min = session->txn.snap_min;
|
||||
|
||||
/*
|
||||
* We won the race to dirty the page, but another thread could
|
||||
* have committed in the meantime, and the last_running field
|
||||
@@ -470,6 +482,23 @@ __wt_off_page(WT_PAGE *page, const void *p)
|
||||
p >= (void *)((uint8_t *)page->dsk + page->dsk->mem_size));
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_ref_addr_free --
|
||||
* Free the address in a reference, if necessary.
|
||||
*/
|
||||
static inline void
|
||||
__wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
{
|
||||
if (ref->addr == NULL)
|
||||
return;
|
||||
|
||||
if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) {
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
}
|
||||
ref->addr = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_ref_key --
|
||||
* Return a reference to a row-store internal page key as cheaply as
|
||||
@@ -968,11 +997,32 @@ __wt_ref_info(WT_SESSION_IMPL *session,
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_page_can_split --
|
||||
* __wt_ref_block_free --
|
||||
* Free the on-disk block for a reference and clear the address.
|
||||
*/
|
||||
static inline int
|
||||
__wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
{
|
||||
const uint8_t *addr;
|
||||
size_t addr_size;
|
||||
|
||||
if (ref->addr == NULL)
|
||||
return (0);
|
||||
|
||||
WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
|
||||
WT_RET(__wt_btree_block_free(session, addr, addr_size));
|
||||
|
||||
/* Clear the address (so we don't free it twice). */
|
||||
__wt_ref_addr_free(session, ref);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* __wt_leaf_page_can_split --
|
||||
* Check whether a page can be split in memory.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
__wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
{
|
||||
WT_BTREE *btree;
|
||||
WT_INSERT_HEAD *ins_head;
|
||||
@@ -1003,7 +1053,7 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
* reconciliation will be wrong, so we can't evict immediately).
|
||||
*/
|
||||
if (page->type != WT_PAGE_ROW_LEAF ||
|
||||
page->memory_footprint < btree->maxmempage ||
|
||||
page->memory_footprint < btree->splitmempage ||
|
||||
!__wt_page_is_modified(page))
|
||||
return (false);
|
||||
|
||||
@@ -1046,13 +1096,12 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
* Check whether a page can be evicted.
|
||||
*/
|
||||
static inline bool
|
||||
__wt_page_can_evict(WT_SESSION_IMPL *session,
|
||||
WT_REF *ref, bool check_splits, bool *inmem_splitp)
|
||||
__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
|
||||
{
|
||||
WT_BTREE *btree;
|
||||
WT_PAGE *page;
|
||||
WT_PAGE_MODIFY *mod;
|
||||
WT_TXN_GLOBAL *txn_global;
|
||||
bool modified;
|
||||
|
||||
if (inmem_splitp != NULL)
|
||||
*inmem_splitp = false;
|
||||
@@ -1071,20 +1120,21 @@ __wt_page_can_evict(WT_SESSION_IMPL *session,
|
||||
* detailed eviction tests. We don't need further tests since the page
|
||||
* won't be written or discarded from the cache.
|
||||
*/
|
||||
if (__wt_page_can_split(session, page)) {
|
||||
if (__wt_leaf_page_can_split(session, page)) {
|
||||
if (inmem_splitp != NULL)
|
||||
*inmem_splitp = true;
|
||||
return (true);
|
||||
}
|
||||
|
||||
modified = __wt_page_is_modified(page);
|
||||
|
||||
/*
|
||||
* If the file is being checkpointed, we can't evict dirty pages:
|
||||
* if we write a page and free the previous version of the page, that
|
||||
* previous version might be referenced by an internal page already
|
||||
* been written in the checkpoint, leaving the checkpoint inconsistent.
|
||||
*/
|
||||
if (btree->checkpointing != WT_CKPT_OFF &&
|
||||
__wt_page_is_modified(page)) {
|
||||
if (btree->checkpointing != WT_CKPT_OFF && modified) {
|
||||
WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
|
||||
WT_STAT_FAST_DATA_INCR(session, cache_eviction_checkpoint);
|
||||
return (false);
|
||||
@@ -1105,28 +1155,24 @@ __wt_page_can_evict(WT_SESSION_IMPL *session,
|
||||
* pages cannot be evicted until all threads are known to have exited
|
||||
* the original parent page's index, because evicting an internal page
|
||||
* discards its WT_REF array, and a thread traversing the original
|
||||
* parent page index might see a freed WT_REF. During the split we set
|
||||
* a transaction value, we can evict the created page as soon as that
|
||||
* transaction value is globally visible.
|
||||
* parent page index might see a freed WT_REF.
|
||||
*/
|
||||
if (check_splits && WT_PAGE_IS_INTERNAL(page) &&
|
||||
(F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK) ||
|
||||
!__wt_txn_visible_all(session, mod->mod_split_txn)))
|
||||
if (WT_PAGE_IS_INTERNAL(page) &&
|
||||
F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK))
|
||||
return (false);
|
||||
|
||||
/*
|
||||
* If the page was recently split in-memory, don't evict it immediately:
|
||||
* we want to give application threads that are appending a chance to
|
||||
* move to the new leaf page created by the split.
|
||||
*
|
||||
* Note the check here is similar to __wt_txn_visible_all, but ignores
|
||||
* the checkpoint's transaction.
|
||||
* If the oldest transaction hasn't changed since the last time
|
||||
* this page was written, it's unlikely we can make progress.
|
||||
* Similarly, if the most recent update on the page is not yet
|
||||
* globally visible, eviction will fail. These heuristics
|
||||
* attempt to avoid repeated attempts to evict the same page.
|
||||
*/
|
||||
if (check_splits) {
|
||||
txn_global = &S2C(session)->txn_global;
|
||||
if (WT_TXNID_LE(txn_global->oldest_id, mod->inmem_split_txn))
|
||||
return (false);
|
||||
}
|
||||
if (modified &&
|
||||
!F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) &&
|
||||
(mod->last_oldest_id == __wt_txn_oldest_id(session) ||
|
||||
!__wt_txn_visible_all(session, mod->update_txn)))
|
||||
return (false);
|
||||
|
||||
return (true);
|
||||
}
|
||||
@@ -1162,7 +1208,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
|
||||
(void)__wt_atomic_addv32(&btree->evict_busy, 1);
|
||||
|
||||
too_big = page->memory_footprint > btree->maxmempage;
|
||||
if ((ret = __wt_evict(session, ref, 0)) == 0) {
|
||||
if ((ret = __wt_evict(session, ref, false)) == 0) {
|
||||
if (too_big)
|
||||
WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
|
||||
else
|
||||
@@ -1221,7 +1267,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
|
||||
LF_ISSET(WT_READ_NO_EVICT) ||
|
||||
F_ISSET(session, WT_SESSION_NO_EVICTION) ||
|
||||
F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
|
||||
!__wt_page_can_evict(session, ref, true, NULL))
|
||||
!__wt_page_can_evict(session, ref, NULL))
|
||||
return (__wt_hazard_clear(session, page));
|
||||
|
||||
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
pages by this many increments of the
|
||||
read generation. */
|
||||
#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */
|
||||
#define WT_EVICT_MAX_PER_FILE 100 /* Max pages to visit per file */
|
||||
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
|
||||
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
|
||||
|
||||
@@ -107,6 +106,7 @@ struct __wt_cache {
|
||||
uint32_t evict_slots; /* LRU list eviction slots */
|
||||
WT_DATA_HANDLE
|
||||
*evict_file_next; /* LRU next file to search */
|
||||
uint32_t evict_max_refs_per_file;/* LRU pages per file per pass */
|
||||
|
||||
/*
|
||||
* Cache pool information.
|
||||
|
||||
@@ -197,7 +197,14 @@ struct __wt_cursor_btree {
|
||||
#define WT_CBT_ITERATE_NEXT 0x04 /* Next iteration configuration */
|
||||
#define WT_CBT_ITERATE_PREV 0x08 /* Prev iteration configuration */
|
||||
#define WT_CBT_MAX_RECORD 0x10 /* Col-store: past end-of-table */
|
||||
#define WT_CBT_SEARCH_SMALLEST 0x20 /* Row-store: small-key insert list */
|
||||
#define WT_CBT_NO_TXN 0x20 /* Non-transactional cursor
|
||||
(e.g. on a checkpoint) */
|
||||
#define WT_CBT_SEARCH_SMALLEST 0x40 /* Row-store: small-key insert list */
|
||||
|
||||
#define WT_CBT_POSITION_MASK /* Flags associated with position */ \
|
||||
(WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
|
||||
WT_CBT_MAX_RECORD | WT_CBT_SEARCH_SMALLEST)
|
||||
|
||||
uint8_t flags;
|
||||
};
|
||||
|
||||
@@ -302,7 +309,7 @@ struct __wt_cursor_join_entry {
|
||||
|
||||
WT_CURSOR_JOIN_ENDPOINT *ends; /* reference endpoints */
|
||||
size_t ends_allocated;
|
||||
size_t ends_next;
|
||||
u_int ends_next;
|
||||
|
||||
WT_JOIN_STATS stats; /* Join statistics */
|
||||
};
|
||||
|
||||
@@ -41,11 +41,7 @@ __cursor_pos_clear(WT_CURSOR_BTREE *cbt)
|
||||
cbt->cip_saved = NULL;
|
||||
cbt->rip_saved = NULL;
|
||||
|
||||
/*
|
||||
* Don't clear the active flag, it's owned by the cursor enter/leave
|
||||
* functions.
|
||||
*/
|
||||
F_CLR(cbt, ~WT_CBT_ACTIVE);
|
||||
F_CLR(cbt, WT_CBT_POSITION_MASK);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -93,7 +89,8 @@ __curfile_enter(WT_CURSOR_BTREE *cbt)
|
||||
|
||||
session = (WT_SESSION_IMPL *)cbt->iface.session;
|
||||
|
||||
WT_RET(__cursor_enter(session));
|
||||
if (!F_ISSET(cbt, WT_CBT_NO_TXN))
|
||||
WT_RET(__cursor_enter(session));
|
||||
F_SET(cbt, WT_CBT_ACTIVE);
|
||||
return (0);
|
||||
}
|
||||
@@ -112,7 +109,8 @@ __curfile_leave(WT_CURSOR_BTREE *cbt)
|
||||
|
||||
/* If the cursor was active, deactivate it. */
|
||||
if (F_ISSET(cbt, WT_CBT_ACTIVE)) {
|
||||
__cursor_leave(session);
|
||||
if (!F_ISSET(cbt, WT_CBT_NO_TXN))
|
||||
__cursor_leave(session);
|
||||
F_CLR(cbt, WT_CBT_ACTIVE);
|
||||
}
|
||||
|
||||
@@ -204,7 +202,7 @@ err: return (ret);
|
||||
|
||||
/*
|
||||
* __wt_cursor_dhandle_incr_use --
|
||||
* Increment the in-use counter in cursor's data source.
|
||||
* Increment the in-use counter in the cursor's data source.
|
||||
*/
|
||||
static inline void
|
||||
__wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session)
|
||||
@@ -221,7 +219,7 @@ __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session)
|
||||
|
||||
/*
|
||||
* __wt_cursor_dhandle_decr_use --
|
||||
* Decrement the in-use counter in cursor's data source.
|
||||
* Decrement the in-use counter in the cursor's data source.
|
||||
*/
|
||||
static inline void
|
||||
__wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
|
||||
@@ -262,7 +260,13 @@ __cursor_func_init(WT_CURSOR_BTREE *cbt, bool reenter)
|
||||
|
||||
if (!F_ISSET(cbt, WT_CBT_ACTIVE))
|
||||
WT_RET(__curfile_enter(cbt));
|
||||
__wt_txn_cursor_op(session);
|
||||
|
||||
/*
|
||||
* If this is an ordinary transactional cursor, make sure we are set up
|
||||
* to read.
|
||||
*/
|
||||
if (!F_ISSET(cbt, WT_CBT_NO_TXN))
|
||||
__wt_txn_cursor_op(session);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool vi
|
||||
extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref);
|
||||
extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref);
|
||||
extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep);
|
||||
extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_PAGE *page, WT_REF *ref, bool free_pages);
|
||||
extern void __wt_free_ref( WT_SESSION_IMPL *session, WT_REF *ref, int page_type, bool free_pages);
|
||||
extern void __wt_free_ref_index(WT_SESSION_IMPL *session, WT_PAGE *page, WT_PAGE_INDEX *pindex, bool free_pages);
|
||||
extern void __wt_free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd);
|
||||
extern int __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]);
|
||||
|
||||
@@ -70,7 +70,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
|
||||
};
|
||||
|
||||
struct __wt_txn_global {
|
||||
uint64_t alloc; /* Transaction ID to allocate. */
|
||||
WT_SPINLOCK id_lock;
|
||||
volatile uint64_t current; /* Current transaction ID. */
|
||||
|
||||
/* The oldest running transaction ID (may race). */
|
||||
|
||||
@@ -323,7 +323,6 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
|
||||
{
|
||||
WT_TXN_GLOBAL *txn_global;
|
||||
uint64_t id;
|
||||
u_int i;
|
||||
|
||||
txn_global = &S2C(session)->txn_global;
|
||||
|
||||
@@ -350,20 +349,16 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
|
||||
* global current ID, so we want post-increment semantics. Our atomic
|
||||
* add primitive does pre-increment, so adjust the result here.
|
||||
*/
|
||||
id = __wt_atomic_addv64(&S2C(session)->txn_global.alloc, 1) - 1;
|
||||
__wt_spin_lock(session, &txn_global->id_lock);
|
||||
id = txn_global->current;
|
||||
|
||||
if (publish) {
|
||||
session->txn.id = id;
|
||||
WT_SESSION_TXN_STATE(session)->id = id;
|
||||
WT_PUBLISH(WT_SESSION_TXN_STATE(session)->id, id);
|
||||
}
|
||||
|
||||
for (i = 0; txn_global->current != id; i++)
|
||||
if (i < 100)
|
||||
WT_PAUSE();
|
||||
else
|
||||
__wt_yield();
|
||||
|
||||
WT_PUBLISH(txn_global->current, id + 1);
|
||||
++txn_global->current;
|
||||
__wt_spin_unlock(session, &txn_global->id_lock);
|
||||
return (id);
|
||||
}
|
||||
|
||||
|
||||
@@ -293,7 +293,7 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
|
||||
&log->slot_pool[i].slot_buf, log->slot_buf_size));
|
||||
F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
|
||||
}
|
||||
WT_STAT_FAST_CONN_INCRV(session,
|
||||
WT_STAT_FAST_CONN_SET(session,
|
||||
log_buffer_size, log->slot_buf_size * WT_SLOT_POOL);
|
||||
/*
|
||||
* Set up the available slot from the pool the first time.
|
||||
|
||||
@@ -351,6 +351,7 @@ __wt_reconcile(WT_SESSION_IMPL *session,
|
||||
WT_PAGE *page;
|
||||
WT_PAGE_MODIFY *mod;
|
||||
WT_RECONCILE *r;
|
||||
uint64_t oldest_id;
|
||||
|
||||
page = ref->page;
|
||||
mod = page->modify;
|
||||
@@ -361,21 +362,14 @@ __wt_reconcile(WT_SESSION_IMPL *session,
|
||||
/* We shouldn't get called with a clean page, that's an error. */
|
||||
WT_ASSERT(session, __wt_page_is_modified(page));
|
||||
|
||||
#ifdef HAVE_DIAGNOSTIC
|
||||
{
|
||||
/*
|
||||
* Check that transaction time always moves forward for a given page.
|
||||
* If this check fails, reconciliation can free something that a future
|
||||
* reconciliation will need.
|
||||
*/
|
||||
uint64_t oldest_id = __wt_txn_oldest_id(session);
|
||||
oldest_id = __wt_txn_oldest_id(session);
|
||||
WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id));
|
||||
mod->last_oldest_id = oldest_id;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Record the most recent transaction ID we will *not* write. */
|
||||
mod->disk_snap_min = session->txn.snap_min;
|
||||
|
||||
/* Initialize the reconciliation structure for each new run. */
|
||||
WT_RET(__rec_write_init(
|
||||
@@ -990,23 +984,6 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* __rec_block_free --
|
||||
* Helper function to free a block.
|
||||
*/
|
||||
static int
|
||||
__rec_block_free(
|
||||
WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
|
||||
{
|
||||
WT_BM *bm;
|
||||
WT_BTREE *btree;
|
||||
|
||||
btree = S2BT(session);
|
||||
bm = btree->bm;
|
||||
|
||||
return (bm->free(bm, session, addr, addr_size));
|
||||
}
|
||||
|
||||
/*
|
||||
* __rec_update_save --
|
||||
* Save a WT_UPDATE list for later restoration.
|
||||
@@ -1349,8 +1326,6 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
|
||||
WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep)
|
||||
{
|
||||
WT_PAGE_DELETED *page_del;
|
||||
size_t addr_size;
|
||||
const uint8_t *addr;
|
||||
|
||||
page_del = ref->page_del;
|
||||
|
||||
@@ -1398,16 +1373,8 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
|
||||
*/
|
||||
if (ref->addr != NULL &&
|
||||
(page_del == NULL ||
|
||||
__wt_txn_visible_all(session, page_del->txnid))) {
|
||||
WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
|
||||
WT_RET(__rec_block_free(session, addr, addr_size));
|
||||
|
||||
if (__wt_off_page(ref->home, ref->addr)) {
|
||||
__wt_free(session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
}
|
||||
ref->addr = NULL;
|
||||
}
|
||||
__wt_txn_visible_all(session, page_del->txnid)))
|
||||
WT_RET(__wt_ref_block_free(session, ref));
|
||||
|
||||
/*
|
||||
* If the original page is gone, we can skip the slot on the internal
|
||||
@@ -2944,8 +2911,8 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
||||
break;
|
||||
case SPLIT_TRACKING_RAW:
|
||||
/*
|
||||
* We were configured for raw compression, but never actually
|
||||
* wrote anything.
|
||||
* We were configured for raw compression, and either we never
|
||||
* wrote anything, or there's a remaindered block of data.
|
||||
*/
|
||||
break;
|
||||
WT_ILLEGAL_VALUE(session);
|
||||
@@ -2998,14 +2965,27 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
||||
static int
|
||||
__rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
|
||||
{
|
||||
/* We're done reconciling - write the final page */
|
||||
if (r->raw_compression && r->entries != 0) {
|
||||
while (r->entries != 0)
|
||||
WT_RET(__rec_split_raw_worker(session, r, 0, true));
|
||||
} else
|
||||
WT_RET(__rec_split_finish_std(session, r));
|
||||
WT_BTREE *btree;
|
||||
size_t data_size;
|
||||
|
||||
return (0);
|
||||
btree = S2BT(session);
|
||||
|
||||
/*
|
||||
* We're done reconciling, write the final page. Call raw compression
|
||||
* until/unless there's not enough data to compress.
|
||||
*/
|
||||
if (r->raw_compression && r->entries != 0) {
|
||||
while (r->entries != 0) {
|
||||
data_size =
|
||||
WT_PTRDIFF32(r->first_free, r->disk_image.mem);
|
||||
if (data_size <= btree->allocsize)
|
||||
break;
|
||||
WT_RET(__rec_split_raw_worker(session, r, 0, true));
|
||||
}
|
||||
if (r->entries == 0)
|
||||
return (0);
|
||||
}
|
||||
return (__rec_split_finish_std(session, r));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -5310,7 +5290,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
|
||||
if (multi->addr.reuse)
|
||||
multi->addr.addr = NULL;
|
||||
else {
|
||||
WT_RET(__rec_block_free(session,
|
||||
WT_RET(__wt_btree_block_free(session,
|
||||
multi->addr.addr, multi->addr.size));
|
||||
__wt_free(session, multi->addr.addr);
|
||||
}
|
||||
@@ -5393,8 +5373,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
WT_BTREE *btree;
|
||||
WT_PAGE_MODIFY *mod;
|
||||
WT_REF *ref;
|
||||
size_t addr_size;
|
||||
const uint8_t *addr;
|
||||
|
||||
btree = S2BT(session);
|
||||
bm = btree->bm;
|
||||
@@ -5419,21 +5397,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
*/
|
||||
if (__wt_ref_is_root(ref))
|
||||
break;
|
||||
if (ref->addr != NULL) {
|
||||
/*
|
||||
* Free the page and clear the address (so we don't free
|
||||
* it twice).
|
||||
*/
|
||||
WT_RET(__wt_ref_info(
|
||||
session, ref, &addr, &addr_size, NULL));
|
||||
WT_RET(__rec_block_free(session, addr, addr_size));
|
||||
if (__wt_off_page(ref->home, ref->addr)) {
|
||||
__wt_free(
|
||||
session, ((WT_ADDR *)ref->addr)->addr);
|
||||
__wt_free(session, ref->addr);
|
||||
}
|
||||
ref->addr = NULL;
|
||||
}
|
||||
WT_RET(__wt_ref_block_free(session, ref));
|
||||
break;
|
||||
case WT_PM_REC_EMPTY: /* Page deleted */
|
||||
break;
|
||||
@@ -5451,7 +5415,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
* are checkpoints, and must be explicitly dropped.
|
||||
*/
|
||||
if (!__wt_ref_is_root(ref))
|
||||
WT_RET(__rec_block_free(session,
|
||||
WT_RET(__wt_btree_block_free(session,
|
||||
mod->mod_replace.addr, mod->mod_replace.size));
|
||||
|
||||
/* Discard the replacement page's address. */
|
||||
@@ -5615,7 +5579,7 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
|
||||
if (bnd->addr.reuse)
|
||||
bnd->addr.addr = NULL;
|
||||
else {
|
||||
WT_TRET(__rec_block_free(session,
|
||||
WT_TRET(__wt_btree_block_free(session,
|
||||
bnd->addr.addr, bnd->addr.size));
|
||||
__wt_free(session, bnd->addr.addr);
|
||||
}
|
||||
|
||||
@@ -206,6 +206,9 @@ __session_close(WT_SESSION *wt_session, const char *config)
|
||||
|
||||
__wt_spin_unlock(session, &conn->api_lock);
|
||||
|
||||
/* We no longer have a session, don't try to update it. */
|
||||
session = NULL;
|
||||
|
||||
err: API_END_RET_NOTFOUND_MAP(session, ret);
|
||||
}
|
||||
|
||||
|
||||
@@ -712,9 +712,11 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
|
||||
conn = S2C(session);
|
||||
|
||||
txn_global = &conn->txn_global;
|
||||
txn_global->alloc = txn_global->current =
|
||||
txn_global->last_running = txn_global->oldest_id = WT_TXN_FIRST;
|
||||
txn_global->current = txn_global->last_running =
|
||||
txn_global->oldest_id = WT_TXN_FIRST;
|
||||
|
||||
WT_RET(__wt_spin_init(session,
|
||||
&txn_global->id_lock, "transaction id lock"));
|
||||
WT_RET(__wt_rwlock_alloc(session,
|
||||
&txn_global->nsnap_rwlock, "named snapshot lock"));
|
||||
txn_global->nsnap_oldest_id = WT_TXN_NONE;
|
||||
@@ -747,6 +749,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
|
||||
if (txn_global == NULL)
|
||||
return (0);
|
||||
|
||||
__wt_spin_destroy(session, &txn_global->id_lock);
|
||||
WT_TRET(__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock));
|
||||
__wt_free(session, txn_global->states);
|
||||
|
||||
|
||||
@@ -394,7 +394,7 @@ config_lrt(void)
|
||||
* stores.
|
||||
*/
|
||||
if (g.type == FIX) {
|
||||
if (config_is_perm("long_running_txn"))
|
||||
if (g.c_long_running_txn && config_is_perm("long_running_txn"))
|
||||
die(EINVAL,
|
||||
"long_running_txn not supported with fixed-length "
|
||||
"column store");
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user